diff --git "a/ckpt/en-ja.do02.ado00/train.log" "b/ckpt/en-ja.do02.ado00/train.log" new file mode 100644--- /dev/null +++ "b/ckpt/en-ja.do02.ado00/train.log" @@ -0,0 +1,12972 @@ +{'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 100, 'log_format': 'simple', 'log_file': 'chkpt/en-ja.do02.ado00/train.log', 'aim_repo': None, 'aim_run_hash': None, 'tensorboard_logdir': None, 'wandb_project': 'wmt23', 'azureml_logging': False, 'seed': 0, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_efficient_bf16': False, 'fp16': True, 'memory_efficient_fp16': False, 'fp16_no_flatten_grads': False, 'fp16_init_scale': 8, 'fp16_scale_window': None, 'fp16_scale_tolerance': 0.0, 'on_cpu_convert_precision': False, 'min_loss_scale': 0.0001, 'threshold_loss_scale': None, 'amp': False, 'amp_batch_retries': 2, 'amp_init_scale': 128, 'amp_scale_window': None, 'user_dir': None, 'empty_cache_freq': 0, 'all_gather_list_size': 16384, 'model_parallel_size': 1, 'quantization_config_path': None, 'profile': False, 'reset_logging': False, 'suppress_crashes': False, 'use_plasma_view': False, 'plasma_path': '/tmp/plasma'}, 'common_eval': {'_name': None, 'path': None, 'post_process': None, 'quiet': False, 'model_overrides': '{}', 'results_path': None}, 'distributed_training': {'_name': None, 'distributed_world_size': 8, 'distributed_num_procs': 8, 'distributed_rank': 0, 'distributed_backend': 'nccl', 'distributed_init_method': 'tcp://localhost:31441', 'distributed_port': 31441, 'device_id': 0, 'distributed_no_spawn': False, 'ddp_backend': 'pytorch_ddp', 'ddp_comm_hook': 'none', 'bucket_cap_mb': 25, 'fix_batches_to_gpus': False, 'find_unused_parameters': False, 'gradient_as_bucket_view': False, 'fast_stat_sync': False, 'heartbeat_timeout': -1, 'broadcast_buffers': False, 'slowmo_momentum': None, 'slowmo_base_algorithm': 'localsgd', 'localsgd_frequency': 3, 'nprocs_per_node': 8, 'pipeline_model_parallel': False, 'pipeline_balance': None, 'pipeline_devices': None, 'pipeline_chunks': 0, 'pipeline_encoder_balance': None, 'pipeline_encoder_devices': None, 'pipeline_decoder_balance': None, 'pipeline_decoder_devices': None, 'pipeline_checkpoint': 'never', 'zero_sharding': 'none', 'fp16': True, 'memory_efficient_fp16': False, 'tpu': False, 'no_reshard_after_forward': False, 'fp32_reduce_scatter': False, 'cpu_offload': False, 'use_sharded_state': False, 'not_fsdp_flatten_parameters': False}, 'dataset': {'_name': None, 'num_workers': 0, 'skip_invalid_size_inputs_valid_test': False, 'max_tokens': 16384, 'batch_size': None, 'required_batch_size_multiple': 8, 'required_seq_len_multiple': 1, 'dataset_impl': None, 'data_buffer_size': 10, 'train_subset': 'train', 'valid_subset': 'valid', 'combine_valid_subsets': None, 'ignore_unused_valid_subsets': False, 'validate_interval': 100000, 'validate_interval_updates': 0, 'validate_after_updates': 0, 'fixed_validation_seed': None, 'disable_validation': False, 'max_tokens_valid': 16384, 'batch_size_valid': None, 'max_valid_steps': None, 'curriculum': 0, 'gen_subset': 'test', 'num_shards': 1, 'shard_id': 0, 'grouped_shuffling': False, 'update_epoch_batch_itr': False, 'update_ordered_indices_seed': False}, 'optimization': {'_name': None, 'max_epoch': 0, 'max_update': 60000, 'stop_time_hours': 0.0, 'clip_norm': 1.0, 'sentence_avg': False, 'update_freq': [4], 'lr': [0.001], 'stop_min_lr': -1.0, 'use_bmuf': False, 'skip_remainder_batch': False, 'debug_param_names': False}, 'checkpoint': {'_name': None, 'save_dir': 'chkpt/en-ja.do02.ado00', 'restore_file': 'checkpoint_last.pt', 'continue_once': None, 'finetune_from_model': None, 'reset_dataloader': False, 'reset_lr_scheduler': False, 'reset_meters': False, 'reset_optimizer': False, 'optimizer_overrides': '{}', 'save_interval': 100000, 'save_interval_updates': 1000, 'keep_interval_updates': 10, 'keep_interval_updates_pattern': -1, 'keep_last_epochs': -1, 'keep_best_checkpoints': -1, 'no_save': False, 'no_epoch_checkpoints': True, 'no_last_checkpoints': False, 'no_save_optimizer_state': False, 'best_checkpoint_metric': 'loss', 'maximize_best_checkpoint_metric': False, 'patience': -1, 'checkpoint_suffix': '', 'checkpoint_shard_count': 1, 'load_checkpoint_on_all_dp_ranks': False, 'write_checkpoints_asynchronously': False, 'model_parallel_size': 1}, 'bmuf': {'_name': None, 'block_lr': 1.0, 'block_momentum': 0.875, 'global_sync_iter': 50, 'warmup_iterations': 500, 'use_nbm': False, 'average_sync': False, 'distributed_world_size': 8}, 'generation': {'_name': None, 'beam': 5, 'beam_mt': 0, 'nbest': 1, 'max_len_a': 0.0, 'max_len_b': 200, 'max_len_a_mt': 0.0, 'max_len_b_mt': 200, 'min_len': 1, 'match_source_len': False, 'unnormalized': False, 'no_early_stop': False, 'no_beamable_mm': False, 'lenpen': 1.0, 'lenpen_mt': 1.0, 'unkpen': 0.0, 'replace_unk': None, 'sacrebleu': False, 'score_reference': False, 'prefix_size': 0, 'no_repeat_ngram_size': 0, 'sampling': False, 'sampling_topk': -1, 'sampling_topp': -1.0, 'constraints': None, 'temperature': 1.0, 'diverse_beam_groups': -1, 'diverse_beam_strength': 0.5, 'diversity_rate': -1.0, 'print_alignment': None, 'print_step': False, 'lm_path': None, 'lm_weight': 0.0, 'iter_decode_eos_penalty': 0.0, 'iter_decode_max_iter': 10, 'iter_decode_force_max_iter': False, 'iter_decode_with_beam': 1, 'iter_decode_with_external_reranker': False, 'retain_iter_history': False, 'retain_dropout': False, 'retain_dropout_modules': None, 'decoding_format': None, 'no_seed_provided': False, 'eos_token': None}, 'eval_lm': {'_name': None, 'output_word_probs': False, 'output_word_stats': False, 'context_window': 0, 'softmax_batch': 9223372036854775807}, 'interactive': {'_name': None, 'buffer_size': 0, 'input': '-'}, 'model': Namespace(no_progress_bar=False, log_interval=100, log_format='simple', log_file='chkpt/en-ja.do02.ado00/train.log', aim_repo=None, aim_run_hash=None, tensorboard_logdir=None, wandb_project='wmt23', azureml_logging=False, seed=0, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=True, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=8, fp16_scale_window=None, fp16_scale_tolerance=0.0, on_cpu_convert_precision=False, min_loss_scale=0.0001, threshold_loss_scale=None, amp=False, amp_batch_retries=2, amp_init_scale=128, amp_scale_window=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, quantization_config_path=None, profile=False, reset_logging=False, suppress_crashes=False, use_plasma_view=False, plasma_path='/tmp/plasma', criterion='label_smoothed_cross_entropy', tokenizer=None, bpe=None, optimizer='adam', lr_scheduler='inverse_sqrt', scoring='bleu', task='translation', num_workers=0, skip_invalid_size_inputs_valid_test=False, max_tokens=16384, batch_size=None, required_batch_size_multiple=8, required_seq_len_multiple=1, dataset_impl=None, data_buffer_size=10, train_subset='train', valid_subset='valid', combine_valid_subsets=None, ignore_unused_valid_subsets=False, validate_interval=100000, validate_interval_updates=0, validate_after_updates=0, fixed_validation_seed=None, disable_validation=False, max_tokens_valid=16384, batch_size_valid=None, max_valid_steps=None, curriculum=0, gen_subset='test', num_shards=1, shard_id=0, grouped_shuffling=False, update_epoch_batch_itr=False, update_ordered_indices_seed=False, distributed_world_size=8, distributed_num_procs=8, distributed_rank=0, distributed_backend='nccl', distributed_init_method=None, distributed_port=-1, device_id=0, distributed_no_spawn=False, ddp_backend='pytorch_ddp', ddp_comm_hook='none', bucket_cap_mb=25, fix_batches_to_gpus=False, find_unused_parameters=False, gradient_as_bucket_view=False, fast_stat_sync=False, heartbeat_timeout=-1, broadcast_buffers=False, slowmo_momentum=None, slowmo_base_algorithm='localsgd', localsgd_frequency=3, nprocs_per_node=8, pipeline_model_parallel=False, pipeline_balance=None, pipeline_devices=None, pipeline_chunks=0, pipeline_encoder_balance=None, pipeline_encoder_devices=None, pipeline_decoder_balance=None, pipeline_decoder_devices=None, pipeline_checkpoint='never', zero_sharding='none', no_reshard_after_forward=False, fp32_reduce_scatter=False, cpu_offload=False, use_sharded_state=False, not_fsdp_flatten_parameters=False, arch='transformer_vaswani_wmt_en_de_big', max_epoch=0, max_update=60000, stop_time_hours=0, clip_norm=1.0, sentence_avg=False, update_freq=[4], lr=[0.001], stop_min_lr=-1.0, use_bmuf=False, skip_remainder_batch=False, debug_param_names=False, save_dir='chkpt/en-ja.do02.ado00', restore_file='checkpoint_last.pt', continue_once=None, finetune_from_model=None, reset_dataloader=False, reset_lr_scheduler=False, reset_meters=False, reset_optimizer=False, optimizer_overrides='{}', save_interval=100000, save_interval_updates=1000, keep_interval_updates=10, keep_interval_updates_pattern=-1, keep_last_epochs=-1, keep_best_checkpoints=-1, no_save=False, no_epoch_checkpoints=True, no_last_checkpoints=False, no_save_optimizer_state=False, best_checkpoint_metric='loss', maximize_best_checkpoint_metric=False, patience=-1, checkpoint_suffix='', checkpoint_shard_count=1, load_checkpoint_on_all_dp_ranks=False, write_checkpoints_asynchronously=False, store_ema=False, ema_decay=0.9999, ema_start_update=0, ema_seed_model=None, ema_update_freq=1, ema_fp32=False, data='binarized/en-ja/', source_lang=None, target_lang=None, load_alignments=False, left_pad_source=True, left_pad_target=False, upsample_primary=-1, truncate_source=False, num_batch_buckets=0, eval_bleu=False, eval_bleu_args='{}', eval_bleu_detok='space', eval_bleu_detok_args='{}', eval_tokenized_bleu=False, eval_bleu_remove_bpe=None, eval_bleu_print_samples=False, label_smoothing=0.1, report_accuracy=False, ignore_prefix_size=0, adam_betas='(0.9, 0.98)', adam_eps=1e-08, weight_decay=0.0, use_old_adam=False, fp16_adam_stats=False, warmup_updates=4000, warmup_init_lr=-1, pad=1, eos=2, unk=3, encoder_ffn_embed_dim=8192, decoder_ffn_embed_dim=8192, dropout=0.2, attention_dropout=0.0, share_decoder_input_output_embed=True, no_seed_provided=False, encoder_embed_dim=1024, encoder_attention_heads=16, encoder_normalize_before=False, decoder_embed_dim=1024, decoder_attention_heads=16, encoder_embed_path=None, encoder_layers=6, encoder_learned_pos=False, decoder_embed_path=None, decoder_layers=6, decoder_normalize_before=False, decoder_learned_pos=False, activation_dropout=0.0, activation_fn='relu', adaptive_softmax_cutoff=None, adaptive_softmax_dropout=0, share_all_embeddings=False, merge_src_tgt_embed=False, no_token_positional_embeddings=False, adaptive_input=False, no_cross_attention=False, cross_self_attention=False, decoder_output_dim=1024, decoder_input_dim=1024, no_scale_embedding=False, layernorm_embedding=False, tie_adaptive_weights=False, checkpoint_activations=False, offload_activations=False, encoder_layers_to_keep=None, decoder_layers_to_keep=None, encoder_layerdrop=0, decoder_layerdrop=0, quant_noise_pq=0, quant_noise_pq_block_size=8, quant_noise_scalar=0, _name='transformer_vaswani_wmt_en_de_big'), 'task': {'_name': 'translation', 'data': 'binarized/en-ja/', 'source_lang': None, 'target_lang': None, 'load_alignments': False, 'left_pad_source': True, 'left_pad_target': False, 'max_source_positions': 1024, 'max_target_positions': 1024, 'upsample_primary': -1, 'truncate_source': False, 'num_batch_buckets': 0, 'train_subset': 'train', 'dataset_impl': None, 'required_seq_len_multiple': 1, 'eval_bleu': False, 'eval_bleu_args': '{}', 'eval_bleu_detok': 'space', 'eval_bleu_detok_args': '{}', 'eval_tokenized_bleu': False, 'eval_bleu_remove_bpe': None, 'eval_bleu_print_samples': False}, 'criterion': {'_name': 'label_smoothed_cross_entropy', 'label_smoothing': 0.1, 'report_accuracy': False, 'ignore_prefix_size': 0, 'sentence_avg': False}, 'optimizer': {'_name': 'adam', 'adam_betas': '(0.9, 0.98)', 'adam_eps': 1e-08, 'weight_decay': 0.0, 'use_old_adam': False, 'fp16_adam_stats': False, 'tpu': False, 'lr': [0.001]}, 'lr_scheduler': {'_name': 'inverse_sqrt', 'warmup_updates': 4000, 'warmup_init_lr': -1.0, 'lr': [0.001]}, 'scoring': {'_name': 'bleu', 'pad': 1, 'eos': 2, 'unk': 3}, 'bpe': None, 'tokenizer': None, 'ema': {'_name': None, 'store_ema': False, 'ema_decay': 0.9999, 'ema_start_update': 0, 'ema_seed_model': None, 'ema_update_freq': 1, 'ema_fp32': False}} +TransformerModel( + (encoder): TransformerEncoderBase( + (dropout_module): FairseqDropout() + (embed_tokens): Embedding(16000, 1024, padding_idx=1) + (embed_positions): SinusoidalPositionalEmbedding() + (layers): ModuleList( + (0-5): 6 x TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=1024, out_features=1024, bias=True) + (v_proj): Linear(in_features=1024, out_features=1024, bias=True) + (q_proj): Linear(in_features=1024, out_features=1024, bias=True) + (out_proj): Linear(in_features=1024, out_features=1024, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=1024, out_features=8192, bias=True) + (fc2): Linear(in_features=8192, out_features=1024, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) + ) + ) + ) + (decoder): TransformerDecoderBase( + (dropout_module): FairseqDropout() + (embed_tokens): Embedding(32000, 1024, padding_idx=1) + (embed_positions): SinusoidalPositionalEmbedding() + (layers): ModuleList( + (0-5): 6 x TransformerDecoderLayerBase( + (dropout_module): FairseqDropout() + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=1024, out_features=1024, bias=True) + (v_proj): Linear(in_features=1024, out_features=1024, bias=True) + (q_proj): Linear(in_features=1024, out_features=1024, bias=True) + (out_proj): Linear(in_features=1024, out_features=1024, bias=True) + ) + (activation_dropout_module): FairseqDropout() + (self_attn_layer_norm): FusedLayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) + (encoder_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=1024, out_features=1024, bias=True) + (v_proj): Linear(in_features=1024, out_features=1024, bias=True) + (q_proj): Linear(in_features=1024, out_features=1024, bias=True) + (out_proj): Linear(in_features=1024, out_features=1024, bias=True) + ) + (encoder_attn_layer_norm): FusedLayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) + (fc1): Linear(in_features=1024, out_features=8192, bias=True) + (fc2): Linear(in_features=8192, out_features=1024, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) + ) + ) + (output_projection): Linear(in_features=1024, out_features=32000, bias=False) + ) +) +task: TranslationTask +model: TransformerModel +criterion: LabelSmoothedCrossEntropyCriterion +num. shared model params: 326,221,824 (num. trained: 326,221,824) +num. expert model params: 0 (num. trained: 0) +training on 8 devices (GPUs/TPUs) +max tokens per device = 16384 and max sentences per device = None +begin dry-run validation on "valid" subset +Start iterating over samples +epoch 001: 102 / 1689 loss=13.088, nll_loss=12.777, ppl=7016.72, wps=451988, ups=1.04, wpb=435192, bsz=16670.6, num_updates=100, lr=2.5e-05, gnorm=3.276, clip=92, loss_scale=2, train_wall=101, gb_free=21.7, wall=116 +epoch 001: 202 / 1689 loss=11.701, nll_loss=11.205, ppl=2359.99, wps=461426, ups=1.06, wpb=435200, bsz=16988, num_updates=200, lr=5e-05, gnorm=1.941, clip=91, loss_scale=2, train_wall=94, gb_free=19.7, wall=211 +epoch 001: 302 / 1689 loss=11.144, nll_loss=10.545, ppl=1493.94, wps=459737, ups=1.06, wpb=434886, bsz=16709.5, num_updates=300, lr=7.5e-05, gnorm=1.667, clip=96, loss_scale=2, train_wall=94, gb_free=18.9, wall=305 +epoch 001: 402 / 1689 loss=10.478, nll_loss=9.753, ppl=862.82, wps=455965, ups=1.05, wpb=432640, bsz=16420.4, num_updates=400, lr=0.0001, gnorm=1.652, clip=99, loss_scale=2, train_wall=94, gb_free=16.6, wall=400 +epoch 001: 502 / 1689 loss=9.926, nll_loss=9.093, ppl=545.94, wps=459261, ups=1.06, wpb=433247, bsz=16495.6, num_updates=500, lr=0.000125, gnorm=1.522, clip=97, loss_scale=2, train_wall=94, gb_free=19.3, wall=495 +epoch 001: 602 / 1689 loss=9.49, nll_loss=8.574, ppl=381.05, wps=461360, ups=1.07, wpb=433091, bsz=16520.5, num_updates=600, lr=0.00015, gnorm=1.427, clip=96, loss_scale=2, train_wall=93, gb_free=18.7, wall=588 +epoch 001: 702 / 1689 loss=9.093, nll_loss=8.106, ppl=275.6, wps=460599, ups=1.06, wpb=434503, bsz=16405.3, num_updates=700, lr=0.000175, gnorm=1.276, clip=95, loss_scale=4, train_wall=93, gb_free=19.4, wall=683 +epoch 001: 802 / 1689 loss=8.708, nll_loss=7.656, ppl=201.72, wps=461109, ups=1.06, wpb=434941, bsz=16333.4, num_updates=800, lr=0.0002, gnorm=1.151, clip=83, loss_scale=4, train_wall=93, gb_free=21.3, wall=777 +epoch 001: 902 / 1689 loss=8.339, nll_loss=7.228, ppl=149.92, wps=461139, ups=1.06, wpb=434996, bsz=16571.2, num_updates=900, lr=0.000225, gnorm=1.087, clip=68, loss_scale=4, train_wall=93, gb_free=18.8, wall=871 +epoch 001: 1002 / 1689 loss=7.995, nll_loss=6.829, ppl=113.7, wps=453700, ups=1.05, wpb=431938, bsz=16755.5, num_updates=1000, lr=0.00025, gnorm=1.012, clip=45, loss_scale=4, train_wall=94, gb_free=19.7, wall=967 +begin validation on "valid" subset +epoch 001 | valid on 'valid' subset | loss 7.71 | nll_loss 6.438 | ppl 86.73 | wps 0 | wpb 42662 | bsz 2032 | num_updates 1000 +epoch 001: 1102 / 1689 loss=7.645, nll_loss=6.424, ppl=85.84, wps=385841, ups=0.9, wpb=430204, bsz=16536.8, num_updates=1100, lr=0.000275, gnorm=1.003, clip=43, loss_scale=4, train_wall=94, gb_free=18.6, wall=1078 +epoch 001: 1202 / 1689 loss=7.32, nll_loss=6.048, ppl=66.17, wps=458160, ups=1.06, wpb=433198, bsz=16274.6, num_updates=1200, lr=0.0003, gnorm=0.965, clip=33, loss_scale=8, train_wall=93, gb_free=20.5, wall=1173 +epoch 001: 1302 / 1689 loss=6.964, nll_loss=5.639, ppl=49.82, wps=455958, ups=1.05, wpb=432602, bsz=16372.2, num_updates=1300, lr=0.000325, gnorm=0.932, clip=29, loss_scale=8, train_wall=94, gb_free=20.6, wall=1268 +epoch 001: 1402 / 1689 loss=6.67, nll_loss=5.301, ppl=39.43, wps=459164, ups=1.06, wpb=434746, bsz=16342.9, num_updates=1400, lr=0.00035, gnorm=0.859, clip=23, loss_scale=8, train_wall=93, gb_free=19.1, wall=1362 +epoch 001: 1502 / 1689 loss=6.4, nll_loss=4.995, ppl=31.89, wps=462813, ups=1.06, wpb=435243, bsz=16512.3, num_updates=1500, lr=0.000375, gnorm=0.776, clip=12, loss_scale=8, train_wall=93, gb_free=18.6, wall=1456 +epoch 001: 1602 / 1689 loss=6.196, nll_loss=4.765, ppl=27.2, wps=459759, ups=1.06, wpb=433941, bsz=16442.2, num_updates=1600, lr=0.0004, gnorm=0.721, clip=5, loss_scale=8, train_wall=93, gb_free=19.2, wall=1551 +end of epoch 1 (average epoch stats below) +epoch 001 | loss 8.683 | nll_loss 7.647 | ppl 200.5 | wps 453390 | ups 1.05 | wpb 433522 | bsz 16506.4 | num_updates 1686 | lr 0.0004215 | gnorm 1.295 | clip 60 | loss_scale 8 | train_wall 1582 | gb_free 21.1 | wall 1632 +Start iterating over samples +epoch 002: 14 / 1689 loss=6.022, nll_loss=4.57, ppl=23.75, wps=449035, ups=1.05, wpb=428985, bsz=16298.6, num_updates=1700, lr=0.000425, gnorm=0.674, clip=5, loss_scale=8, train_wall=94, gb_free=20, wall=1646 +epoch 002: 14 / 1689 loss=6.022, nll_loss=4.57, ppl=23.75, wps=449035, ups=1.05, wpb=428985, bsz=16298.6, num_updates=1700, lr=0.000425, gnorm=0.674, clip=5, loss_scale=8, train_wall=94, gb_free=20, wall=1646 +epoch 002: 114 / 1689 loss=5.876, nll_loss=4.407, ppl=21.21, wps=457961, ups=1.06, wpb=433815, bsz=16765.6, num_updates=1800, lr=0.00045, gnorm=0.629, clip=3, loss_scale=8, train_wall=93, gb_free=19.9, wall=1741 +epoch 002: 114 / 1689 loss=5.876, nll_loss=4.407, ppl=21.21, wps=457961, ups=1.06, wpb=433815, bsz=16765.6, num_updates=1800, lr=0.00045, gnorm=0.629, clip=3, loss_scale=8, train_wall=93, gb_free=19.9, wall=1741 +epoch 002: 215 / 1689 loss=5.766, nll_loss=4.285, ppl=19.49, wps=447663, ups=1.03, wpb=434331, bsz=16483.4, num_updates=1900, lr=0.000475, gnorm=0.581, clip=1, loss_scale=4, train_wall=95, gb_free=20.3, wall=1838 +epoch 002: 215 / 1689 loss=5.766, nll_loss=4.285, ppl=19.49, wps=447663, ups=1.03, wpb=434331, bsz=16483.4, num_updates=1900, lr=0.000475, gnorm=0.581, clip=1, loss_scale=4, train_wall=95, gb_free=20.3, wall=1838 +epoch 002: 315 / 1689 loss=5.644, nll_loss=4.15, ppl=17.76, wps=459105, ups=1.06, wpb=434889, bsz=16749.4, num_updates=2000, lr=0.0005, gnorm=0.549, clip=1, loss_scale=4, train_wall=93, gb_free=19.2, wall=1933 +epoch 002: 315 / 1689 loss=5.644, nll_loss=4.15, ppl=17.76, wps=459105, ups=1.06, wpb=434889, bsz=16749.4, num_updates=2000, lr=0.0005, gnorm=0.549, clip=1, loss_scale=4, train_wall=93, gb_free=19.2, wall=1933 +begin validation on "valid" subset +epoch 002 | valid on 'valid' subset | loss 5.473 | nll_loss 3.88 | ppl 14.72 | wps 0 | wpb 42662 | bsz 2032 | num_updates 2000 | best_loss 5.473 +epoch 002 | valid on 'valid' subset | loss 5.473 | nll_loss 3.88 | ppl 14.72 | wps 0 | wpb 42662 | bsz 2032 | num_updates 2000 | best_loss 5.473 +epoch 002: 415 / 1689 loss=5.548, nll_loss=4.045, ppl=16.5, wps=384688, ups=0.89, wpb=432563, bsz=16241.9, num_updates=2100, lr=0.000525, gnorm=0.522, clip=0, loss_scale=4, train_wall=93, gb_free=18.9, wall=2045 +epoch 002: 415 / 1689 loss=5.548, nll_loss=4.045, ppl=16.5, wps=384688, ups=0.89, wpb=432563, bsz=16241.9, num_updates=2100, lr=0.000525, gnorm=0.522, clip=0, loss_scale=4, train_wall=93, gb_free=18.9, wall=2045 +epoch 002: 515 / 1689 loss=5.473, nll_loss=3.963, ppl=15.59, wps=463560, ups=1.07, wpb=435041, bsz=16516.2, num_updates=2200, lr=0.00055, gnorm=0.506, clip=0, loss_scale=4, train_wall=93, gb_free=19.5, wall=2139 +epoch 002: 515 / 1689 loss=5.473, nll_loss=3.963, ppl=15.59, wps=463560, ups=1.07, wpb=435041, bsz=16516.2, num_updates=2200, lr=0.00055, gnorm=0.506, clip=0, loss_scale=4, train_wall=93, gb_free=19.5, wall=2139 +epoch 002: 616 / 1689 loss=5.379, nll_loss=3.861, ppl=14.53, wps=454082, ups=1.05, wpb=432524, bsz=16610.3, num_updates=2300, lr=0.000575, gnorm=0.46, clip=0, loss_scale=2, train_wall=94, gb_free=20, wall=2234 +epoch 002: 616 / 1689 loss=5.379, nll_loss=3.861, ppl=14.53, wps=454082, ups=1.05, wpb=432524, bsz=16610.3, num_updates=2300, lr=0.000575, gnorm=0.46, clip=0, loss_scale=2, train_wall=94, gb_free=20, wall=2234 +epoch 002: 716 / 1689 loss=5.315, nll_loss=3.79, ppl=13.83, wps=460907, ups=1.06, wpb=434305, bsz=16174.7, num_updates=2400, lr=0.0006, gnorm=0.476, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=2328 +epoch 002: 716 / 1689 loss=5.315, nll_loss=3.79, ppl=13.83, wps=460907, ups=1.06, wpb=434305, bsz=16174.7, num_updates=2400, lr=0.0006, gnorm=0.476, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=2328 +epoch 002: 816 / 1689 loss=5.249, nll_loss=3.719, ppl=13.17, wps=456744, ups=1.06, wpb=431438, bsz=16804.4, num_updates=2500, lr=0.000625, gnorm=0.448, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=2423 +epoch 002: 816 / 1689 loss=5.249, nll_loss=3.719, ppl=13.17, wps=456744, ups=1.06, wpb=431438, bsz=16804.4, num_updates=2500, lr=0.000625, gnorm=0.448, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=2423 +epoch 002: 916 / 1689 loss=5.202, nll_loss=3.668, ppl=12.71, wps=458949, ups=1.06, wpb=434226, bsz=16532.2, num_updates=2600, lr=0.00065, gnorm=0.442, clip=0, loss_scale=2, train_wall=93, gb_free=19.2, wall=2518 +epoch 002: 916 / 1689 loss=5.202, nll_loss=3.668, ppl=12.71, wps=458949, ups=1.06, wpb=434226, bsz=16532.2, num_updates=2600, lr=0.00065, gnorm=0.442, clip=0, loss_scale=2, train_wall=93, gb_free=19.2, wall=2518 +epoch 002: 1016 / 1689 loss=5.146, nll_loss=3.608, ppl=12.19, wps=459611, ups=1.06, wpb=432834, bsz=16389.8, num_updates=2700, lr=0.000675, gnorm=0.447, clip=0, loss_scale=2, train_wall=93, gb_free=20.2, wall=2612 +epoch 002: 1016 / 1689 loss=5.146, nll_loss=3.608, ppl=12.19, wps=459611, ups=1.06, wpb=432834, bsz=16389.8, num_updates=2700, lr=0.000675, gnorm=0.447, clip=0, loss_scale=2, train_wall=93, gb_free=20.2, wall=2612 +epoch 002: 1116 / 1689 loss=5.111, nll_loss=3.57, ppl=11.88, wps=455923, ups=1.05, wpb=434616, bsz=16381.1, num_updates=2800, lr=0.0007, gnorm=0.398, clip=0, loss_scale=4, train_wall=93, gb_free=21.7, wall=2707 +epoch 002: 1116 / 1689 loss=5.111, nll_loss=3.57, ppl=11.88, wps=455923, ups=1.05, wpb=434616, bsz=16381.1, num_updates=2800, lr=0.0007, gnorm=0.398, clip=0, loss_scale=4, train_wall=93, gb_free=21.7, wall=2707 +epoch 002: 1216 / 1689 loss=5.061, nll_loss=3.516, ppl=11.44, wps=458143, ups=1.05, wpb=434473, bsz=16578.2, num_updates=2900, lr=0.000725, gnorm=0.425, clip=0, loss_scale=4, train_wall=93, gb_free=18.9, wall=2802 +epoch 002: 1216 / 1689 loss=5.061, nll_loss=3.516, ppl=11.44, wps=458143, ups=1.05, wpb=434473, bsz=16578.2, num_updates=2900, lr=0.000725, gnorm=0.425, clip=0, loss_scale=4, train_wall=93, gb_free=18.9, wall=2802 +epoch 002: 1316 / 1689 loss=5.025, nll_loss=3.477, ppl=11.13, wps=457043, ups=1.06, wpb=432917, bsz=16800.8, num_updates=3000, lr=0.00075, gnorm=0.414, clip=0, loss_scale=4, train_wall=93, gb_free=18.8, wall=2897 +epoch 002: 1316 / 1689 loss=5.025, nll_loss=3.477, ppl=11.13, wps=457043, ups=1.06, wpb=432917, bsz=16800.8, num_updates=3000, lr=0.00075, gnorm=0.414, clip=0, loss_scale=4, train_wall=93, gb_free=18.8, wall=2897 +begin validation on "valid" subset +epoch 002 | valid on 'valid' subset | loss 4.943 | nll_loss 3.328 | ppl 10.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 3000 | best_loss 4.943 +epoch 002 | valid on 'valid' subset | loss 4.943 | nll_loss 3.328 | ppl 10.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 3000 | best_loss 4.943 +epoch 002: 1416 / 1689 loss=4.993, nll_loss=3.442, ppl=10.87, wps=384822, ups=0.89, wpb=433486, bsz=16253.3, num_updates=3100, lr=0.000775, gnorm=0.417, clip=0, loss_scale=4, train_wall=92, gb_free=19.5, wall=3009 +epoch 002: 1416 / 1689 loss=4.993, nll_loss=3.442, ppl=10.87, wps=384822, ups=0.89, wpb=433486, bsz=16253.3, num_updates=3100, lr=0.000775, gnorm=0.417, clip=0, loss_scale=4, train_wall=92, gb_free=19.5, wall=3009 +epoch 002: 1516 / 1689 loss=4.973, nll_loss=3.423, ppl=10.72, wps=455388, ups=1.05, wpb=433644, bsz=16473, num_updates=3200, lr=0.0008, gnorm=0.4, clip=0, loss_scale=4, train_wall=93, gb_free=19.1, wall=3104 +epoch 002: 1516 / 1689 loss=4.973, nll_loss=3.423, ppl=10.72, wps=455388, ups=1.05, wpb=433644, bsz=16473, num_updates=3200, lr=0.0008, gnorm=0.4, clip=0, loss_scale=4, train_wall=93, gb_free=19.1, wall=3104 +epoch 002: 1618 / 1689 loss=4.93, nll_loss=3.375, ppl=10.37, wps=445799, ups=1.03, wpb=433978, bsz=16305.5, num_updates=3300, lr=0.000825, gnorm=0.408, clip=0, loss_scale=2, train_wall=95, gb_free=19.7, wall=3202 +epoch 002: 1618 / 1689 loss=4.93, nll_loss=3.375, ppl=10.37, wps=445799, ups=1.03, wpb=433978, bsz=16305.5, num_updates=3300, lr=0.000825, gnorm=0.408, clip=0, loss_scale=2, train_wall=95, gb_free=19.7, wall=3202 +end of epoch 2 (average epoch stats below) +epoch 002 | loss 5.284 | nll_loss 3.758 | ppl 13.53 | wps 446342 | ups 1.03 | wpb 433534 | bsz 16498.9 | num_updates 3371 | lr 0.00084275 | gnorm 0.469 | clip 0.4 | loss_scale 2 | train_wall 1571 | gb_free 20.5 | wall 3269 +epoch 002 | loss 5.284 | nll_loss 3.758 | ppl 13.53 | wps 446342 | ups 1.03 | wpb 433534 | bsz 16498.9 | num_updates 3371 | lr 0.00084275 | gnorm 0.469 | clip 0.4 | loss_scale 2 | train_wall 1571 | gb_free 20.5 | wall 3269 +Start iterating over samples +epoch 003: 29 / 1689 loss=4.926, nll_loss=3.372, ppl=10.35, wps=452612, ups=1.05, wpb=430909, bsz=16420.6, num_updates=3400, lr=0.00085, gnorm=0.433, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=3297 +epoch 003: 29 / 1689 loss=4.926, nll_loss=3.372, ppl=10.35, wps=452612, ups=1.05, wpb=430909, bsz=16420.6, num_updates=3400, lr=0.00085, gnorm=0.433, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=3297 +epoch 003: 29 / 1689 loss=4.926, nll_loss=3.372, ppl=10.35, wps=452612, ups=1.05, wpb=430909, bsz=16420.6, num_updates=3400, lr=0.00085, gnorm=0.433, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=3297 +epoch 003: 129 / 1689 loss=4.868, nll_loss=3.307, ppl=9.9, wps=455748, ups=1.05, wpb=434017, bsz=16771.7, num_updates=3500, lr=0.000875, gnorm=0.376, clip=0, loss_scale=2, train_wall=94, gb_free=18.4, wall=3392 +epoch 003: 129 / 1689 loss=4.868, nll_loss=3.307, ppl=9.9, wps=455748, ups=1.05, wpb=434017, bsz=16771.7, num_updates=3500, lr=0.000875, gnorm=0.376, clip=0, loss_scale=2, train_wall=94, gb_free=18.4, wall=3392 +epoch 003: 129 / 1689 loss=4.868, nll_loss=3.307, ppl=9.9, wps=455748, ups=1.05, wpb=434017, bsz=16771.7, num_updates=3500, lr=0.000875, gnorm=0.376, clip=0, loss_scale=2, train_wall=94, gb_free=18.4, wall=3392 +epoch 003: 229 / 1689 loss=4.856, nll_loss=3.296, ppl=9.82, wps=462038, ups=1.06, wpb=433922, bsz=16906.6, num_updates=3600, lr=0.0009, gnorm=0.4, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=3486 +epoch 003: 229 / 1689 loss=4.856, nll_loss=3.296, ppl=9.82, wps=462038, ups=1.06, wpb=433922, bsz=16906.6, num_updates=3600, lr=0.0009, gnorm=0.4, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=3486 +epoch 003: 229 / 1689 loss=4.856, nll_loss=3.296, ppl=9.82, wps=462038, ups=1.06, wpb=433922, bsz=16906.6, num_updates=3600, lr=0.0009, gnorm=0.4, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=3486 +epoch 003: 329 / 1689 loss=4.836, nll_loss=3.274, ppl=9.67, wps=455820, ups=1.06, wpb=431866, bsz=16563.3, num_updates=3700, lr=0.000925, gnorm=0.407, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=3581 +epoch 003: 329 / 1689 loss=4.836, nll_loss=3.274, ppl=9.67, wps=455820, ups=1.06, wpb=431866, bsz=16563.3, num_updates=3700, lr=0.000925, gnorm=0.407, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=3581 +epoch 003: 329 / 1689 loss=4.836, nll_loss=3.274, ppl=9.67, wps=455820, ups=1.06, wpb=431866, bsz=16563.3, num_updates=3700, lr=0.000925, gnorm=0.407, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=3581 +epoch 003: 430 / 1689 loss=4.835, nll_loss=3.273, ppl=9.67, wps=457424, ups=1.06, wpb=433252, bsz=16402, num_updates=3800, lr=0.00095, gnorm=0.406, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=3676 +epoch 003: 430 / 1689 loss=4.835, nll_loss=3.273, ppl=9.67, wps=457424, ups=1.06, wpb=433252, bsz=16402, num_updates=3800, lr=0.00095, gnorm=0.406, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=3676 +epoch 003: 430 / 1689 loss=4.835, nll_loss=3.273, ppl=9.67, wps=457424, ups=1.06, wpb=433252, bsz=16402, num_updates=3800, lr=0.00095, gnorm=0.406, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=3676 +epoch 003: 530 / 1689 loss=4.822, nll_loss=3.26, ppl=9.58, wps=457659, ups=1.05, wpb=434716, bsz=16543.6, num_updates=3900, lr=0.000975, gnorm=0.415, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=3771 +epoch 003: 530 / 1689 loss=4.822, nll_loss=3.26, ppl=9.58, wps=457659, ups=1.05, wpb=434716, bsz=16543.6, num_updates=3900, lr=0.000975, gnorm=0.415, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=3771 +epoch 003: 530 / 1689 loss=4.822, nll_loss=3.26, ppl=9.58, wps=457659, ups=1.05, wpb=434716, bsz=16543.6, num_updates=3900, lr=0.000975, gnorm=0.415, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=3771 +epoch 003: 630 / 1689 loss=4.808, nll_loss=3.245, ppl=9.48, wps=460219, ups=1.06, wpb=434272, bsz=16474.6, num_updates=4000, lr=0.001, gnorm=0.41, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=3865 +epoch 003: 630 / 1689 loss=4.808, nll_loss=3.245, ppl=9.48, wps=460219, ups=1.06, wpb=434272, bsz=16474.6, num_updates=4000, lr=0.001, gnorm=0.41, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=3865 +epoch 003: 630 / 1689 loss=4.808, nll_loss=3.245, ppl=9.48, wps=460219, ups=1.06, wpb=434272, bsz=16474.6, num_updates=4000, lr=0.001, gnorm=0.41, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=3865 +begin validation on "valid" subset +epoch 003 | valid on 'valid' subset | loss 4.742 | nll_loss 3.111 | ppl 8.64 | wps 0 | wpb 42662 | bsz 2032 | num_updates 4000 | best_loss 4.742 +epoch 003 | valid on 'valid' subset | loss 4.742 | nll_loss 3.111 | ppl 8.64 | wps 0 | wpb 42662 | bsz 2032 | num_updates 4000 | best_loss 4.742 +epoch 003 | valid on 'valid' subset | loss 4.742 | nll_loss 3.111 | ppl 8.64 | wps 0 | wpb 42662 | bsz 2032 | num_updates 4000 | best_loss 4.742 +epoch 003: 730 / 1689 loss=4.808, nll_loss=3.245, ppl=9.48, wps=383918, ups=0.88, wpb=434443, bsz=16383.4, num_updates=4100, lr=0.00098773, gnorm=0.407, clip=0, loss_scale=2, train_wall=93, gb_free=18.6, wall=3978 +epoch 003: 730 / 1689 loss=4.808, nll_loss=3.245, ppl=9.48, wps=383918, ups=0.88, wpb=434443, bsz=16383.4, num_updates=4100, lr=0.00098773, gnorm=0.407, clip=0, loss_scale=2, train_wall=93, gb_free=18.6, wall=3978 +epoch 003: 730 / 1689 loss=4.808, nll_loss=3.245, ppl=9.48, wps=383918, ups=0.88, wpb=434443, bsz=16383.4, num_updates=4100, lr=0.00098773, gnorm=0.407, clip=0, loss_scale=2, train_wall=93, gb_free=18.6, wall=3978 +epoch 003: 830 / 1689 loss=4.776, nll_loss=3.211, ppl=9.26, wps=458877, ups=1.06, wpb=432929, bsz=16190.4, num_updates=4200, lr=0.0009759, gnorm=0.395, clip=0, loss_scale=2, train_wall=93, gb_free=21.7, wall=4073 +epoch 003: 830 / 1689 loss=4.776, nll_loss=3.211, ppl=9.26, wps=458877, ups=1.06, wpb=432929, bsz=16190.4, num_updates=4200, lr=0.0009759, gnorm=0.395, clip=0, loss_scale=2, train_wall=93, gb_free=21.7, wall=4073 +epoch 003: 830 / 1689 loss=4.776, nll_loss=3.211, ppl=9.26, wps=458877, ups=1.06, wpb=432929, bsz=16190.4, num_updates=4200, lr=0.0009759, gnorm=0.395, clip=0, loss_scale=2, train_wall=93, gb_free=21.7, wall=4073 +epoch 003: 930 / 1689 loss=4.759, nll_loss=3.193, ppl=9.14, wps=462086, ups=1.07, wpb=433053, bsz=16397.6, num_updates=4300, lr=0.000964486, gnorm=0.404, clip=0, loss_scale=2, train_wall=92, gb_free=20.2, wall=4166 +epoch 003: 930 / 1689 loss=4.759, nll_loss=3.193, ppl=9.14, wps=462086, ups=1.07, wpb=433053, bsz=16397.6, num_updates=4300, lr=0.000964486, gnorm=0.404, clip=0, loss_scale=2, train_wall=92, gb_free=20.2, wall=4166 +epoch 003: 930 / 1689 loss=4.759, nll_loss=3.193, ppl=9.14, wps=462086, ups=1.07, wpb=433053, bsz=16397.6, num_updates=4300, lr=0.000964486, gnorm=0.404, clip=0, loss_scale=2, train_wall=92, gb_free=20.2, wall=4166 +epoch 003: 1030 / 1689 loss=4.756, nll_loss=3.19, ppl=9.13, wps=462615, ups=1.07, wpb=434331, bsz=16308.9, num_updates=4400, lr=0.000953463, gnorm=0.378, clip=0, loss_scale=4, train_wall=92, gb_free=19, wall=4260 +epoch 003: 1030 / 1689 loss=4.756, nll_loss=3.19, ppl=9.13, wps=462615, ups=1.07, wpb=434331, bsz=16308.9, num_updates=4400, lr=0.000953463, gnorm=0.378, clip=0, loss_scale=4, train_wall=92, gb_free=19, wall=4260 +epoch 003: 1030 / 1689 loss=4.756, nll_loss=3.19, ppl=9.13, wps=462615, ups=1.07, wpb=434331, bsz=16308.9, num_updates=4400, lr=0.000953463, gnorm=0.378, clip=0, loss_scale=4, train_wall=92, gb_free=19, wall=4260 +epoch 003: 1130 / 1689 loss=4.742, nll_loss=3.176, ppl=9.04, wps=457537, ups=1.05, wpb=437633, bsz=16972.6, num_updates=4500, lr=0.000942809, gnorm=0.388, clip=0, loss_scale=4, train_wall=94, gb_free=18.9, wall=4356 +epoch 003: 1130 / 1689 loss=4.742, nll_loss=3.176, ppl=9.04, wps=457537, ups=1.05, wpb=437633, bsz=16972.6, num_updates=4500, lr=0.000942809, gnorm=0.388, clip=0, loss_scale=4, train_wall=94, gb_free=18.9, wall=4356 +epoch 003: 1130 / 1689 loss=4.742, nll_loss=3.176, ppl=9.04, wps=457537, ups=1.05, wpb=437633, bsz=16972.6, num_updates=4500, lr=0.000942809, gnorm=0.388, clip=0, loss_scale=4, train_wall=94, gb_free=18.9, wall=4356 +epoch 003: 1231 / 1689 loss=4.723, nll_loss=3.154, ppl=8.9, wps=455497, ups=1.05, wpb=433553, bsz=16335.3, num_updates=4600, lr=0.000932505, gnorm=0.384, clip=0, loss_scale=2, train_wall=94, gb_free=19.5, wall=4451 +epoch 003: 1231 / 1689 loss=4.723, nll_loss=3.154, ppl=8.9, wps=455497, ups=1.05, wpb=433553, bsz=16335.3, num_updates=4600, lr=0.000932505, gnorm=0.384, clip=0, loss_scale=2, train_wall=94, gb_free=19.5, wall=4451 +epoch 003: 1231 / 1689 loss=4.723, nll_loss=3.154, ppl=8.9, wps=455497, ups=1.05, wpb=433553, bsz=16335.3, num_updates=4600, lr=0.000932505, gnorm=0.384, clip=0, loss_scale=2, train_wall=94, gb_free=19.5, wall=4451 +epoch 003: 1331 / 1689 loss=4.698, nll_loss=3.127, ppl=8.74, wps=459622, ups=1.06, wpb=434179, bsz=16378.6, num_updates=4700, lr=0.000922531, gnorm=0.375, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=4545 +epoch 003: 1331 / 1689 loss=4.698, nll_loss=3.127, ppl=8.74, wps=459622, ups=1.06, wpb=434179, bsz=16378.6, num_updates=4700, lr=0.000922531, gnorm=0.375, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=4545 +epoch 003: 1331 / 1689 loss=4.698, nll_loss=3.127, ppl=8.74, wps=459622, ups=1.06, wpb=434179, bsz=16378.6, num_updates=4700, lr=0.000922531, gnorm=0.375, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=4545 +epoch 003: 1431 / 1689 loss=4.699, nll_loss=3.129, ppl=8.75, wps=460440, ups=1.06, wpb=433420, bsz=16608.6, num_updates=4800, lr=0.000912871, gnorm=0.373, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=4640 +epoch 003: 1431 / 1689 loss=4.699, nll_loss=3.129, ppl=8.75, wps=460440, ups=1.06, wpb=433420, bsz=16608.6, num_updates=4800, lr=0.000912871, gnorm=0.373, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=4640 +epoch 003: 1431 / 1689 loss=4.699, nll_loss=3.129, ppl=8.75, wps=460440, ups=1.06, wpb=433420, bsz=16608.6, num_updates=4800, lr=0.000912871, gnorm=0.373, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=4640 +epoch 003: 1531 / 1689 loss=4.671, nll_loss=3.098, ppl=8.56, wps=455364, ups=1.05, wpb=432104, bsz=16527.4, num_updates=4900, lr=0.000903508, gnorm=0.364, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=4734 +epoch 003: 1531 / 1689 loss=4.671, nll_loss=3.098, ppl=8.56, wps=455364, ups=1.05, wpb=432104, bsz=16527.4, num_updates=4900, lr=0.000903508, gnorm=0.364, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=4734 +epoch 003: 1531 / 1689 loss=4.671, nll_loss=3.098, ppl=8.56, wps=455364, ups=1.05, wpb=432104, bsz=16527.4, num_updates=4900, lr=0.000903508, gnorm=0.364, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=4734 +epoch 003: 1631 / 1689 loss=4.665, nll_loss=3.092, ppl=8.53, wps=458872, ups=1.06, wpb=431565, bsz=16155.2, num_updates=5000, lr=0.000894427, gnorm=0.38, clip=0, loss_scale=2, train_wall=93, gb_free=19.2, wall=4829 +epoch 003: 1631 / 1689 loss=4.665, nll_loss=3.092, ppl=8.53, wps=458872, ups=1.06, wpb=431565, bsz=16155.2, num_updates=5000, lr=0.000894427, gnorm=0.38, clip=0, loss_scale=2, train_wall=93, gb_free=19.2, wall=4829 +epoch 003: 1631 / 1689 loss=4.665, nll_loss=3.092, ppl=8.53, wps=458872, ups=1.06, wpb=431565, bsz=16155.2, num_updates=5000, lr=0.000894427, gnorm=0.38, clip=0, loss_scale=2, train_wall=93, gb_free=19.2, wall=4829 +begin validation on "valid" subset +epoch 003 | valid on 'valid' subset | loss 4.578 | nll_loss 2.96 | ppl 7.78 | wps 0 | wpb 42662 | bsz 2032 | num_updates 5000 | best_loss 4.578 +epoch 003 | valid on 'valid' subset | loss 4.578 | nll_loss 2.96 | ppl 7.78 | wps 0 | wpb 42662 | bsz 2032 | num_updates 5000 | best_loss 4.578 +epoch 003 | valid on 'valid' subset | loss 4.578 | nll_loss 2.96 | ppl 7.78 | wps 0 | wpb 42662 | bsz 2032 | num_updates 5000 | best_loss 4.578 +end of epoch 3 (average epoch stats below) +epoch 003 | loss 4.769 | nll_loss 3.203 | ppl 9.21 | wps 447995 | ups 1.03 | wpb 433510 | bsz 16499.4 | num_updates 5058 | lr 0.000889284 | gnorm 0.392 | clip 0 | loss_scale 2 | train_wall 1568 | gb_free 19.6 | wall 4901 +epoch 003 | loss 4.769 | nll_loss 3.203 | ppl 9.21 | wps 447995 | ups 1.03 | wpb 433510 | bsz 16499.4 | num_updates 5058 | lr 0.000889284 | gnorm 0.392 | clip 0 | loss_scale 2 | train_wall 1568 | gb_free 19.6 | wall 4901 +epoch 003 | loss 4.769 | nll_loss 3.203 | ppl 9.21 | wps 447995 | ups 1.03 | wpb 433510 | bsz 16499.4 | num_updates 5058 | lr 0.000889284 | gnorm 0.392 | clip 0 | loss_scale 2 | train_wall 1568 | gb_free 19.6 | wall 4901 +Start iterating over samples +epoch 004: 42 / 1689 loss=4.639, nll_loss=3.062, ppl=8.35, wps=381664, ups=0.89, wpb=430679, bsz=16832.2, num_updates=5100, lr=0.000885615, gnorm=0.367, clip=0, loss_scale=4, train_wall=93, gb_free=19.1, wall=4941 +epoch 004: 42 / 1689 loss=4.639, nll_loss=3.062, ppl=8.35, wps=381664, ups=0.89, wpb=430679, bsz=16832.2, num_updates=5100, lr=0.000885615, gnorm=0.367, clip=0, loss_scale=4, train_wall=93, gb_free=19.1, wall=4941 +epoch 004: 42 / 1689 loss=4.639, nll_loss=3.062, ppl=8.35, wps=381664, ups=0.89, wpb=430679, bsz=16832.2, num_updates=5100, lr=0.000885615, gnorm=0.367, clip=0, loss_scale=4, train_wall=93, gb_free=19.1, wall=4941 +epoch 004: 42 / 1689 loss=4.639, nll_loss=3.062, ppl=8.35, wps=381664, ups=0.89, wpb=430679, bsz=16832.2, num_updates=5100, lr=0.000885615, gnorm=0.367, clip=0, loss_scale=4, train_wall=93, gb_free=19.1, wall=4941 +epoch 004: 142 / 1689 loss=4.613, nll_loss=3.034, ppl=8.19, wps=461103, ups=1.06, wpb=433420, bsz=16375.6, num_updates=5200, lr=0.000877058, gnorm=0.355, clip=0, loss_scale=4, train_wall=93, gb_free=19.4, wall=5035 +epoch 004: 142 / 1689 loss=4.613, nll_loss=3.034, ppl=8.19, wps=461103, ups=1.06, wpb=433420, bsz=16375.6, num_updates=5200, lr=0.000877058, gnorm=0.355, clip=0, loss_scale=4, train_wall=93, gb_free=19.4, wall=5035 +epoch 004: 142 / 1689 loss=4.613, nll_loss=3.034, ppl=8.19, wps=461103, ups=1.06, wpb=433420, bsz=16375.6, num_updates=5200, lr=0.000877058, gnorm=0.355, clip=0, loss_scale=4, train_wall=93, gb_free=19.4, wall=5035 +epoch 004: 142 / 1689 loss=4.613, nll_loss=3.034, ppl=8.19, wps=461103, ups=1.06, wpb=433420, bsz=16375.6, num_updates=5200, lr=0.000877058, gnorm=0.355, clip=0, loss_scale=4, train_wall=93, gb_free=19.4, wall=5035 +epoch 004: 243 / 1689 loss=4.61, nll_loss=3.03, ppl=8.17, wps=454146, ups=1.05, wpb=431922, bsz=16305.1, num_updates=5300, lr=0.000868744, gnorm=0.368, clip=0, loss_scale=2, train_wall=94, gb_free=19.6, wall=5130 +epoch 004: 243 / 1689 loss=4.61, nll_loss=3.03, ppl=8.17, wps=454146, ups=1.05, wpb=431922, bsz=16305.1, num_updates=5300, lr=0.000868744, gnorm=0.368, clip=0, loss_scale=2, train_wall=94, gb_free=19.6, wall=5130 +epoch 004: 243 / 1689 loss=4.61, nll_loss=3.03, ppl=8.17, wps=454146, ups=1.05, wpb=431922, bsz=16305.1, num_updates=5300, lr=0.000868744, gnorm=0.368, clip=0, loss_scale=2, train_wall=94, gb_free=19.6, wall=5130 +epoch 004: 243 / 1689 loss=4.61, nll_loss=3.03, ppl=8.17, wps=454146, ups=1.05, wpb=431922, bsz=16305.1, num_updates=5300, lr=0.000868744, gnorm=0.368, clip=0, loss_scale=2, train_wall=94, gb_free=19.6, wall=5130 +epoch 004: 343 / 1689 loss=4.61, nll_loss=3.031, ppl=8.17, wps=463767, ups=1.07, wpb=434595, bsz=16711.5, num_updates=5400, lr=0.000860663, gnorm=0.354, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=5224 +epoch 004: 343 / 1689 loss=4.61, nll_loss=3.031, ppl=8.17, wps=463767, ups=1.07, wpb=434595, bsz=16711.5, num_updates=5400, lr=0.000860663, gnorm=0.354, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=5224 +epoch 004: 343 / 1689 loss=4.61, nll_loss=3.031, ppl=8.17, wps=463767, ups=1.07, wpb=434595, bsz=16711.5, num_updates=5400, lr=0.000860663, gnorm=0.354, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=5224 +epoch 004: 343 / 1689 loss=4.61, nll_loss=3.031, ppl=8.17, wps=463767, ups=1.07, wpb=434595, bsz=16711.5, num_updates=5400, lr=0.000860663, gnorm=0.354, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=5224 +epoch 004: 443 / 1689 loss=4.601, nll_loss=3.021, ppl=8.12, wps=460710, ups=1.07, wpb=432435, bsz=16662.2, num_updates=5500, lr=0.000852803, gnorm=0.353, clip=0, loss_scale=2, train_wall=93, gb_free=18.6, wall=5318 +epoch 004: 443 / 1689 loss=4.601, nll_loss=3.021, ppl=8.12, wps=460710, ups=1.07, wpb=432435, bsz=16662.2, num_updates=5500, lr=0.000852803, gnorm=0.353, clip=0, loss_scale=2, train_wall=93, gb_free=18.6, wall=5318 +epoch 004: 443 / 1689 loss=4.601, nll_loss=3.021, ppl=8.12, wps=460710, ups=1.07, wpb=432435, bsz=16662.2, num_updates=5500, lr=0.000852803, gnorm=0.353, clip=0, loss_scale=2, train_wall=93, gb_free=18.6, wall=5318 +epoch 004: 443 / 1689 loss=4.601, nll_loss=3.021, ppl=8.12, wps=460710, ups=1.07, wpb=432435, bsz=16662.2, num_updates=5500, lr=0.000852803, gnorm=0.353, clip=0, loss_scale=2, train_wall=93, gb_free=18.6, wall=5318 +epoch 004: 543 / 1689 loss=4.599, nll_loss=3.02, ppl=8.11, wps=460938, ups=1.06, wpb=433622, bsz=16605.2, num_updates=5600, lr=0.000845154, gnorm=0.362, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=5412 +epoch 004: 543 / 1689 loss=4.599, nll_loss=3.02, ppl=8.11, wps=460938, ups=1.06, wpb=433622, bsz=16605.2, num_updates=5600, lr=0.000845154, gnorm=0.362, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=5412 +epoch 004: 543 / 1689 loss=4.599, nll_loss=3.02, ppl=8.11, wps=460938, ups=1.06, wpb=433622, bsz=16605.2, num_updates=5600, lr=0.000845154, gnorm=0.362, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=5412 +epoch 004: 543 / 1689 loss=4.599, nll_loss=3.02, ppl=8.11, wps=460938, ups=1.06, wpb=433622, bsz=16605.2, num_updates=5600, lr=0.000845154, gnorm=0.362, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=5412 +epoch 004: 643 / 1689 loss=4.585, nll_loss=3.005, ppl=8.03, wps=461255, ups=1.06, wpb=434504, bsz=16524.6, num_updates=5700, lr=0.000837708, gnorm=0.352, clip=0, loss_scale=2, train_wall=93, gb_free=19, wall=5506 +epoch 004: 643 / 1689 loss=4.585, nll_loss=3.005, ppl=8.03, wps=461255, ups=1.06, wpb=434504, bsz=16524.6, num_updates=5700, lr=0.000837708, gnorm=0.352, clip=0, loss_scale=2, train_wall=93, gb_free=19, wall=5506 +epoch 004: 643 / 1689 loss=4.585, nll_loss=3.005, ppl=8.03, wps=461255, ups=1.06, wpb=434504, bsz=16524.6, num_updates=5700, lr=0.000837708, gnorm=0.352, clip=0, loss_scale=2, train_wall=93, gb_free=19, wall=5506 +epoch 004: 643 / 1689 loss=4.585, nll_loss=3.005, ppl=8.03, wps=461255, ups=1.06, wpb=434504, bsz=16524.6, num_updates=5700, lr=0.000837708, gnorm=0.352, clip=0, loss_scale=2, train_wall=93, gb_free=19, wall=5506 +epoch 004: 743 / 1689 loss=4.582, nll_loss=3.001, ppl=8.01, wps=460970, ups=1.06, wpb=433836, bsz=16571.8, num_updates=5800, lr=0.000830455, gnorm=0.375, clip=0, loss_scale=4, train_wall=93, gb_free=18.6, wall=5600 +epoch 004: 743 / 1689 loss=4.582, nll_loss=3.001, ppl=8.01, wps=460970, ups=1.06, wpb=433836, bsz=16571.8, num_updates=5800, lr=0.000830455, gnorm=0.375, clip=0, loss_scale=4, train_wall=93, gb_free=18.6, wall=5600 +epoch 004: 743 / 1689 loss=4.582, nll_loss=3.001, ppl=8.01, wps=460970, ups=1.06, wpb=433836, bsz=16571.8, num_updates=5800, lr=0.000830455, gnorm=0.375, clip=0, loss_scale=4, train_wall=93, gb_free=18.6, wall=5600 +epoch 004: 743 / 1689 loss=4.582, nll_loss=3.001, ppl=8.01, wps=460970, ups=1.06, wpb=433836, bsz=16571.8, num_updates=5800, lr=0.000830455, gnorm=0.375, clip=0, loss_scale=4, train_wall=93, gb_free=18.6, wall=5600 +epoch 004: 843 / 1689 loss=4.57, nll_loss=2.988, ppl=7.93, wps=462052, ups=1.06, wpb=435037, bsz=16307.4, num_updates=5900, lr=0.000823387, gnorm=0.36, clip=0, loss_scale=4, train_wall=93, gb_free=19, wall=5695 +epoch 004: 843 / 1689 loss=4.57, nll_loss=2.988, ppl=7.93, wps=462052, ups=1.06, wpb=435037, bsz=16307.4, num_updates=5900, lr=0.000823387, gnorm=0.36, clip=0, loss_scale=4, train_wall=93, gb_free=19, wall=5695 +epoch 004: 843 / 1689 loss=4.57, nll_loss=2.988, ppl=7.93, wps=462052, ups=1.06, wpb=435037, bsz=16307.4, num_updates=5900, lr=0.000823387, gnorm=0.36, clip=0, loss_scale=4, train_wall=93, gb_free=19, wall=5695 +epoch 004: 843 / 1689 loss=4.57, nll_loss=2.988, ppl=7.93, wps=462052, ups=1.06, wpb=435037, bsz=16307.4, num_updates=5900, lr=0.000823387, gnorm=0.36, clip=0, loss_scale=4, train_wall=93, gb_free=19, wall=5695 +epoch 004: 944 / 1689 loss=4.56, nll_loss=2.977, ppl=7.88, wps=455640, ups=1.05, wpb=433738, bsz=16409.6, num_updates=6000, lr=0.000816497, gnorm=0.361, clip=0, loss_scale=2, train_wall=94, gb_free=19, wall=5790 +epoch 004: 944 / 1689 loss=4.56, nll_loss=2.977, ppl=7.88, wps=455640, ups=1.05, wpb=433738, bsz=16409.6, num_updates=6000, lr=0.000816497, gnorm=0.361, clip=0, loss_scale=2, train_wall=94, gb_free=19, wall=5790 +epoch 004: 944 / 1689 loss=4.56, nll_loss=2.977, ppl=7.88, wps=455640, ups=1.05, wpb=433738, bsz=16409.6, num_updates=6000, lr=0.000816497, gnorm=0.361, clip=0, loss_scale=2, train_wall=94, gb_free=19, wall=5790 +epoch 004: 944 / 1689 loss=4.56, nll_loss=2.977, ppl=7.88, wps=455640, ups=1.05, wpb=433738, bsz=16409.6, num_updates=6000, lr=0.000816497, gnorm=0.361, clip=0, loss_scale=2, train_wall=94, gb_free=19, wall=5790 +begin validation on "valid" subset +epoch 004 | valid on 'valid' subset | loss 4.526 | nll_loss 2.901 | ppl 7.47 | wps 0 | wpb 42662 | bsz 2032 | num_updates 6000 | best_loss 4.526 +epoch 004 | valid on 'valid' subset | loss 4.526 | nll_loss 2.901 | ppl 7.47 | wps 0 | wpb 42662 | bsz 2032 | num_updates 6000 | best_loss 4.526 +epoch 004 | valid on 'valid' subset | loss 4.526 | nll_loss 2.901 | ppl 7.47 | wps 0 | wpb 42662 | bsz 2032 | num_updates 6000 | best_loss 4.526 +epoch 004 | valid on 'valid' subset | loss 4.526 | nll_loss 2.901 | ppl 7.47 | wps 0 | wpb 42662 | bsz 2032 | num_updates 6000 | best_loss 4.526 +epoch 004: 1044 / 1689 loss=4.564, nll_loss=2.983, ppl=7.9, wps=379819, ups=0.88, wpb=434073, bsz=16698.2, num_updates=6100, lr=0.000809776, gnorm=0.347, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=5904 +epoch 004: 1044 / 1689 loss=4.564, nll_loss=2.983, ppl=7.9, wps=379819, ups=0.88, wpb=434073, bsz=16698.2, num_updates=6100, lr=0.000809776, gnorm=0.347, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=5904 +epoch 004: 1044 / 1689 loss=4.564, nll_loss=2.983, ppl=7.9, wps=379819, ups=0.88, wpb=434073, bsz=16698.2, num_updates=6100, lr=0.000809776, gnorm=0.347, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=5904 +epoch 004: 1044 / 1689 loss=4.564, nll_loss=2.983, ppl=7.9, wps=379819, ups=0.88, wpb=434073, bsz=16698.2, num_updates=6100, lr=0.000809776, gnorm=0.347, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=5904 +epoch 004: 1144 / 1689 loss=4.554, nll_loss=2.972, ppl=7.84, wps=461104, ups=1.06, wpb=434581, bsz=16651.3, num_updates=6200, lr=0.000803219, gnorm=0.354, clip=0, loss_scale=2, train_wall=92, gb_free=20.6, wall=5998 +epoch 004: 1144 / 1689 loss=4.554, nll_loss=2.972, ppl=7.84, wps=461104, ups=1.06, wpb=434581, bsz=16651.3, num_updates=6200, lr=0.000803219, gnorm=0.354, clip=0, loss_scale=2, train_wall=92, gb_free=20.6, wall=5998 +epoch 004: 1144 / 1689 loss=4.554, nll_loss=2.972, ppl=7.84, wps=461104, ups=1.06, wpb=434581, bsz=16651.3, num_updates=6200, lr=0.000803219, gnorm=0.354, clip=0, loss_scale=2, train_wall=92, gb_free=20.6, wall=5998 +epoch 004: 1144 / 1689 loss=4.554, nll_loss=2.972, ppl=7.84, wps=461104, ups=1.06, wpb=434581, bsz=16651.3, num_updates=6200, lr=0.000803219, gnorm=0.354, clip=0, loss_scale=2, train_wall=92, gb_free=20.6, wall=5998 +epoch 004: 1244 / 1689 loss=4.548, nll_loss=2.965, ppl=7.81, wps=461784, ups=1.06, wpb=434843, bsz=16323.7, num_updates=6300, lr=0.000796819, gnorm=0.348, clip=0, loss_scale=2, train_wall=93, gb_free=19.7, wall=6092 +epoch 004: 1244 / 1689 loss=4.548, nll_loss=2.965, ppl=7.81, wps=461784, ups=1.06, wpb=434843, bsz=16323.7, num_updates=6300, lr=0.000796819, gnorm=0.348, clip=0, loss_scale=2, train_wall=93, gb_free=19.7, wall=6092 +epoch 004: 1244 / 1689 loss=4.548, nll_loss=2.965, ppl=7.81, wps=461784, ups=1.06, wpb=434843, bsz=16323.7, num_updates=6300, lr=0.000796819, gnorm=0.348, clip=0, loss_scale=2, train_wall=93, gb_free=19.7, wall=6092 +epoch 004: 1244 / 1689 loss=4.548, nll_loss=2.965, ppl=7.81, wps=461784, ups=1.06, wpb=434843, bsz=16323.7, num_updates=6300, lr=0.000796819, gnorm=0.348, clip=0, loss_scale=2, train_wall=93, gb_free=19.7, wall=6092 +epoch 004: 1344 / 1689 loss=4.542, nll_loss=2.958, ppl=7.77, wps=456825, ups=1.06, wpb=432538, bsz=16691.5, num_updates=6400, lr=0.000790569, gnorm=0.355, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=6187 +epoch 004: 1344 / 1689 loss=4.542, nll_loss=2.958, ppl=7.77, wps=456825, ups=1.06, wpb=432538, bsz=16691.5, num_updates=6400, lr=0.000790569, gnorm=0.355, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=6187 +epoch 004: 1344 / 1689 loss=4.542, nll_loss=2.958, ppl=7.77, wps=456825, ups=1.06, wpb=432538, bsz=16691.5, num_updates=6400, lr=0.000790569, gnorm=0.355, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=6187 +epoch 004: 1344 / 1689 loss=4.542, nll_loss=2.958, ppl=7.77, wps=456825, ups=1.06, wpb=432538, bsz=16691.5, num_updates=6400, lr=0.000790569, gnorm=0.355, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=6187 +epoch 004: 1444 / 1689 loss=4.535, nll_loss=2.951, ppl=7.73, wps=462165, ups=1.06, wpb=434479, bsz=16687.9, num_updates=6500, lr=0.000784465, gnorm=0.357, clip=0, loss_scale=4, train_wall=93, gb_free=19.7, wall=6281 +epoch 004: 1444 / 1689 loss=4.535, nll_loss=2.951, ppl=7.73, wps=462165, ups=1.06, wpb=434479, bsz=16687.9, num_updates=6500, lr=0.000784465, gnorm=0.357, clip=0, loss_scale=4, train_wall=93, gb_free=19.7, wall=6281 +epoch 004: 1444 / 1689 loss=4.535, nll_loss=2.951, ppl=7.73, wps=462165, ups=1.06, wpb=434479, bsz=16687.9, num_updates=6500, lr=0.000784465, gnorm=0.357, clip=0, loss_scale=4, train_wall=93, gb_free=19.7, wall=6281 +epoch 004: 1444 / 1689 loss=4.535, nll_loss=2.951, ppl=7.73, wps=462165, ups=1.06, wpb=434479, bsz=16687.9, num_updates=6500, lr=0.000784465, gnorm=0.357, clip=0, loss_scale=4, train_wall=93, gb_free=19.7, wall=6281 +epoch 004: 1544 / 1689 loss=4.533, nll_loss=2.949, ppl=7.72, wps=461917, ups=1.06, wpb=434053, bsz=16285.8, num_updates=6600, lr=0.000778499, gnorm=0.342, clip=0, loss_scale=4, train_wall=92, gb_free=19.4, wall=6375 +epoch 004: 1544 / 1689 loss=4.533, nll_loss=2.949, ppl=7.72, wps=461917, ups=1.06, wpb=434053, bsz=16285.8, num_updates=6600, lr=0.000778499, gnorm=0.342, clip=0, loss_scale=4, train_wall=92, gb_free=19.4, wall=6375 +epoch 004: 1544 / 1689 loss=4.533, nll_loss=2.949, ppl=7.72, wps=461917, ups=1.06, wpb=434053, bsz=16285.8, num_updates=6600, lr=0.000778499, gnorm=0.342, clip=0, loss_scale=4, train_wall=92, gb_free=19.4, wall=6375 +epoch 004: 1544 / 1689 loss=4.533, nll_loss=2.949, ppl=7.72, wps=461917, ups=1.06, wpb=434053, bsz=16285.8, num_updates=6600, lr=0.000778499, gnorm=0.342, clip=0, loss_scale=4, train_wall=92, gb_free=19.4, wall=6375 +epoch 004: 1644 / 1689 loss=4.534, nll_loss=2.951, ppl=7.73, wps=463435, ups=1.07, wpb=432968, bsz=16215.5, num_updates=6700, lr=0.000772667, gnorm=0.363, clip=0, loss_scale=4, train_wall=92, gb_free=19.6, wall=6469 +epoch 004: 1644 / 1689 loss=4.534, nll_loss=2.951, ppl=7.73, wps=463435, ups=1.07, wpb=432968, bsz=16215.5, num_updates=6700, lr=0.000772667, gnorm=0.363, clip=0, loss_scale=4, train_wall=92, gb_free=19.6, wall=6469 +epoch 004: 1644 / 1689 loss=4.534, nll_loss=2.951, ppl=7.73, wps=463435, ups=1.07, wpb=432968, bsz=16215.5, num_updates=6700, lr=0.000772667, gnorm=0.363, clip=0, loss_scale=4, train_wall=92, gb_free=19.6, wall=6469 +epoch 004: 1644 / 1689 loss=4.534, nll_loss=2.951, ppl=7.73, wps=463435, ups=1.07, wpb=432968, bsz=16215.5, num_updates=6700, lr=0.000772667, gnorm=0.363, clip=0, loss_scale=4, train_wall=92, gb_free=19.6, wall=6469 +end of epoch 4 (average epoch stats below) +epoch 004 | loss 4.571 | nll_loss 2.989 | ppl 7.94 | wps 454714 | ups 1.05 | wpb 433537 | bsz 16508.1 | num_updates 6745 | lr 0.000770086 | gnorm 0.356 | clip 0 | loss_scale 4 | train_wall 1563 | gb_free 19.6 | wall 6510 +epoch 004 | loss 4.571 | nll_loss 2.989 | ppl 7.94 | wps 454714 | ups 1.05 | wpb 433537 | bsz 16508.1 | num_updates 6745 | lr 0.000770086 | gnorm 0.356 | clip 0 | loss_scale 4 | train_wall 1563 | gb_free 19.6 | wall 6510 +epoch 004 | loss 4.571 | nll_loss 2.989 | ppl 7.94 | wps 454714 | ups 1.05 | wpb 433537 | bsz 16508.1 | num_updates 6745 | lr 0.000770086 | gnorm 0.356 | clip 0 | loss_scale 4 | train_wall 1563 | gb_free 19.6 | wall 6510 +epoch 004 | loss 4.571 | nll_loss 2.989 | ppl 7.94 | wps 454714 | ups 1.05 | wpb 433537 | bsz 16508.1 | num_updates 6745 | lr 0.000770086 | gnorm 0.356 | clip 0 | loss_scale 4 | train_wall 1563 | gb_free 19.6 | wall 6510 +Start iterating over samples +epoch 005: 55 / 1689 loss=4.494, nll_loss=2.905, ppl=7.49, wps=460054, ups=1.07, wpb=429079, bsz=16276.2, num_updates=6800, lr=0.000766965, gnorm=0.336, clip=0, loss_scale=4, train_wall=91, gb_free=21.1, wall=6562 +epoch 005: 55 / 1689 loss=4.494, nll_loss=2.905, ppl=7.49, wps=460054, ups=1.07, wpb=429079, bsz=16276.2, num_updates=6800, lr=0.000766965, gnorm=0.336, clip=0, loss_scale=4, train_wall=91, gb_free=21.1, wall=6562 +epoch 005: 55 / 1689 loss=4.494, nll_loss=2.905, ppl=7.49, wps=460054, ups=1.07, wpb=429079, bsz=16276.2, num_updates=6800, lr=0.000766965, gnorm=0.336, clip=0, loss_scale=4, train_wall=91, gb_free=21.1, wall=6562 +epoch 005: 55 / 1689 loss=4.494, nll_loss=2.905, ppl=7.49, wps=460054, ups=1.07, wpb=429079, bsz=16276.2, num_updates=6800, lr=0.000766965, gnorm=0.336, clip=0, loss_scale=4, train_wall=91, gb_free=21.1, wall=6562 +epoch 005: 55 / 1689 loss=4.494, nll_loss=2.905, ppl=7.49, wps=460054, ups=1.07, wpb=429079, bsz=16276.2, num_updates=6800, lr=0.000766965, gnorm=0.336, clip=0, loss_scale=4, train_wall=91, gb_free=21.1, wall=6562 +epoch 005: 156 / 1689 loss=4.487, nll_loss=2.897, ppl=7.45, wps=458291, ups=1.06, wpb=434283, bsz=16357.2, num_updates=6900, lr=0.000761387, gnorm=0.35, clip=0, loss_scale=2, train_wall=93, gb_free=19.2, wall=6657 +epoch 005: 156 / 1689 loss=4.487, nll_loss=2.897, ppl=7.45, wps=458291, ups=1.06, wpb=434283, bsz=16357.2, num_updates=6900, lr=0.000761387, gnorm=0.35, clip=0, loss_scale=2, train_wall=93, gb_free=19.2, wall=6657 +epoch 005: 156 / 1689 loss=4.487, nll_loss=2.897, ppl=7.45, wps=458291, ups=1.06, wpb=434283, bsz=16357.2, num_updates=6900, lr=0.000761387, gnorm=0.35, clip=0, loss_scale=2, train_wall=93, gb_free=19.2, wall=6657 +epoch 005: 156 / 1689 loss=4.487, nll_loss=2.897, ppl=7.45, wps=458291, ups=1.06, wpb=434283, bsz=16357.2, num_updates=6900, lr=0.000761387, gnorm=0.35, clip=0, loss_scale=2, train_wall=93, gb_free=19.2, wall=6657 +epoch 005: 156 / 1689 loss=4.487, nll_loss=2.897, ppl=7.45, wps=458291, ups=1.06, wpb=434283, bsz=16357.2, num_updates=6900, lr=0.000761387, gnorm=0.35, clip=0, loss_scale=2, train_wall=93, gb_free=19.2, wall=6657 +epoch 005: 256 / 1689 loss=4.483, nll_loss=2.893, ppl=7.43, wps=460318, ups=1.06, wpb=432530, bsz=16314.4, num_updates=7000, lr=0.000755929, gnorm=0.338, clip=0, loss_scale=2, train_wall=92, gb_free=20.2, wall=6751 +epoch 005: 256 / 1689 loss=4.483, nll_loss=2.893, ppl=7.43, wps=460318, ups=1.06, wpb=432530, bsz=16314.4, num_updates=7000, lr=0.000755929, gnorm=0.338, clip=0, loss_scale=2, train_wall=92, gb_free=20.2, wall=6751 +epoch 005: 256 / 1689 loss=4.483, nll_loss=2.893, ppl=7.43, wps=460318, ups=1.06, wpb=432530, bsz=16314.4, num_updates=7000, lr=0.000755929, gnorm=0.338, clip=0, loss_scale=2, train_wall=92, gb_free=20.2, wall=6751 +epoch 005: 256 / 1689 loss=4.483, nll_loss=2.893, ppl=7.43, wps=460318, ups=1.06, wpb=432530, bsz=16314.4, num_updates=7000, lr=0.000755929, gnorm=0.338, clip=0, loss_scale=2, train_wall=92, gb_free=20.2, wall=6751 +epoch 005: 256 / 1689 loss=4.483, nll_loss=2.893, ppl=7.43, wps=460318, ups=1.06, wpb=432530, bsz=16314.4, num_updates=7000, lr=0.000755929, gnorm=0.338, clip=0, loss_scale=2, train_wall=92, gb_free=20.2, wall=6751 +begin validation on "valid" subset +epoch 005 | valid on 'valid' subset | loss 4.471 | nll_loss 2.837 | ppl 7.15 | wps 0 | wpb 42662 | bsz 2032 | num_updates 7000 | best_loss 4.471 +epoch 005 | valid on 'valid' subset | loss 4.471 | nll_loss 2.837 | ppl 7.15 | wps 0 | wpb 42662 | bsz 2032 | num_updates 7000 | best_loss 4.471 +epoch 005 | valid on 'valid' subset | loss 4.471 | nll_loss 2.837 | ppl 7.15 | wps 0 | wpb 42662 | bsz 2032 | num_updates 7000 | best_loss 4.471 +epoch 005 | valid on 'valid' subset | loss 4.471 | nll_loss 2.837 | ppl 7.15 | wps 0 | wpb 42662 | bsz 2032 | num_updates 7000 | best_loss 4.471 +epoch 005 | valid on 'valid' subset | loss 4.471 | nll_loss 2.837 | ppl 7.15 | wps 0 | wpb 42662 | bsz 2032 | num_updates 7000 | best_loss 4.471 +epoch 005: 356 / 1689 loss=4.475, nll_loss=2.884, ppl=7.38, wps=174499, ups=0.4, wpb=434470, bsz=16713.8, num_updates=7100, lr=0.000750587, gnorm=0.349, clip=0, loss_scale=2, train_wall=185, gb_free=21.5, wall=7000 +epoch 005: 356 / 1689 loss=4.475, nll_loss=2.884, ppl=7.38, wps=174499, ups=0.4, wpb=434470, bsz=16713.8, num_updates=7100, lr=0.000750587, gnorm=0.349, clip=0, loss_scale=2, train_wall=185, gb_free=21.5, wall=7000 +epoch 005: 356 / 1689 loss=4.475, nll_loss=2.884, ppl=7.38, wps=174499, ups=0.4, wpb=434470, bsz=16713.8, num_updates=7100, lr=0.000750587, gnorm=0.349, clip=0, loss_scale=2, train_wall=185, gb_free=21.5, wall=7000 +epoch 005: 356 / 1689 loss=4.475, nll_loss=2.884, ppl=7.38, wps=174499, ups=0.4, wpb=434470, bsz=16713.8, num_updates=7100, lr=0.000750587, gnorm=0.349, clip=0, loss_scale=2, train_wall=185, gb_free=21.5, wall=7000 +epoch 005: 356 / 1689 loss=4.475, nll_loss=2.884, ppl=7.38, wps=174499, ups=0.4, wpb=434470, bsz=16713.8, num_updates=7100, lr=0.000750587, gnorm=0.349, clip=0, loss_scale=2, train_wall=185, gb_free=21.5, wall=7000 +epoch 005: 456 / 1689 loss=4.485, nll_loss=2.896, ppl=7.44, wps=465587, ups=1.08, wpb=433003, bsz=16603, num_updates=7200, lr=0.000745356, gnorm=0.336, clip=0, loss_scale=2, train_wall=93, gb_free=18.2, wall=7093 +epoch 005: 456 / 1689 loss=4.485, nll_loss=2.896, ppl=7.44, wps=465587, ups=1.08, wpb=433003, bsz=16603, num_updates=7200, lr=0.000745356, gnorm=0.336, clip=0, loss_scale=2, train_wall=93, gb_free=18.2, wall=7093 +epoch 005: 456 / 1689 loss=4.485, nll_loss=2.896, ppl=7.44, wps=465587, ups=1.08, wpb=433003, bsz=16603, num_updates=7200, lr=0.000745356, gnorm=0.336, clip=0, loss_scale=2, train_wall=93, gb_free=18.2, wall=7093 +epoch 005: 456 / 1689 loss=4.485, nll_loss=2.896, ppl=7.44, wps=465587, ups=1.08, wpb=433003, bsz=16603, num_updates=7200, lr=0.000745356, gnorm=0.336, clip=0, loss_scale=2, train_wall=93, gb_free=18.2, wall=7093 +epoch 005: 456 / 1689 loss=4.485, nll_loss=2.896, ppl=7.44, wps=465587, ups=1.08, wpb=433003, bsz=16603, num_updates=7200, lr=0.000745356, gnorm=0.336, clip=0, loss_scale=2, train_wall=93, gb_free=18.2, wall=7093 +epoch 005: 556 / 1689 loss=4.471, nll_loss=2.881, ppl=7.37, wps=464945, ups=1.07, wpb=434026, bsz=16606.7, num_updates=7300, lr=0.000740233, gnorm=0.333, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=7186 +epoch 005: 556 / 1689 loss=4.471, nll_loss=2.881, ppl=7.37, wps=464945, ups=1.07, wpb=434026, bsz=16606.7, num_updates=7300, lr=0.000740233, gnorm=0.333, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=7186 +epoch 005: 556 / 1689 loss=4.471, nll_loss=2.881, ppl=7.37, wps=464945, ups=1.07, wpb=434026, bsz=16606.7, num_updates=7300, lr=0.000740233, gnorm=0.333, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=7186 +epoch 005: 556 / 1689 loss=4.471, nll_loss=2.881, ppl=7.37, wps=464945, ups=1.07, wpb=434026, bsz=16606.7, num_updates=7300, lr=0.000740233, gnorm=0.333, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=7186 +epoch 005: 556 / 1689 loss=4.471, nll_loss=2.881, ppl=7.37, wps=464945, ups=1.07, wpb=434026, bsz=16606.7, num_updates=7300, lr=0.000740233, gnorm=0.333, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=7186 +epoch 005: 656 / 1689 loss=4.479, nll_loss=2.889, ppl=7.41, wps=463213, ups=1.07, wpb=433328, bsz=16463.8, num_updates=7400, lr=0.000735215, gnorm=0.336, clip=0, loss_scale=2, train_wall=93, gb_free=19.8, wall=7279 +epoch 005: 656 / 1689 loss=4.479, nll_loss=2.889, ppl=7.41, wps=463213, ups=1.07, wpb=433328, bsz=16463.8, num_updates=7400, lr=0.000735215, gnorm=0.336, clip=0, loss_scale=2, train_wall=93, gb_free=19.8, wall=7279 +epoch 005: 656 / 1689 loss=4.479, nll_loss=2.889, ppl=7.41, wps=463213, ups=1.07, wpb=433328, bsz=16463.8, num_updates=7400, lr=0.000735215, gnorm=0.336, clip=0, loss_scale=2, train_wall=93, gb_free=19.8, wall=7279 +epoch 005: 656 / 1689 loss=4.479, nll_loss=2.889, ppl=7.41, wps=463213, ups=1.07, wpb=433328, bsz=16463.8, num_updates=7400, lr=0.000735215, gnorm=0.336, clip=0, loss_scale=2, train_wall=93, gb_free=19.8, wall=7279 +epoch 005: 656 / 1689 loss=4.479, nll_loss=2.889, ppl=7.41, wps=463213, ups=1.07, wpb=433328, bsz=16463.8, num_updates=7400, lr=0.000735215, gnorm=0.336, clip=0, loss_scale=2, train_wall=93, gb_free=19.8, wall=7279 +epoch 005: 756 / 1689 loss=4.471, nll_loss=2.881, ppl=7.37, wps=460993, ups=1.07, wpb=431285, bsz=16406.2, num_updates=7500, lr=0.000730297, gnorm=0.326, clip=0, loss_scale=4, train_wall=93, gb_free=19.7, wall=7373 +epoch 005: 756 / 1689 loss=4.471, nll_loss=2.881, ppl=7.37, wps=460993, ups=1.07, wpb=431285, bsz=16406.2, num_updates=7500, lr=0.000730297, gnorm=0.326, clip=0, loss_scale=4, train_wall=93, gb_free=19.7, wall=7373 +epoch 005: 756 / 1689 loss=4.471, nll_loss=2.881, ppl=7.37, wps=460993, ups=1.07, wpb=431285, bsz=16406.2, num_updates=7500, lr=0.000730297, gnorm=0.326, clip=0, loss_scale=4, train_wall=93, gb_free=19.7, wall=7373 +epoch 005: 756 / 1689 loss=4.471, nll_loss=2.881, ppl=7.37, wps=460993, ups=1.07, wpb=431285, bsz=16406.2, num_updates=7500, lr=0.000730297, gnorm=0.326, clip=0, loss_scale=4, train_wall=93, gb_free=19.7, wall=7373 +epoch 005: 756 / 1689 loss=4.471, nll_loss=2.881, ppl=7.37, wps=460993, ups=1.07, wpb=431285, bsz=16406.2, num_updates=7500, lr=0.000730297, gnorm=0.326, clip=0, loss_scale=4, train_wall=93, gb_free=19.7, wall=7373 +epoch 005: 857 / 1689 loss=4.464, nll_loss=2.874, ppl=7.33, wps=453941, ups=1.04, wpb=435224, bsz=16500.9, num_updates=7600, lr=0.000725476, gnorm=0.342, clip=0, loss_scale=2, train_wall=95, gb_free=18.1, wall=7469 +epoch 005: 857 / 1689 loss=4.464, nll_loss=2.874, ppl=7.33, wps=453941, ups=1.04, wpb=435224, bsz=16500.9, num_updates=7600, lr=0.000725476, gnorm=0.342, clip=0, loss_scale=2, train_wall=95, gb_free=18.1, wall=7469 +epoch 005: 857 / 1689 loss=4.464, nll_loss=2.874, ppl=7.33, wps=453941, ups=1.04, wpb=435224, bsz=16500.9, num_updates=7600, lr=0.000725476, gnorm=0.342, clip=0, loss_scale=2, train_wall=95, gb_free=18.1, wall=7469 +epoch 005: 857 / 1689 loss=4.464, nll_loss=2.874, ppl=7.33, wps=453941, ups=1.04, wpb=435224, bsz=16500.9, num_updates=7600, lr=0.000725476, gnorm=0.342, clip=0, loss_scale=2, train_wall=95, gb_free=18.1, wall=7469 +epoch 005: 857 / 1689 loss=4.464, nll_loss=2.874, ppl=7.33, wps=453941, ups=1.04, wpb=435224, bsz=16500.9, num_updates=7600, lr=0.000725476, gnorm=0.342, clip=0, loss_scale=2, train_wall=95, gb_free=18.1, wall=7469 +epoch 005: 957 / 1689 loss=4.47, nll_loss=2.88, ppl=7.36, wps=463554, ups=1.06, wpb=435414, bsz=16477.5, num_updates=7700, lr=0.00072075, gnorm=0.344, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=7563 +epoch 005: 957 / 1689 loss=4.47, nll_loss=2.88, ppl=7.36, wps=463554, ups=1.06, wpb=435414, bsz=16477.5, num_updates=7700, lr=0.00072075, gnorm=0.344, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=7563 +epoch 005: 957 / 1689 loss=4.47, nll_loss=2.88, ppl=7.36, wps=463554, ups=1.06, wpb=435414, bsz=16477.5, num_updates=7700, lr=0.00072075, gnorm=0.344, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=7563 +epoch 005: 957 / 1689 loss=4.47, nll_loss=2.88, ppl=7.36, wps=463554, ups=1.06, wpb=435414, bsz=16477.5, num_updates=7700, lr=0.00072075, gnorm=0.344, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=7563 +epoch 005: 957 / 1689 loss=4.47, nll_loss=2.88, ppl=7.36, wps=463554, ups=1.06, wpb=435414, bsz=16477.5, num_updates=7700, lr=0.00072075, gnorm=0.344, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=7563 +epoch 005: 1057 / 1689 loss=4.451, nll_loss=2.86, ppl=7.26, wps=465478, ups=1.07, wpb=434233, bsz=16651.7, num_updates=7800, lr=0.000716115, gnorm=0.33, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=7656 +epoch 005: 1057 / 1689 loss=4.451, nll_loss=2.86, ppl=7.26, wps=465478, ups=1.07, wpb=434233, bsz=16651.7, num_updates=7800, lr=0.000716115, gnorm=0.33, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=7656 +epoch 005: 1057 / 1689 loss=4.451, nll_loss=2.86, ppl=7.26, wps=465478, ups=1.07, wpb=434233, bsz=16651.7, num_updates=7800, lr=0.000716115, gnorm=0.33, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=7656 +epoch 005: 1057 / 1689 loss=4.451, nll_loss=2.86, ppl=7.26, wps=465478, ups=1.07, wpb=434233, bsz=16651.7, num_updates=7800, lr=0.000716115, gnorm=0.33, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=7656 +epoch 005: 1057 / 1689 loss=4.451, nll_loss=2.86, ppl=7.26, wps=465478, ups=1.07, wpb=434233, bsz=16651.7, num_updates=7800, lr=0.000716115, gnorm=0.33, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=7656 +epoch 005: 1157 / 1689 loss=4.454, nll_loss=2.863, ppl=7.27, wps=466287, ups=1.07, wpb=435090, bsz=16662.6, num_updates=7900, lr=0.000711568, gnorm=0.329, clip=0, loss_scale=2, train_wall=92, gb_free=18.3, wall=7749 +epoch 005: 1157 / 1689 loss=4.454, nll_loss=2.863, ppl=7.27, wps=466287, ups=1.07, wpb=435090, bsz=16662.6, num_updates=7900, lr=0.000711568, gnorm=0.329, clip=0, loss_scale=2, train_wall=92, gb_free=18.3, wall=7749 +epoch 005: 1157 / 1689 loss=4.454, nll_loss=2.863, ppl=7.27, wps=466287, ups=1.07, wpb=435090, bsz=16662.6, num_updates=7900, lr=0.000711568, gnorm=0.329, clip=0, loss_scale=2, train_wall=92, gb_free=18.3, wall=7749 +epoch 005: 1157 / 1689 loss=4.454, nll_loss=2.863, ppl=7.27, wps=466287, ups=1.07, wpb=435090, bsz=16662.6, num_updates=7900, lr=0.000711568, gnorm=0.329, clip=0, loss_scale=2, train_wall=92, gb_free=18.3, wall=7749 +epoch 005: 1157 / 1689 loss=4.454, nll_loss=2.863, ppl=7.27, wps=466287, ups=1.07, wpb=435090, bsz=16662.6, num_updates=7900, lr=0.000711568, gnorm=0.329, clip=0, loss_scale=2, train_wall=92, gb_free=18.3, wall=7749 +epoch 005: 1257 / 1689 loss=4.456, nll_loss=2.866, ppl=7.29, wps=466863, ups=1.08, wpb=434275, bsz=16374.6, num_updates=8000, lr=0.000707107, gnorm=0.336, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=7842 +epoch 005: 1257 / 1689 loss=4.456, nll_loss=2.866, ppl=7.29, wps=466863, ups=1.08, wpb=434275, bsz=16374.6, num_updates=8000, lr=0.000707107, gnorm=0.336, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=7842 +epoch 005: 1257 / 1689 loss=4.456, nll_loss=2.866, ppl=7.29, wps=466863, ups=1.08, wpb=434275, bsz=16374.6, num_updates=8000, lr=0.000707107, gnorm=0.336, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=7842 +epoch 005: 1257 / 1689 loss=4.456, nll_loss=2.866, ppl=7.29, wps=466863, ups=1.08, wpb=434275, bsz=16374.6, num_updates=8000, lr=0.000707107, gnorm=0.336, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=7842 +epoch 005: 1257 / 1689 loss=4.456, nll_loss=2.866, ppl=7.29, wps=466863, ups=1.08, wpb=434275, bsz=16374.6, num_updates=8000, lr=0.000707107, gnorm=0.336, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=7842 +begin validation on "valid" subset +epoch 005 | valid on 'valid' subset | loss 4.425 | nll_loss 2.793 | ppl 6.93 | wps 0 | wpb 42662 | bsz 2032 | num_updates 8000 | best_loss 4.425 +epoch 005 | valid on 'valid' subset | loss 4.425 | nll_loss 2.793 | ppl 6.93 | wps 0 | wpb 42662 | bsz 2032 | num_updates 8000 | best_loss 4.425 +epoch 005 | valid on 'valid' subset | loss 4.425 | nll_loss 2.793 | ppl 6.93 | wps 0 | wpb 42662 | bsz 2032 | num_updates 8000 | best_loss 4.425 +epoch 005 | valid on 'valid' subset | loss 4.425 | nll_loss 2.793 | ppl 6.93 | wps 0 | wpb 42662 | bsz 2032 | num_updates 8000 | best_loss 4.425 +epoch 005 | valid on 'valid' subset | loss 4.425 | nll_loss 2.793 | ppl 6.93 | wps 0 | wpb 42662 | bsz 2032 | num_updates 8000 | best_loss 4.425 +epoch 005: 1357 / 1689 loss=4.453, nll_loss=2.862, ppl=7.27, wps=388012, ups=0.89, wpb=434803, bsz=16743.4, num_updates=8100, lr=0.000702728, gnorm=0.33, clip=0, loss_scale=4, train_wall=92, gb_free=18.2, wall=7955 +epoch 005: 1357 / 1689 loss=4.453, nll_loss=2.862, ppl=7.27, wps=388012, ups=0.89, wpb=434803, bsz=16743.4, num_updates=8100, lr=0.000702728, gnorm=0.33, clip=0, loss_scale=4, train_wall=92, gb_free=18.2, wall=7955 +epoch 005: 1357 / 1689 loss=4.453, nll_loss=2.862, ppl=7.27, wps=388012, ups=0.89, wpb=434803, bsz=16743.4, num_updates=8100, lr=0.000702728, gnorm=0.33, clip=0, loss_scale=4, train_wall=92, gb_free=18.2, wall=7955 +epoch 005: 1357 / 1689 loss=4.453, nll_loss=2.862, ppl=7.27, wps=388012, ups=0.89, wpb=434803, bsz=16743.4, num_updates=8100, lr=0.000702728, gnorm=0.33, clip=0, loss_scale=4, train_wall=92, gb_free=18.2, wall=7955 +epoch 005: 1357 / 1689 loss=4.453, nll_loss=2.862, ppl=7.27, wps=388012, ups=0.89, wpb=434803, bsz=16743.4, num_updates=8100, lr=0.000702728, gnorm=0.33, clip=0, loss_scale=4, train_wall=92, gb_free=18.2, wall=7955 +epoch 005: 1457 / 1689 loss=4.446, nll_loss=2.854, ppl=7.23, wps=466157, ups=1.08, wpb=433309, bsz=16459.7, num_updates=8200, lr=0.00069843, gnorm=0.329, clip=0, loss_scale=4, train_wall=92, gb_free=19.8, wall=8047 +epoch 005: 1457 / 1689 loss=4.446, nll_loss=2.854, ppl=7.23, wps=466157, ups=1.08, wpb=433309, bsz=16459.7, num_updates=8200, lr=0.00069843, gnorm=0.329, clip=0, loss_scale=4, train_wall=92, gb_free=19.8, wall=8047 +epoch 005: 1457 / 1689 loss=4.446, nll_loss=2.854, ppl=7.23, wps=466157, ups=1.08, wpb=433309, bsz=16459.7, num_updates=8200, lr=0.00069843, gnorm=0.329, clip=0, loss_scale=4, train_wall=92, gb_free=19.8, wall=8047 +epoch 005: 1457 / 1689 loss=4.446, nll_loss=2.854, ppl=7.23, wps=466157, ups=1.08, wpb=433309, bsz=16459.7, num_updates=8200, lr=0.00069843, gnorm=0.329, clip=0, loss_scale=4, train_wall=92, gb_free=19.8, wall=8047 +epoch 005: 1457 / 1689 loss=4.446, nll_loss=2.854, ppl=7.23, wps=466157, ups=1.08, wpb=433309, bsz=16459.7, num_updates=8200, lr=0.00069843, gnorm=0.329, clip=0, loss_scale=4, train_wall=92, gb_free=19.8, wall=8047 +epoch 005: 1558 / 1689 loss=4.439, nll_loss=2.847, ppl=7.2, wps=460759, ups=1.07, wpb=431946, bsz=16191.4, num_updates=8300, lr=0.00069421, gnorm=0.333, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=8141 +epoch 005: 1558 / 1689 loss=4.439, nll_loss=2.847, ppl=7.2, wps=460759, ups=1.07, wpb=431946, bsz=16191.4, num_updates=8300, lr=0.00069421, gnorm=0.333, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=8141 +epoch 005: 1558 / 1689 loss=4.439, nll_loss=2.847, ppl=7.2, wps=460759, ups=1.07, wpb=431946, bsz=16191.4, num_updates=8300, lr=0.00069421, gnorm=0.333, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=8141 +epoch 005: 1558 / 1689 loss=4.439, nll_loss=2.847, ppl=7.2, wps=460759, ups=1.07, wpb=431946, bsz=16191.4, num_updates=8300, lr=0.00069421, gnorm=0.333, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=8141 +epoch 005: 1558 / 1689 loss=4.439, nll_loss=2.847, ppl=7.2, wps=460759, ups=1.07, wpb=431946, bsz=16191.4, num_updates=8300, lr=0.00069421, gnorm=0.333, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=8141 +epoch 005: 1659 / 1689 loss=4.446, nll_loss=2.856, ppl=7.24, wps=457222, ups=1.06, wpb=432697, bsz=16910.9, num_updates=8400, lr=0.000690066, gnorm=0.333, clip=0, loss_scale=1, train_wall=94, gb_free=19.8, wall=8236 +epoch 005: 1659 / 1689 loss=4.446, nll_loss=2.856, ppl=7.24, wps=457222, ups=1.06, wpb=432697, bsz=16910.9, num_updates=8400, lr=0.000690066, gnorm=0.333, clip=0, loss_scale=1, train_wall=94, gb_free=19.8, wall=8236 +epoch 005: 1659 / 1689 loss=4.446, nll_loss=2.856, ppl=7.24, wps=457222, ups=1.06, wpb=432697, bsz=16910.9, num_updates=8400, lr=0.000690066, gnorm=0.333, clip=0, loss_scale=1, train_wall=94, gb_free=19.8, wall=8236 +epoch 005: 1659 / 1689 loss=4.446, nll_loss=2.856, ppl=7.24, wps=457222, ups=1.06, wpb=432697, bsz=16910.9, num_updates=8400, lr=0.000690066, gnorm=0.333, clip=0, loss_scale=1, train_wall=94, gb_free=19.8, wall=8236 +epoch 005: 1659 / 1689 loss=4.446, nll_loss=2.856, ppl=7.24, wps=457222, ups=1.06, wpb=432697, bsz=16910.9, num_updates=8400, lr=0.000690066, gnorm=0.333, clip=0, loss_scale=1, train_wall=94, gb_free=19.8, wall=8236 +end of epoch 5 (average epoch stats below) +epoch 005 | loss 4.465 | nll_loss 2.874 | ppl 7.33 | wps 416624 | ups 0.96 | wpb 433514 | bsz 16507.9 | num_updates 8430 | lr 0.000688837 | gnorm 0.336 | clip 0 | loss_scale 1 | train_wall 1652 | gb_free 23.9 | wall 8263 +epoch 005 | loss 4.465 | nll_loss 2.874 | ppl 7.33 | wps 416624 | ups 0.96 | wpb 433514 | bsz 16507.9 | num_updates 8430 | lr 0.000688837 | gnorm 0.336 | clip 0 | loss_scale 1 | train_wall 1652 | gb_free 23.9 | wall 8263 +epoch 005 | loss 4.465 | nll_loss 2.874 | ppl 7.33 | wps 416624 | ups 0.96 | wpb 433514 | bsz 16507.9 | num_updates 8430 | lr 0.000688837 | gnorm 0.336 | clip 0 | loss_scale 1 | train_wall 1652 | gb_free 23.9 | wall 8263 +epoch 005 | loss 4.465 | nll_loss 2.874 | ppl 7.33 | wps 416624 | ups 0.96 | wpb 433514 | bsz 16507.9 | num_updates 8430 | lr 0.000688837 | gnorm 0.336 | clip 0 | loss_scale 1 | train_wall 1652 | gb_free 23.9 | wall 8263 +epoch 005 | loss 4.465 | nll_loss 2.874 | ppl 7.33 | wps 416624 | ups 0.96 | wpb 433514 | bsz 16507.9 | num_updates 8430 | lr 0.000688837 | gnorm 0.336 | clip 0 | loss_scale 1 | train_wall 1652 | gb_free 23.9 | wall 8263 +Start iterating over samples +epoch 006: 70 / 1689 loss=4.406, nll_loss=2.808, ppl=7, wps=461295, ups=1.07, wpb=430353, bsz=16080.6, num_updates=8500, lr=0.000685994, gnorm=0.332, clip=0, loss_scale=1, train_wall=91, gb_free=19.5, wall=8329 +epoch 006: 70 / 1689 loss=4.406, nll_loss=2.808, ppl=7, wps=461295, ups=1.07, wpb=430353, bsz=16080.6, num_updates=8500, lr=0.000685994, gnorm=0.332, clip=0, loss_scale=1, train_wall=91, gb_free=19.5, wall=8329 +epoch 006: 70 / 1689 loss=4.406, nll_loss=2.808, ppl=7, wps=461295, ups=1.07, wpb=430353, bsz=16080.6, num_updates=8500, lr=0.000685994, gnorm=0.332, clip=0, loss_scale=1, train_wall=91, gb_free=19.5, wall=8329 +epoch 006: 70 / 1689 loss=4.406, nll_loss=2.808, ppl=7, wps=461295, ups=1.07, wpb=430353, bsz=16080.6, num_updates=8500, lr=0.000685994, gnorm=0.332, clip=0, loss_scale=1, train_wall=91, gb_free=19.5, wall=8329 +epoch 006: 70 / 1689 loss=4.406, nll_loss=2.808, ppl=7, wps=461295, ups=1.07, wpb=430353, bsz=16080.6, num_updates=8500, lr=0.000685994, gnorm=0.332, clip=0, loss_scale=1, train_wall=91, gb_free=19.5, wall=8329 +epoch 006: 70 / 1689 loss=4.406, nll_loss=2.808, ppl=7, wps=461295, ups=1.07, wpb=430353, bsz=16080.6, num_updates=8500, lr=0.000685994, gnorm=0.332, clip=0, loss_scale=1, train_wall=91, gb_free=19.5, wall=8329 +epoch 006: 171 / 1689 loss=4.411, nll_loss=2.814, ppl=7.03, wps=463060, ups=1.07, wpb=434536, bsz=16325, num_updates=8600, lr=0.000681994, gnorm=0.324, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=8423 +epoch 006: 171 / 1689 loss=4.411, nll_loss=2.814, ppl=7.03, wps=463060, ups=1.07, wpb=434536, bsz=16325, num_updates=8600, lr=0.000681994, gnorm=0.324, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=8423 +epoch 006: 171 / 1689 loss=4.411, nll_loss=2.814, ppl=7.03, wps=463060, ups=1.07, wpb=434536, bsz=16325, num_updates=8600, lr=0.000681994, gnorm=0.324, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=8423 +epoch 006: 171 / 1689 loss=4.411, nll_loss=2.814, ppl=7.03, wps=463060, ups=1.07, wpb=434536, bsz=16325, num_updates=8600, lr=0.000681994, gnorm=0.324, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=8423 +epoch 006: 171 / 1689 loss=4.411, nll_loss=2.814, ppl=7.03, wps=463060, ups=1.07, wpb=434536, bsz=16325, num_updates=8600, lr=0.000681994, gnorm=0.324, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=8423 +epoch 006: 171 / 1689 loss=4.411, nll_loss=2.814, ppl=7.03, wps=463060, ups=1.07, wpb=434536, bsz=16325, num_updates=8600, lr=0.000681994, gnorm=0.324, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=8423 +epoch 006: 271 / 1689 loss=4.396, nll_loss=2.798, ppl=6.96, wps=459064, ups=1.06, wpb=433091, bsz=16592.1, num_updates=8700, lr=0.000678064, gnorm=0.324, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.3, wall=8517 +epoch 006: 271 / 1689 loss=4.396, nll_loss=2.798, ppl=6.96, wps=459064, ups=1.06, wpb=433091, bsz=16592.1, num_updates=8700, lr=0.000678064, gnorm=0.324, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.3, wall=8517 +epoch 006: 271 / 1689 loss=4.396, nll_loss=2.798, ppl=6.96, wps=459064, ups=1.06, wpb=433091, bsz=16592.1, num_updates=8700, lr=0.000678064, gnorm=0.324, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.3, wall=8517 +epoch 006: 271 / 1689 loss=4.396, nll_loss=2.798, ppl=6.96, wps=459064, ups=1.06, wpb=433091, bsz=16592.1, num_updates=8700, lr=0.000678064, gnorm=0.324, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.3, wall=8517 +epoch 006: 271 / 1689 loss=4.396, nll_loss=2.798, ppl=6.96, wps=459064, ups=1.06, wpb=433091, bsz=16592.1, num_updates=8700, lr=0.000678064, gnorm=0.324, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.3, wall=8517 +epoch 006: 271 / 1689 loss=4.396, nll_loss=2.798, ppl=6.96, wps=459064, ups=1.06, wpb=433091, bsz=16592.1, num_updates=8700, lr=0.000678064, gnorm=0.324, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.3, wall=8517 +epoch 006: 371 / 1689 loss=4.409, nll_loss=2.813, ppl=7.03, wps=462059, ups=1.07, wpb=433504, bsz=16379.4, num_updates=8800, lr=0.0006742, gnorm=0.322, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=8611 +epoch 006: 371 / 1689 loss=4.409, nll_loss=2.813, ppl=7.03, wps=462059, ups=1.07, wpb=433504, bsz=16379.4, num_updates=8800, lr=0.0006742, gnorm=0.322, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=8611 +epoch 006: 371 / 1689 loss=4.409, nll_loss=2.813, ppl=7.03, wps=462059, ups=1.07, wpb=433504, bsz=16379.4, num_updates=8800, lr=0.0006742, gnorm=0.322, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=8611 +epoch 006: 371 / 1689 loss=4.409, nll_loss=2.813, ppl=7.03, wps=462059, ups=1.07, wpb=433504, bsz=16379.4, num_updates=8800, lr=0.0006742, gnorm=0.322, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=8611 +epoch 006: 371 / 1689 loss=4.409, nll_loss=2.813, ppl=7.03, wps=462059, ups=1.07, wpb=433504, bsz=16379.4, num_updates=8800, lr=0.0006742, gnorm=0.322, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=8611 +epoch 006: 371 / 1689 loss=4.409, nll_loss=2.813, ppl=7.03, wps=462059, ups=1.07, wpb=433504, bsz=16379.4, num_updates=8800, lr=0.0006742, gnorm=0.322, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=8611 +epoch 006: 471 / 1689 loss=4.403, nll_loss=2.806, ppl=6.99, wps=458950, ups=1.06, wpb=432513, bsz=16384.4, num_updates=8900, lr=0.000670402, gnorm=0.328, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=8705 +epoch 006: 471 / 1689 loss=4.403, nll_loss=2.806, ppl=6.99, wps=458950, ups=1.06, wpb=432513, bsz=16384.4, num_updates=8900, lr=0.000670402, gnorm=0.328, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=8705 +epoch 006: 471 / 1689 loss=4.403, nll_loss=2.806, ppl=6.99, wps=458950, ups=1.06, wpb=432513, bsz=16384.4, num_updates=8900, lr=0.000670402, gnorm=0.328, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=8705 +epoch 006: 471 / 1689 loss=4.403, nll_loss=2.806, ppl=6.99, wps=458950, ups=1.06, wpb=432513, bsz=16384.4, num_updates=8900, lr=0.000670402, gnorm=0.328, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=8705 +epoch 006: 471 / 1689 loss=4.403, nll_loss=2.806, ppl=6.99, wps=458950, ups=1.06, wpb=432513, bsz=16384.4, num_updates=8900, lr=0.000670402, gnorm=0.328, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=8705 +epoch 006: 471 / 1689 loss=4.403, nll_loss=2.806, ppl=6.99, wps=458950, ups=1.06, wpb=432513, bsz=16384.4, num_updates=8900, lr=0.000670402, gnorm=0.328, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=8705 +epoch 006: 571 / 1689 loss=4.399, nll_loss=2.803, ppl=6.98, wps=460834, ups=1.06, wpb=434779, bsz=16699.4, num_updates=9000, lr=0.000666667, gnorm=0.33, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=8800 +epoch 006: 571 / 1689 loss=4.399, nll_loss=2.803, ppl=6.98, wps=460834, ups=1.06, wpb=434779, bsz=16699.4, num_updates=9000, lr=0.000666667, gnorm=0.33, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=8800 +epoch 006: 571 / 1689 loss=4.399, nll_loss=2.803, ppl=6.98, wps=460834, ups=1.06, wpb=434779, bsz=16699.4, num_updates=9000, lr=0.000666667, gnorm=0.33, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=8800 +epoch 006: 571 / 1689 loss=4.399, nll_loss=2.803, ppl=6.98, wps=460834, ups=1.06, wpb=434779, bsz=16699.4, num_updates=9000, lr=0.000666667, gnorm=0.33, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=8800 +epoch 006: 571 / 1689 loss=4.399, nll_loss=2.803, ppl=6.98, wps=460834, ups=1.06, wpb=434779, bsz=16699.4, num_updates=9000, lr=0.000666667, gnorm=0.33, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=8800 +epoch 006: 571 / 1689 loss=4.399, nll_loss=2.803, ppl=6.98, wps=460834, ups=1.06, wpb=434779, bsz=16699.4, num_updates=9000, lr=0.000666667, gnorm=0.33, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=8800 +begin validation on "valid" subset +epoch 006 | valid on 'valid' subset | loss 4.445 | nll_loss 2.808 | ppl 7.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 9000 | best_loss 4.425 +epoch 006 | valid on 'valid' subset | loss 4.445 | nll_loss 2.808 | ppl 7.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 9000 | best_loss 4.425 +epoch 006 | valid on 'valid' subset | loss 4.445 | nll_loss 2.808 | ppl 7.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 9000 | best_loss 4.425 +epoch 006 | valid on 'valid' subset | loss 4.445 | nll_loss 2.808 | ppl 7.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 9000 | best_loss 4.425 +epoch 006 | valid on 'valid' subset | loss 4.445 | nll_loss 2.808 | ppl 7.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 9000 | best_loss 4.425 +epoch 006 | valid on 'valid' subset | loss 4.445 | nll_loss 2.808 | ppl 7.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 9000 | best_loss 4.425 +epoch 006: 671 / 1689 loss=4.401, nll_loss=2.805, ppl=6.99, wps=358363, ups=0.83, wpb=432301, bsz=16807, num_updates=9100, lr=0.000662994, gnorm=0.311, clip=0, loss_scale=1, train_wall=99, gb_free=19.8, wall=8920 +epoch 006: 671 / 1689 loss=4.401, nll_loss=2.805, ppl=6.99, wps=358363, ups=0.83, wpb=432301, bsz=16807, num_updates=9100, lr=0.000662994, gnorm=0.311, clip=0, loss_scale=1, train_wall=99, gb_free=19.8, wall=8920 +epoch 006: 671 / 1689 loss=4.401, nll_loss=2.805, ppl=6.99, wps=358363, ups=0.83, wpb=432301, bsz=16807, num_updates=9100, lr=0.000662994, gnorm=0.311, clip=0, loss_scale=1, train_wall=99, gb_free=19.8, wall=8920 +epoch 006: 671 / 1689 loss=4.401, nll_loss=2.805, ppl=6.99, wps=358363, ups=0.83, wpb=432301, bsz=16807, num_updates=9100, lr=0.000662994, gnorm=0.311, clip=0, loss_scale=1, train_wall=99, gb_free=19.8, wall=8920 +epoch 006: 671 / 1689 loss=4.401, nll_loss=2.805, ppl=6.99, wps=358363, ups=0.83, wpb=432301, bsz=16807, num_updates=9100, lr=0.000662994, gnorm=0.311, clip=0, loss_scale=1, train_wall=99, gb_free=19.8, wall=8920 +epoch 006: 671 / 1689 loss=4.401, nll_loss=2.805, ppl=6.99, wps=358363, ups=0.83, wpb=432301, bsz=16807, num_updates=9100, lr=0.000662994, gnorm=0.311, clip=0, loss_scale=1, train_wall=99, gb_free=19.8, wall=8920 +epoch 006: 771 / 1689 loss=4.397, nll_loss=2.8, ppl=6.96, wps=460526, ups=1.06, wpb=433356, bsz=16388.8, num_updates=9200, lr=0.00065938, gnorm=0.328, clip=0, loss_scale=1, train_wall=93, gb_free=20.7, wall=9015 +epoch 006: 771 / 1689 loss=4.397, nll_loss=2.8, ppl=6.96, wps=460526, ups=1.06, wpb=433356, bsz=16388.8, num_updates=9200, lr=0.00065938, gnorm=0.328, clip=0, loss_scale=1, train_wall=93, gb_free=20.7, wall=9015 +epoch 006: 771 / 1689 loss=4.397, nll_loss=2.8, ppl=6.96, wps=460526, ups=1.06, wpb=433356, bsz=16388.8, num_updates=9200, lr=0.00065938, gnorm=0.328, clip=0, loss_scale=1, train_wall=93, gb_free=20.7, wall=9015 +epoch 006: 771 / 1689 loss=4.397, nll_loss=2.8, ppl=6.96, wps=460526, ups=1.06, wpb=433356, bsz=16388.8, num_updates=9200, lr=0.00065938, gnorm=0.328, clip=0, loss_scale=1, train_wall=93, gb_free=20.7, wall=9015 +epoch 006: 771 / 1689 loss=4.397, nll_loss=2.8, ppl=6.96, wps=460526, ups=1.06, wpb=433356, bsz=16388.8, num_updates=9200, lr=0.00065938, gnorm=0.328, clip=0, loss_scale=1, train_wall=93, gb_free=20.7, wall=9015 +epoch 006: 771 / 1689 loss=4.397, nll_loss=2.8, ppl=6.96, wps=460526, ups=1.06, wpb=433356, bsz=16388.8, num_updates=9200, lr=0.00065938, gnorm=0.328, clip=0, loss_scale=1, train_wall=93, gb_free=20.7, wall=9015 +epoch 006: 871 / 1689 loss=4.411, nll_loss=2.817, ppl=7.05, wps=458105, ups=1.06, wpb=433893, bsz=16833.3, num_updates=9300, lr=0.000655826, gnorm=0.316, clip=0, loss_scale=1, train_wall=93, gb_free=20.5, wall=9109 +epoch 006: 871 / 1689 loss=4.411, nll_loss=2.817, ppl=7.05, wps=458105, ups=1.06, wpb=433893, bsz=16833.3, num_updates=9300, lr=0.000655826, gnorm=0.316, clip=0, loss_scale=1, train_wall=93, gb_free=20.5, wall=9109 +epoch 006: 871 / 1689 loss=4.411, nll_loss=2.817, ppl=7.05, wps=458105, ups=1.06, wpb=433893, bsz=16833.3, num_updates=9300, lr=0.000655826, gnorm=0.316, clip=0, loss_scale=1, train_wall=93, gb_free=20.5, wall=9109 +epoch 006: 871 / 1689 loss=4.411, nll_loss=2.817, ppl=7.05, wps=458105, ups=1.06, wpb=433893, bsz=16833.3, num_updates=9300, lr=0.000655826, gnorm=0.316, clip=0, loss_scale=1, train_wall=93, gb_free=20.5, wall=9109 +epoch 006: 871 / 1689 loss=4.411, nll_loss=2.817, ppl=7.05, wps=458105, ups=1.06, wpb=433893, bsz=16833.3, num_updates=9300, lr=0.000655826, gnorm=0.316, clip=0, loss_scale=1, train_wall=93, gb_free=20.5, wall=9109 +epoch 006: 871 / 1689 loss=4.411, nll_loss=2.817, ppl=7.05, wps=458105, ups=1.06, wpb=433893, bsz=16833.3, num_updates=9300, lr=0.000655826, gnorm=0.316, clip=0, loss_scale=1, train_wall=93, gb_free=20.5, wall=9109 +epoch 006: 971 / 1689 loss=4.4, nll_loss=2.804, ppl=6.98, wps=460101, ups=1.06, wpb=434361, bsz=16305.7, num_updates=9400, lr=0.000652328, gnorm=0.306, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=9204 +epoch 006: 971 / 1689 loss=4.4, nll_loss=2.804, ppl=6.98, wps=460101, ups=1.06, wpb=434361, bsz=16305.7, num_updates=9400, lr=0.000652328, gnorm=0.306, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=9204 +epoch 006: 971 / 1689 loss=4.4, nll_loss=2.804, ppl=6.98, wps=460101, ups=1.06, wpb=434361, bsz=16305.7, num_updates=9400, lr=0.000652328, gnorm=0.306, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=9204 +epoch 006: 971 / 1689 loss=4.4, nll_loss=2.804, ppl=6.98, wps=460101, ups=1.06, wpb=434361, bsz=16305.7, num_updates=9400, lr=0.000652328, gnorm=0.306, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=9204 +epoch 006: 971 / 1689 loss=4.4, nll_loss=2.804, ppl=6.98, wps=460101, ups=1.06, wpb=434361, bsz=16305.7, num_updates=9400, lr=0.000652328, gnorm=0.306, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=9204 +epoch 006: 971 / 1689 loss=4.4, nll_loss=2.804, ppl=6.98, wps=460101, ups=1.06, wpb=434361, bsz=16305.7, num_updates=9400, lr=0.000652328, gnorm=0.306, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=9204 +epoch 006: 1071 / 1689 loss=4.394, nll_loss=2.798, ppl=6.95, wps=461499, ups=1.07, wpb=432636, bsz=16451.7, num_updates=9500, lr=0.000648886, gnorm=0.323, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=9297 +epoch 006: 1071 / 1689 loss=4.394, nll_loss=2.798, ppl=6.95, wps=461499, ups=1.07, wpb=432636, bsz=16451.7, num_updates=9500, lr=0.000648886, gnorm=0.323, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=9297 +epoch 006: 1071 / 1689 loss=4.394, nll_loss=2.798, ppl=6.95, wps=461499, ups=1.07, wpb=432636, bsz=16451.7, num_updates=9500, lr=0.000648886, gnorm=0.323, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=9297 +epoch 006: 1071 / 1689 loss=4.394, nll_loss=2.798, ppl=6.95, wps=461499, ups=1.07, wpb=432636, bsz=16451.7, num_updates=9500, lr=0.000648886, gnorm=0.323, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=9297 +epoch 006: 1071 / 1689 loss=4.394, nll_loss=2.798, ppl=6.95, wps=461499, ups=1.07, wpb=432636, bsz=16451.7, num_updates=9500, lr=0.000648886, gnorm=0.323, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=9297 +epoch 006: 1071 / 1689 loss=4.394, nll_loss=2.798, ppl=6.95, wps=461499, ups=1.07, wpb=432636, bsz=16451.7, num_updates=9500, lr=0.000648886, gnorm=0.323, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=9297 +epoch 006: 1171 / 1689 loss=4.395, nll_loss=2.799, ppl=6.96, wps=459606, ups=1.06, wpb=434620, bsz=16664.2, num_updates=9600, lr=0.000645497, gnorm=0.317, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=9392 +epoch 006: 1171 / 1689 loss=4.395, nll_loss=2.799, ppl=6.96, wps=459606, ups=1.06, wpb=434620, bsz=16664.2, num_updates=9600, lr=0.000645497, gnorm=0.317, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=9392 +epoch 006: 1171 / 1689 loss=4.395, nll_loss=2.799, ppl=6.96, wps=459606, ups=1.06, wpb=434620, bsz=16664.2, num_updates=9600, lr=0.000645497, gnorm=0.317, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=9392 +epoch 006: 1171 / 1689 loss=4.395, nll_loss=2.799, ppl=6.96, wps=459606, ups=1.06, wpb=434620, bsz=16664.2, num_updates=9600, lr=0.000645497, gnorm=0.317, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=9392 +epoch 006: 1171 / 1689 loss=4.395, nll_loss=2.799, ppl=6.96, wps=459606, ups=1.06, wpb=434620, bsz=16664.2, num_updates=9600, lr=0.000645497, gnorm=0.317, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=9392 +epoch 006: 1171 / 1689 loss=4.395, nll_loss=2.799, ppl=6.96, wps=459606, ups=1.06, wpb=434620, bsz=16664.2, num_updates=9600, lr=0.000645497, gnorm=0.317, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=9392 +epoch 006: 1271 / 1689 loss=4.386, nll_loss=2.789, ppl=6.91, wps=461418, ups=1.07, wpb=433171, bsz=16261.9, num_updates=9700, lr=0.000642161, gnorm=0.305, clip=0, loss_scale=2, train_wall=93, gb_free=17, wall=9486 +epoch 006: 1271 / 1689 loss=4.386, nll_loss=2.789, ppl=6.91, wps=461418, ups=1.07, wpb=433171, bsz=16261.9, num_updates=9700, lr=0.000642161, gnorm=0.305, clip=0, loss_scale=2, train_wall=93, gb_free=17, wall=9486 +epoch 006: 1271 / 1689 loss=4.386, nll_loss=2.789, ppl=6.91, wps=461418, ups=1.07, wpb=433171, bsz=16261.9, num_updates=9700, lr=0.000642161, gnorm=0.305, clip=0, loss_scale=2, train_wall=93, gb_free=17, wall=9486 +epoch 006: 1271 / 1689 loss=4.386, nll_loss=2.789, ppl=6.91, wps=461418, ups=1.07, wpb=433171, bsz=16261.9, num_updates=9700, lr=0.000642161, gnorm=0.305, clip=0, loss_scale=2, train_wall=93, gb_free=17, wall=9486 +epoch 006: 1271 / 1689 loss=4.386, nll_loss=2.789, ppl=6.91, wps=461418, ups=1.07, wpb=433171, bsz=16261.9, num_updates=9700, lr=0.000642161, gnorm=0.305, clip=0, loss_scale=2, train_wall=93, gb_free=17, wall=9486 +epoch 006: 1271 / 1689 loss=4.386, nll_loss=2.789, ppl=6.91, wps=461418, ups=1.07, wpb=433171, bsz=16261.9, num_updates=9700, lr=0.000642161, gnorm=0.305, clip=0, loss_scale=2, train_wall=93, gb_free=17, wall=9486 +epoch 006: 1371 / 1689 loss=4.395, nll_loss=2.8, ppl=6.96, wps=460336, ups=1.06, wpb=432832, bsz=16659, num_updates=9800, lr=0.000638877, gnorm=0.326, clip=0, loss_scale=2, train_wall=92, gb_free=18.9, wall=9580 +epoch 006: 1371 / 1689 loss=4.395, nll_loss=2.8, ppl=6.96, wps=460336, ups=1.06, wpb=432832, bsz=16659, num_updates=9800, lr=0.000638877, gnorm=0.326, clip=0, loss_scale=2, train_wall=92, gb_free=18.9, wall=9580 +epoch 006: 1371 / 1689 loss=4.395, nll_loss=2.8, ppl=6.96, wps=460336, ups=1.06, wpb=432832, bsz=16659, num_updates=9800, lr=0.000638877, gnorm=0.326, clip=0, loss_scale=2, train_wall=92, gb_free=18.9, wall=9580 +epoch 006: 1371 / 1689 loss=4.395, nll_loss=2.8, ppl=6.96, wps=460336, ups=1.06, wpb=432832, bsz=16659, num_updates=9800, lr=0.000638877, gnorm=0.326, clip=0, loss_scale=2, train_wall=92, gb_free=18.9, wall=9580 +epoch 006: 1371 / 1689 loss=4.395, nll_loss=2.8, ppl=6.96, wps=460336, ups=1.06, wpb=432832, bsz=16659, num_updates=9800, lr=0.000638877, gnorm=0.326, clip=0, loss_scale=2, train_wall=92, gb_free=18.9, wall=9580 +epoch 006: 1371 / 1689 loss=4.395, nll_loss=2.8, ppl=6.96, wps=460336, ups=1.06, wpb=432832, bsz=16659, num_updates=9800, lr=0.000638877, gnorm=0.326, clip=0, loss_scale=2, train_wall=92, gb_free=18.9, wall=9580 +epoch 006: 1471 / 1689 loss=4.397, nll_loss=2.802, ppl=6.97, wps=463376, ups=1.06, wpb=436466, bsz=16325.3, num_updates=9900, lr=0.000635642, gnorm=0.311, clip=0, loss_scale=2, train_wall=93, gb_free=19.2, wall=9674 +epoch 006: 1471 / 1689 loss=4.397, nll_loss=2.802, ppl=6.97, wps=463376, ups=1.06, wpb=436466, bsz=16325.3, num_updates=9900, lr=0.000635642, gnorm=0.311, clip=0, loss_scale=2, train_wall=93, gb_free=19.2, wall=9674 +epoch 006: 1471 / 1689 loss=4.397, nll_loss=2.802, ppl=6.97, wps=463376, ups=1.06, wpb=436466, bsz=16325.3, num_updates=9900, lr=0.000635642, gnorm=0.311, clip=0, loss_scale=2, train_wall=93, gb_free=19.2, wall=9674 +epoch 006: 1471 / 1689 loss=4.397, nll_loss=2.802, ppl=6.97, wps=463376, ups=1.06, wpb=436466, bsz=16325.3, num_updates=9900, lr=0.000635642, gnorm=0.311, clip=0, loss_scale=2, train_wall=93, gb_free=19.2, wall=9674 +epoch 006: 1471 / 1689 loss=4.397, nll_loss=2.802, ppl=6.97, wps=463376, ups=1.06, wpb=436466, bsz=16325.3, num_updates=9900, lr=0.000635642, gnorm=0.311, clip=0, loss_scale=2, train_wall=93, gb_free=19.2, wall=9674 +epoch 006: 1471 / 1689 loss=4.397, nll_loss=2.802, ppl=6.97, wps=463376, ups=1.06, wpb=436466, bsz=16325.3, num_updates=9900, lr=0.000635642, gnorm=0.311, clip=0, loss_scale=2, train_wall=93, gb_free=19.2, wall=9674 +epoch 006: 1571 / 1689 loss=4.393, nll_loss=2.797, ppl=6.95, wps=463079, ups=1.06, wpb=436013, bsz=16715.1, num_updates=10000, lr=0.000632456, gnorm=0.306, clip=0, loss_scale=2, train_wall=93, gb_free=18.6, wall=9768 +epoch 006: 1571 / 1689 loss=4.393, nll_loss=2.797, ppl=6.95, wps=463079, ups=1.06, wpb=436013, bsz=16715.1, num_updates=10000, lr=0.000632456, gnorm=0.306, clip=0, loss_scale=2, train_wall=93, gb_free=18.6, wall=9768 +epoch 006: 1571 / 1689 loss=4.393, nll_loss=2.797, ppl=6.95, wps=463079, ups=1.06, wpb=436013, bsz=16715.1, num_updates=10000, lr=0.000632456, gnorm=0.306, clip=0, loss_scale=2, train_wall=93, gb_free=18.6, wall=9768 +epoch 006: 1571 / 1689 loss=4.393, nll_loss=2.797, ppl=6.95, wps=463079, ups=1.06, wpb=436013, bsz=16715.1, num_updates=10000, lr=0.000632456, gnorm=0.306, clip=0, loss_scale=2, train_wall=93, gb_free=18.6, wall=9768 +epoch 006: 1571 / 1689 loss=4.393, nll_loss=2.797, ppl=6.95, wps=463079, ups=1.06, wpb=436013, bsz=16715.1, num_updates=10000, lr=0.000632456, gnorm=0.306, clip=0, loss_scale=2, train_wall=93, gb_free=18.6, wall=9768 +epoch 006: 1571 / 1689 loss=4.393, nll_loss=2.797, ppl=6.95, wps=463079, ups=1.06, wpb=436013, bsz=16715.1, num_updates=10000, lr=0.000632456, gnorm=0.306, clip=0, loss_scale=2, train_wall=93, gb_free=18.6, wall=9768 +begin validation on "valid" subset +epoch 006 | valid on 'valid' subset | loss 4.393 | nll_loss 2.762 | ppl 6.79 | wps 0 | wpb 42662 | bsz 2032 | num_updates 10000 | best_loss 4.393 +epoch 006 | valid on 'valid' subset | loss 4.393 | nll_loss 2.762 | ppl 6.79 | wps 0 | wpb 42662 | bsz 2032 | num_updates 10000 | best_loss 4.393 +epoch 006 | valid on 'valid' subset | loss 4.393 | nll_loss 2.762 | ppl 6.79 | wps 0 | wpb 42662 | bsz 2032 | num_updates 10000 | best_loss 4.393 +epoch 006 | valid on 'valid' subset | loss 4.393 | nll_loss 2.762 | ppl 6.79 | wps 0 | wpb 42662 | bsz 2032 | num_updates 10000 | best_loss 4.393 +epoch 006 | valid on 'valid' subset | loss 4.393 | nll_loss 2.762 | ppl 6.79 | wps 0 | wpb 42662 | bsz 2032 | num_updates 10000 | best_loss 4.393 +epoch 006 | valid on 'valid' subset | loss 4.393 | nll_loss 2.762 | ppl 6.79 | wps 0 | wpb 42662 | bsz 2032 | num_updates 10000 | best_loss 4.393 +epoch 006: 1671 / 1689 loss=4.373, nll_loss=2.775, ppl=6.85, wps=297958, ups=0.69, wpb=432010, bsz=16585.9, num_updates=10100, lr=0.000629317, gnorm=0.316, clip=0, loss_scale=4, train_wall=119, gb_free=20.5, wall=9913 +epoch 006: 1671 / 1689 loss=4.373, nll_loss=2.775, ppl=6.85, wps=297958, ups=0.69, wpb=432010, bsz=16585.9, num_updates=10100, lr=0.000629317, gnorm=0.316, clip=0, loss_scale=4, train_wall=119, gb_free=20.5, wall=9913 +epoch 006: 1671 / 1689 loss=4.373, nll_loss=2.775, ppl=6.85, wps=297958, ups=0.69, wpb=432010, bsz=16585.9, num_updates=10100, lr=0.000629317, gnorm=0.316, clip=0, loss_scale=4, train_wall=119, gb_free=20.5, wall=9913 +epoch 006: 1671 / 1689 loss=4.373, nll_loss=2.775, ppl=6.85, wps=297958, ups=0.69, wpb=432010, bsz=16585.9, num_updates=10100, lr=0.000629317, gnorm=0.316, clip=0, loss_scale=4, train_wall=119, gb_free=20.5, wall=9913 +epoch 006: 1671 / 1689 loss=4.373, nll_loss=2.775, ppl=6.85, wps=297958, ups=0.69, wpb=432010, bsz=16585.9, num_updates=10100, lr=0.000629317, gnorm=0.316, clip=0, loss_scale=4, train_wall=119, gb_free=20.5, wall=9913 +epoch 006: 1671 / 1689 loss=4.373, nll_loss=2.775, ppl=6.85, wps=297958, ups=0.69, wpb=432010, bsz=16585.9, num_updates=10100, lr=0.000629317, gnorm=0.316, clip=0, loss_scale=4, train_wall=119, gb_free=20.5, wall=9913 +end of epoch 6 (average epoch stats below) +epoch 006 | loss 4.397 | nll_loss 2.801 | ppl 6.97 | wps 439174 | ups 1.01 | wpb 433525 | bsz 16504.9 | num_updates 10118 | lr 0.000628757 | gnorm 0.319 | clip 0 | loss_scale 4 | train_wall 1598 | gb_free 22.5 | wall 9929 +epoch 006 | loss 4.397 | nll_loss 2.801 | ppl 6.97 | wps 439174 | ups 1.01 | wpb 433525 | bsz 16504.9 | num_updates 10118 | lr 0.000628757 | gnorm 0.319 | clip 0 | loss_scale 4 | train_wall 1598 | gb_free 22.5 | wall 9929 +epoch 006 | loss 4.397 | nll_loss 2.801 | ppl 6.97 | wps 439174 | ups 1.01 | wpb 433525 | bsz 16504.9 | num_updates 10118 | lr 0.000628757 | gnorm 0.319 | clip 0 | loss_scale 4 | train_wall 1598 | gb_free 22.5 | wall 9929 +epoch 006 | loss 4.397 | nll_loss 2.801 | ppl 6.97 | wps 439174 | ups 1.01 | wpb 433525 | bsz 16504.9 | num_updates 10118 | lr 0.000628757 | gnorm 0.319 | clip 0 | loss_scale 4 | train_wall 1598 | gb_free 22.5 | wall 9929 +epoch 006 | loss 4.397 | nll_loss 2.801 | ppl 6.97 | wps 439174 | ups 1.01 | wpb 433525 | bsz 16504.9 | num_updates 10118 | lr 0.000628757 | gnorm 0.319 | clip 0 | loss_scale 4 | train_wall 1598 | gb_free 22.5 | wall 9929 +epoch 006 | loss 4.397 | nll_loss 2.801 | ppl 6.97 | wps 439174 | ups 1.01 | wpb 433525 | bsz 16504.9 | num_updates 10118 | lr 0.000628757 | gnorm 0.319 | clip 0 | loss_scale 4 | train_wall 1598 | gb_free 22.5 | wall 9929 +Start iterating over samples +epoch 007: 82 / 1689 loss=4.352, nll_loss=2.751, ppl=6.73, wps=452863, ups=1.05, wpb=429630, bsz=16373.4, num_updates=10200, lr=0.000626224, gnorm=0.312, clip=0, loss_scale=4, train_wall=92, gb_free=19.2, wall=10008 +epoch 007: 82 / 1689 loss=4.352, nll_loss=2.751, ppl=6.73, wps=452863, ups=1.05, wpb=429630, bsz=16373.4, num_updates=10200, lr=0.000626224, gnorm=0.312, clip=0, loss_scale=4, train_wall=92, gb_free=19.2, wall=10008 +epoch 007: 82 / 1689 loss=4.352, nll_loss=2.751, ppl=6.73, wps=452863, ups=1.05, wpb=429630, bsz=16373.4, num_updates=10200, lr=0.000626224, gnorm=0.312, clip=0, loss_scale=4, train_wall=92, gb_free=19.2, wall=10008 +epoch 007: 82 / 1689 loss=4.352, nll_loss=2.751, ppl=6.73, wps=452863, ups=1.05, wpb=429630, bsz=16373.4, num_updates=10200, lr=0.000626224, gnorm=0.312, clip=0, loss_scale=4, train_wall=92, gb_free=19.2, wall=10008 +epoch 007: 82 / 1689 loss=4.352, nll_loss=2.751, ppl=6.73, wps=452863, ups=1.05, wpb=429630, bsz=16373.4, num_updates=10200, lr=0.000626224, gnorm=0.312, clip=0, loss_scale=4, train_wall=92, gb_free=19.2, wall=10008 +epoch 007: 82 / 1689 loss=4.352, nll_loss=2.751, ppl=6.73, wps=452863, ups=1.05, wpb=429630, bsz=16373.4, num_updates=10200, lr=0.000626224, gnorm=0.312, clip=0, loss_scale=4, train_wall=92, gb_free=19.2, wall=10008 +epoch 007: 82 / 1689 loss=4.352, nll_loss=2.751, ppl=6.73, wps=452863, ups=1.05, wpb=429630, bsz=16373.4, num_updates=10200, lr=0.000626224, gnorm=0.312, clip=0, loss_scale=4, train_wall=92, gb_free=19.2, wall=10008 +epoch 007: 182 / 1689 loss=4.341, nll_loss=2.738, ppl=6.67, wps=463828, ups=1.07, wpb=433715, bsz=16554.6, num_updates=10300, lr=0.000623177, gnorm=0.309, clip=0, loss_scale=4, train_wall=92, gb_free=19.6, wall=10102 +epoch 007: 182 / 1689 loss=4.341, nll_loss=2.738, ppl=6.67, wps=463828, ups=1.07, wpb=433715, bsz=16554.6, num_updates=10300, lr=0.000623177, gnorm=0.309, clip=0, loss_scale=4, train_wall=92, gb_free=19.6, wall=10102 +epoch 007: 182 / 1689 loss=4.341, nll_loss=2.738, ppl=6.67, wps=463828, ups=1.07, wpb=433715, bsz=16554.6, num_updates=10300, lr=0.000623177, gnorm=0.309, clip=0, loss_scale=4, train_wall=92, gb_free=19.6, wall=10102 +epoch 007: 182 / 1689 loss=4.341, nll_loss=2.738, ppl=6.67, wps=463828, ups=1.07, wpb=433715, bsz=16554.6, num_updates=10300, lr=0.000623177, gnorm=0.309, clip=0, loss_scale=4, train_wall=92, gb_free=19.6, wall=10102 +epoch 007: 182 / 1689 loss=4.341, nll_loss=2.738, ppl=6.67, wps=463828, ups=1.07, wpb=433715, bsz=16554.6, num_updates=10300, lr=0.000623177, gnorm=0.309, clip=0, loss_scale=4, train_wall=92, gb_free=19.6, wall=10102 +epoch 007: 182 / 1689 loss=4.341, nll_loss=2.738, ppl=6.67, wps=463828, ups=1.07, wpb=433715, bsz=16554.6, num_updates=10300, lr=0.000623177, gnorm=0.309, clip=0, loss_scale=4, train_wall=92, gb_free=19.6, wall=10102 +epoch 007: 182 / 1689 loss=4.341, nll_loss=2.738, ppl=6.67, wps=463828, ups=1.07, wpb=433715, bsz=16554.6, num_updates=10300, lr=0.000623177, gnorm=0.309, clip=0, loss_scale=4, train_wall=92, gb_free=19.6, wall=10102 +epoch 007: 283 / 1689 loss=4.347, nll_loss=2.744, ppl=6.7, wps=456786, ups=1.05, wpb=434300, bsz=16520.3, num_updates=10400, lr=0.000620174, gnorm=0.29, clip=0, loss_scale=2, train_wall=94, gb_free=19.3, wall=10197 +epoch 007: 283 / 1689 loss=4.347, nll_loss=2.744, ppl=6.7, wps=456786, ups=1.05, wpb=434300, bsz=16520.3, num_updates=10400, lr=0.000620174, gnorm=0.29, clip=0, loss_scale=2, train_wall=94, gb_free=19.3, wall=10197 +epoch 007: 283 / 1689 loss=4.347, nll_loss=2.744, ppl=6.7, wps=456786, ups=1.05, wpb=434300, bsz=16520.3, num_updates=10400, lr=0.000620174, gnorm=0.29, clip=0, loss_scale=2, train_wall=94, gb_free=19.3, wall=10197 +epoch 007: 283 / 1689 loss=4.347, nll_loss=2.744, ppl=6.7, wps=456786, ups=1.05, wpb=434300, bsz=16520.3, num_updates=10400, lr=0.000620174, gnorm=0.29, clip=0, loss_scale=2, train_wall=94, gb_free=19.3, wall=10197 +epoch 007: 283 / 1689 loss=4.347, nll_loss=2.744, ppl=6.7, wps=456786, ups=1.05, wpb=434300, bsz=16520.3, num_updates=10400, lr=0.000620174, gnorm=0.29, clip=0, loss_scale=2, train_wall=94, gb_free=19.3, wall=10197 +epoch 007: 283 / 1689 loss=4.347, nll_loss=2.744, ppl=6.7, wps=456786, ups=1.05, wpb=434300, bsz=16520.3, num_updates=10400, lr=0.000620174, gnorm=0.29, clip=0, loss_scale=2, train_wall=94, gb_free=19.3, wall=10197 +epoch 007: 283 / 1689 loss=4.347, nll_loss=2.744, ppl=6.7, wps=456786, ups=1.05, wpb=434300, bsz=16520.3, num_updates=10400, lr=0.000620174, gnorm=0.29, clip=0, loss_scale=2, train_wall=94, gb_free=19.3, wall=10197 +epoch 007: 383 / 1689 loss=4.358, nll_loss=2.758, ppl=6.76, wps=466600, ups=1.07, wpb=435001, bsz=16086.7, num_updates=10500, lr=0.000617213, gnorm=0.298, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=10290 +epoch 007: 383 / 1689 loss=4.358, nll_loss=2.758, ppl=6.76, wps=466600, ups=1.07, wpb=435001, bsz=16086.7, num_updates=10500, lr=0.000617213, gnorm=0.298, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=10290 +epoch 007: 383 / 1689 loss=4.358, nll_loss=2.758, ppl=6.76, wps=466600, ups=1.07, wpb=435001, bsz=16086.7, num_updates=10500, lr=0.000617213, gnorm=0.298, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=10290 +epoch 007: 383 / 1689 loss=4.358, nll_loss=2.758, ppl=6.76, wps=466600, ups=1.07, wpb=435001, bsz=16086.7, num_updates=10500, lr=0.000617213, gnorm=0.298, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=10290 +epoch 007: 383 / 1689 loss=4.358, nll_loss=2.758, ppl=6.76, wps=466600, ups=1.07, wpb=435001, bsz=16086.7, num_updates=10500, lr=0.000617213, gnorm=0.298, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=10290 +epoch 007: 383 / 1689 loss=4.358, nll_loss=2.758, ppl=6.76, wps=466600, ups=1.07, wpb=435001, bsz=16086.7, num_updates=10500, lr=0.000617213, gnorm=0.298, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=10290 +epoch 007: 383 / 1689 loss=4.358, nll_loss=2.758, ppl=6.76, wps=466600, ups=1.07, wpb=435001, bsz=16086.7, num_updates=10500, lr=0.000617213, gnorm=0.298, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=10290 +epoch 007: 483 / 1689 loss=4.348, nll_loss=2.747, ppl=6.71, wps=462863, ups=1.07, wpb=433476, bsz=16578.7, num_updates=10600, lr=0.000614295, gnorm=0.298, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=10384 +epoch 007: 483 / 1689 loss=4.348, nll_loss=2.747, ppl=6.71, wps=462863, ups=1.07, wpb=433476, bsz=16578.7, num_updates=10600, lr=0.000614295, gnorm=0.298, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=10384 +epoch 007: 483 / 1689 loss=4.348, nll_loss=2.747, ppl=6.71, wps=462863, ups=1.07, wpb=433476, bsz=16578.7, num_updates=10600, lr=0.000614295, gnorm=0.298, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=10384 +epoch 007: 483 / 1689 loss=4.348, nll_loss=2.747, ppl=6.71, wps=462863, ups=1.07, wpb=433476, bsz=16578.7, num_updates=10600, lr=0.000614295, gnorm=0.298, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=10384 +epoch 007: 483 / 1689 loss=4.348, nll_loss=2.747, ppl=6.71, wps=462863, ups=1.07, wpb=433476, bsz=16578.7, num_updates=10600, lr=0.000614295, gnorm=0.298, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=10384 +epoch 007: 483 / 1689 loss=4.348, nll_loss=2.747, ppl=6.71, wps=462863, ups=1.07, wpb=433476, bsz=16578.7, num_updates=10600, lr=0.000614295, gnorm=0.298, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=10384 +epoch 007: 483 / 1689 loss=4.348, nll_loss=2.747, ppl=6.71, wps=462863, ups=1.07, wpb=433476, bsz=16578.7, num_updates=10600, lr=0.000614295, gnorm=0.298, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=10384 +epoch 007: 583 / 1689 loss=4.343, nll_loss=2.741, ppl=6.68, wps=457378, ups=1.06, wpb=431803, bsz=16425, num_updates=10700, lr=0.000611418, gnorm=0.313, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=10478 +epoch 007: 583 / 1689 loss=4.343, nll_loss=2.741, ppl=6.68, wps=457378, ups=1.06, wpb=431803, bsz=16425, num_updates=10700, lr=0.000611418, gnorm=0.313, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=10478 +epoch 007: 583 / 1689 loss=4.343, nll_loss=2.741, ppl=6.68, wps=457378, ups=1.06, wpb=431803, bsz=16425, num_updates=10700, lr=0.000611418, gnorm=0.313, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=10478 +epoch 007: 583 / 1689 loss=4.343, nll_loss=2.741, ppl=6.68, wps=457378, ups=1.06, wpb=431803, bsz=16425, num_updates=10700, lr=0.000611418, gnorm=0.313, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=10478 +epoch 007: 583 / 1689 loss=4.343, nll_loss=2.741, ppl=6.68, wps=457378, ups=1.06, wpb=431803, bsz=16425, num_updates=10700, lr=0.000611418, gnorm=0.313, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=10478 +epoch 007: 583 / 1689 loss=4.343, nll_loss=2.741, ppl=6.68, wps=457378, ups=1.06, wpb=431803, bsz=16425, num_updates=10700, lr=0.000611418, gnorm=0.313, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=10478 +epoch 007: 583 / 1689 loss=4.343, nll_loss=2.741, ppl=6.68, wps=457378, ups=1.06, wpb=431803, bsz=16425, num_updates=10700, lr=0.000611418, gnorm=0.313, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=10478 +epoch 007: 683 / 1689 loss=4.346, nll_loss=2.744, ppl=6.7, wps=461716, ups=1.06, wpb=434036, bsz=16361.3, num_updates=10800, lr=0.000608581, gnorm=0.293, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=10572 +epoch 007: 683 / 1689 loss=4.346, nll_loss=2.744, ppl=6.7, wps=461716, ups=1.06, wpb=434036, bsz=16361.3, num_updates=10800, lr=0.000608581, gnorm=0.293, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=10572 +epoch 007: 683 / 1689 loss=4.346, nll_loss=2.744, ppl=6.7, wps=461716, ups=1.06, wpb=434036, bsz=16361.3, num_updates=10800, lr=0.000608581, gnorm=0.293, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=10572 +epoch 007: 683 / 1689 loss=4.346, nll_loss=2.744, ppl=6.7, wps=461716, ups=1.06, wpb=434036, bsz=16361.3, num_updates=10800, lr=0.000608581, gnorm=0.293, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=10572 +epoch 007: 683 / 1689 loss=4.346, nll_loss=2.744, ppl=6.7, wps=461716, ups=1.06, wpb=434036, bsz=16361.3, num_updates=10800, lr=0.000608581, gnorm=0.293, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=10572 +epoch 007: 683 / 1689 loss=4.346, nll_loss=2.744, ppl=6.7, wps=461716, ups=1.06, wpb=434036, bsz=16361.3, num_updates=10800, lr=0.000608581, gnorm=0.293, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=10572 +epoch 007: 683 / 1689 loss=4.346, nll_loss=2.744, ppl=6.7, wps=461716, ups=1.06, wpb=434036, bsz=16361.3, num_updates=10800, lr=0.000608581, gnorm=0.293, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=10572 +epoch 007: 784 / 1689 loss=4.361, nll_loss=2.761, ppl=6.78, wps=458655, ups=1.06, wpb=433462, bsz=16341.1, num_updates=10900, lr=0.000605783, gnorm=0.298, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=10666 +epoch 007: 784 / 1689 loss=4.361, nll_loss=2.761, ppl=6.78, wps=458655, ups=1.06, wpb=433462, bsz=16341.1, num_updates=10900, lr=0.000605783, gnorm=0.298, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=10666 +epoch 007: 784 / 1689 loss=4.361, nll_loss=2.761, ppl=6.78, wps=458655, ups=1.06, wpb=433462, bsz=16341.1, num_updates=10900, lr=0.000605783, gnorm=0.298, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=10666 +epoch 007: 784 / 1689 loss=4.361, nll_loss=2.761, ppl=6.78, wps=458655, ups=1.06, wpb=433462, bsz=16341.1, num_updates=10900, lr=0.000605783, gnorm=0.298, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=10666 +epoch 007: 784 / 1689 loss=4.361, nll_loss=2.761, ppl=6.78, wps=458655, ups=1.06, wpb=433462, bsz=16341.1, num_updates=10900, lr=0.000605783, gnorm=0.298, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=10666 +epoch 007: 784 / 1689 loss=4.361, nll_loss=2.761, ppl=6.78, wps=458655, ups=1.06, wpb=433462, bsz=16341.1, num_updates=10900, lr=0.000605783, gnorm=0.298, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=10666 +epoch 007: 784 / 1689 loss=4.361, nll_loss=2.761, ppl=6.78, wps=458655, ups=1.06, wpb=433462, bsz=16341.1, num_updates=10900, lr=0.000605783, gnorm=0.298, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=10666 +epoch 007: 884 / 1689 loss=4.36, nll_loss=2.76, ppl=6.78, wps=461815, ups=1.07, wpb=433106, bsz=16498.1, num_updates=11000, lr=0.000603023, gnorm=0.295, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=10760 +epoch 007: 884 / 1689 loss=4.36, nll_loss=2.76, ppl=6.78, wps=461815, ups=1.07, wpb=433106, bsz=16498.1, num_updates=11000, lr=0.000603023, gnorm=0.295, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=10760 +epoch 007: 884 / 1689 loss=4.36, nll_loss=2.76, ppl=6.78, wps=461815, ups=1.07, wpb=433106, bsz=16498.1, num_updates=11000, lr=0.000603023, gnorm=0.295, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=10760 +epoch 007: 884 / 1689 loss=4.36, nll_loss=2.76, ppl=6.78, wps=461815, ups=1.07, wpb=433106, bsz=16498.1, num_updates=11000, lr=0.000603023, gnorm=0.295, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=10760 +epoch 007: 884 / 1689 loss=4.36, nll_loss=2.76, ppl=6.78, wps=461815, ups=1.07, wpb=433106, bsz=16498.1, num_updates=11000, lr=0.000603023, gnorm=0.295, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=10760 +epoch 007: 884 / 1689 loss=4.36, nll_loss=2.76, ppl=6.78, wps=461815, ups=1.07, wpb=433106, bsz=16498.1, num_updates=11000, lr=0.000603023, gnorm=0.295, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=10760 +epoch 007: 884 / 1689 loss=4.36, nll_loss=2.76, ppl=6.78, wps=461815, ups=1.07, wpb=433106, bsz=16498.1, num_updates=11000, lr=0.000603023, gnorm=0.295, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=10760 +begin validation on "valid" subset +epoch 007 | valid on 'valid' subset | loss 4.362 | nll_loss 2.73 | ppl 6.63 | wps 0 | wpb 42662 | bsz 2032 | num_updates 11000 | best_loss 4.362 +epoch 007 | valid on 'valid' subset | loss 4.362 | nll_loss 2.73 | ppl 6.63 | wps 0 | wpb 42662 | bsz 2032 | num_updates 11000 | best_loss 4.362 +epoch 007 | valid on 'valid' subset | loss 4.362 | nll_loss 2.73 | ppl 6.63 | wps 0 | wpb 42662 | bsz 2032 | num_updates 11000 | best_loss 4.362 +epoch 007 | valid on 'valid' subset | loss 4.362 | nll_loss 2.73 | ppl 6.63 | wps 0 | wpb 42662 | bsz 2032 | num_updates 11000 | best_loss 4.362 +epoch 007 | valid on 'valid' subset | loss 4.362 | nll_loss 2.73 | ppl 6.63 | wps 0 | wpb 42662 | bsz 2032 | num_updates 11000 | best_loss 4.362 +epoch 007 | valid on 'valid' subset | loss 4.362 | nll_loss 2.73 | ppl 6.63 | wps 0 | wpb 42662 | bsz 2032 | num_updates 11000 | best_loss 4.362 +epoch 007 | valid on 'valid' subset | loss 4.362 | nll_loss 2.73 | ppl 6.63 | wps 0 | wpb 42662 | bsz 2032 | num_updates 11000 | best_loss 4.362 +epoch 007: 984 / 1689 loss=4.344, nll_loss=2.743, ppl=6.69, wps=386759, ups=0.89, wpb=434160, bsz=16326.5, num_updates=11100, lr=0.0006003, gnorm=0.318, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=10873 +epoch 007: 984 / 1689 loss=4.344, nll_loss=2.743, ppl=6.69, wps=386759, ups=0.89, wpb=434160, bsz=16326.5, num_updates=11100, lr=0.0006003, gnorm=0.318, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=10873 +epoch 007: 984 / 1689 loss=4.344, nll_loss=2.743, ppl=6.69, wps=386759, ups=0.89, wpb=434160, bsz=16326.5, num_updates=11100, lr=0.0006003, gnorm=0.318, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=10873 +epoch 007: 984 / 1689 loss=4.344, nll_loss=2.743, ppl=6.69, wps=386759, ups=0.89, wpb=434160, bsz=16326.5, num_updates=11100, lr=0.0006003, gnorm=0.318, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=10873 +epoch 007: 984 / 1689 loss=4.344, nll_loss=2.743, ppl=6.69, wps=386759, ups=0.89, wpb=434160, bsz=16326.5, num_updates=11100, lr=0.0006003, gnorm=0.318, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=10873 +epoch 007: 984 / 1689 loss=4.344, nll_loss=2.743, ppl=6.69, wps=386759, ups=0.89, wpb=434160, bsz=16326.5, num_updates=11100, lr=0.0006003, gnorm=0.318, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=10873 +epoch 007: 984 / 1689 loss=4.344, nll_loss=2.743, ppl=6.69, wps=386759, ups=0.89, wpb=434160, bsz=16326.5, num_updates=11100, lr=0.0006003, gnorm=0.318, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=10873 +epoch 007: 1084 / 1689 loss=4.339, nll_loss=2.737, ppl=6.67, wps=460722, ups=1.07, wpb=432249, bsz=16991.3, num_updates=11200, lr=0.000597614, gnorm=0.31, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=10966 +epoch 007: 1084 / 1689 loss=4.339, nll_loss=2.737, ppl=6.67, wps=460722, ups=1.07, wpb=432249, bsz=16991.3, num_updates=11200, lr=0.000597614, gnorm=0.31, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=10966 +epoch 007: 1084 / 1689 loss=4.339, nll_loss=2.737, ppl=6.67, wps=460722, ups=1.07, wpb=432249, bsz=16991.3, num_updates=11200, lr=0.000597614, gnorm=0.31, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=10966 +epoch 007: 1084 / 1689 loss=4.339, nll_loss=2.737, ppl=6.67, wps=460722, ups=1.07, wpb=432249, bsz=16991.3, num_updates=11200, lr=0.000597614, gnorm=0.31, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=10966 +epoch 007: 1084 / 1689 loss=4.339, nll_loss=2.737, ppl=6.67, wps=460722, ups=1.07, wpb=432249, bsz=16991.3, num_updates=11200, lr=0.000597614, gnorm=0.31, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=10966 +epoch 007: 1084 / 1689 loss=4.339, nll_loss=2.737, ppl=6.67, wps=460722, ups=1.07, wpb=432249, bsz=16991.3, num_updates=11200, lr=0.000597614, gnorm=0.31, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=10966 +epoch 007: 1084 / 1689 loss=4.339, nll_loss=2.737, ppl=6.67, wps=460722, ups=1.07, wpb=432249, bsz=16991.3, num_updates=11200, lr=0.000597614, gnorm=0.31, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=10966 +epoch 007: 1184 / 1689 loss=4.348, nll_loss=2.748, ppl=6.72, wps=465166, ups=1.07, wpb=435140, bsz=16705.6, num_updates=11300, lr=0.000594964, gnorm=0.282, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=11060 +epoch 007: 1184 / 1689 loss=4.348, nll_loss=2.748, ppl=6.72, wps=465166, ups=1.07, wpb=435140, bsz=16705.6, num_updates=11300, lr=0.000594964, gnorm=0.282, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=11060 +epoch 007: 1184 / 1689 loss=4.348, nll_loss=2.748, ppl=6.72, wps=465166, ups=1.07, wpb=435140, bsz=16705.6, num_updates=11300, lr=0.000594964, gnorm=0.282, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=11060 +epoch 007: 1184 / 1689 loss=4.348, nll_loss=2.748, ppl=6.72, wps=465166, ups=1.07, wpb=435140, bsz=16705.6, num_updates=11300, lr=0.000594964, gnorm=0.282, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=11060 +epoch 007: 1184 / 1689 loss=4.348, nll_loss=2.748, ppl=6.72, wps=465166, ups=1.07, wpb=435140, bsz=16705.6, num_updates=11300, lr=0.000594964, gnorm=0.282, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=11060 +epoch 007: 1184 / 1689 loss=4.348, nll_loss=2.748, ppl=6.72, wps=465166, ups=1.07, wpb=435140, bsz=16705.6, num_updates=11300, lr=0.000594964, gnorm=0.282, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=11060 +epoch 007: 1184 / 1689 loss=4.348, nll_loss=2.748, ppl=6.72, wps=465166, ups=1.07, wpb=435140, bsz=16705.6, num_updates=11300, lr=0.000594964, gnorm=0.282, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=11060 +epoch 007: 1285 / 1689 loss=4.347, nll_loss=2.747, ppl=6.71, wps=456822, ups=1.05, wpb=435719, bsz=16386.1, num_updates=11400, lr=0.000592349, gnorm=0.295, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.9, wall=11155 +epoch 007: 1285 / 1689 loss=4.347, nll_loss=2.747, ppl=6.71, wps=456822, ups=1.05, wpb=435719, bsz=16386.1, num_updates=11400, lr=0.000592349, gnorm=0.295, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.9, wall=11155 +epoch 007: 1285 / 1689 loss=4.347, nll_loss=2.747, ppl=6.71, wps=456822, ups=1.05, wpb=435719, bsz=16386.1, num_updates=11400, lr=0.000592349, gnorm=0.295, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.9, wall=11155 +epoch 007: 1285 / 1689 loss=4.347, nll_loss=2.747, ppl=6.71, wps=456822, ups=1.05, wpb=435719, bsz=16386.1, num_updates=11400, lr=0.000592349, gnorm=0.295, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.9, wall=11155 +epoch 007: 1285 / 1689 loss=4.347, nll_loss=2.747, ppl=6.71, wps=456822, ups=1.05, wpb=435719, bsz=16386.1, num_updates=11400, lr=0.000592349, gnorm=0.295, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.9, wall=11155 +epoch 007: 1285 / 1689 loss=4.347, nll_loss=2.747, ppl=6.71, wps=456822, ups=1.05, wpb=435719, bsz=16386.1, num_updates=11400, lr=0.000592349, gnorm=0.295, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.9, wall=11155 +epoch 007: 1285 / 1689 loss=4.347, nll_loss=2.747, ppl=6.71, wps=456822, ups=1.05, wpb=435719, bsz=16386.1, num_updates=11400, lr=0.000592349, gnorm=0.295, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.9, wall=11155 +epoch 007: 1385 / 1689 loss=4.358, nll_loss=2.759, ppl=6.77, wps=460938, ups=1.06, wpb=433577, bsz=16396.2, num_updates=11500, lr=0.000589768, gnorm=0.285, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.3, wall=11249 +epoch 007: 1385 / 1689 loss=4.358, nll_loss=2.759, ppl=6.77, wps=460938, ups=1.06, wpb=433577, bsz=16396.2, num_updates=11500, lr=0.000589768, gnorm=0.285, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.3, wall=11249 +epoch 007: 1385 / 1689 loss=4.358, nll_loss=2.759, ppl=6.77, wps=460938, ups=1.06, wpb=433577, bsz=16396.2, num_updates=11500, lr=0.000589768, gnorm=0.285, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.3, wall=11249 +epoch 007: 1385 / 1689 loss=4.358, nll_loss=2.759, ppl=6.77, wps=460938, ups=1.06, wpb=433577, bsz=16396.2, num_updates=11500, lr=0.000589768, gnorm=0.285, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.3, wall=11249 +epoch 007: 1385 / 1689 loss=4.358, nll_loss=2.759, ppl=6.77, wps=460938, ups=1.06, wpb=433577, bsz=16396.2, num_updates=11500, lr=0.000589768, gnorm=0.285, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.3, wall=11249 +epoch 007: 1385 / 1689 loss=4.358, nll_loss=2.759, ppl=6.77, wps=460938, ups=1.06, wpb=433577, bsz=16396.2, num_updates=11500, lr=0.000589768, gnorm=0.285, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.3, wall=11249 +epoch 007: 1385 / 1689 loss=4.358, nll_loss=2.759, ppl=6.77, wps=460938, ups=1.06, wpb=433577, bsz=16396.2, num_updates=11500, lr=0.000589768, gnorm=0.285, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.3, wall=11249 +epoch 007: 1485 / 1689 loss=4.354, nll_loss=2.755, ppl=6.75, wps=460585, ups=1.06, wpb=434879, bsz=16616.1, num_updates=11600, lr=0.00058722, gnorm=0.282, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.1, wall=11344 +epoch 007: 1485 / 1689 loss=4.354, nll_loss=2.755, ppl=6.75, wps=460585, ups=1.06, wpb=434879, bsz=16616.1, num_updates=11600, lr=0.00058722, gnorm=0.282, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.1, wall=11344 +epoch 007: 1485 / 1689 loss=4.354, nll_loss=2.755, ppl=6.75, wps=460585, ups=1.06, wpb=434879, bsz=16616.1, num_updates=11600, lr=0.00058722, gnorm=0.282, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.1, wall=11344 +epoch 007: 1485 / 1689 loss=4.354, nll_loss=2.755, ppl=6.75, wps=460585, ups=1.06, wpb=434879, bsz=16616.1, num_updates=11600, lr=0.00058722, gnorm=0.282, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.1, wall=11344 +epoch 007: 1485 / 1689 loss=4.354, nll_loss=2.755, ppl=6.75, wps=460585, ups=1.06, wpb=434879, bsz=16616.1, num_updates=11600, lr=0.00058722, gnorm=0.282, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.1, wall=11344 +epoch 007: 1485 / 1689 loss=4.354, nll_loss=2.755, ppl=6.75, wps=460585, ups=1.06, wpb=434879, bsz=16616.1, num_updates=11600, lr=0.00058722, gnorm=0.282, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.1, wall=11344 +epoch 007: 1485 / 1689 loss=4.354, nll_loss=2.755, ppl=6.75, wps=460585, ups=1.06, wpb=434879, bsz=16616.1, num_updates=11600, lr=0.00058722, gnorm=0.282, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.1, wall=11344 +epoch 007: 1585 / 1689 loss=4.347, nll_loss=2.747, ppl=6.71, wps=459565, ups=1.06, wpb=433746, bsz=16915.8, num_updates=11700, lr=0.000584705, gnorm=0.302, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=11438 +epoch 007: 1585 / 1689 loss=4.347, nll_loss=2.747, ppl=6.71, wps=459565, ups=1.06, wpb=433746, bsz=16915.8, num_updates=11700, lr=0.000584705, gnorm=0.302, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=11438 +epoch 007: 1585 / 1689 loss=4.347, nll_loss=2.747, ppl=6.71, wps=459565, ups=1.06, wpb=433746, bsz=16915.8, num_updates=11700, lr=0.000584705, gnorm=0.302, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=11438 +epoch 007: 1585 / 1689 loss=4.347, nll_loss=2.747, ppl=6.71, wps=459565, ups=1.06, wpb=433746, bsz=16915.8, num_updates=11700, lr=0.000584705, gnorm=0.302, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=11438 +epoch 007: 1585 / 1689 loss=4.347, nll_loss=2.747, ppl=6.71, wps=459565, ups=1.06, wpb=433746, bsz=16915.8, num_updates=11700, lr=0.000584705, gnorm=0.302, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=11438 +epoch 007: 1585 / 1689 loss=4.347, nll_loss=2.747, ppl=6.71, wps=459565, ups=1.06, wpb=433746, bsz=16915.8, num_updates=11700, lr=0.000584705, gnorm=0.302, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=11438 +epoch 007: 1585 / 1689 loss=4.347, nll_loss=2.747, ppl=6.71, wps=459565, ups=1.06, wpb=433746, bsz=16915.8, num_updates=11700, lr=0.000584705, gnorm=0.302, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=11438 +epoch 007: 1685 / 1689 loss=4.341, nll_loss=2.741, ppl=6.68, wps=457499, ups=1.06, wpb=431976, bsz=16493.3, num_updates=11800, lr=0.000582223, gnorm=0.283, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=11533 +epoch 007: 1685 / 1689 loss=4.341, nll_loss=2.741, ppl=6.68, wps=457499, ups=1.06, wpb=431976, bsz=16493.3, num_updates=11800, lr=0.000582223, gnorm=0.283, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=11533 +epoch 007: 1685 / 1689 loss=4.341, nll_loss=2.741, ppl=6.68, wps=457499, ups=1.06, wpb=431976, bsz=16493.3, num_updates=11800, lr=0.000582223, gnorm=0.283, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=11533 +epoch 007: 1685 / 1689 loss=4.341, nll_loss=2.741, ppl=6.68, wps=457499, ups=1.06, wpb=431976, bsz=16493.3, num_updates=11800, lr=0.000582223, gnorm=0.283, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=11533 +epoch 007: 1685 / 1689 loss=4.341, nll_loss=2.741, ppl=6.68, wps=457499, ups=1.06, wpb=431976, bsz=16493.3, num_updates=11800, lr=0.000582223, gnorm=0.283, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=11533 +epoch 007: 1685 / 1689 loss=4.341, nll_loss=2.741, ppl=6.68, wps=457499, ups=1.06, wpb=431976, bsz=16493.3, num_updates=11800, lr=0.000582223, gnorm=0.283, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=11533 +epoch 007: 1685 / 1689 loss=4.341, nll_loss=2.741, ppl=6.68, wps=457499, ups=1.06, wpb=431976, bsz=16493.3, num_updates=11800, lr=0.000582223, gnorm=0.283, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=11533 +end of epoch 7 (average epoch stats below) +epoch 007 | loss 4.348 | nll_loss 2.748 | ppl 6.72 | wps 455080 | ups 1.05 | wpb 433531 | bsz 16503 | num_updates 11804 | lr 0.000582124 | gnorm 0.297 | clip 0 | loss_scale 0.5 | train_wall 1561 | gb_free 19.6 | wall 11536 +epoch 007 | loss 4.348 | nll_loss 2.748 | ppl 6.72 | wps 455080 | ups 1.05 | wpb 433531 | bsz 16503 | num_updates 11804 | lr 0.000582124 | gnorm 0.297 | clip 0 | loss_scale 0.5 | train_wall 1561 | gb_free 19.6 | wall 11536 +epoch 007 | loss 4.348 | nll_loss 2.748 | ppl 6.72 | wps 455080 | ups 1.05 | wpb 433531 | bsz 16503 | num_updates 11804 | lr 0.000582124 | gnorm 0.297 | clip 0 | loss_scale 0.5 | train_wall 1561 | gb_free 19.6 | wall 11536 +epoch 007 | loss 4.348 | nll_loss 2.748 | ppl 6.72 | wps 455080 | ups 1.05 | wpb 433531 | bsz 16503 | num_updates 11804 | lr 0.000582124 | gnorm 0.297 | clip 0 | loss_scale 0.5 | train_wall 1561 | gb_free 19.6 | wall 11536 +epoch 007 | loss 4.348 | nll_loss 2.748 | ppl 6.72 | wps 455080 | ups 1.05 | wpb 433531 | bsz 16503 | num_updates 11804 | lr 0.000582124 | gnorm 0.297 | clip 0 | loss_scale 0.5 | train_wall 1561 | gb_free 19.6 | wall 11536 +epoch 007 | loss 4.348 | nll_loss 2.748 | ppl 6.72 | wps 455080 | ups 1.05 | wpb 433531 | bsz 16503 | num_updates 11804 | lr 0.000582124 | gnorm 0.297 | clip 0 | loss_scale 0.5 | train_wall 1561 | gb_free 19.6 | wall 11536 +epoch 007 | loss 4.348 | nll_loss 2.748 | ppl 6.72 | wps 455080 | ups 1.05 | wpb 433531 | bsz 16503 | num_updates 11804 | lr 0.000582124 | gnorm 0.297 | clip 0 | loss_scale 0.5 | train_wall 1561 | gb_free 19.6 | wall 11536 +Start iterating over samples +epoch 008: 96 / 1689 loss=4.296, nll_loss=2.688, ppl=6.45, wps=452408, ups=1.05, wpb=430592, bsz=16555.9, num_updates=11900, lr=0.000579771, gnorm=0.3, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=11628 +epoch 008: 96 / 1689 loss=4.296, nll_loss=2.688, ppl=6.45, wps=452408, ups=1.05, wpb=430592, bsz=16555.9, num_updates=11900, lr=0.000579771, gnorm=0.3, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=11628 +epoch 008: 96 / 1689 loss=4.296, nll_loss=2.688, ppl=6.45, wps=452408, ups=1.05, wpb=430592, bsz=16555.9, num_updates=11900, lr=0.000579771, gnorm=0.3, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=11628 +epoch 008: 96 / 1689 loss=4.296, nll_loss=2.688, ppl=6.45, wps=452408, ups=1.05, wpb=430592, bsz=16555.9, num_updates=11900, lr=0.000579771, gnorm=0.3, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=11628 +epoch 008: 96 / 1689 loss=4.296, nll_loss=2.688, ppl=6.45, wps=452408, ups=1.05, wpb=430592, bsz=16555.9, num_updates=11900, lr=0.000579771, gnorm=0.3, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=11628 +epoch 008: 96 / 1689 loss=4.296, nll_loss=2.688, ppl=6.45, wps=452408, ups=1.05, wpb=430592, bsz=16555.9, num_updates=11900, lr=0.000579771, gnorm=0.3, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=11628 +epoch 008: 96 / 1689 loss=4.296, nll_loss=2.688, ppl=6.45, wps=452408, ups=1.05, wpb=430592, bsz=16555.9, num_updates=11900, lr=0.000579771, gnorm=0.3, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=11628 +epoch 008: 96 / 1689 loss=4.296, nll_loss=2.688, ppl=6.45, wps=452408, ups=1.05, wpb=430592, bsz=16555.9, num_updates=11900, lr=0.000579771, gnorm=0.3, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=11628 +epoch 008: 196 / 1689 loss=4.312, nll_loss=2.706, ppl=6.53, wps=465225, ups=1.07, wpb=434281, bsz=16559, num_updates=12000, lr=0.00057735, gnorm=0.27, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=11721 +epoch 008: 196 / 1689 loss=4.312, nll_loss=2.706, ppl=6.53, wps=465225, ups=1.07, wpb=434281, bsz=16559, num_updates=12000, lr=0.00057735, gnorm=0.27, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=11721 +epoch 008: 196 / 1689 loss=4.312, nll_loss=2.706, ppl=6.53, wps=465225, ups=1.07, wpb=434281, bsz=16559, num_updates=12000, lr=0.00057735, gnorm=0.27, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=11721 +epoch 008: 196 / 1689 loss=4.312, nll_loss=2.706, ppl=6.53, wps=465225, ups=1.07, wpb=434281, bsz=16559, num_updates=12000, lr=0.00057735, gnorm=0.27, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=11721 +epoch 008: 196 / 1689 loss=4.312, nll_loss=2.706, ppl=6.53, wps=465225, ups=1.07, wpb=434281, bsz=16559, num_updates=12000, lr=0.00057735, gnorm=0.27, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=11721 +epoch 008: 196 / 1689 loss=4.312, nll_loss=2.706, ppl=6.53, wps=465225, ups=1.07, wpb=434281, bsz=16559, num_updates=12000, lr=0.00057735, gnorm=0.27, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=11721 +epoch 008: 196 / 1689 loss=4.312, nll_loss=2.706, ppl=6.53, wps=465225, ups=1.07, wpb=434281, bsz=16559, num_updates=12000, lr=0.00057735, gnorm=0.27, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=11721 +epoch 008: 196 / 1689 loss=4.312, nll_loss=2.706, ppl=6.53, wps=465225, ups=1.07, wpb=434281, bsz=16559, num_updates=12000, lr=0.00057735, gnorm=0.27, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=11721 +begin validation on "valid" subset +epoch 008 | valid on 'valid' subset | loss 4.341 | nll_loss 2.708 | ppl 6.53 | wps 0 | wpb 42662 | bsz 2032 | num_updates 12000 | best_loss 4.341 +epoch 008 | valid on 'valid' subset | loss 4.341 | nll_loss 2.708 | ppl 6.53 | wps 0 | wpb 42662 | bsz 2032 | num_updates 12000 | best_loss 4.341 +epoch 008 | valid on 'valid' subset | loss 4.341 | nll_loss 2.708 | ppl 6.53 | wps 0 | wpb 42662 | bsz 2032 | num_updates 12000 | best_loss 4.341 +epoch 008 | valid on 'valid' subset | loss 4.341 | nll_loss 2.708 | ppl 6.53 | wps 0 | wpb 42662 | bsz 2032 | num_updates 12000 | best_loss 4.341 +epoch 008 | valid on 'valid' subset | loss 4.341 | nll_loss 2.708 | ppl 6.53 | wps 0 | wpb 42662 | bsz 2032 | num_updates 12000 | best_loss 4.341 +epoch 008 | valid on 'valid' subset | loss 4.341 | nll_loss 2.708 | ppl 6.53 | wps 0 | wpb 42662 | bsz 2032 | num_updates 12000 | best_loss 4.341 +epoch 008 | valid on 'valid' subset | loss 4.341 | nll_loss 2.708 | ppl 6.53 | wps 0 | wpb 42662 | bsz 2032 | num_updates 12000 | best_loss 4.341 +epoch 008 | valid on 'valid' subset | loss 4.341 | nll_loss 2.708 | ppl 6.53 | wps 0 | wpb 42662 | bsz 2032 | num_updates 12000 | best_loss 4.341 +epoch 008: 296 / 1689 loss=4.315, nll_loss=2.71, ppl=6.55, wps=380366, ups=0.88, wpb=434536, bsz=16604.2, num_updates=12100, lr=0.00057496, gnorm=0.293, clip=0, loss_scale=1, train_wall=93, gb_free=21.7, wall=11835 +epoch 008: 296 / 1689 loss=4.315, nll_loss=2.71, ppl=6.55, wps=380366, ups=0.88, wpb=434536, bsz=16604.2, num_updates=12100, lr=0.00057496, gnorm=0.293, clip=0, loss_scale=1, train_wall=93, gb_free=21.7, wall=11835 +epoch 008: 296 / 1689 loss=4.315, nll_loss=2.71, ppl=6.55, wps=380366, ups=0.88, wpb=434536, bsz=16604.2, num_updates=12100, lr=0.00057496, gnorm=0.293, clip=0, loss_scale=1, train_wall=93, gb_free=21.7, wall=11835 +epoch 008: 296 / 1689 loss=4.315, nll_loss=2.71, ppl=6.55, wps=380366, ups=0.88, wpb=434536, bsz=16604.2, num_updates=12100, lr=0.00057496, gnorm=0.293, clip=0, loss_scale=1, train_wall=93, gb_free=21.7, wall=11835 +epoch 008: 296 / 1689 loss=4.315, nll_loss=2.71, ppl=6.55, wps=380366, ups=0.88, wpb=434536, bsz=16604.2, num_updates=12100, lr=0.00057496, gnorm=0.293, clip=0, loss_scale=1, train_wall=93, gb_free=21.7, wall=11835 +epoch 008: 296 / 1689 loss=4.315, nll_loss=2.71, ppl=6.55, wps=380366, ups=0.88, wpb=434536, bsz=16604.2, num_updates=12100, lr=0.00057496, gnorm=0.293, clip=0, loss_scale=1, train_wall=93, gb_free=21.7, wall=11835 +epoch 008: 296 / 1689 loss=4.315, nll_loss=2.71, ppl=6.55, wps=380366, ups=0.88, wpb=434536, bsz=16604.2, num_updates=12100, lr=0.00057496, gnorm=0.293, clip=0, loss_scale=1, train_wall=93, gb_free=21.7, wall=11835 +epoch 008: 296 / 1689 loss=4.315, nll_loss=2.71, ppl=6.55, wps=380366, ups=0.88, wpb=434536, bsz=16604.2, num_updates=12100, lr=0.00057496, gnorm=0.293, clip=0, loss_scale=1, train_wall=93, gb_free=21.7, wall=11835 +epoch 008: 396 / 1689 loss=4.323, nll_loss=2.719, ppl=6.58, wps=466986, ups=1.08, wpb=433774, bsz=16639.4, num_updates=12200, lr=0.000572598, gnorm=0.29, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=11928 +epoch 008: 396 / 1689 loss=4.323, nll_loss=2.719, ppl=6.58, wps=466986, ups=1.08, wpb=433774, bsz=16639.4, num_updates=12200, lr=0.000572598, gnorm=0.29, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=11928 +epoch 008: 396 / 1689 loss=4.323, nll_loss=2.719, ppl=6.58, wps=466986, ups=1.08, wpb=433774, bsz=16639.4, num_updates=12200, lr=0.000572598, gnorm=0.29, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=11928 +epoch 008: 396 / 1689 loss=4.323, nll_loss=2.719, ppl=6.58, wps=466986, ups=1.08, wpb=433774, bsz=16639.4, num_updates=12200, lr=0.000572598, gnorm=0.29, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=11928 +epoch 008: 396 / 1689 loss=4.323, nll_loss=2.719, ppl=6.58, wps=466986, ups=1.08, wpb=433774, bsz=16639.4, num_updates=12200, lr=0.000572598, gnorm=0.29, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=11928 +epoch 008: 396 / 1689 loss=4.323, nll_loss=2.719, ppl=6.58, wps=466986, ups=1.08, wpb=433774, bsz=16639.4, num_updates=12200, lr=0.000572598, gnorm=0.29, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=11928 +epoch 008: 396 / 1689 loss=4.323, nll_loss=2.719, ppl=6.58, wps=466986, ups=1.08, wpb=433774, bsz=16639.4, num_updates=12200, lr=0.000572598, gnorm=0.29, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=11928 +epoch 008: 396 / 1689 loss=4.323, nll_loss=2.719, ppl=6.58, wps=466986, ups=1.08, wpb=433774, bsz=16639.4, num_updates=12200, lr=0.000572598, gnorm=0.29, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=11928 +epoch 008: 496 / 1689 loss=4.3, nll_loss=2.694, ppl=6.47, wps=464212, ups=1.08, wpb=430886, bsz=16052.7, num_updates=12300, lr=0.000570266, gnorm=0.298, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=12021 +epoch 008: 496 / 1689 loss=4.3, nll_loss=2.694, ppl=6.47, wps=464212, ups=1.08, wpb=430886, bsz=16052.7, num_updates=12300, lr=0.000570266, gnorm=0.298, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=12021 +epoch 008: 496 / 1689 loss=4.3, nll_loss=2.694, ppl=6.47, wps=464212, ups=1.08, wpb=430886, bsz=16052.7, num_updates=12300, lr=0.000570266, gnorm=0.298, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=12021 +epoch 008: 496 / 1689 loss=4.3, nll_loss=2.694, ppl=6.47, wps=464212, ups=1.08, wpb=430886, bsz=16052.7, num_updates=12300, lr=0.000570266, gnorm=0.298, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=12021 +epoch 008: 496 / 1689 loss=4.3, nll_loss=2.694, ppl=6.47, wps=464212, ups=1.08, wpb=430886, bsz=16052.7, num_updates=12300, lr=0.000570266, gnorm=0.298, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=12021 +epoch 008: 496 / 1689 loss=4.3, nll_loss=2.694, ppl=6.47, wps=464212, ups=1.08, wpb=430886, bsz=16052.7, num_updates=12300, lr=0.000570266, gnorm=0.298, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=12021 +epoch 008: 496 / 1689 loss=4.3, nll_loss=2.694, ppl=6.47, wps=464212, ups=1.08, wpb=430886, bsz=16052.7, num_updates=12300, lr=0.000570266, gnorm=0.298, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=12021 +epoch 008: 496 / 1689 loss=4.3, nll_loss=2.694, ppl=6.47, wps=464212, ups=1.08, wpb=430886, bsz=16052.7, num_updates=12300, lr=0.000570266, gnorm=0.298, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=12021 +epoch 008: 596 / 1689 loss=4.316, nll_loss=2.711, ppl=6.55, wps=463305, ups=1.08, wpb=430445, bsz=16397, num_updates=12400, lr=0.000567962, gnorm=0.293, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=12114 +epoch 008: 596 / 1689 loss=4.316, nll_loss=2.711, ppl=6.55, wps=463305, ups=1.08, wpb=430445, bsz=16397, num_updates=12400, lr=0.000567962, gnorm=0.293, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=12114 +epoch 008: 596 / 1689 loss=4.316, nll_loss=2.711, ppl=6.55, wps=463305, ups=1.08, wpb=430445, bsz=16397, num_updates=12400, lr=0.000567962, gnorm=0.293, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=12114 +epoch 008: 596 / 1689 loss=4.316, nll_loss=2.711, ppl=6.55, wps=463305, ups=1.08, wpb=430445, bsz=16397, num_updates=12400, lr=0.000567962, gnorm=0.293, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=12114 +epoch 008: 596 / 1689 loss=4.316, nll_loss=2.711, ppl=6.55, wps=463305, ups=1.08, wpb=430445, bsz=16397, num_updates=12400, lr=0.000567962, gnorm=0.293, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=12114 +epoch 008: 596 / 1689 loss=4.316, nll_loss=2.711, ppl=6.55, wps=463305, ups=1.08, wpb=430445, bsz=16397, num_updates=12400, lr=0.000567962, gnorm=0.293, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=12114 +epoch 008: 596 / 1689 loss=4.316, nll_loss=2.711, ppl=6.55, wps=463305, ups=1.08, wpb=430445, bsz=16397, num_updates=12400, lr=0.000567962, gnorm=0.293, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=12114 +epoch 008: 596 / 1689 loss=4.316, nll_loss=2.711, ppl=6.55, wps=463305, ups=1.08, wpb=430445, bsz=16397, num_updates=12400, lr=0.000567962, gnorm=0.293, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=12114 +epoch 008: 696 / 1689 loss=4.303, nll_loss=2.697, ppl=6.48, wps=459502, ups=1.06, wpb=433254, bsz=16599, num_updates=12500, lr=0.000565685, gnorm=0.28, clip=0, loss_scale=2, train_wall=94, gb_free=19.2, wall=12208 +epoch 008: 696 / 1689 loss=4.303, nll_loss=2.697, ppl=6.48, wps=459502, ups=1.06, wpb=433254, bsz=16599, num_updates=12500, lr=0.000565685, gnorm=0.28, clip=0, loss_scale=2, train_wall=94, gb_free=19.2, wall=12208 +epoch 008: 696 / 1689 loss=4.303, nll_loss=2.697, ppl=6.48, wps=459502, ups=1.06, wpb=433254, bsz=16599, num_updates=12500, lr=0.000565685, gnorm=0.28, clip=0, loss_scale=2, train_wall=94, gb_free=19.2, wall=12208 +epoch 008: 696 / 1689 loss=4.303, nll_loss=2.697, ppl=6.48, wps=459502, ups=1.06, wpb=433254, bsz=16599, num_updates=12500, lr=0.000565685, gnorm=0.28, clip=0, loss_scale=2, train_wall=94, gb_free=19.2, wall=12208 +epoch 008: 696 / 1689 loss=4.303, nll_loss=2.697, ppl=6.48, wps=459502, ups=1.06, wpb=433254, bsz=16599, num_updates=12500, lr=0.000565685, gnorm=0.28, clip=0, loss_scale=2, train_wall=94, gb_free=19.2, wall=12208 +epoch 008: 696 / 1689 loss=4.303, nll_loss=2.697, ppl=6.48, wps=459502, ups=1.06, wpb=433254, bsz=16599, num_updates=12500, lr=0.000565685, gnorm=0.28, clip=0, loss_scale=2, train_wall=94, gb_free=19.2, wall=12208 +epoch 008: 696 / 1689 loss=4.303, nll_loss=2.697, ppl=6.48, wps=459502, ups=1.06, wpb=433254, bsz=16599, num_updates=12500, lr=0.000565685, gnorm=0.28, clip=0, loss_scale=2, train_wall=94, gb_free=19.2, wall=12208 +epoch 008: 696 / 1689 loss=4.303, nll_loss=2.697, ppl=6.48, wps=459502, ups=1.06, wpb=433254, bsz=16599, num_updates=12500, lr=0.000565685, gnorm=0.28, clip=0, loss_scale=2, train_wall=94, gb_free=19.2, wall=12208 +epoch 008: 796 / 1689 loss=4.311, nll_loss=2.707, ppl=6.53, wps=460826, ups=1.06, wpb=434251, bsz=16372.3, num_updates=12600, lr=0.000563436, gnorm=0.273, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=12302 +epoch 008: 796 / 1689 loss=4.311, nll_loss=2.707, ppl=6.53, wps=460826, ups=1.06, wpb=434251, bsz=16372.3, num_updates=12600, lr=0.000563436, gnorm=0.273, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=12302 +epoch 008: 796 / 1689 loss=4.311, nll_loss=2.707, ppl=6.53, wps=460826, ups=1.06, wpb=434251, bsz=16372.3, num_updates=12600, lr=0.000563436, gnorm=0.273, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=12302 +epoch 008: 796 / 1689 loss=4.311, nll_loss=2.707, ppl=6.53, wps=460826, ups=1.06, wpb=434251, bsz=16372.3, num_updates=12600, lr=0.000563436, gnorm=0.273, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=12302 +epoch 008: 796 / 1689 loss=4.311, nll_loss=2.707, ppl=6.53, wps=460826, ups=1.06, wpb=434251, bsz=16372.3, num_updates=12600, lr=0.000563436, gnorm=0.273, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=12302 +epoch 008: 796 / 1689 loss=4.311, nll_loss=2.707, ppl=6.53, wps=460826, ups=1.06, wpb=434251, bsz=16372.3, num_updates=12600, lr=0.000563436, gnorm=0.273, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=12302 +epoch 008: 796 / 1689 loss=4.311, nll_loss=2.707, ppl=6.53, wps=460826, ups=1.06, wpb=434251, bsz=16372.3, num_updates=12600, lr=0.000563436, gnorm=0.273, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=12302 +epoch 008: 796 / 1689 loss=4.311, nll_loss=2.707, ppl=6.53, wps=460826, ups=1.06, wpb=434251, bsz=16372.3, num_updates=12600, lr=0.000563436, gnorm=0.273, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=12302 +epoch 008: 896 / 1689 loss=4.325, nll_loss=2.723, ppl=6.6, wps=464149, ups=1.07, wpb=434970, bsz=16585.5, num_updates=12700, lr=0.000561214, gnorm=0.297, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=12396 +epoch 008: 896 / 1689 loss=4.325, nll_loss=2.723, ppl=6.6, wps=464149, ups=1.07, wpb=434970, bsz=16585.5, num_updates=12700, lr=0.000561214, gnorm=0.297, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=12396 +epoch 008: 896 / 1689 loss=4.325, nll_loss=2.723, ppl=6.6, wps=464149, ups=1.07, wpb=434970, bsz=16585.5, num_updates=12700, lr=0.000561214, gnorm=0.297, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=12396 +epoch 008: 896 / 1689 loss=4.325, nll_loss=2.723, ppl=6.6, wps=464149, ups=1.07, wpb=434970, bsz=16585.5, num_updates=12700, lr=0.000561214, gnorm=0.297, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=12396 +epoch 008: 896 / 1689 loss=4.325, nll_loss=2.723, ppl=6.6, wps=464149, ups=1.07, wpb=434970, bsz=16585.5, num_updates=12700, lr=0.000561214, gnorm=0.297, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=12396 +epoch 008: 896 / 1689 loss=4.325, nll_loss=2.723, ppl=6.6, wps=464149, ups=1.07, wpb=434970, bsz=16585.5, num_updates=12700, lr=0.000561214, gnorm=0.297, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=12396 +epoch 008: 896 / 1689 loss=4.325, nll_loss=2.723, ppl=6.6, wps=464149, ups=1.07, wpb=434970, bsz=16585.5, num_updates=12700, lr=0.000561214, gnorm=0.297, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=12396 +epoch 008: 896 / 1689 loss=4.325, nll_loss=2.723, ppl=6.6, wps=464149, ups=1.07, wpb=434970, bsz=16585.5, num_updates=12700, lr=0.000561214, gnorm=0.297, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=12396 +epoch 008: 996 / 1689 loss=4.327, nll_loss=2.725, ppl=6.61, wps=464758, ups=1.06, wpb=436642, bsz=16375.6, num_updates=12800, lr=0.000559017, gnorm=0.271, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=12490 +epoch 008: 996 / 1689 loss=4.327, nll_loss=2.725, ppl=6.61, wps=464758, ups=1.06, wpb=436642, bsz=16375.6, num_updates=12800, lr=0.000559017, gnorm=0.271, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=12490 +epoch 008: 996 / 1689 loss=4.327, nll_loss=2.725, ppl=6.61, wps=464758, ups=1.06, wpb=436642, bsz=16375.6, num_updates=12800, lr=0.000559017, gnorm=0.271, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=12490 +epoch 008: 996 / 1689 loss=4.327, nll_loss=2.725, ppl=6.61, wps=464758, ups=1.06, wpb=436642, bsz=16375.6, num_updates=12800, lr=0.000559017, gnorm=0.271, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=12490 +epoch 008: 996 / 1689 loss=4.327, nll_loss=2.725, ppl=6.61, wps=464758, ups=1.06, wpb=436642, bsz=16375.6, num_updates=12800, lr=0.000559017, gnorm=0.271, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=12490 +epoch 008: 996 / 1689 loss=4.327, nll_loss=2.725, ppl=6.61, wps=464758, ups=1.06, wpb=436642, bsz=16375.6, num_updates=12800, lr=0.000559017, gnorm=0.271, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=12490 +epoch 008: 996 / 1689 loss=4.327, nll_loss=2.725, ppl=6.61, wps=464758, ups=1.06, wpb=436642, bsz=16375.6, num_updates=12800, lr=0.000559017, gnorm=0.271, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=12490 +epoch 008: 996 / 1689 loss=4.327, nll_loss=2.725, ppl=6.61, wps=464758, ups=1.06, wpb=436642, bsz=16375.6, num_updates=12800, lr=0.000559017, gnorm=0.271, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=12490 +epoch 008: 1096 / 1689 loss=4.309, nll_loss=2.705, ppl=6.52, wps=460938, ups=1.06, wpb=434215, bsz=16411.8, num_updates=12900, lr=0.000556846, gnorm=0.288, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=12584 +epoch 008: 1096 / 1689 loss=4.309, nll_loss=2.705, ppl=6.52, wps=460938, ups=1.06, wpb=434215, bsz=16411.8, num_updates=12900, lr=0.000556846, gnorm=0.288, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=12584 +epoch 008: 1096 / 1689 loss=4.309, nll_loss=2.705, ppl=6.52, wps=460938, ups=1.06, wpb=434215, bsz=16411.8, num_updates=12900, lr=0.000556846, gnorm=0.288, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=12584 +epoch 008: 1096 / 1689 loss=4.309, nll_loss=2.705, ppl=6.52, wps=460938, ups=1.06, wpb=434215, bsz=16411.8, num_updates=12900, lr=0.000556846, gnorm=0.288, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=12584 +epoch 008: 1096 / 1689 loss=4.309, nll_loss=2.705, ppl=6.52, wps=460938, ups=1.06, wpb=434215, bsz=16411.8, num_updates=12900, lr=0.000556846, gnorm=0.288, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=12584 +epoch 008: 1096 / 1689 loss=4.309, nll_loss=2.705, ppl=6.52, wps=460938, ups=1.06, wpb=434215, bsz=16411.8, num_updates=12900, lr=0.000556846, gnorm=0.288, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=12584 +epoch 008: 1096 / 1689 loss=4.309, nll_loss=2.705, ppl=6.52, wps=460938, ups=1.06, wpb=434215, bsz=16411.8, num_updates=12900, lr=0.000556846, gnorm=0.288, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=12584 +epoch 008: 1096 / 1689 loss=4.309, nll_loss=2.705, ppl=6.52, wps=460938, ups=1.06, wpb=434215, bsz=16411.8, num_updates=12900, lr=0.000556846, gnorm=0.288, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=12584 +epoch 008: 1196 / 1689 loss=4.306, nll_loss=2.702, ppl=6.51, wps=462515, ups=1.07, wpb=434089, bsz=16281, num_updates=13000, lr=0.0005547, gnorm=0.264, clip=0, loss_scale=4, train_wall=92, gb_free=19, wall=12678 +epoch 008: 1196 / 1689 loss=4.306, nll_loss=2.702, ppl=6.51, wps=462515, ups=1.07, wpb=434089, bsz=16281, num_updates=13000, lr=0.0005547, gnorm=0.264, clip=0, loss_scale=4, train_wall=92, gb_free=19, wall=12678 +epoch 008: 1196 / 1689 loss=4.306, nll_loss=2.702, ppl=6.51, wps=462515, ups=1.07, wpb=434089, bsz=16281, num_updates=13000, lr=0.0005547, gnorm=0.264, clip=0, loss_scale=4, train_wall=92, gb_free=19, wall=12678 +epoch 008: 1196 / 1689 loss=4.306, nll_loss=2.702, ppl=6.51, wps=462515, ups=1.07, wpb=434089, bsz=16281, num_updates=13000, lr=0.0005547, gnorm=0.264, clip=0, loss_scale=4, train_wall=92, gb_free=19, wall=12678 +epoch 008: 1196 / 1689 loss=4.306, nll_loss=2.702, ppl=6.51, wps=462515, ups=1.07, wpb=434089, bsz=16281, num_updates=13000, lr=0.0005547, gnorm=0.264, clip=0, loss_scale=4, train_wall=92, gb_free=19, wall=12678 +epoch 008: 1196 / 1689 loss=4.306, nll_loss=2.702, ppl=6.51, wps=462515, ups=1.07, wpb=434089, bsz=16281, num_updates=13000, lr=0.0005547, gnorm=0.264, clip=0, loss_scale=4, train_wall=92, gb_free=19, wall=12678 +epoch 008: 1196 / 1689 loss=4.306, nll_loss=2.702, ppl=6.51, wps=462515, ups=1.07, wpb=434089, bsz=16281, num_updates=13000, lr=0.0005547, gnorm=0.264, clip=0, loss_scale=4, train_wall=92, gb_free=19, wall=12678 +epoch 008: 1196 / 1689 loss=4.306, nll_loss=2.702, ppl=6.51, wps=462515, ups=1.07, wpb=434089, bsz=16281, num_updates=13000, lr=0.0005547, gnorm=0.264, clip=0, loss_scale=4, train_wall=92, gb_free=19, wall=12678 +begin validation on "valid" subset +epoch 008 | valid on 'valid' subset | loss 4.335 | nll_loss 2.704 | ppl 6.51 | wps 0 | wpb 42662 | bsz 2032 | num_updates 13000 | best_loss 4.335 +epoch 008 | valid on 'valid' subset | loss 4.335 | nll_loss 2.704 | ppl 6.51 | wps 0 | wpb 42662 | bsz 2032 | num_updates 13000 | best_loss 4.335 +epoch 008 | valid on 'valid' subset | loss 4.335 | nll_loss 2.704 | ppl 6.51 | wps 0 | wpb 42662 | bsz 2032 | num_updates 13000 | best_loss 4.335 +epoch 008 | valid on 'valid' subset | loss 4.335 | nll_loss 2.704 | ppl 6.51 | wps 0 | wpb 42662 | bsz 2032 | num_updates 13000 | best_loss 4.335 +epoch 008 | valid on 'valid' subset | loss 4.335 | nll_loss 2.704 | ppl 6.51 | wps 0 | wpb 42662 | bsz 2032 | num_updates 13000 | best_loss 4.335 +epoch 008 | valid on 'valid' subset | loss 4.335 | nll_loss 2.704 | ppl 6.51 | wps 0 | wpb 42662 | bsz 2032 | num_updates 13000 | best_loss 4.335 +epoch 008 | valid on 'valid' subset | loss 4.335 | nll_loss 2.704 | ppl 6.51 | wps 0 | wpb 42662 | bsz 2032 | num_updates 13000 | best_loss 4.335 +epoch 008 | valid on 'valid' subset | loss 4.335 | nll_loss 2.704 | ppl 6.51 | wps 0 | wpb 42662 | bsz 2032 | num_updates 13000 | best_loss 4.335 +epoch 008: 1297 / 1689 loss=4.316, nll_loss=2.713, ppl=6.56, wps=122689, ups=0.28, wpb=434602, bsz=16713.4, num_updates=13100, lr=0.000552579, gnorm=0.272, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=13032 +epoch 008: 1297 / 1689 loss=4.316, nll_loss=2.713, ppl=6.56, wps=122689, ups=0.28, wpb=434602, bsz=16713.4, num_updates=13100, lr=0.000552579, gnorm=0.272, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=13032 +epoch 008: 1297 / 1689 loss=4.316, nll_loss=2.713, ppl=6.56, wps=122689, ups=0.28, wpb=434602, bsz=16713.4, num_updates=13100, lr=0.000552579, gnorm=0.272, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=13032 +epoch 008: 1297 / 1689 loss=4.316, nll_loss=2.713, ppl=6.56, wps=122689, ups=0.28, wpb=434602, bsz=16713.4, num_updates=13100, lr=0.000552579, gnorm=0.272, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=13032 +epoch 008: 1297 / 1689 loss=4.316, nll_loss=2.713, ppl=6.56, wps=122689, ups=0.28, wpb=434602, bsz=16713.4, num_updates=13100, lr=0.000552579, gnorm=0.272, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=13032 +epoch 008: 1297 / 1689 loss=4.316, nll_loss=2.713, ppl=6.56, wps=122689, ups=0.28, wpb=434602, bsz=16713.4, num_updates=13100, lr=0.000552579, gnorm=0.272, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=13032 +epoch 008: 1297 / 1689 loss=4.316, nll_loss=2.713, ppl=6.56, wps=122689, ups=0.28, wpb=434602, bsz=16713.4, num_updates=13100, lr=0.000552579, gnorm=0.272, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=13032 +epoch 008: 1297 / 1689 loss=4.316, nll_loss=2.713, ppl=6.56, wps=122689, ups=0.28, wpb=434602, bsz=16713.4, num_updates=13100, lr=0.000552579, gnorm=0.272, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=13032 +epoch 008: 1397 / 1689 loss=4.312, nll_loss=2.709, ppl=6.54, wps=467114, ups=1.08, wpb=432662, bsz=16710.8, num_updates=13200, lr=0.000550482, gnorm=0.284, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=13125 +epoch 008: 1397 / 1689 loss=4.312, nll_loss=2.709, ppl=6.54, wps=467114, ups=1.08, wpb=432662, bsz=16710.8, num_updates=13200, lr=0.000550482, gnorm=0.284, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=13125 +epoch 008: 1397 / 1689 loss=4.312, nll_loss=2.709, ppl=6.54, wps=467114, ups=1.08, wpb=432662, bsz=16710.8, num_updates=13200, lr=0.000550482, gnorm=0.284, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=13125 +epoch 008: 1397 / 1689 loss=4.312, nll_loss=2.709, ppl=6.54, wps=467114, ups=1.08, wpb=432662, bsz=16710.8, num_updates=13200, lr=0.000550482, gnorm=0.284, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=13125 +epoch 008: 1397 / 1689 loss=4.312, nll_loss=2.709, ppl=6.54, wps=467114, ups=1.08, wpb=432662, bsz=16710.8, num_updates=13200, lr=0.000550482, gnorm=0.284, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=13125 +epoch 008: 1397 / 1689 loss=4.312, nll_loss=2.709, ppl=6.54, wps=467114, ups=1.08, wpb=432662, bsz=16710.8, num_updates=13200, lr=0.000550482, gnorm=0.284, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=13125 +epoch 008: 1397 / 1689 loss=4.312, nll_loss=2.709, ppl=6.54, wps=467114, ups=1.08, wpb=432662, bsz=16710.8, num_updates=13200, lr=0.000550482, gnorm=0.284, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=13125 +epoch 008: 1397 / 1689 loss=4.312, nll_loss=2.709, ppl=6.54, wps=467114, ups=1.08, wpb=432662, bsz=16710.8, num_updates=13200, lr=0.000550482, gnorm=0.284, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=13125 +epoch 008: 1497 / 1689 loss=4.305, nll_loss=2.701, ppl=6.5, wps=468197, ups=1.08, wpb=434190, bsz=16438.8, num_updates=13300, lr=0.000548408, gnorm=0.284, clip=0, loss_scale=2, train_wall=92, gb_free=19.7, wall=13218 +epoch 008: 1497 / 1689 loss=4.305, nll_loss=2.701, ppl=6.5, wps=468197, ups=1.08, wpb=434190, bsz=16438.8, num_updates=13300, lr=0.000548408, gnorm=0.284, clip=0, loss_scale=2, train_wall=92, gb_free=19.7, wall=13218 +epoch 008: 1497 / 1689 loss=4.305, nll_loss=2.701, ppl=6.5, wps=468197, ups=1.08, wpb=434190, bsz=16438.8, num_updates=13300, lr=0.000548408, gnorm=0.284, clip=0, loss_scale=2, train_wall=92, gb_free=19.7, wall=13218 +epoch 008: 1497 / 1689 loss=4.305, nll_loss=2.701, ppl=6.5, wps=468197, ups=1.08, wpb=434190, bsz=16438.8, num_updates=13300, lr=0.000548408, gnorm=0.284, clip=0, loss_scale=2, train_wall=92, gb_free=19.7, wall=13218 +epoch 008: 1497 / 1689 loss=4.305, nll_loss=2.701, ppl=6.5, wps=468197, ups=1.08, wpb=434190, bsz=16438.8, num_updates=13300, lr=0.000548408, gnorm=0.284, clip=0, loss_scale=2, train_wall=92, gb_free=19.7, wall=13218 +epoch 008: 1497 / 1689 loss=4.305, nll_loss=2.701, ppl=6.5, wps=468197, ups=1.08, wpb=434190, bsz=16438.8, num_updates=13300, lr=0.000548408, gnorm=0.284, clip=0, loss_scale=2, train_wall=92, gb_free=19.7, wall=13218 +epoch 008: 1497 / 1689 loss=4.305, nll_loss=2.701, ppl=6.5, wps=468197, ups=1.08, wpb=434190, bsz=16438.8, num_updates=13300, lr=0.000548408, gnorm=0.284, clip=0, loss_scale=2, train_wall=92, gb_free=19.7, wall=13218 +epoch 008: 1497 / 1689 loss=4.305, nll_loss=2.701, ppl=6.5, wps=468197, ups=1.08, wpb=434190, bsz=16438.8, num_updates=13300, lr=0.000548408, gnorm=0.284, clip=0, loss_scale=2, train_wall=92, gb_free=19.7, wall=13218 +epoch 008: 1597 / 1689 loss=4.306, nll_loss=2.703, ppl=6.51, wps=466136, ups=1.08, wpb=431910, bsz=16620.6, num_updates=13400, lr=0.000546358, gnorm=0.295, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=13310 +epoch 008: 1597 / 1689 loss=4.306, nll_loss=2.703, ppl=6.51, wps=466136, ups=1.08, wpb=431910, bsz=16620.6, num_updates=13400, lr=0.000546358, gnorm=0.295, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=13310 +epoch 008: 1597 / 1689 loss=4.306, nll_loss=2.703, ppl=6.51, wps=466136, ups=1.08, wpb=431910, bsz=16620.6, num_updates=13400, lr=0.000546358, gnorm=0.295, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=13310 +epoch 008: 1597 / 1689 loss=4.306, nll_loss=2.703, ppl=6.51, wps=466136, ups=1.08, wpb=431910, bsz=16620.6, num_updates=13400, lr=0.000546358, gnorm=0.295, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=13310 +epoch 008: 1597 / 1689 loss=4.306, nll_loss=2.703, ppl=6.51, wps=466136, ups=1.08, wpb=431910, bsz=16620.6, num_updates=13400, lr=0.000546358, gnorm=0.295, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=13310 +epoch 008: 1597 / 1689 loss=4.306, nll_loss=2.703, ppl=6.51, wps=466136, ups=1.08, wpb=431910, bsz=16620.6, num_updates=13400, lr=0.000546358, gnorm=0.295, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=13310 +epoch 008: 1597 / 1689 loss=4.306, nll_loss=2.703, ppl=6.51, wps=466136, ups=1.08, wpb=431910, bsz=16620.6, num_updates=13400, lr=0.000546358, gnorm=0.295, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=13310 +epoch 008: 1597 / 1689 loss=4.306, nll_loss=2.703, ppl=6.51, wps=466136, ups=1.08, wpb=431910, bsz=16620.6, num_updates=13400, lr=0.000546358, gnorm=0.295, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=13310 +end of epoch 8 (average epoch stats below) +epoch 008 | loss 4.311 | nll_loss 2.707 | ppl 6.53 | wps 393426 | ups 0.91 | wpb 433520 | bsz 16503 | num_updates 13492 | lr 0.000544492 | gnorm 0.284 | clip 0 | loss_scale 2 | train_wall 1561 | gb_free 20.2 | wall 13396 +epoch 008 | loss 4.311 | nll_loss 2.707 | ppl 6.53 | wps 393426 | ups 0.91 | wpb 433520 | bsz 16503 | num_updates 13492 | lr 0.000544492 | gnorm 0.284 | clip 0 | loss_scale 2 | train_wall 1561 | gb_free 20.2 | wall 13396 +epoch 008 | loss 4.311 | nll_loss 2.707 | ppl 6.53 | wps 393426 | ups 0.91 | wpb 433520 | bsz 16503 | num_updates 13492 | lr 0.000544492 | gnorm 0.284 | clip 0 | loss_scale 2 | train_wall 1561 | gb_free 20.2 | wall 13396 +epoch 008 | loss 4.311 | nll_loss 2.707 | ppl 6.53 | wps 393426 | ups 0.91 | wpb 433520 | bsz 16503 | num_updates 13492 | lr 0.000544492 | gnorm 0.284 | clip 0 | loss_scale 2 | train_wall 1561 | gb_free 20.2 | wall 13396 +epoch 008 | loss 4.311 | nll_loss 2.707 | ppl 6.53 | wps 393426 | ups 0.91 | wpb 433520 | bsz 16503 | num_updates 13492 | lr 0.000544492 | gnorm 0.284 | clip 0 | loss_scale 2 | train_wall 1561 | gb_free 20.2 | wall 13396 +epoch 008 | loss 4.311 | nll_loss 2.707 | ppl 6.53 | wps 393426 | ups 0.91 | wpb 433520 | bsz 16503 | num_updates 13492 | lr 0.000544492 | gnorm 0.284 | clip 0 | loss_scale 2 | train_wall 1561 | gb_free 20.2 | wall 13396 +epoch 008 | loss 4.311 | nll_loss 2.707 | ppl 6.53 | wps 393426 | ups 0.91 | wpb 433520 | bsz 16503 | num_updates 13492 | lr 0.000544492 | gnorm 0.284 | clip 0 | loss_scale 2 | train_wall 1561 | gb_free 20.2 | wall 13396 +epoch 008 | loss 4.311 | nll_loss 2.707 | ppl 6.53 | wps 393426 | ups 0.91 | wpb 433520 | bsz 16503 | num_updates 13492 | lr 0.000544492 | gnorm 0.284 | clip 0 | loss_scale 2 | train_wall 1561 | gb_free 20.2 | wall 13396 +Start iterating over samples +epoch 009: 8 / 1689 loss=4.303, nll_loss=2.699, ppl=6.49, wps=462644, ups=1.07, wpb=431211, bsz=16495.1, num_updates=13500, lr=0.000544331, gnorm=0.286, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=13404 +epoch 009: 8 / 1689 loss=4.303, nll_loss=2.699, ppl=6.49, wps=462644, ups=1.07, wpb=431211, bsz=16495.1, num_updates=13500, lr=0.000544331, gnorm=0.286, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=13404 +epoch 009: 8 / 1689 loss=4.303, nll_loss=2.699, ppl=6.49, wps=462644, ups=1.07, wpb=431211, bsz=16495.1, num_updates=13500, lr=0.000544331, gnorm=0.286, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=13404 +epoch 009: 8 / 1689 loss=4.303, nll_loss=2.699, ppl=6.49, wps=462644, ups=1.07, wpb=431211, bsz=16495.1, num_updates=13500, lr=0.000544331, gnorm=0.286, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=13404 +epoch 009: 8 / 1689 loss=4.303, nll_loss=2.699, ppl=6.49, wps=462644, ups=1.07, wpb=431211, bsz=16495.1, num_updates=13500, lr=0.000544331, gnorm=0.286, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=13404 +epoch 009: 8 / 1689 loss=4.303, nll_loss=2.699, ppl=6.49, wps=462644, ups=1.07, wpb=431211, bsz=16495.1, num_updates=13500, lr=0.000544331, gnorm=0.286, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=13404 +epoch 009: 8 / 1689 loss=4.303, nll_loss=2.699, ppl=6.49, wps=462644, ups=1.07, wpb=431211, bsz=16495.1, num_updates=13500, lr=0.000544331, gnorm=0.286, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=13404 +epoch 009: 8 / 1689 loss=4.303, nll_loss=2.699, ppl=6.49, wps=462644, ups=1.07, wpb=431211, bsz=16495.1, num_updates=13500, lr=0.000544331, gnorm=0.286, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=13404 +epoch 009: 8 / 1689 loss=4.303, nll_loss=2.699, ppl=6.49, wps=462644, ups=1.07, wpb=431211, bsz=16495.1, num_updates=13500, lr=0.000544331, gnorm=0.286, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=13404 +epoch 009: 108 / 1689 loss=4.263, nll_loss=2.652, ppl=6.29, wps=464636, ups=1.08, wpb=431511, bsz=16519.9, num_updates=13600, lr=0.000542326, gnorm=0.272, clip=0, loss_scale=4, train_wall=92, gb_free=20.1, wall=13497 +epoch 009: 108 / 1689 loss=4.263, nll_loss=2.652, ppl=6.29, wps=464636, ups=1.08, wpb=431511, bsz=16519.9, num_updates=13600, lr=0.000542326, gnorm=0.272, clip=0, loss_scale=4, train_wall=92, gb_free=20.1, wall=13497 +epoch 009: 108 / 1689 loss=4.263, nll_loss=2.652, ppl=6.29, wps=464636, ups=1.08, wpb=431511, bsz=16519.9, num_updates=13600, lr=0.000542326, gnorm=0.272, clip=0, loss_scale=4, train_wall=92, gb_free=20.1, wall=13497 +epoch 009: 108 / 1689 loss=4.263, nll_loss=2.652, ppl=6.29, wps=464636, ups=1.08, wpb=431511, bsz=16519.9, num_updates=13600, lr=0.000542326, gnorm=0.272, clip=0, loss_scale=4, train_wall=92, gb_free=20.1, wall=13497 +epoch 009: 108 / 1689 loss=4.263, nll_loss=2.652, ppl=6.29, wps=464636, ups=1.08, wpb=431511, bsz=16519.9, num_updates=13600, lr=0.000542326, gnorm=0.272, clip=0, loss_scale=4, train_wall=92, gb_free=20.1, wall=13497 +epoch 009: 108 / 1689 loss=4.263, nll_loss=2.652, ppl=6.29, wps=464636, ups=1.08, wpb=431511, bsz=16519.9, num_updates=13600, lr=0.000542326, gnorm=0.272, clip=0, loss_scale=4, train_wall=92, gb_free=20.1, wall=13497 +epoch 009: 108 / 1689 loss=4.263, nll_loss=2.652, ppl=6.29, wps=464636, ups=1.08, wpb=431511, bsz=16519.9, num_updates=13600, lr=0.000542326, gnorm=0.272, clip=0, loss_scale=4, train_wall=92, gb_free=20.1, wall=13497 +epoch 009: 108 / 1689 loss=4.263, nll_loss=2.652, ppl=6.29, wps=464636, ups=1.08, wpb=431511, bsz=16519.9, num_updates=13600, lr=0.000542326, gnorm=0.272, clip=0, loss_scale=4, train_wall=92, gb_free=20.1, wall=13497 +epoch 009: 108 / 1689 loss=4.263, nll_loss=2.652, ppl=6.29, wps=464636, ups=1.08, wpb=431511, bsz=16519.9, num_updates=13600, lr=0.000542326, gnorm=0.272, clip=0, loss_scale=4, train_wall=92, gb_free=20.1, wall=13497 +epoch 009: 209 / 1689 loss=4.278, nll_loss=2.67, ppl=6.36, wps=457910, ups=1.06, wpb=433711, bsz=16694.1, num_updates=13700, lr=0.000540343, gnorm=0.284, clip=0, loss_scale=2, train_wall=94, gb_free=18.9, wall=13591 +epoch 009: 209 / 1689 loss=4.278, nll_loss=2.67, ppl=6.36, wps=457910, ups=1.06, wpb=433711, bsz=16694.1, num_updates=13700, lr=0.000540343, gnorm=0.284, clip=0, loss_scale=2, train_wall=94, gb_free=18.9, wall=13591 +epoch 009: 209 / 1689 loss=4.278, nll_loss=2.67, ppl=6.36, wps=457910, ups=1.06, wpb=433711, bsz=16694.1, num_updates=13700, lr=0.000540343, gnorm=0.284, clip=0, loss_scale=2, train_wall=94, gb_free=18.9, wall=13591 +epoch 009: 209 / 1689 loss=4.278, nll_loss=2.67, ppl=6.36, wps=457910, ups=1.06, wpb=433711, bsz=16694.1, num_updates=13700, lr=0.000540343, gnorm=0.284, clip=0, loss_scale=2, train_wall=94, gb_free=18.9, wall=13591 +epoch 009: 209 / 1689 loss=4.278, nll_loss=2.67, ppl=6.36, wps=457910, ups=1.06, wpb=433711, bsz=16694.1, num_updates=13700, lr=0.000540343, gnorm=0.284, clip=0, loss_scale=2, train_wall=94, gb_free=18.9, wall=13591 +epoch 009: 209 / 1689 loss=4.278, nll_loss=2.67, ppl=6.36, wps=457910, ups=1.06, wpb=433711, bsz=16694.1, num_updates=13700, lr=0.000540343, gnorm=0.284, clip=0, loss_scale=2, train_wall=94, gb_free=18.9, wall=13591 +epoch 009: 209 / 1689 loss=4.278, nll_loss=2.67, ppl=6.36, wps=457910, ups=1.06, wpb=433711, bsz=16694.1, num_updates=13700, lr=0.000540343, gnorm=0.284, clip=0, loss_scale=2, train_wall=94, gb_free=18.9, wall=13591 +epoch 009: 209 / 1689 loss=4.278, nll_loss=2.67, ppl=6.36, wps=457910, ups=1.06, wpb=433711, bsz=16694.1, num_updates=13700, lr=0.000540343, gnorm=0.284, clip=0, loss_scale=2, train_wall=94, gb_free=18.9, wall=13591 +epoch 009: 209 / 1689 loss=4.278, nll_loss=2.67, ppl=6.36, wps=457910, ups=1.06, wpb=433711, bsz=16694.1, num_updates=13700, lr=0.000540343, gnorm=0.284, clip=0, loss_scale=2, train_wall=94, gb_free=18.9, wall=13591 +epoch 009: 309 / 1689 loss=4.286, nll_loss=2.679, ppl=6.4, wps=460025, ups=1.06, wpb=435112, bsz=16472.7, num_updates=13800, lr=0.000538382, gnorm=0.281, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=13686 +epoch 009: 309 / 1689 loss=4.286, nll_loss=2.679, ppl=6.4, wps=460025, ups=1.06, wpb=435112, bsz=16472.7, num_updates=13800, lr=0.000538382, gnorm=0.281, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=13686 +epoch 009: 309 / 1689 loss=4.286, nll_loss=2.679, ppl=6.4, wps=460025, ups=1.06, wpb=435112, bsz=16472.7, num_updates=13800, lr=0.000538382, gnorm=0.281, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=13686 +epoch 009: 309 / 1689 loss=4.286, nll_loss=2.679, ppl=6.4, wps=460025, ups=1.06, wpb=435112, bsz=16472.7, num_updates=13800, lr=0.000538382, gnorm=0.281, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=13686 +epoch 009: 309 / 1689 loss=4.286, nll_loss=2.679, ppl=6.4, wps=460025, ups=1.06, wpb=435112, bsz=16472.7, num_updates=13800, lr=0.000538382, gnorm=0.281, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=13686 +epoch 009: 309 / 1689 loss=4.286, nll_loss=2.679, ppl=6.4, wps=460025, ups=1.06, wpb=435112, bsz=16472.7, num_updates=13800, lr=0.000538382, gnorm=0.281, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=13686 +epoch 009: 309 / 1689 loss=4.286, nll_loss=2.679, ppl=6.4, wps=460025, ups=1.06, wpb=435112, bsz=16472.7, num_updates=13800, lr=0.000538382, gnorm=0.281, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=13686 +epoch 009: 309 / 1689 loss=4.286, nll_loss=2.679, ppl=6.4, wps=460025, ups=1.06, wpb=435112, bsz=16472.7, num_updates=13800, lr=0.000538382, gnorm=0.281, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=13686 +epoch 009: 309 / 1689 loss=4.286, nll_loss=2.679, ppl=6.4, wps=460025, ups=1.06, wpb=435112, bsz=16472.7, num_updates=13800, lr=0.000538382, gnorm=0.281, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=13686 +epoch 009: 409 / 1689 loss=4.277, nll_loss=2.669, ppl=6.36, wps=461234, ups=1.06, wpb=435210, bsz=16420.2, num_updates=13900, lr=0.000536442, gnorm=0.277, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=13780 +epoch 009: 409 / 1689 loss=4.277, nll_loss=2.669, ppl=6.36, wps=461234, ups=1.06, wpb=435210, bsz=16420.2, num_updates=13900, lr=0.000536442, gnorm=0.277, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=13780 +epoch 009: 409 / 1689 loss=4.277, nll_loss=2.669, ppl=6.36, wps=461234, ups=1.06, wpb=435210, bsz=16420.2, num_updates=13900, lr=0.000536442, gnorm=0.277, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=13780 +epoch 009: 409 / 1689 loss=4.277, nll_loss=2.669, ppl=6.36, wps=461234, ups=1.06, wpb=435210, bsz=16420.2, num_updates=13900, lr=0.000536442, gnorm=0.277, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=13780 +epoch 009: 409 / 1689 loss=4.277, nll_loss=2.669, ppl=6.36, wps=461234, ups=1.06, wpb=435210, bsz=16420.2, num_updates=13900, lr=0.000536442, gnorm=0.277, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=13780 +epoch 009: 409 / 1689 loss=4.277, nll_loss=2.669, ppl=6.36, wps=461234, ups=1.06, wpb=435210, bsz=16420.2, num_updates=13900, lr=0.000536442, gnorm=0.277, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=13780 +epoch 009: 409 / 1689 loss=4.277, nll_loss=2.669, ppl=6.36, wps=461234, ups=1.06, wpb=435210, bsz=16420.2, num_updates=13900, lr=0.000536442, gnorm=0.277, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=13780 +epoch 009: 409 / 1689 loss=4.277, nll_loss=2.669, ppl=6.36, wps=461234, ups=1.06, wpb=435210, bsz=16420.2, num_updates=13900, lr=0.000536442, gnorm=0.277, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=13780 +epoch 009: 409 / 1689 loss=4.277, nll_loss=2.669, ppl=6.36, wps=461234, ups=1.06, wpb=435210, bsz=16420.2, num_updates=13900, lr=0.000536442, gnorm=0.277, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=13780 +epoch 009: 509 / 1689 loss=4.28, nll_loss=2.672, ppl=6.37, wps=455847, ups=1.05, wpb=432631, bsz=16527.3, num_updates=14000, lr=0.000534522, gnorm=0.28, clip=0, loss_scale=2, train_wall=94, gb_free=19, wall=13875 +epoch 009: 509 / 1689 loss=4.28, nll_loss=2.672, ppl=6.37, wps=455847, ups=1.05, wpb=432631, bsz=16527.3, num_updates=14000, lr=0.000534522, gnorm=0.28, clip=0, loss_scale=2, train_wall=94, gb_free=19, wall=13875 +epoch 009: 509 / 1689 loss=4.28, nll_loss=2.672, ppl=6.37, wps=455847, ups=1.05, wpb=432631, bsz=16527.3, num_updates=14000, lr=0.000534522, gnorm=0.28, clip=0, loss_scale=2, train_wall=94, gb_free=19, wall=13875 +epoch 009: 509 / 1689 loss=4.28, nll_loss=2.672, ppl=6.37, wps=455847, ups=1.05, wpb=432631, bsz=16527.3, num_updates=14000, lr=0.000534522, gnorm=0.28, clip=0, loss_scale=2, train_wall=94, gb_free=19, wall=13875 +epoch 009: 509 / 1689 loss=4.28, nll_loss=2.672, ppl=6.37, wps=455847, ups=1.05, wpb=432631, bsz=16527.3, num_updates=14000, lr=0.000534522, gnorm=0.28, clip=0, loss_scale=2, train_wall=94, gb_free=19, wall=13875 +epoch 009: 509 / 1689 loss=4.28, nll_loss=2.672, ppl=6.37, wps=455847, ups=1.05, wpb=432631, bsz=16527.3, num_updates=14000, lr=0.000534522, gnorm=0.28, clip=0, loss_scale=2, train_wall=94, gb_free=19, wall=13875 +epoch 009: 509 / 1689 loss=4.28, nll_loss=2.672, ppl=6.37, wps=455847, ups=1.05, wpb=432631, bsz=16527.3, num_updates=14000, lr=0.000534522, gnorm=0.28, clip=0, loss_scale=2, train_wall=94, gb_free=19, wall=13875 +epoch 009: 509 / 1689 loss=4.28, nll_loss=2.672, ppl=6.37, wps=455847, ups=1.05, wpb=432631, bsz=16527.3, num_updates=14000, lr=0.000534522, gnorm=0.28, clip=0, loss_scale=2, train_wall=94, gb_free=19, wall=13875 +epoch 009: 509 / 1689 loss=4.28, nll_loss=2.672, ppl=6.37, wps=455847, ups=1.05, wpb=432631, bsz=16527.3, num_updates=14000, lr=0.000534522, gnorm=0.28, clip=0, loss_scale=2, train_wall=94, gb_free=19, wall=13875 +begin validation on "valid" subset +epoch 009 | valid on 'valid' subset | loss 4.318 | nll_loss 2.687 | ppl 6.44 | wps 0 | wpb 42662 | bsz 2032 | num_updates 14000 | best_loss 4.318 +epoch 009 | valid on 'valid' subset | loss 4.318 | nll_loss 2.687 | ppl 6.44 | wps 0 | wpb 42662 | bsz 2032 | num_updates 14000 | best_loss 4.318 +epoch 009 | valid on 'valid' subset | loss 4.318 | nll_loss 2.687 | ppl 6.44 | wps 0 | wpb 42662 | bsz 2032 | num_updates 14000 | best_loss 4.318 +epoch 009 | valid on 'valid' subset | loss 4.318 | nll_loss 2.687 | ppl 6.44 | wps 0 | wpb 42662 | bsz 2032 | num_updates 14000 | best_loss 4.318 +epoch 009 | valid on 'valid' subset | loss 4.318 | nll_loss 2.687 | ppl 6.44 | wps 0 | wpb 42662 | bsz 2032 | num_updates 14000 | best_loss 4.318 +epoch 009 | valid on 'valid' subset | loss 4.318 | nll_loss 2.687 | ppl 6.44 | wps 0 | wpb 42662 | bsz 2032 | num_updates 14000 | best_loss 4.318 +epoch 009 | valid on 'valid' subset | loss 4.318 | nll_loss 2.687 | ppl 6.44 | wps 0 | wpb 42662 | bsz 2032 | num_updates 14000 | best_loss 4.318 +epoch 009 | valid on 'valid' subset | loss 4.318 | nll_loss 2.687 | ppl 6.44 | wps 0 | wpb 42662 | bsz 2032 | num_updates 14000 | best_loss 4.318 +epoch 009 | valid on 'valid' subset | loss 4.318 | nll_loss 2.687 | ppl 6.44 | wps 0 | wpb 42662 | bsz 2032 | num_updates 14000 | best_loss 4.318 +epoch 009: 609 / 1689 loss=4.288, nll_loss=2.681, ppl=6.41, wps=273615, ups=0.63, wpb=433733, bsz=16229.4, num_updates=14100, lr=0.000532624, gnorm=0.28, clip=0, loss_scale=2, train_wall=130, gb_free=19.1, wall=14034 +epoch 009: 609 / 1689 loss=4.288, nll_loss=2.681, ppl=6.41, wps=273615, ups=0.63, wpb=433733, bsz=16229.4, num_updates=14100, lr=0.000532624, gnorm=0.28, clip=0, loss_scale=2, train_wall=130, gb_free=19.1, wall=14034 +epoch 009: 609 / 1689 loss=4.288, nll_loss=2.681, ppl=6.41, wps=273615, ups=0.63, wpb=433733, bsz=16229.4, num_updates=14100, lr=0.000532624, gnorm=0.28, clip=0, loss_scale=2, train_wall=130, gb_free=19.1, wall=14034 +epoch 009: 609 / 1689 loss=4.288, nll_loss=2.681, ppl=6.41, wps=273615, ups=0.63, wpb=433733, bsz=16229.4, num_updates=14100, lr=0.000532624, gnorm=0.28, clip=0, loss_scale=2, train_wall=130, gb_free=19.1, wall=14034 +epoch 009: 609 / 1689 loss=4.288, nll_loss=2.681, ppl=6.41, wps=273615, ups=0.63, wpb=433733, bsz=16229.4, num_updates=14100, lr=0.000532624, gnorm=0.28, clip=0, loss_scale=2, train_wall=130, gb_free=19.1, wall=14034 +epoch 009: 609 / 1689 loss=4.288, nll_loss=2.681, ppl=6.41, wps=273615, ups=0.63, wpb=433733, bsz=16229.4, num_updates=14100, lr=0.000532624, gnorm=0.28, clip=0, loss_scale=2, train_wall=130, gb_free=19.1, wall=14034 +epoch 009: 609 / 1689 loss=4.288, nll_loss=2.681, ppl=6.41, wps=273615, ups=0.63, wpb=433733, bsz=16229.4, num_updates=14100, lr=0.000532624, gnorm=0.28, clip=0, loss_scale=2, train_wall=130, gb_free=19.1, wall=14034 +epoch 009: 609 / 1689 loss=4.288, nll_loss=2.681, ppl=6.41, wps=273615, ups=0.63, wpb=433733, bsz=16229.4, num_updates=14100, lr=0.000532624, gnorm=0.28, clip=0, loss_scale=2, train_wall=130, gb_free=19.1, wall=14034 +epoch 009: 609 / 1689 loss=4.288, nll_loss=2.681, ppl=6.41, wps=273615, ups=0.63, wpb=433733, bsz=16229.4, num_updates=14100, lr=0.000532624, gnorm=0.28, clip=0, loss_scale=2, train_wall=130, gb_free=19.1, wall=14034 +epoch 009: 709 / 1689 loss=4.295, nll_loss=2.689, ppl=6.45, wps=460157, ups=1.06, wpb=435588, bsz=16483, num_updates=14200, lr=0.000530745, gnorm=0.271, clip=0, loss_scale=4, train_wall=93, gb_free=19.4, wall=14128 +epoch 009: 709 / 1689 loss=4.295, nll_loss=2.689, ppl=6.45, wps=460157, ups=1.06, wpb=435588, bsz=16483, num_updates=14200, lr=0.000530745, gnorm=0.271, clip=0, loss_scale=4, train_wall=93, gb_free=19.4, wall=14128 +epoch 009: 709 / 1689 loss=4.295, nll_loss=2.689, ppl=6.45, wps=460157, ups=1.06, wpb=435588, bsz=16483, num_updates=14200, lr=0.000530745, gnorm=0.271, clip=0, loss_scale=4, train_wall=93, gb_free=19.4, wall=14128 +epoch 009: 709 / 1689 loss=4.295, nll_loss=2.689, ppl=6.45, wps=460157, ups=1.06, wpb=435588, bsz=16483, num_updates=14200, lr=0.000530745, gnorm=0.271, clip=0, loss_scale=4, train_wall=93, gb_free=19.4, wall=14128 +epoch 009: 709 / 1689 loss=4.295, nll_loss=2.689, ppl=6.45, wps=460157, ups=1.06, wpb=435588, bsz=16483, num_updates=14200, lr=0.000530745, gnorm=0.271, clip=0, loss_scale=4, train_wall=93, gb_free=19.4, wall=14128 +epoch 009: 709 / 1689 loss=4.295, nll_loss=2.689, ppl=6.45, wps=460157, ups=1.06, wpb=435588, bsz=16483, num_updates=14200, lr=0.000530745, gnorm=0.271, clip=0, loss_scale=4, train_wall=93, gb_free=19.4, wall=14128 +epoch 009: 709 / 1689 loss=4.295, nll_loss=2.689, ppl=6.45, wps=460157, ups=1.06, wpb=435588, bsz=16483, num_updates=14200, lr=0.000530745, gnorm=0.271, clip=0, loss_scale=4, train_wall=93, gb_free=19.4, wall=14128 +epoch 009: 709 / 1689 loss=4.295, nll_loss=2.689, ppl=6.45, wps=460157, ups=1.06, wpb=435588, bsz=16483, num_updates=14200, lr=0.000530745, gnorm=0.271, clip=0, loss_scale=4, train_wall=93, gb_free=19.4, wall=14128 +epoch 009: 709 / 1689 loss=4.295, nll_loss=2.689, ppl=6.45, wps=460157, ups=1.06, wpb=435588, bsz=16483, num_updates=14200, lr=0.000530745, gnorm=0.271, clip=0, loss_scale=4, train_wall=93, gb_free=19.4, wall=14128 +epoch 009: 810 / 1689 loss=4.279, nll_loss=2.671, ppl=6.37, wps=453307, ups=1.05, wpb=432724, bsz=16647, num_updates=14300, lr=0.000528886, gnorm=0.276, clip=0, loss_scale=2, train_wall=94, gb_free=19.2, wall=14224 +epoch 009: 810 / 1689 loss=4.279, nll_loss=2.671, ppl=6.37, wps=453307, ups=1.05, wpb=432724, bsz=16647, num_updates=14300, lr=0.000528886, gnorm=0.276, clip=0, loss_scale=2, train_wall=94, gb_free=19.2, wall=14224 +epoch 009: 810 / 1689 loss=4.279, nll_loss=2.671, ppl=6.37, wps=453307, ups=1.05, wpb=432724, bsz=16647, num_updates=14300, lr=0.000528886, gnorm=0.276, clip=0, loss_scale=2, train_wall=94, gb_free=19.2, wall=14224 +epoch 009: 810 / 1689 loss=4.279, nll_loss=2.671, ppl=6.37, wps=453307, ups=1.05, wpb=432724, bsz=16647, num_updates=14300, lr=0.000528886, gnorm=0.276, clip=0, loss_scale=2, train_wall=94, gb_free=19.2, wall=14224 +epoch 009: 810 / 1689 loss=4.279, nll_loss=2.671, ppl=6.37, wps=453307, ups=1.05, wpb=432724, bsz=16647, num_updates=14300, lr=0.000528886, gnorm=0.276, clip=0, loss_scale=2, train_wall=94, gb_free=19.2, wall=14224 +epoch 009: 810 / 1689 loss=4.279, nll_loss=2.671, ppl=6.37, wps=453307, ups=1.05, wpb=432724, bsz=16647, num_updates=14300, lr=0.000528886, gnorm=0.276, clip=0, loss_scale=2, train_wall=94, gb_free=19.2, wall=14224 +epoch 009: 810 / 1689 loss=4.279, nll_loss=2.671, ppl=6.37, wps=453307, ups=1.05, wpb=432724, bsz=16647, num_updates=14300, lr=0.000528886, gnorm=0.276, clip=0, loss_scale=2, train_wall=94, gb_free=19.2, wall=14224 +epoch 009: 810 / 1689 loss=4.279, nll_loss=2.671, ppl=6.37, wps=453307, ups=1.05, wpb=432724, bsz=16647, num_updates=14300, lr=0.000528886, gnorm=0.276, clip=0, loss_scale=2, train_wall=94, gb_free=19.2, wall=14224 +epoch 009: 810 / 1689 loss=4.279, nll_loss=2.671, ppl=6.37, wps=453307, ups=1.05, wpb=432724, bsz=16647, num_updates=14300, lr=0.000528886, gnorm=0.276, clip=0, loss_scale=2, train_wall=94, gb_free=19.2, wall=14224 +epoch 009: 910 / 1689 loss=4.28, nll_loss=2.673, ppl=6.38, wps=459012, ups=1.06, wpb=433743, bsz=16661.7, num_updates=14400, lr=0.000527046, gnorm=0.277, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=14318 +epoch 009: 910 / 1689 loss=4.28, nll_loss=2.673, ppl=6.38, wps=459012, ups=1.06, wpb=433743, bsz=16661.7, num_updates=14400, lr=0.000527046, gnorm=0.277, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=14318 +epoch 009: 910 / 1689 loss=4.28, nll_loss=2.673, ppl=6.38, wps=459012, ups=1.06, wpb=433743, bsz=16661.7, num_updates=14400, lr=0.000527046, gnorm=0.277, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=14318 +epoch 009: 910 / 1689 loss=4.28, nll_loss=2.673, ppl=6.38, wps=459012, ups=1.06, wpb=433743, bsz=16661.7, num_updates=14400, lr=0.000527046, gnorm=0.277, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=14318 +epoch 009: 910 / 1689 loss=4.28, nll_loss=2.673, ppl=6.38, wps=459012, ups=1.06, wpb=433743, bsz=16661.7, num_updates=14400, lr=0.000527046, gnorm=0.277, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=14318 +epoch 009: 910 / 1689 loss=4.28, nll_loss=2.673, ppl=6.38, wps=459012, ups=1.06, wpb=433743, bsz=16661.7, num_updates=14400, lr=0.000527046, gnorm=0.277, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=14318 +epoch 009: 910 / 1689 loss=4.28, nll_loss=2.673, ppl=6.38, wps=459012, ups=1.06, wpb=433743, bsz=16661.7, num_updates=14400, lr=0.000527046, gnorm=0.277, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=14318 +epoch 009: 910 / 1689 loss=4.28, nll_loss=2.673, ppl=6.38, wps=459012, ups=1.06, wpb=433743, bsz=16661.7, num_updates=14400, lr=0.000527046, gnorm=0.277, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=14318 +epoch 009: 910 / 1689 loss=4.28, nll_loss=2.673, ppl=6.38, wps=459012, ups=1.06, wpb=433743, bsz=16661.7, num_updates=14400, lr=0.000527046, gnorm=0.277, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=14318 +epoch 009: 1010 / 1689 loss=4.271, nll_loss=2.663, ppl=6.33, wps=461331, ups=1.07, wpb=432747, bsz=16322.3, num_updates=14500, lr=0.000525226, gnorm=0.265, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=14412 +epoch 009: 1010 / 1689 loss=4.271, nll_loss=2.663, ppl=6.33, wps=461331, ups=1.07, wpb=432747, bsz=16322.3, num_updates=14500, lr=0.000525226, gnorm=0.265, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=14412 +epoch 009: 1010 / 1689 loss=4.271, nll_loss=2.663, ppl=6.33, wps=461331, ups=1.07, wpb=432747, bsz=16322.3, num_updates=14500, lr=0.000525226, gnorm=0.265, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=14412 +epoch 009: 1010 / 1689 loss=4.271, nll_loss=2.663, ppl=6.33, wps=461331, ups=1.07, wpb=432747, bsz=16322.3, num_updates=14500, lr=0.000525226, gnorm=0.265, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=14412 +epoch 009: 1010 / 1689 loss=4.271, nll_loss=2.663, ppl=6.33, wps=461331, ups=1.07, wpb=432747, bsz=16322.3, num_updates=14500, lr=0.000525226, gnorm=0.265, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=14412 +epoch 009: 1010 / 1689 loss=4.271, nll_loss=2.663, ppl=6.33, wps=461331, ups=1.07, wpb=432747, bsz=16322.3, num_updates=14500, lr=0.000525226, gnorm=0.265, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=14412 +epoch 009: 1010 / 1689 loss=4.271, nll_loss=2.663, ppl=6.33, wps=461331, ups=1.07, wpb=432747, bsz=16322.3, num_updates=14500, lr=0.000525226, gnorm=0.265, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=14412 +epoch 009: 1010 / 1689 loss=4.271, nll_loss=2.663, ppl=6.33, wps=461331, ups=1.07, wpb=432747, bsz=16322.3, num_updates=14500, lr=0.000525226, gnorm=0.265, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=14412 +epoch 009: 1010 / 1689 loss=4.271, nll_loss=2.663, ppl=6.33, wps=461331, ups=1.07, wpb=432747, bsz=16322.3, num_updates=14500, lr=0.000525226, gnorm=0.265, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=14412 +epoch 009: 1110 / 1689 loss=4.289, nll_loss=2.683, ppl=6.42, wps=460340, ups=1.06, wpb=434886, bsz=16668.3, num_updates=14600, lr=0.000523424, gnorm=0.264, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=14507 +epoch 009: 1110 / 1689 loss=4.289, nll_loss=2.683, ppl=6.42, wps=460340, ups=1.06, wpb=434886, bsz=16668.3, num_updates=14600, lr=0.000523424, gnorm=0.264, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=14507 +epoch 009: 1110 / 1689 loss=4.289, nll_loss=2.683, ppl=6.42, wps=460340, ups=1.06, wpb=434886, bsz=16668.3, num_updates=14600, lr=0.000523424, gnorm=0.264, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=14507 +epoch 009: 1110 / 1689 loss=4.289, nll_loss=2.683, ppl=6.42, wps=460340, ups=1.06, wpb=434886, bsz=16668.3, num_updates=14600, lr=0.000523424, gnorm=0.264, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=14507 +epoch 009: 1110 / 1689 loss=4.289, nll_loss=2.683, ppl=6.42, wps=460340, ups=1.06, wpb=434886, bsz=16668.3, num_updates=14600, lr=0.000523424, gnorm=0.264, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=14507 +epoch 009: 1110 / 1689 loss=4.289, nll_loss=2.683, ppl=6.42, wps=460340, ups=1.06, wpb=434886, bsz=16668.3, num_updates=14600, lr=0.000523424, gnorm=0.264, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=14507 +epoch 009: 1110 / 1689 loss=4.289, nll_loss=2.683, ppl=6.42, wps=460340, ups=1.06, wpb=434886, bsz=16668.3, num_updates=14600, lr=0.000523424, gnorm=0.264, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=14507 +epoch 009: 1110 / 1689 loss=4.289, nll_loss=2.683, ppl=6.42, wps=460340, ups=1.06, wpb=434886, bsz=16668.3, num_updates=14600, lr=0.000523424, gnorm=0.264, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=14507 +epoch 009: 1110 / 1689 loss=4.289, nll_loss=2.683, ppl=6.42, wps=460340, ups=1.06, wpb=434886, bsz=16668.3, num_updates=14600, lr=0.000523424, gnorm=0.264, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=14507 +epoch 009: 1210 / 1689 loss=4.272, nll_loss=2.665, ppl=6.34, wps=459436, ups=1.06, wpb=432430, bsz=16385.2, num_updates=14700, lr=0.000521641, gnorm=0.271, clip=0, loss_scale=2, train_wall=93, gb_free=19.2, wall=14601 +epoch 009: 1210 / 1689 loss=4.272, nll_loss=2.665, ppl=6.34, wps=459436, ups=1.06, wpb=432430, bsz=16385.2, num_updates=14700, lr=0.000521641, gnorm=0.271, clip=0, loss_scale=2, train_wall=93, gb_free=19.2, wall=14601 +epoch 009: 1210 / 1689 loss=4.272, nll_loss=2.665, ppl=6.34, wps=459436, ups=1.06, wpb=432430, bsz=16385.2, num_updates=14700, lr=0.000521641, gnorm=0.271, clip=0, loss_scale=2, train_wall=93, gb_free=19.2, wall=14601 +epoch 009: 1210 / 1689 loss=4.272, nll_loss=2.665, ppl=6.34, wps=459436, ups=1.06, wpb=432430, bsz=16385.2, num_updates=14700, lr=0.000521641, gnorm=0.271, clip=0, loss_scale=2, train_wall=93, gb_free=19.2, wall=14601 +epoch 009: 1210 / 1689 loss=4.272, nll_loss=2.665, ppl=6.34, wps=459436, ups=1.06, wpb=432430, bsz=16385.2, num_updates=14700, lr=0.000521641, gnorm=0.271, clip=0, loss_scale=2, train_wall=93, gb_free=19.2, wall=14601 +epoch 009: 1210 / 1689 loss=4.272, nll_loss=2.665, ppl=6.34, wps=459436, ups=1.06, wpb=432430, bsz=16385.2, num_updates=14700, lr=0.000521641, gnorm=0.271, clip=0, loss_scale=2, train_wall=93, gb_free=19.2, wall=14601 +epoch 009: 1210 / 1689 loss=4.272, nll_loss=2.665, ppl=6.34, wps=459436, ups=1.06, wpb=432430, bsz=16385.2, num_updates=14700, lr=0.000521641, gnorm=0.271, clip=0, loss_scale=2, train_wall=93, gb_free=19.2, wall=14601 +epoch 009: 1210 / 1689 loss=4.272, nll_loss=2.665, ppl=6.34, wps=459436, ups=1.06, wpb=432430, bsz=16385.2, num_updates=14700, lr=0.000521641, gnorm=0.271, clip=0, loss_scale=2, train_wall=93, gb_free=19.2, wall=14601 +epoch 009: 1210 / 1689 loss=4.272, nll_loss=2.665, ppl=6.34, wps=459436, ups=1.06, wpb=432430, bsz=16385.2, num_updates=14700, lr=0.000521641, gnorm=0.271, clip=0, loss_scale=2, train_wall=93, gb_free=19.2, wall=14601 +epoch 009: 1310 / 1689 loss=4.279, nll_loss=2.672, ppl=6.37, wps=457106, ups=1.06, wpb=431825, bsz=16283.9, num_updates=14800, lr=0.000519875, gnorm=0.268, clip=0, loss_scale=4, train_wall=93, gb_free=21.2, wall=14695 +epoch 009: 1310 / 1689 loss=4.279, nll_loss=2.672, ppl=6.37, wps=457106, ups=1.06, wpb=431825, bsz=16283.9, num_updates=14800, lr=0.000519875, gnorm=0.268, clip=0, loss_scale=4, train_wall=93, gb_free=21.2, wall=14695 +epoch 009: 1310 / 1689 loss=4.279, nll_loss=2.672, ppl=6.37, wps=457106, ups=1.06, wpb=431825, bsz=16283.9, num_updates=14800, lr=0.000519875, gnorm=0.268, clip=0, loss_scale=4, train_wall=93, gb_free=21.2, wall=14695 +epoch 009: 1310 / 1689 loss=4.279, nll_loss=2.672, ppl=6.37, wps=457106, ups=1.06, wpb=431825, bsz=16283.9, num_updates=14800, lr=0.000519875, gnorm=0.268, clip=0, loss_scale=4, train_wall=93, gb_free=21.2, wall=14695 +epoch 009: 1310 / 1689 loss=4.279, nll_loss=2.672, ppl=6.37, wps=457106, ups=1.06, wpb=431825, bsz=16283.9, num_updates=14800, lr=0.000519875, gnorm=0.268, clip=0, loss_scale=4, train_wall=93, gb_free=21.2, wall=14695 +epoch 009: 1310 / 1689 loss=4.279, nll_loss=2.672, ppl=6.37, wps=457106, ups=1.06, wpb=431825, bsz=16283.9, num_updates=14800, lr=0.000519875, gnorm=0.268, clip=0, loss_scale=4, train_wall=93, gb_free=21.2, wall=14695 +epoch 009: 1310 / 1689 loss=4.279, nll_loss=2.672, ppl=6.37, wps=457106, ups=1.06, wpb=431825, bsz=16283.9, num_updates=14800, lr=0.000519875, gnorm=0.268, clip=0, loss_scale=4, train_wall=93, gb_free=21.2, wall=14695 +epoch 009: 1310 / 1689 loss=4.279, nll_loss=2.672, ppl=6.37, wps=457106, ups=1.06, wpb=431825, bsz=16283.9, num_updates=14800, lr=0.000519875, gnorm=0.268, clip=0, loss_scale=4, train_wall=93, gb_free=21.2, wall=14695 +epoch 009: 1310 / 1689 loss=4.279, nll_loss=2.672, ppl=6.37, wps=457106, ups=1.06, wpb=431825, bsz=16283.9, num_updates=14800, lr=0.000519875, gnorm=0.268, clip=0, loss_scale=4, train_wall=93, gb_free=21.2, wall=14695 +epoch 009: 1410 / 1689 loss=4.284, nll_loss=2.678, ppl=6.4, wps=459413, ups=1.06, wpb=433866, bsz=16604.4, num_updates=14900, lr=0.000518128, gnorm=0.269, clip=0, loss_scale=4, train_wall=93, gb_free=19.2, wall=14790 +epoch 009: 1410 / 1689 loss=4.284, nll_loss=2.678, ppl=6.4, wps=459413, ups=1.06, wpb=433866, bsz=16604.4, num_updates=14900, lr=0.000518128, gnorm=0.269, clip=0, loss_scale=4, train_wall=93, gb_free=19.2, wall=14790 +epoch 009: 1410 / 1689 loss=4.284, nll_loss=2.678, ppl=6.4, wps=459413, ups=1.06, wpb=433866, bsz=16604.4, num_updates=14900, lr=0.000518128, gnorm=0.269, clip=0, loss_scale=4, train_wall=93, gb_free=19.2, wall=14790 +epoch 009: 1410 / 1689 loss=4.284, nll_loss=2.678, ppl=6.4, wps=459413, ups=1.06, wpb=433866, bsz=16604.4, num_updates=14900, lr=0.000518128, gnorm=0.269, clip=0, loss_scale=4, train_wall=93, gb_free=19.2, wall=14790 +epoch 009: 1410 / 1689 loss=4.284, nll_loss=2.678, ppl=6.4, wps=459413, ups=1.06, wpb=433866, bsz=16604.4, num_updates=14900, lr=0.000518128, gnorm=0.269, clip=0, loss_scale=4, train_wall=93, gb_free=19.2, wall=14790 +epoch 009: 1410 / 1689 loss=4.284, nll_loss=2.678, ppl=6.4, wps=459413, ups=1.06, wpb=433866, bsz=16604.4, num_updates=14900, lr=0.000518128, gnorm=0.269, clip=0, loss_scale=4, train_wall=93, gb_free=19.2, wall=14790 +epoch 009: 1410 / 1689 loss=4.284, nll_loss=2.678, ppl=6.4, wps=459413, ups=1.06, wpb=433866, bsz=16604.4, num_updates=14900, lr=0.000518128, gnorm=0.269, clip=0, loss_scale=4, train_wall=93, gb_free=19.2, wall=14790 +epoch 009: 1410 / 1689 loss=4.284, nll_loss=2.678, ppl=6.4, wps=459413, ups=1.06, wpb=433866, bsz=16604.4, num_updates=14900, lr=0.000518128, gnorm=0.269, clip=0, loss_scale=4, train_wall=93, gb_free=19.2, wall=14790 +epoch 009: 1410 / 1689 loss=4.284, nll_loss=2.678, ppl=6.4, wps=459413, ups=1.06, wpb=433866, bsz=16604.4, num_updates=14900, lr=0.000518128, gnorm=0.269, clip=0, loss_scale=4, train_wall=93, gb_free=19.2, wall=14790 +epoch 009: 1511 / 1689 loss=4.275, nll_loss=2.668, ppl=6.35, wps=454183, ups=1.05, wpb=433458, bsz=16546.3, num_updates=15000, lr=0.000516398, gnorm=0.263, clip=0, loss_scale=2, train_wall=94, gb_free=19.4, wall=14885 +epoch 009: 1511 / 1689 loss=4.275, nll_loss=2.668, ppl=6.35, wps=454183, ups=1.05, wpb=433458, bsz=16546.3, num_updates=15000, lr=0.000516398, gnorm=0.263, clip=0, loss_scale=2, train_wall=94, gb_free=19.4, wall=14885 +epoch 009: 1511 / 1689 loss=4.275, nll_loss=2.668, ppl=6.35, wps=454183, ups=1.05, wpb=433458, bsz=16546.3, num_updates=15000, lr=0.000516398, gnorm=0.263, clip=0, loss_scale=2, train_wall=94, gb_free=19.4, wall=14885 +epoch 009: 1511 / 1689 loss=4.275, nll_loss=2.668, ppl=6.35, wps=454183, ups=1.05, wpb=433458, bsz=16546.3, num_updates=15000, lr=0.000516398, gnorm=0.263, clip=0, loss_scale=2, train_wall=94, gb_free=19.4, wall=14885 +epoch 009: 1511 / 1689 loss=4.275, nll_loss=2.668, ppl=6.35, wps=454183, ups=1.05, wpb=433458, bsz=16546.3, num_updates=15000, lr=0.000516398, gnorm=0.263, clip=0, loss_scale=2, train_wall=94, gb_free=19.4, wall=14885 +epoch 009: 1511 / 1689 loss=4.275, nll_loss=2.668, ppl=6.35, wps=454183, ups=1.05, wpb=433458, bsz=16546.3, num_updates=15000, lr=0.000516398, gnorm=0.263, clip=0, loss_scale=2, train_wall=94, gb_free=19.4, wall=14885 +epoch 009: 1511 / 1689 loss=4.275, nll_loss=2.668, ppl=6.35, wps=454183, ups=1.05, wpb=433458, bsz=16546.3, num_updates=15000, lr=0.000516398, gnorm=0.263, clip=0, loss_scale=2, train_wall=94, gb_free=19.4, wall=14885 +epoch 009: 1511 / 1689 loss=4.275, nll_loss=2.668, ppl=6.35, wps=454183, ups=1.05, wpb=433458, bsz=16546.3, num_updates=15000, lr=0.000516398, gnorm=0.263, clip=0, loss_scale=2, train_wall=94, gb_free=19.4, wall=14885 +epoch 009: 1511 / 1689 loss=4.275, nll_loss=2.668, ppl=6.35, wps=454183, ups=1.05, wpb=433458, bsz=16546.3, num_updates=15000, lr=0.000516398, gnorm=0.263, clip=0, loss_scale=2, train_wall=94, gb_free=19.4, wall=14885 +begin validation on "valid" subset +epoch 009 | valid on 'valid' subset | loss 4.306 | nll_loss 2.671 | ppl 6.37 | wps 0 | wpb 42662 | bsz 2032 | num_updates 15000 | best_loss 4.306 +epoch 009 | valid on 'valid' subset | loss 4.306 | nll_loss 2.671 | ppl 6.37 | wps 0 | wpb 42662 | bsz 2032 | num_updates 15000 | best_loss 4.306 +epoch 009 | valid on 'valid' subset | loss 4.306 | nll_loss 2.671 | ppl 6.37 | wps 0 | wpb 42662 | bsz 2032 | num_updates 15000 | best_loss 4.306 +epoch 009 | valid on 'valid' subset | loss 4.306 | nll_loss 2.671 | ppl 6.37 | wps 0 | wpb 42662 | bsz 2032 | num_updates 15000 | best_loss 4.306 +epoch 009 | valid on 'valid' subset | loss 4.306 | nll_loss 2.671 | ppl 6.37 | wps 0 | wpb 42662 | bsz 2032 | num_updates 15000 | best_loss 4.306 +epoch 009 | valid on 'valid' subset | loss 4.306 | nll_loss 2.671 | ppl 6.37 | wps 0 | wpb 42662 | bsz 2032 | num_updates 15000 | best_loss 4.306 +epoch 009 | valid on 'valid' subset | loss 4.306 | nll_loss 2.671 | ppl 6.37 | wps 0 | wpb 42662 | bsz 2032 | num_updates 15000 | best_loss 4.306 +epoch 009 | valid on 'valid' subset | loss 4.306 | nll_loss 2.671 | ppl 6.37 | wps 0 | wpb 42662 | bsz 2032 | num_updates 15000 | best_loss 4.306 +epoch 009 | valid on 'valid' subset | loss 4.306 | nll_loss 2.671 | ppl 6.37 | wps 0 | wpb 42662 | bsz 2032 | num_updates 15000 | best_loss 4.306 +epoch 009: 1611 / 1689 loss=4.297, nll_loss=2.693, ppl=6.47, wps=380641, ups=0.88, wpb=434922, bsz=16618.9, num_updates=15100, lr=0.000514685, gnorm=0.272, clip=0, loss_scale=2, train_wall=93, gb_free=21.6, wall=14999 +epoch 009: 1611 / 1689 loss=4.297, nll_loss=2.693, ppl=6.47, wps=380641, ups=0.88, wpb=434922, bsz=16618.9, num_updates=15100, lr=0.000514685, gnorm=0.272, clip=0, loss_scale=2, train_wall=93, gb_free=21.6, wall=14999 +epoch 009: 1611 / 1689 loss=4.297, nll_loss=2.693, ppl=6.47, wps=380641, ups=0.88, wpb=434922, bsz=16618.9, num_updates=15100, lr=0.000514685, gnorm=0.272, clip=0, loss_scale=2, train_wall=93, gb_free=21.6, wall=14999 +epoch 009: 1611 / 1689 loss=4.297, nll_loss=2.693, ppl=6.47, wps=380641, ups=0.88, wpb=434922, bsz=16618.9, num_updates=15100, lr=0.000514685, gnorm=0.272, clip=0, loss_scale=2, train_wall=93, gb_free=21.6, wall=14999 +epoch 009: 1611 / 1689 loss=4.297, nll_loss=2.693, ppl=6.47, wps=380641, ups=0.88, wpb=434922, bsz=16618.9, num_updates=15100, lr=0.000514685, gnorm=0.272, clip=0, loss_scale=2, train_wall=93, gb_free=21.6, wall=14999 +epoch 009: 1611 / 1689 loss=4.297, nll_loss=2.693, ppl=6.47, wps=380641, ups=0.88, wpb=434922, bsz=16618.9, num_updates=15100, lr=0.000514685, gnorm=0.272, clip=0, loss_scale=2, train_wall=93, gb_free=21.6, wall=14999 +epoch 009: 1611 / 1689 loss=4.297, nll_loss=2.693, ppl=6.47, wps=380641, ups=0.88, wpb=434922, bsz=16618.9, num_updates=15100, lr=0.000514685, gnorm=0.272, clip=0, loss_scale=2, train_wall=93, gb_free=21.6, wall=14999 +epoch 009: 1611 / 1689 loss=4.297, nll_loss=2.693, ppl=6.47, wps=380641, ups=0.88, wpb=434922, bsz=16618.9, num_updates=15100, lr=0.000514685, gnorm=0.272, clip=0, loss_scale=2, train_wall=93, gb_free=21.6, wall=14999 +epoch 009: 1611 / 1689 loss=4.297, nll_loss=2.693, ppl=6.47, wps=380641, ups=0.88, wpb=434922, bsz=16618.9, num_updates=15100, lr=0.000514685, gnorm=0.272, clip=0, loss_scale=2, train_wall=93, gb_free=21.6, wall=14999 +end of epoch 9 (average epoch stats below) +epoch 009 | loss 4.281 | nll_loss 2.674 | ppl 6.38 | wps 436020 | ups 1.01 | wpb 433527 | bsz 16504.5 | num_updates 15178 | lr 0.000513361 | gnorm 0.274 | clip 0 | loss_scale 2 | train_wall 1604 | gb_free 20.3 | wall 15072 +epoch 009 | loss 4.281 | nll_loss 2.674 | ppl 6.38 | wps 436020 | ups 1.01 | wpb 433527 | bsz 16504.5 | num_updates 15178 | lr 0.000513361 | gnorm 0.274 | clip 0 | loss_scale 2 | train_wall 1604 | gb_free 20.3 | wall 15072 +epoch 009 | loss 4.281 | nll_loss 2.674 | ppl 6.38 | wps 436020 | ups 1.01 | wpb 433527 | bsz 16504.5 | num_updates 15178 | lr 0.000513361 | gnorm 0.274 | clip 0 | loss_scale 2 | train_wall 1604 | gb_free 20.3 | wall 15072 +epoch 009 | loss 4.281 | nll_loss 2.674 | ppl 6.38 | wps 436020 | ups 1.01 | wpb 433527 | bsz 16504.5 | num_updates 15178 | lr 0.000513361 | gnorm 0.274 | clip 0 | loss_scale 2 | train_wall 1604 | gb_free 20.3 | wall 15072 +epoch 009 | loss 4.281 | nll_loss 2.674 | ppl 6.38 | wps 436020 | ups 1.01 | wpb 433527 | bsz 16504.5 | num_updates 15178 | lr 0.000513361 | gnorm 0.274 | clip 0 | loss_scale 2 | train_wall 1604 | gb_free 20.3 | wall 15072 +epoch 009 | loss 4.281 | nll_loss 2.674 | ppl 6.38 | wps 436020 | ups 1.01 | wpb 433527 | bsz 16504.5 | num_updates 15178 | lr 0.000513361 | gnorm 0.274 | clip 0 | loss_scale 2 | train_wall 1604 | gb_free 20.3 | wall 15072 +epoch 009 | loss 4.281 | nll_loss 2.674 | ppl 6.38 | wps 436020 | ups 1.01 | wpb 433527 | bsz 16504.5 | num_updates 15178 | lr 0.000513361 | gnorm 0.274 | clip 0 | loss_scale 2 | train_wall 1604 | gb_free 20.3 | wall 15072 +epoch 009 | loss 4.281 | nll_loss 2.674 | ppl 6.38 | wps 436020 | ups 1.01 | wpb 433527 | bsz 16504.5 | num_updates 15178 | lr 0.000513361 | gnorm 0.274 | clip 0 | loss_scale 2 | train_wall 1604 | gb_free 20.3 | wall 15072 +epoch 009 | loss 4.281 | nll_loss 2.674 | ppl 6.38 | wps 436020 | ups 1.01 | wpb 433527 | bsz 16504.5 | num_updates 15178 | lr 0.000513361 | gnorm 0.274 | clip 0 | loss_scale 2 | train_wall 1604 | gb_free 20.3 | wall 15072 +Start iterating over samples +epoch 010: 22 / 1689 loss=4.288, nll_loss=2.683, ppl=6.42, wps=457714, ups=1.06, wpb=431292, bsz=16655.2, num_updates=15200, lr=0.000512989, gnorm=0.29, clip=0, loss_scale=2, train_wall=92, gb_free=18.9, wall=15094 +epoch 010: 22 / 1689 loss=4.288, nll_loss=2.683, ppl=6.42, wps=457714, ups=1.06, wpb=431292, bsz=16655.2, num_updates=15200, lr=0.000512989, gnorm=0.29, clip=0, loss_scale=2, train_wall=92, gb_free=18.9, wall=15094 +epoch 010: 22 / 1689 loss=4.288, nll_loss=2.683, ppl=6.42, wps=457714, ups=1.06, wpb=431292, bsz=16655.2, num_updates=15200, lr=0.000512989, gnorm=0.29, clip=0, loss_scale=2, train_wall=92, gb_free=18.9, wall=15094 +epoch 010: 22 / 1689 loss=4.288, nll_loss=2.683, ppl=6.42, wps=457714, ups=1.06, wpb=431292, bsz=16655.2, num_updates=15200, lr=0.000512989, gnorm=0.29, clip=0, loss_scale=2, train_wall=92, gb_free=18.9, wall=15094 +epoch 010: 22 / 1689 loss=4.288, nll_loss=2.683, ppl=6.42, wps=457714, ups=1.06, wpb=431292, bsz=16655.2, num_updates=15200, lr=0.000512989, gnorm=0.29, clip=0, loss_scale=2, train_wall=92, gb_free=18.9, wall=15094 +epoch 010: 22 / 1689 loss=4.288, nll_loss=2.683, ppl=6.42, wps=457714, ups=1.06, wpb=431292, bsz=16655.2, num_updates=15200, lr=0.000512989, gnorm=0.29, clip=0, loss_scale=2, train_wall=92, gb_free=18.9, wall=15094 +epoch 010: 22 / 1689 loss=4.288, nll_loss=2.683, ppl=6.42, wps=457714, ups=1.06, wpb=431292, bsz=16655.2, num_updates=15200, lr=0.000512989, gnorm=0.29, clip=0, loss_scale=2, train_wall=92, gb_free=18.9, wall=15094 +epoch 010: 22 / 1689 loss=4.288, nll_loss=2.683, ppl=6.42, wps=457714, ups=1.06, wpb=431292, bsz=16655.2, num_updates=15200, lr=0.000512989, gnorm=0.29, clip=0, loss_scale=2, train_wall=92, gb_free=18.9, wall=15094 +epoch 010: 22 / 1689 loss=4.288, nll_loss=2.683, ppl=6.42, wps=457714, ups=1.06, wpb=431292, bsz=16655.2, num_updates=15200, lr=0.000512989, gnorm=0.29, clip=0, loss_scale=2, train_wall=92, gb_free=18.9, wall=15094 +epoch 010: 22 / 1689 loss=4.288, nll_loss=2.683, ppl=6.42, wps=457714, ups=1.06, wpb=431292, bsz=16655.2, num_updates=15200, lr=0.000512989, gnorm=0.29, clip=0, loss_scale=2, train_wall=92, gb_free=18.9, wall=15094 +epoch 010: 122 / 1689 loss=4.253, nll_loss=2.642, ppl=6.24, wps=458848, ups=1.06, wpb=434455, bsz=16475.3, num_updates=15300, lr=0.00051131, gnorm=0.265, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=15188 +epoch 010: 122 / 1689 loss=4.253, nll_loss=2.642, ppl=6.24, wps=458848, ups=1.06, wpb=434455, bsz=16475.3, num_updates=15300, lr=0.00051131, gnorm=0.265, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=15188 +epoch 010: 122 / 1689 loss=4.253, nll_loss=2.642, ppl=6.24, wps=458848, ups=1.06, wpb=434455, bsz=16475.3, num_updates=15300, lr=0.00051131, gnorm=0.265, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=15188 +epoch 010: 122 / 1689 loss=4.253, nll_loss=2.642, ppl=6.24, wps=458848, ups=1.06, wpb=434455, bsz=16475.3, num_updates=15300, lr=0.00051131, gnorm=0.265, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=15188 +epoch 010: 122 / 1689 loss=4.253, nll_loss=2.642, ppl=6.24, wps=458848, ups=1.06, wpb=434455, bsz=16475.3, num_updates=15300, lr=0.00051131, gnorm=0.265, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=15188 +epoch 010: 122 / 1689 loss=4.253, nll_loss=2.642, ppl=6.24, wps=458848, ups=1.06, wpb=434455, bsz=16475.3, num_updates=15300, lr=0.00051131, gnorm=0.265, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=15188 +epoch 010: 122 / 1689 loss=4.253, nll_loss=2.642, ppl=6.24, wps=458848, ups=1.06, wpb=434455, bsz=16475.3, num_updates=15300, lr=0.00051131, gnorm=0.265, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=15188 +epoch 010: 122 / 1689 loss=4.253, nll_loss=2.642, ppl=6.24, wps=458848, ups=1.06, wpb=434455, bsz=16475.3, num_updates=15300, lr=0.00051131, gnorm=0.265, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=15188 +epoch 010: 122 / 1689 loss=4.253, nll_loss=2.642, ppl=6.24, wps=458848, ups=1.06, wpb=434455, bsz=16475.3, num_updates=15300, lr=0.00051131, gnorm=0.265, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=15188 +epoch 010: 122 / 1689 loss=4.253, nll_loss=2.642, ppl=6.24, wps=458848, ups=1.06, wpb=434455, bsz=16475.3, num_updates=15300, lr=0.00051131, gnorm=0.265, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=15188 +epoch 010: 222 / 1689 loss=4.244, nll_loss=2.632, ppl=6.2, wps=462807, ups=1.07, wpb=433180, bsz=16630.6, num_updates=15400, lr=0.000509647, gnorm=0.257, clip=0, loss_scale=2, train_wall=92, gb_free=16.7, wall=15282 +epoch 010: 222 / 1689 loss=4.244, nll_loss=2.632, ppl=6.2, wps=462807, ups=1.07, wpb=433180, bsz=16630.6, num_updates=15400, lr=0.000509647, gnorm=0.257, clip=0, loss_scale=2, train_wall=92, gb_free=16.7, wall=15282 +epoch 010: 222 / 1689 loss=4.244, nll_loss=2.632, ppl=6.2, wps=462807, ups=1.07, wpb=433180, bsz=16630.6, num_updates=15400, lr=0.000509647, gnorm=0.257, clip=0, loss_scale=2, train_wall=92, gb_free=16.7, wall=15282 +epoch 010: 222 / 1689 loss=4.244, nll_loss=2.632, ppl=6.2, wps=462807, ups=1.07, wpb=433180, bsz=16630.6, num_updates=15400, lr=0.000509647, gnorm=0.257, clip=0, loss_scale=2, train_wall=92, gb_free=16.7, wall=15282 +epoch 010: 222 / 1689 loss=4.244, nll_loss=2.632, ppl=6.2, wps=462807, ups=1.07, wpb=433180, bsz=16630.6, num_updates=15400, lr=0.000509647, gnorm=0.257, clip=0, loss_scale=2, train_wall=92, gb_free=16.7, wall=15282 +epoch 010: 222 / 1689 loss=4.244, nll_loss=2.632, ppl=6.2, wps=462807, ups=1.07, wpb=433180, bsz=16630.6, num_updates=15400, lr=0.000509647, gnorm=0.257, clip=0, loss_scale=2, train_wall=92, gb_free=16.7, wall=15282 +epoch 010: 222 / 1689 loss=4.244, nll_loss=2.632, ppl=6.2, wps=462807, ups=1.07, wpb=433180, bsz=16630.6, num_updates=15400, lr=0.000509647, gnorm=0.257, clip=0, loss_scale=2, train_wall=92, gb_free=16.7, wall=15282 +epoch 010: 222 / 1689 loss=4.244, nll_loss=2.632, ppl=6.2, wps=462807, ups=1.07, wpb=433180, bsz=16630.6, num_updates=15400, lr=0.000509647, gnorm=0.257, clip=0, loss_scale=2, train_wall=92, gb_free=16.7, wall=15282 +epoch 010: 222 / 1689 loss=4.244, nll_loss=2.632, ppl=6.2, wps=462807, ups=1.07, wpb=433180, bsz=16630.6, num_updates=15400, lr=0.000509647, gnorm=0.257, clip=0, loss_scale=2, train_wall=92, gb_free=16.7, wall=15282 +epoch 010: 222 / 1689 loss=4.244, nll_loss=2.632, ppl=6.2, wps=462807, ups=1.07, wpb=433180, bsz=16630.6, num_updates=15400, lr=0.000509647, gnorm=0.257, clip=0, loss_scale=2, train_wall=92, gb_free=16.7, wall=15282 +epoch 010: 322 / 1689 loss=4.251, nll_loss=2.64, ppl=6.23, wps=461845, ups=1.06, wpb=433834, bsz=16467.8, num_updates=15500, lr=0.000508001, gnorm=0.273, clip=0, loss_scale=4, train_wall=92, gb_free=18.1, wall=15376 +epoch 010: 322 / 1689 loss=4.251, nll_loss=2.64, ppl=6.23, wps=461845, ups=1.06, wpb=433834, bsz=16467.8, num_updates=15500, lr=0.000508001, gnorm=0.273, clip=0, loss_scale=4, train_wall=92, gb_free=18.1, wall=15376 +epoch 010: 322 / 1689 loss=4.251, nll_loss=2.64, ppl=6.23, wps=461845, ups=1.06, wpb=433834, bsz=16467.8, num_updates=15500, lr=0.000508001, gnorm=0.273, clip=0, loss_scale=4, train_wall=92, gb_free=18.1, wall=15376 +epoch 010: 322 / 1689 loss=4.251, nll_loss=2.64, ppl=6.23, wps=461845, ups=1.06, wpb=433834, bsz=16467.8, num_updates=15500, lr=0.000508001, gnorm=0.273, clip=0, loss_scale=4, train_wall=92, gb_free=18.1, wall=15376 +epoch 010: 322 / 1689 loss=4.251, nll_loss=2.64, ppl=6.23, wps=461845, ups=1.06, wpb=433834, bsz=16467.8, num_updates=15500, lr=0.000508001, gnorm=0.273, clip=0, loss_scale=4, train_wall=92, gb_free=18.1, wall=15376 +epoch 010: 322 / 1689 loss=4.251, nll_loss=2.64, ppl=6.23, wps=461845, ups=1.06, wpb=433834, bsz=16467.8, num_updates=15500, lr=0.000508001, gnorm=0.273, clip=0, loss_scale=4, train_wall=92, gb_free=18.1, wall=15376 +epoch 010: 322 / 1689 loss=4.251, nll_loss=2.64, ppl=6.23, wps=461845, ups=1.06, wpb=433834, bsz=16467.8, num_updates=15500, lr=0.000508001, gnorm=0.273, clip=0, loss_scale=4, train_wall=92, gb_free=18.1, wall=15376 +epoch 010: 322 / 1689 loss=4.251, nll_loss=2.64, ppl=6.23, wps=461845, ups=1.06, wpb=433834, bsz=16467.8, num_updates=15500, lr=0.000508001, gnorm=0.273, clip=0, loss_scale=4, train_wall=92, gb_free=18.1, wall=15376 +epoch 010: 322 / 1689 loss=4.251, nll_loss=2.64, ppl=6.23, wps=461845, ups=1.06, wpb=433834, bsz=16467.8, num_updates=15500, lr=0.000508001, gnorm=0.273, clip=0, loss_scale=4, train_wall=92, gb_free=18.1, wall=15376 +epoch 010: 322 / 1689 loss=4.251, nll_loss=2.64, ppl=6.23, wps=461845, ups=1.06, wpb=433834, bsz=16467.8, num_updates=15500, lr=0.000508001, gnorm=0.273, clip=0, loss_scale=4, train_wall=92, gb_free=18.1, wall=15376 +epoch 010: 423 / 1689 loss=4.25, nll_loss=2.64, ppl=6.23, wps=458799, ups=1.05, wpb=434905, bsz=16602.9, num_updates=15600, lr=0.00050637, gnorm=0.273, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=15471 +epoch 010: 423 / 1689 loss=4.25, nll_loss=2.64, ppl=6.23, wps=458799, ups=1.05, wpb=434905, bsz=16602.9, num_updates=15600, lr=0.00050637, gnorm=0.273, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=15471 +epoch 010: 423 / 1689 loss=4.25, nll_loss=2.64, ppl=6.23, wps=458799, ups=1.05, wpb=434905, bsz=16602.9, num_updates=15600, lr=0.00050637, gnorm=0.273, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=15471 +epoch 010: 423 / 1689 loss=4.25, nll_loss=2.64, ppl=6.23, wps=458799, ups=1.05, wpb=434905, bsz=16602.9, num_updates=15600, lr=0.00050637, gnorm=0.273, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=15471 +epoch 010: 423 / 1689 loss=4.25, nll_loss=2.64, ppl=6.23, wps=458799, ups=1.05, wpb=434905, bsz=16602.9, num_updates=15600, lr=0.00050637, gnorm=0.273, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=15471 +epoch 010: 423 / 1689 loss=4.25, nll_loss=2.64, ppl=6.23, wps=458799, ups=1.05, wpb=434905, bsz=16602.9, num_updates=15600, lr=0.00050637, gnorm=0.273, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=15471 +epoch 010: 423 / 1689 loss=4.25, nll_loss=2.64, ppl=6.23, wps=458799, ups=1.05, wpb=434905, bsz=16602.9, num_updates=15600, lr=0.00050637, gnorm=0.273, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=15471 +epoch 010: 423 / 1689 loss=4.25, nll_loss=2.64, ppl=6.23, wps=458799, ups=1.05, wpb=434905, bsz=16602.9, num_updates=15600, lr=0.00050637, gnorm=0.273, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=15471 +epoch 010: 423 / 1689 loss=4.25, nll_loss=2.64, ppl=6.23, wps=458799, ups=1.05, wpb=434905, bsz=16602.9, num_updates=15600, lr=0.00050637, gnorm=0.273, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=15471 +epoch 010: 423 / 1689 loss=4.25, nll_loss=2.64, ppl=6.23, wps=458799, ups=1.05, wpb=434905, bsz=16602.9, num_updates=15600, lr=0.00050637, gnorm=0.273, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=15471 +epoch 010: 523 / 1689 loss=4.252, nll_loss=2.642, ppl=6.24, wps=457786, ups=1.06, wpb=432070, bsz=16322.5, num_updates=15700, lr=0.000504754, gnorm=0.262, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=15565 +epoch 010: 523 / 1689 loss=4.252, nll_loss=2.642, ppl=6.24, wps=457786, ups=1.06, wpb=432070, bsz=16322.5, num_updates=15700, lr=0.000504754, gnorm=0.262, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=15565 +epoch 010: 523 / 1689 loss=4.252, nll_loss=2.642, ppl=6.24, wps=457786, ups=1.06, wpb=432070, bsz=16322.5, num_updates=15700, lr=0.000504754, gnorm=0.262, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=15565 +epoch 010: 523 / 1689 loss=4.252, nll_loss=2.642, ppl=6.24, wps=457786, ups=1.06, wpb=432070, bsz=16322.5, num_updates=15700, lr=0.000504754, gnorm=0.262, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=15565 +epoch 010: 523 / 1689 loss=4.252, nll_loss=2.642, ppl=6.24, wps=457786, ups=1.06, wpb=432070, bsz=16322.5, num_updates=15700, lr=0.000504754, gnorm=0.262, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=15565 +epoch 010: 523 / 1689 loss=4.252, nll_loss=2.642, ppl=6.24, wps=457786, ups=1.06, wpb=432070, bsz=16322.5, num_updates=15700, lr=0.000504754, gnorm=0.262, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=15565 +epoch 010: 523 / 1689 loss=4.252, nll_loss=2.642, ppl=6.24, wps=457786, ups=1.06, wpb=432070, bsz=16322.5, num_updates=15700, lr=0.000504754, gnorm=0.262, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=15565 +epoch 010: 523 / 1689 loss=4.252, nll_loss=2.642, ppl=6.24, wps=457786, ups=1.06, wpb=432070, bsz=16322.5, num_updates=15700, lr=0.000504754, gnorm=0.262, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=15565 +epoch 010: 523 / 1689 loss=4.252, nll_loss=2.642, ppl=6.24, wps=457786, ups=1.06, wpb=432070, bsz=16322.5, num_updates=15700, lr=0.000504754, gnorm=0.262, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=15565 +epoch 010: 523 / 1689 loss=4.252, nll_loss=2.642, ppl=6.24, wps=457786, ups=1.06, wpb=432070, bsz=16322.5, num_updates=15700, lr=0.000504754, gnorm=0.262, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=15565 +epoch 010: 623 / 1689 loss=4.258, nll_loss=2.649, ppl=6.27, wps=458998, ups=1.06, wpb=432533, bsz=16819.3, num_updates=15800, lr=0.000503155, gnorm=0.283, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=15659 +epoch 010: 623 / 1689 loss=4.258, nll_loss=2.649, ppl=6.27, wps=458998, ups=1.06, wpb=432533, bsz=16819.3, num_updates=15800, lr=0.000503155, gnorm=0.283, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=15659 +epoch 010: 623 / 1689 loss=4.258, nll_loss=2.649, ppl=6.27, wps=458998, ups=1.06, wpb=432533, bsz=16819.3, num_updates=15800, lr=0.000503155, gnorm=0.283, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=15659 +epoch 010: 623 / 1689 loss=4.258, nll_loss=2.649, ppl=6.27, wps=458998, ups=1.06, wpb=432533, bsz=16819.3, num_updates=15800, lr=0.000503155, gnorm=0.283, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=15659 +epoch 010: 623 / 1689 loss=4.258, nll_loss=2.649, ppl=6.27, wps=458998, ups=1.06, wpb=432533, bsz=16819.3, num_updates=15800, lr=0.000503155, gnorm=0.283, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=15659 +epoch 010: 623 / 1689 loss=4.258, nll_loss=2.649, ppl=6.27, wps=458998, ups=1.06, wpb=432533, bsz=16819.3, num_updates=15800, lr=0.000503155, gnorm=0.283, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=15659 +epoch 010: 623 / 1689 loss=4.258, nll_loss=2.649, ppl=6.27, wps=458998, ups=1.06, wpb=432533, bsz=16819.3, num_updates=15800, lr=0.000503155, gnorm=0.283, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=15659 +epoch 010: 623 / 1689 loss=4.258, nll_loss=2.649, ppl=6.27, wps=458998, ups=1.06, wpb=432533, bsz=16819.3, num_updates=15800, lr=0.000503155, gnorm=0.283, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=15659 +epoch 010: 623 / 1689 loss=4.258, nll_loss=2.649, ppl=6.27, wps=458998, ups=1.06, wpb=432533, bsz=16819.3, num_updates=15800, lr=0.000503155, gnorm=0.283, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=15659 +epoch 010: 623 / 1689 loss=4.258, nll_loss=2.649, ppl=6.27, wps=458998, ups=1.06, wpb=432533, bsz=16819.3, num_updates=15800, lr=0.000503155, gnorm=0.283, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=15659 +epoch 010: 723 / 1689 loss=4.252, nll_loss=2.642, ppl=6.24, wps=455312, ups=1.05, wpb=433189, bsz=16460.6, num_updates=15900, lr=0.00050157, gnorm=0.262, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=15754 +epoch 010: 723 / 1689 loss=4.252, nll_loss=2.642, ppl=6.24, wps=455312, ups=1.05, wpb=433189, bsz=16460.6, num_updates=15900, lr=0.00050157, gnorm=0.262, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=15754 +epoch 010: 723 / 1689 loss=4.252, nll_loss=2.642, ppl=6.24, wps=455312, ups=1.05, wpb=433189, bsz=16460.6, num_updates=15900, lr=0.00050157, gnorm=0.262, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=15754 +epoch 010: 723 / 1689 loss=4.252, nll_loss=2.642, ppl=6.24, wps=455312, ups=1.05, wpb=433189, bsz=16460.6, num_updates=15900, lr=0.00050157, gnorm=0.262, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=15754 +epoch 010: 723 / 1689 loss=4.252, nll_loss=2.642, ppl=6.24, wps=455312, ups=1.05, wpb=433189, bsz=16460.6, num_updates=15900, lr=0.00050157, gnorm=0.262, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=15754 +epoch 010: 723 / 1689 loss=4.252, nll_loss=2.642, ppl=6.24, wps=455312, ups=1.05, wpb=433189, bsz=16460.6, num_updates=15900, lr=0.00050157, gnorm=0.262, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=15754 +epoch 010: 723 / 1689 loss=4.252, nll_loss=2.642, ppl=6.24, wps=455312, ups=1.05, wpb=433189, bsz=16460.6, num_updates=15900, lr=0.00050157, gnorm=0.262, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=15754 +epoch 010: 723 / 1689 loss=4.252, nll_loss=2.642, ppl=6.24, wps=455312, ups=1.05, wpb=433189, bsz=16460.6, num_updates=15900, lr=0.00050157, gnorm=0.262, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=15754 +epoch 010: 723 / 1689 loss=4.252, nll_loss=2.642, ppl=6.24, wps=455312, ups=1.05, wpb=433189, bsz=16460.6, num_updates=15900, lr=0.00050157, gnorm=0.262, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=15754 +epoch 010: 723 / 1689 loss=4.252, nll_loss=2.642, ppl=6.24, wps=455312, ups=1.05, wpb=433189, bsz=16460.6, num_updates=15900, lr=0.00050157, gnorm=0.262, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=15754 +epoch 010: 823 / 1689 loss=4.253, nll_loss=2.643, ppl=6.25, wps=458213, ups=1.05, wpb=435391, bsz=16382.6, num_updates=16000, lr=0.0005, gnorm=0.255, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=15849 +epoch 010: 823 / 1689 loss=4.253, nll_loss=2.643, ppl=6.25, wps=458213, ups=1.05, wpb=435391, bsz=16382.6, num_updates=16000, lr=0.0005, gnorm=0.255, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=15849 +epoch 010: 823 / 1689 loss=4.253, nll_loss=2.643, ppl=6.25, wps=458213, ups=1.05, wpb=435391, bsz=16382.6, num_updates=16000, lr=0.0005, gnorm=0.255, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=15849 +epoch 010: 823 / 1689 loss=4.253, nll_loss=2.643, ppl=6.25, wps=458213, ups=1.05, wpb=435391, bsz=16382.6, num_updates=16000, lr=0.0005, gnorm=0.255, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=15849 +epoch 010: 823 / 1689 loss=4.253, nll_loss=2.643, ppl=6.25, wps=458213, ups=1.05, wpb=435391, bsz=16382.6, num_updates=16000, lr=0.0005, gnorm=0.255, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=15849 +epoch 010: 823 / 1689 loss=4.253, nll_loss=2.643, ppl=6.25, wps=458213, ups=1.05, wpb=435391, bsz=16382.6, num_updates=16000, lr=0.0005, gnorm=0.255, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=15849 +epoch 010: 823 / 1689 loss=4.253, nll_loss=2.643, ppl=6.25, wps=458213, ups=1.05, wpb=435391, bsz=16382.6, num_updates=16000, lr=0.0005, gnorm=0.255, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=15849 +epoch 010: 823 / 1689 loss=4.253, nll_loss=2.643, ppl=6.25, wps=458213, ups=1.05, wpb=435391, bsz=16382.6, num_updates=16000, lr=0.0005, gnorm=0.255, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=15849 +epoch 010: 823 / 1689 loss=4.253, nll_loss=2.643, ppl=6.25, wps=458213, ups=1.05, wpb=435391, bsz=16382.6, num_updates=16000, lr=0.0005, gnorm=0.255, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=15849 +epoch 010: 823 / 1689 loss=4.253, nll_loss=2.643, ppl=6.25, wps=458213, ups=1.05, wpb=435391, bsz=16382.6, num_updates=16000, lr=0.0005, gnorm=0.255, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=15849 +begin validation on "valid" subset +epoch 010 | valid on 'valid' subset | loss 4.317 | nll_loss 2.681 | ppl 6.41 | wps 0 | wpb 42662 | bsz 2032 | num_updates 16000 | best_loss 4.306 +epoch 010 | valid on 'valid' subset | loss 4.317 | nll_loss 2.681 | ppl 6.41 | wps 0 | wpb 42662 | bsz 2032 | num_updates 16000 | best_loss 4.306 +epoch 010 | valid on 'valid' subset | loss 4.317 | nll_loss 2.681 | ppl 6.41 | wps 0 | wpb 42662 | bsz 2032 | num_updates 16000 | best_loss 4.306 +epoch 010 | valid on 'valid' subset | loss 4.317 | nll_loss 2.681 | ppl 6.41 | wps 0 | wpb 42662 | bsz 2032 | num_updates 16000 | best_loss 4.306 +epoch 010 | valid on 'valid' subset | loss 4.317 | nll_loss 2.681 | ppl 6.41 | wps 0 | wpb 42662 | bsz 2032 | num_updates 16000 | best_loss 4.306 +epoch 010 | valid on 'valid' subset | loss 4.317 | nll_loss 2.681 | ppl 6.41 | wps 0 | wpb 42662 | bsz 2032 | num_updates 16000 | best_loss 4.306 +epoch 010 | valid on 'valid' subset | loss 4.317 | nll_loss 2.681 | ppl 6.41 | wps 0 | wpb 42662 | bsz 2032 | num_updates 16000 | best_loss 4.306 +epoch 010 | valid on 'valid' subset | loss 4.317 | nll_loss 2.681 | ppl 6.41 | wps 0 | wpb 42662 | bsz 2032 | num_updates 16000 | best_loss 4.306 +epoch 010 | valid on 'valid' subset | loss 4.317 | nll_loss 2.681 | ppl 6.41 | wps 0 | wpb 42662 | bsz 2032 | num_updates 16000 | best_loss 4.306 +epoch 010 | valid on 'valid' subset | loss 4.317 | nll_loss 2.681 | ppl 6.41 | wps 0 | wpb 42662 | bsz 2032 | num_updates 16000 | best_loss 4.306 +epoch 010: 924 / 1689 loss=4.263, nll_loss=2.654, ppl=6.3, wps=406031, ups=0.93, wpb=435595, bsz=16738.8, num_updates=16100, lr=0.000498445, gnorm=0.255, clip=0, loss_scale=2, train_wall=94, gb_free=19.5, wall=15957 +epoch 010: 924 / 1689 loss=4.263, nll_loss=2.654, ppl=6.3, wps=406031, ups=0.93, wpb=435595, bsz=16738.8, num_updates=16100, lr=0.000498445, gnorm=0.255, clip=0, loss_scale=2, train_wall=94, gb_free=19.5, wall=15957 +epoch 010: 924 / 1689 loss=4.263, nll_loss=2.654, ppl=6.3, wps=406031, ups=0.93, wpb=435595, bsz=16738.8, num_updates=16100, lr=0.000498445, gnorm=0.255, clip=0, loss_scale=2, train_wall=94, gb_free=19.5, wall=15957 +epoch 010: 924 / 1689 loss=4.263, nll_loss=2.654, ppl=6.3, wps=406031, ups=0.93, wpb=435595, bsz=16738.8, num_updates=16100, lr=0.000498445, gnorm=0.255, clip=0, loss_scale=2, train_wall=94, gb_free=19.5, wall=15957 +epoch 010: 924 / 1689 loss=4.263, nll_loss=2.654, ppl=6.3, wps=406031, ups=0.93, wpb=435595, bsz=16738.8, num_updates=16100, lr=0.000498445, gnorm=0.255, clip=0, loss_scale=2, train_wall=94, gb_free=19.5, wall=15957 +epoch 010: 924 / 1689 loss=4.263, nll_loss=2.654, ppl=6.3, wps=406031, ups=0.93, wpb=435595, bsz=16738.8, num_updates=16100, lr=0.000498445, gnorm=0.255, clip=0, loss_scale=2, train_wall=94, gb_free=19.5, wall=15957 +epoch 010: 924 / 1689 loss=4.263, nll_loss=2.654, ppl=6.3, wps=406031, ups=0.93, wpb=435595, bsz=16738.8, num_updates=16100, lr=0.000498445, gnorm=0.255, clip=0, loss_scale=2, train_wall=94, gb_free=19.5, wall=15957 +epoch 010: 924 / 1689 loss=4.263, nll_loss=2.654, ppl=6.3, wps=406031, ups=0.93, wpb=435595, bsz=16738.8, num_updates=16100, lr=0.000498445, gnorm=0.255, clip=0, loss_scale=2, train_wall=94, gb_free=19.5, wall=15957 +epoch 010: 924 / 1689 loss=4.263, nll_loss=2.654, ppl=6.3, wps=406031, ups=0.93, wpb=435595, bsz=16738.8, num_updates=16100, lr=0.000498445, gnorm=0.255, clip=0, loss_scale=2, train_wall=94, gb_free=19.5, wall=15957 +epoch 010: 924 / 1689 loss=4.263, nll_loss=2.654, ppl=6.3, wps=406031, ups=0.93, wpb=435595, bsz=16738.8, num_updates=16100, lr=0.000498445, gnorm=0.255, clip=0, loss_scale=2, train_wall=94, gb_free=19.5, wall=15957 +epoch 010: 1024 / 1689 loss=4.254, nll_loss=2.644, ppl=6.25, wps=463025, ups=1.07, wpb=433782, bsz=16639.6, num_updates=16200, lr=0.000496904, gnorm=0.265, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=16050 +epoch 010: 1024 / 1689 loss=4.254, nll_loss=2.644, ppl=6.25, wps=463025, ups=1.07, wpb=433782, bsz=16639.6, num_updates=16200, lr=0.000496904, gnorm=0.265, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=16050 +epoch 010: 1024 / 1689 loss=4.254, nll_loss=2.644, ppl=6.25, wps=463025, ups=1.07, wpb=433782, bsz=16639.6, num_updates=16200, lr=0.000496904, gnorm=0.265, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=16050 +epoch 010: 1024 / 1689 loss=4.254, nll_loss=2.644, ppl=6.25, wps=463025, ups=1.07, wpb=433782, bsz=16639.6, num_updates=16200, lr=0.000496904, gnorm=0.265, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=16050 +epoch 010: 1024 / 1689 loss=4.254, nll_loss=2.644, ppl=6.25, wps=463025, ups=1.07, wpb=433782, bsz=16639.6, num_updates=16200, lr=0.000496904, gnorm=0.265, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=16050 +epoch 010: 1024 / 1689 loss=4.254, nll_loss=2.644, ppl=6.25, wps=463025, ups=1.07, wpb=433782, bsz=16639.6, num_updates=16200, lr=0.000496904, gnorm=0.265, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=16050 +epoch 010: 1024 / 1689 loss=4.254, nll_loss=2.644, ppl=6.25, wps=463025, ups=1.07, wpb=433782, bsz=16639.6, num_updates=16200, lr=0.000496904, gnorm=0.265, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=16050 +epoch 010: 1024 / 1689 loss=4.254, nll_loss=2.644, ppl=6.25, wps=463025, ups=1.07, wpb=433782, bsz=16639.6, num_updates=16200, lr=0.000496904, gnorm=0.265, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=16050 +epoch 010: 1024 / 1689 loss=4.254, nll_loss=2.644, ppl=6.25, wps=463025, ups=1.07, wpb=433782, bsz=16639.6, num_updates=16200, lr=0.000496904, gnorm=0.265, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=16050 +epoch 010: 1024 / 1689 loss=4.254, nll_loss=2.644, ppl=6.25, wps=463025, ups=1.07, wpb=433782, bsz=16639.6, num_updates=16200, lr=0.000496904, gnorm=0.265, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=16050 +epoch 010: 1124 / 1689 loss=4.252, nll_loss=2.643, ppl=6.24, wps=459662, ups=1.06, wpb=432004, bsz=16481.1, num_updates=16300, lr=0.000495377, gnorm=0.255, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=16144 +epoch 010: 1124 / 1689 loss=4.252, nll_loss=2.643, ppl=6.24, wps=459662, ups=1.06, wpb=432004, bsz=16481.1, num_updates=16300, lr=0.000495377, gnorm=0.255, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=16144 +epoch 010: 1124 / 1689 loss=4.252, nll_loss=2.643, ppl=6.24, wps=459662, ups=1.06, wpb=432004, bsz=16481.1, num_updates=16300, lr=0.000495377, gnorm=0.255, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=16144 +epoch 010: 1124 / 1689 loss=4.252, nll_loss=2.643, ppl=6.24, wps=459662, ups=1.06, wpb=432004, bsz=16481.1, num_updates=16300, lr=0.000495377, gnorm=0.255, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=16144 +epoch 010: 1124 / 1689 loss=4.252, nll_loss=2.643, ppl=6.24, wps=459662, ups=1.06, wpb=432004, bsz=16481.1, num_updates=16300, lr=0.000495377, gnorm=0.255, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=16144 +epoch 010: 1124 / 1689 loss=4.252, nll_loss=2.643, ppl=6.24, wps=459662, ups=1.06, wpb=432004, bsz=16481.1, num_updates=16300, lr=0.000495377, gnorm=0.255, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=16144 +epoch 010: 1124 / 1689 loss=4.252, nll_loss=2.643, ppl=6.24, wps=459662, ups=1.06, wpb=432004, bsz=16481.1, num_updates=16300, lr=0.000495377, gnorm=0.255, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=16144 +epoch 010: 1124 / 1689 loss=4.252, nll_loss=2.643, ppl=6.24, wps=459662, ups=1.06, wpb=432004, bsz=16481.1, num_updates=16300, lr=0.000495377, gnorm=0.255, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=16144 +epoch 010: 1124 / 1689 loss=4.252, nll_loss=2.643, ppl=6.24, wps=459662, ups=1.06, wpb=432004, bsz=16481.1, num_updates=16300, lr=0.000495377, gnorm=0.255, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=16144 +epoch 010: 1124 / 1689 loss=4.252, nll_loss=2.643, ppl=6.24, wps=459662, ups=1.06, wpb=432004, bsz=16481.1, num_updates=16300, lr=0.000495377, gnorm=0.255, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=16144 +epoch 010: 1224 / 1689 loss=4.262, nll_loss=2.653, ppl=6.29, wps=465776, ups=1.07, wpb=433409, bsz=16475.8, num_updates=16400, lr=0.000493865, gnorm=0.26, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=16237 +epoch 010: 1224 / 1689 loss=4.262, nll_loss=2.653, ppl=6.29, wps=465776, ups=1.07, wpb=433409, bsz=16475.8, num_updates=16400, lr=0.000493865, gnorm=0.26, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=16237 +epoch 010: 1224 / 1689 loss=4.262, nll_loss=2.653, ppl=6.29, wps=465776, ups=1.07, wpb=433409, bsz=16475.8, num_updates=16400, lr=0.000493865, gnorm=0.26, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=16237 +epoch 010: 1224 / 1689 loss=4.262, nll_loss=2.653, ppl=6.29, wps=465776, ups=1.07, wpb=433409, bsz=16475.8, num_updates=16400, lr=0.000493865, gnorm=0.26, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=16237 +epoch 010: 1224 / 1689 loss=4.262, nll_loss=2.653, ppl=6.29, wps=465776, ups=1.07, wpb=433409, bsz=16475.8, num_updates=16400, lr=0.000493865, gnorm=0.26, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=16237 +epoch 010: 1224 / 1689 loss=4.262, nll_loss=2.653, ppl=6.29, wps=465776, ups=1.07, wpb=433409, bsz=16475.8, num_updates=16400, lr=0.000493865, gnorm=0.26, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=16237 +epoch 010: 1224 / 1689 loss=4.262, nll_loss=2.653, ppl=6.29, wps=465776, ups=1.07, wpb=433409, bsz=16475.8, num_updates=16400, lr=0.000493865, gnorm=0.26, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=16237 +epoch 010: 1224 / 1689 loss=4.262, nll_loss=2.653, ppl=6.29, wps=465776, ups=1.07, wpb=433409, bsz=16475.8, num_updates=16400, lr=0.000493865, gnorm=0.26, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=16237 +epoch 010: 1224 / 1689 loss=4.262, nll_loss=2.653, ppl=6.29, wps=465776, ups=1.07, wpb=433409, bsz=16475.8, num_updates=16400, lr=0.000493865, gnorm=0.26, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=16237 +epoch 010: 1224 / 1689 loss=4.262, nll_loss=2.653, ppl=6.29, wps=465776, ups=1.07, wpb=433409, bsz=16475.8, num_updates=16400, lr=0.000493865, gnorm=0.26, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=16237 +epoch 010: 1324 / 1689 loss=4.249, nll_loss=2.639, ppl=6.23, wps=459748, ups=1.06, wpb=433318, bsz=16658.9, num_updates=16500, lr=0.000492366, gnorm=0.274, clip=0, loss_scale=2, train_wall=93, gb_free=19.7, wall=16332 +epoch 010: 1324 / 1689 loss=4.249, nll_loss=2.639, ppl=6.23, wps=459748, ups=1.06, wpb=433318, bsz=16658.9, num_updates=16500, lr=0.000492366, gnorm=0.274, clip=0, loss_scale=2, train_wall=93, gb_free=19.7, wall=16332 +epoch 010: 1324 / 1689 loss=4.249, nll_loss=2.639, ppl=6.23, wps=459748, ups=1.06, wpb=433318, bsz=16658.9, num_updates=16500, lr=0.000492366, gnorm=0.274, clip=0, loss_scale=2, train_wall=93, gb_free=19.7, wall=16332 +epoch 010: 1324 / 1689 loss=4.249, nll_loss=2.639, ppl=6.23, wps=459748, ups=1.06, wpb=433318, bsz=16658.9, num_updates=16500, lr=0.000492366, gnorm=0.274, clip=0, loss_scale=2, train_wall=93, gb_free=19.7, wall=16332 +epoch 010: 1324 / 1689 loss=4.249, nll_loss=2.639, ppl=6.23, wps=459748, ups=1.06, wpb=433318, bsz=16658.9, num_updates=16500, lr=0.000492366, gnorm=0.274, clip=0, loss_scale=2, train_wall=93, gb_free=19.7, wall=16332 +epoch 010: 1324 / 1689 loss=4.249, nll_loss=2.639, ppl=6.23, wps=459748, ups=1.06, wpb=433318, bsz=16658.9, num_updates=16500, lr=0.000492366, gnorm=0.274, clip=0, loss_scale=2, train_wall=93, gb_free=19.7, wall=16332 +epoch 010: 1324 / 1689 loss=4.249, nll_loss=2.639, ppl=6.23, wps=459748, ups=1.06, wpb=433318, bsz=16658.9, num_updates=16500, lr=0.000492366, gnorm=0.274, clip=0, loss_scale=2, train_wall=93, gb_free=19.7, wall=16332 +epoch 010: 1324 / 1689 loss=4.249, nll_loss=2.639, ppl=6.23, wps=459748, ups=1.06, wpb=433318, bsz=16658.9, num_updates=16500, lr=0.000492366, gnorm=0.274, clip=0, loss_scale=2, train_wall=93, gb_free=19.7, wall=16332 +epoch 010: 1324 / 1689 loss=4.249, nll_loss=2.639, ppl=6.23, wps=459748, ups=1.06, wpb=433318, bsz=16658.9, num_updates=16500, lr=0.000492366, gnorm=0.274, clip=0, loss_scale=2, train_wall=93, gb_free=19.7, wall=16332 +epoch 010: 1324 / 1689 loss=4.249, nll_loss=2.639, ppl=6.23, wps=459748, ups=1.06, wpb=433318, bsz=16658.9, num_updates=16500, lr=0.000492366, gnorm=0.274, clip=0, loss_scale=2, train_wall=93, gb_free=19.7, wall=16332 +epoch 010: 1424 / 1689 loss=4.274, nll_loss=2.668, ppl=6.36, wps=464950, ups=1.07, wpb=434366, bsz=16583, num_updates=16600, lr=0.000490881, gnorm=0.262, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=16425 +epoch 010: 1424 / 1689 loss=4.274, nll_loss=2.668, ppl=6.36, wps=464950, ups=1.07, wpb=434366, bsz=16583, num_updates=16600, lr=0.000490881, gnorm=0.262, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=16425 +epoch 010: 1424 / 1689 loss=4.274, nll_loss=2.668, ppl=6.36, wps=464950, ups=1.07, wpb=434366, bsz=16583, num_updates=16600, lr=0.000490881, gnorm=0.262, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=16425 +epoch 010: 1424 / 1689 loss=4.274, nll_loss=2.668, ppl=6.36, wps=464950, ups=1.07, wpb=434366, bsz=16583, num_updates=16600, lr=0.000490881, gnorm=0.262, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=16425 +epoch 010: 1424 / 1689 loss=4.274, nll_loss=2.668, ppl=6.36, wps=464950, ups=1.07, wpb=434366, bsz=16583, num_updates=16600, lr=0.000490881, gnorm=0.262, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=16425 +epoch 010: 1424 / 1689 loss=4.274, nll_loss=2.668, ppl=6.36, wps=464950, ups=1.07, wpb=434366, bsz=16583, num_updates=16600, lr=0.000490881, gnorm=0.262, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=16425 +epoch 010: 1424 / 1689 loss=4.274, nll_loss=2.668, ppl=6.36, wps=464950, ups=1.07, wpb=434366, bsz=16583, num_updates=16600, lr=0.000490881, gnorm=0.262, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=16425 +epoch 010: 1424 / 1689 loss=4.274, nll_loss=2.668, ppl=6.36, wps=464950, ups=1.07, wpb=434366, bsz=16583, num_updates=16600, lr=0.000490881, gnorm=0.262, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=16425 +epoch 010: 1424 / 1689 loss=4.274, nll_loss=2.668, ppl=6.36, wps=464950, ups=1.07, wpb=434366, bsz=16583, num_updates=16600, lr=0.000490881, gnorm=0.262, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=16425 +epoch 010: 1424 / 1689 loss=4.274, nll_loss=2.668, ppl=6.36, wps=464950, ups=1.07, wpb=434366, bsz=16583, num_updates=16600, lr=0.000490881, gnorm=0.262, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=16425 +epoch 010: 1525 / 1689 loss=4.262, nll_loss=2.654, ppl=6.29, wps=457987, ups=1.06, wpb=432754, bsz=16343, num_updates=16700, lr=0.000489409, gnorm=0.263, clip=0, loss_scale=2, train_wall=93, gb_free=20.4, wall=16520 +epoch 010: 1525 / 1689 loss=4.262, nll_loss=2.654, ppl=6.29, wps=457987, ups=1.06, wpb=432754, bsz=16343, num_updates=16700, lr=0.000489409, gnorm=0.263, clip=0, loss_scale=2, train_wall=93, gb_free=20.4, wall=16520 +epoch 010: 1525 / 1689 loss=4.262, nll_loss=2.654, ppl=6.29, wps=457987, ups=1.06, wpb=432754, bsz=16343, num_updates=16700, lr=0.000489409, gnorm=0.263, clip=0, loss_scale=2, train_wall=93, gb_free=20.4, wall=16520 +epoch 010: 1525 / 1689 loss=4.262, nll_loss=2.654, ppl=6.29, wps=457987, ups=1.06, wpb=432754, bsz=16343, num_updates=16700, lr=0.000489409, gnorm=0.263, clip=0, loss_scale=2, train_wall=93, gb_free=20.4, wall=16520 +epoch 010: 1525 / 1689 loss=4.262, nll_loss=2.654, ppl=6.29, wps=457987, ups=1.06, wpb=432754, bsz=16343, num_updates=16700, lr=0.000489409, gnorm=0.263, clip=0, loss_scale=2, train_wall=93, gb_free=20.4, wall=16520 +epoch 010: 1525 / 1689 loss=4.262, nll_loss=2.654, ppl=6.29, wps=457987, ups=1.06, wpb=432754, bsz=16343, num_updates=16700, lr=0.000489409, gnorm=0.263, clip=0, loss_scale=2, train_wall=93, gb_free=20.4, wall=16520 +epoch 010: 1525 / 1689 loss=4.262, nll_loss=2.654, ppl=6.29, wps=457987, ups=1.06, wpb=432754, bsz=16343, num_updates=16700, lr=0.000489409, gnorm=0.263, clip=0, loss_scale=2, train_wall=93, gb_free=20.4, wall=16520 +epoch 010: 1525 / 1689 loss=4.262, nll_loss=2.654, ppl=6.29, wps=457987, ups=1.06, wpb=432754, bsz=16343, num_updates=16700, lr=0.000489409, gnorm=0.263, clip=0, loss_scale=2, train_wall=93, gb_free=20.4, wall=16520 +epoch 010: 1525 / 1689 loss=4.262, nll_loss=2.654, ppl=6.29, wps=457987, ups=1.06, wpb=432754, bsz=16343, num_updates=16700, lr=0.000489409, gnorm=0.263, clip=0, loss_scale=2, train_wall=93, gb_free=20.4, wall=16520 +epoch 010: 1525 / 1689 loss=4.262, nll_loss=2.654, ppl=6.29, wps=457987, ups=1.06, wpb=432754, bsz=16343, num_updates=16700, lr=0.000489409, gnorm=0.263, clip=0, loss_scale=2, train_wall=93, gb_free=20.4, wall=16520 +epoch 010: 1625 / 1689 loss=4.266, nll_loss=2.659, ppl=6.31, wps=465203, ups=1.07, wpb=434818, bsz=16334, num_updates=16800, lr=0.00048795, gnorm=0.276, clip=0, loss_scale=2, train_wall=92, gb_free=19.7, wall=16613 +epoch 010: 1625 / 1689 loss=4.266, nll_loss=2.659, ppl=6.31, wps=465203, ups=1.07, wpb=434818, bsz=16334, num_updates=16800, lr=0.00048795, gnorm=0.276, clip=0, loss_scale=2, train_wall=92, gb_free=19.7, wall=16613 +epoch 010: 1625 / 1689 loss=4.266, nll_loss=2.659, ppl=6.31, wps=465203, ups=1.07, wpb=434818, bsz=16334, num_updates=16800, lr=0.00048795, gnorm=0.276, clip=0, loss_scale=2, train_wall=92, gb_free=19.7, wall=16613 +epoch 010: 1625 / 1689 loss=4.266, nll_loss=2.659, ppl=6.31, wps=465203, ups=1.07, wpb=434818, bsz=16334, num_updates=16800, lr=0.00048795, gnorm=0.276, clip=0, loss_scale=2, train_wall=92, gb_free=19.7, wall=16613 +epoch 010: 1625 / 1689 loss=4.266, nll_loss=2.659, ppl=6.31, wps=465203, ups=1.07, wpb=434818, bsz=16334, num_updates=16800, lr=0.00048795, gnorm=0.276, clip=0, loss_scale=2, train_wall=92, gb_free=19.7, wall=16613 +epoch 010: 1625 / 1689 loss=4.266, nll_loss=2.659, ppl=6.31, wps=465203, ups=1.07, wpb=434818, bsz=16334, num_updates=16800, lr=0.00048795, gnorm=0.276, clip=0, loss_scale=2, train_wall=92, gb_free=19.7, wall=16613 +epoch 010: 1625 / 1689 loss=4.266, nll_loss=2.659, ppl=6.31, wps=465203, ups=1.07, wpb=434818, bsz=16334, num_updates=16800, lr=0.00048795, gnorm=0.276, clip=0, loss_scale=2, train_wall=92, gb_free=19.7, wall=16613 +epoch 010: 1625 / 1689 loss=4.266, nll_loss=2.659, ppl=6.31, wps=465203, ups=1.07, wpb=434818, bsz=16334, num_updates=16800, lr=0.00048795, gnorm=0.276, clip=0, loss_scale=2, train_wall=92, gb_free=19.7, wall=16613 +epoch 010: 1625 / 1689 loss=4.266, nll_loss=2.659, ppl=6.31, wps=465203, ups=1.07, wpb=434818, bsz=16334, num_updates=16800, lr=0.00048795, gnorm=0.276, clip=0, loss_scale=2, train_wall=92, gb_free=19.7, wall=16613 +epoch 010: 1625 / 1689 loss=4.266, nll_loss=2.659, ppl=6.31, wps=465203, ups=1.07, wpb=434818, bsz=16334, num_updates=16800, lr=0.00048795, gnorm=0.276, clip=0, loss_scale=2, train_wall=92, gb_free=19.7, wall=16613 +end of epoch 10 (average epoch stats below) +epoch 010 | loss 4.256 | nll_loss 2.647 | ppl 6.26 | wps 456776 | ups 1.05 | wpb 433550 | bsz 16505.1 | num_updates 16864 | lr 0.000487023 | gnorm 0.266 | clip 0 | loss_scale 2 | train_wall 1562 | gb_free 21.2 | wall 16672 +epoch 010 | loss 4.256 | nll_loss 2.647 | ppl 6.26 | wps 456776 | ups 1.05 | wpb 433550 | bsz 16505.1 | num_updates 16864 | lr 0.000487023 | gnorm 0.266 | clip 0 | loss_scale 2 | train_wall 1562 | gb_free 21.2 | wall 16672 +epoch 010 | loss 4.256 | nll_loss 2.647 | ppl 6.26 | wps 456776 | ups 1.05 | wpb 433550 | bsz 16505.1 | num_updates 16864 | lr 0.000487023 | gnorm 0.266 | clip 0 | loss_scale 2 | train_wall 1562 | gb_free 21.2 | wall 16672 +epoch 010 | loss 4.256 | nll_loss 2.647 | ppl 6.26 | wps 456776 | ups 1.05 | wpb 433550 | bsz 16505.1 | num_updates 16864 | lr 0.000487023 | gnorm 0.266 | clip 0 | loss_scale 2 | train_wall 1562 | gb_free 21.2 | wall 16672 +epoch 010 | loss 4.256 | nll_loss 2.647 | ppl 6.26 | wps 456776 | ups 1.05 | wpb 433550 | bsz 16505.1 | num_updates 16864 | lr 0.000487023 | gnorm 0.266 | clip 0 | loss_scale 2 | train_wall 1562 | gb_free 21.2 | wall 16672 +epoch 010 | loss 4.256 | nll_loss 2.647 | ppl 6.26 | wps 456776 | ups 1.05 | wpb 433550 | bsz 16505.1 | num_updates 16864 | lr 0.000487023 | gnorm 0.266 | clip 0 | loss_scale 2 | train_wall 1562 | gb_free 21.2 | wall 16672 +epoch 010 | loss 4.256 | nll_loss 2.647 | ppl 6.26 | wps 456776 | ups 1.05 | wpb 433550 | bsz 16505.1 | num_updates 16864 | lr 0.000487023 | gnorm 0.266 | clip 0 | loss_scale 2 | train_wall 1562 | gb_free 21.2 | wall 16672 +epoch 010 | loss 4.256 | nll_loss 2.647 | ppl 6.26 | wps 456776 | ups 1.05 | wpb 433550 | bsz 16505.1 | num_updates 16864 | lr 0.000487023 | gnorm 0.266 | clip 0 | loss_scale 2 | train_wall 1562 | gb_free 21.2 | wall 16672 +epoch 010 | loss 4.256 | nll_loss 2.647 | ppl 6.26 | wps 456776 | ups 1.05 | wpb 433550 | bsz 16505.1 | num_updates 16864 | lr 0.000487023 | gnorm 0.266 | clip 0 | loss_scale 2 | train_wall 1562 | gb_free 21.2 | wall 16672 +epoch 010 | loss 4.256 | nll_loss 2.647 | ppl 6.26 | wps 456776 | ups 1.05 | wpb 433550 | bsz 16505.1 | num_updates 16864 | lr 0.000487023 | gnorm 0.266 | clip 0 | loss_scale 2 | train_wall 1562 | gb_free 21.2 | wall 16672 +Start iterating over samples +epoch 011: 36 / 1689 loss=4.25, nll_loss=2.64, ppl=6.23, wps=459329, ups=1.06, wpb=432584, bsz=15999.6, num_updates=16900, lr=0.000486504, gnorm=0.258, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=16707 +epoch 011: 36 / 1689 loss=4.25, nll_loss=2.64, ppl=6.23, wps=459329, ups=1.06, wpb=432584, bsz=15999.6, num_updates=16900, lr=0.000486504, gnorm=0.258, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=16707 +epoch 011: 36 / 1689 loss=4.25, nll_loss=2.64, ppl=6.23, wps=459329, ups=1.06, wpb=432584, bsz=15999.6, num_updates=16900, lr=0.000486504, gnorm=0.258, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=16707 +epoch 011: 36 / 1689 loss=4.25, nll_loss=2.64, ppl=6.23, wps=459329, ups=1.06, wpb=432584, bsz=15999.6, num_updates=16900, lr=0.000486504, gnorm=0.258, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=16707 +epoch 011: 36 / 1689 loss=4.25, nll_loss=2.64, ppl=6.23, wps=459329, ups=1.06, wpb=432584, bsz=15999.6, num_updates=16900, lr=0.000486504, gnorm=0.258, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=16707 +epoch 011: 36 / 1689 loss=4.25, nll_loss=2.64, ppl=6.23, wps=459329, ups=1.06, wpb=432584, bsz=15999.6, num_updates=16900, lr=0.000486504, gnorm=0.258, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=16707 +epoch 011: 36 / 1689 loss=4.25, nll_loss=2.64, ppl=6.23, wps=459329, ups=1.06, wpb=432584, bsz=15999.6, num_updates=16900, lr=0.000486504, gnorm=0.258, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=16707 +epoch 011: 36 / 1689 loss=4.25, nll_loss=2.64, ppl=6.23, wps=459329, ups=1.06, wpb=432584, bsz=15999.6, num_updates=16900, lr=0.000486504, gnorm=0.258, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=16707 +epoch 011: 36 / 1689 loss=4.25, nll_loss=2.64, ppl=6.23, wps=459329, ups=1.06, wpb=432584, bsz=15999.6, num_updates=16900, lr=0.000486504, gnorm=0.258, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=16707 +epoch 011: 36 / 1689 loss=4.25, nll_loss=2.64, ppl=6.23, wps=459329, ups=1.06, wpb=432584, bsz=15999.6, num_updates=16900, lr=0.000486504, gnorm=0.258, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=16707 +epoch 011: 36 / 1689 loss=4.25, nll_loss=2.64, ppl=6.23, wps=459329, ups=1.06, wpb=432584, bsz=15999.6, num_updates=16900, lr=0.000486504, gnorm=0.258, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=16707 +epoch 011: 136 / 1689 loss=4.217, nll_loss=2.602, ppl=6.07, wps=463830, ups=1.07, wpb=433997, bsz=16731.8, num_updates=17000, lr=0.000485071, gnorm=0.261, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=16801 +epoch 011: 136 / 1689 loss=4.217, nll_loss=2.602, ppl=6.07, wps=463830, ups=1.07, wpb=433997, bsz=16731.8, num_updates=17000, lr=0.000485071, gnorm=0.261, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=16801 +epoch 011: 136 / 1689 loss=4.217, nll_loss=2.602, ppl=6.07, wps=463830, ups=1.07, wpb=433997, bsz=16731.8, num_updates=17000, lr=0.000485071, gnorm=0.261, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=16801 +epoch 011: 136 / 1689 loss=4.217, nll_loss=2.602, ppl=6.07, wps=463830, ups=1.07, wpb=433997, bsz=16731.8, num_updates=17000, lr=0.000485071, gnorm=0.261, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=16801 +epoch 011: 136 / 1689 loss=4.217, nll_loss=2.602, ppl=6.07, wps=463830, ups=1.07, wpb=433997, bsz=16731.8, num_updates=17000, lr=0.000485071, gnorm=0.261, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=16801 +epoch 011: 136 / 1689 loss=4.217, nll_loss=2.602, ppl=6.07, wps=463830, ups=1.07, wpb=433997, bsz=16731.8, num_updates=17000, lr=0.000485071, gnorm=0.261, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=16801 +epoch 011: 136 / 1689 loss=4.217, nll_loss=2.602, ppl=6.07, wps=463830, ups=1.07, wpb=433997, bsz=16731.8, num_updates=17000, lr=0.000485071, gnorm=0.261, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=16801 +epoch 011: 136 / 1689 loss=4.217, nll_loss=2.602, ppl=6.07, wps=463830, ups=1.07, wpb=433997, bsz=16731.8, num_updates=17000, lr=0.000485071, gnorm=0.261, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=16801 +epoch 011: 136 / 1689 loss=4.217, nll_loss=2.602, ppl=6.07, wps=463830, ups=1.07, wpb=433997, bsz=16731.8, num_updates=17000, lr=0.000485071, gnorm=0.261, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=16801 +epoch 011: 136 / 1689 loss=4.217, nll_loss=2.602, ppl=6.07, wps=463830, ups=1.07, wpb=433997, bsz=16731.8, num_updates=17000, lr=0.000485071, gnorm=0.261, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=16801 +epoch 011: 136 / 1689 loss=4.217, nll_loss=2.602, ppl=6.07, wps=463830, ups=1.07, wpb=433997, bsz=16731.8, num_updates=17000, lr=0.000485071, gnorm=0.261, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=16801 +begin validation on "valid" subset +epoch 011 | valid on 'valid' subset | loss 4.299 | nll_loss 2.668 | ppl 6.35 | wps 0 | wpb 42662 | bsz 2032 | num_updates 17000 | best_loss 4.299 +epoch 011 | valid on 'valid' subset | loss 4.299 | nll_loss 2.668 | ppl 6.35 | wps 0 | wpb 42662 | bsz 2032 | num_updates 17000 | best_loss 4.299 +epoch 011 | valid on 'valid' subset | loss 4.299 | nll_loss 2.668 | ppl 6.35 | wps 0 | wpb 42662 | bsz 2032 | num_updates 17000 | best_loss 4.299 +epoch 011 | valid on 'valid' subset | loss 4.299 | nll_loss 2.668 | ppl 6.35 | wps 0 | wpb 42662 | bsz 2032 | num_updates 17000 | best_loss 4.299 +epoch 011 | valid on 'valid' subset | loss 4.299 | nll_loss 2.668 | ppl 6.35 | wps 0 | wpb 42662 | bsz 2032 | num_updates 17000 | best_loss 4.299 +epoch 011 | valid on 'valid' subset | loss 4.299 | nll_loss 2.668 | ppl 6.35 | wps 0 | wpb 42662 | bsz 2032 | num_updates 17000 | best_loss 4.299 +epoch 011 | valid on 'valid' subset | loss 4.299 | nll_loss 2.668 | ppl 6.35 | wps 0 | wpb 42662 | bsz 2032 | num_updates 17000 | best_loss 4.299 +epoch 011 | valid on 'valid' subset | loss 4.299 | nll_loss 2.668 | ppl 6.35 | wps 0 | wpb 42662 | bsz 2032 | num_updates 17000 | best_loss 4.299 +epoch 011 | valid on 'valid' subset | loss 4.299 | nll_loss 2.668 | ppl 6.35 | wps 0 | wpb 42662 | bsz 2032 | num_updates 17000 | best_loss 4.299 +epoch 011 | valid on 'valid' subset | loss 4.299 | nll_loss 2.668 | ppl 6.35 | wps 0 | wpb 42662 | bsz 2032 | num_updates 17000 | best_loss 4.299 +epoch 011 | valid on 'valid' subset | loss 4.299 | nll_loss 2.668 | ppl 6.35 | wps 0 | wpb 42662 | bsz 2032 | num_updates 17000 | best_loss 4.299 +epoch 011: 236 / 1689 loss=4.222, nll_loss=2.608, ppl=6.1, wps=386924, ups=0.9, wpb=430910, bsz=16515.2, num_updates=17100, lr=0.000483651, gnorm=0.259, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=16912 +epoch 011: 236 / 1689 loss=4.222, nll_loss=2.608, ppl=6.1, wps=386924, ups=0.9, wpb=430910, bsz=16515.2, num_updates=17100, lr=0.000483651, gnorm=0.259, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=16912 +epoch 011: 236 / 1689 loss=4.222, nll_loss=2.608, ppl=6.1, wps=386924, ups=0.9, wpb=430910, bsz=16515.2, num_updates=17100, lr=0.000483651, gnorm=0.259, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=16912 +epoch 011: 236 / 1689 loss=4.222, nll_loss=2.608, ppl=6.1, wps=386924, ups=0.9, wpb=430910, bsz=16515.2, num_updates=17100, lr=0.000483651, gnorm=0.259, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=16912 +epoch 011: 236 / 1689 loss=4.222, nll_loss=2.608, ppl=6.1, wps=386924, ups=0.9, wpb=430910, bsz=16515.2, num_updates=17100, lr=0.000483651, gnorm=0.259, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=16912 +epoch 011: 236 / 1689 loss=4.222, nll_loss=2.608, ppl=6.1, wps=386924, ups=0.9, wpb=430910, bsz=16515.2, num_updates=17100, lr=0.000483651, gnorm=0.259, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=16912 +epoch 011: 236 / 1689 loss=4.222, nll_loss=2.608, ppl=6.1, wps=386924, ups=0.9, wpb=430910, bsz=16515.2, num_updates=17100, lr=0.000483651, gnorm=0.259, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=16912 +epoch 011: 236 / 1689 loss=4.222, nll_loss=2.608, ppl=6.1, wps=386924, ups=0.9, wpb=430910, bsz=16515.2, num_updates=17100, lr=0.000483651, gnorm=0.259, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=16912 +epoch 011: 236 / 1689 loss=4.222, nll_loss=2.608, ppl=6.1, wps=386924, ups=0.9, wpb=430910, bsz=16515.2, num_updates=17100, lr=0.000483651, gnorm=0.259, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=16912 +epoch 011: 236 / 1689 loss=4.222, nll_loss=2.608, ppl=6.1, wps=386924, ups=0.9, wpb=430910, bsz=16515.2, num_updates=17100, lr=0.000483651, gnorm=0.259, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=16912 +epoch 011: 236 / 1689 loss=4.222, nll_loss=2.608, ppl=6.1, wps=386924, ups=0.9, wpb=430910, bsz=16515.2, num_updates=17100, lr=0.000483651, gnorm=0.259, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=16912 +epoch 011: 337 / 1689 loss=4.228, nll_loss=2.615, ppl=6.13, wps=459438, ups=1.06, wpb=433248, bsz=16956.5, num_updates=17200, lr=0.000482243, gnorm=0.245, clip=0, loss_scale=1, train_wall=94, gb_free=16.2, wall=17006 +epoch 011: 337 / 1689 loss=4.228, nll_loss=2.615, ppl=6.13, wps=459438, ups=1.06, wpb=433248, bsz=16956.5, num_updates=17200, lr=0.000482243, gnorm=0.245, clip=0, loss_scale=1, train_wall=94, gb_free=16.2, wall=17006 +epoch 011: 337 / 1689 loss=4.228, nll_loss=2.615, ppl=6.13, wps=459438, ups=1.06, wpb=433248, bsz=16956.5, num_updates=17200, lr=0.000482243, gnorm=0.245, clip=0, loss_scale=1, train_wall=94, gb_free=16.2, wall=17006 +epoch 011: 337 / 1689 loss=4.228, nll_loss=2.615, ppl=6.13, wps=459438, ups=1.06, wpb=433248, bsz=16956.5, num_updates=17200, lr=0.000482243, gnorm=0.245, clip=0, loss_scale=1, train_wall=94, gb_free=16.2, wall=17006 +epoch 011: 337 / 1689 loss=4.228, nll_loss=2.615, ppl=6.13, wps=459438, ups=1.06, wpb=433248, bsz=16956.5, num_updates=17200, lr=0.000482243, gnorm=0.245, clip=0, loss_scale=1, train_wall=94, gb_free=16.2, wall=17006 +epoch 011: 337 / 1689 loss=4.228, nll_loss=2.615, ppl=6.13, wps=459438, ups=1.06, wpb=433248, bsz=16956.5, num_updates=17200, lr=0.000482243, gnorm=0.245, clip=0, loss_scale=1, train_wall=94, gb_free=16.2, wall=17006 +epoch 011: 337 / 1689 loss=4.228, nll_loss=2.615, ppl=6.13, wps=459438, ups=1.06, wpb=433248, bsz=16956.5, num_updates=17200, lr=0.000482243, gnorm=0.245, clip=0, loss_scale=1, train_wall=94, gb_free=16.2, wall=17006 +epoch 011: 337 / 1689 loss=4.228, nll_loss=2.615, ppl=6.13, wps=459438, ups=1.06, wpb=433248, bsz=16956.5, num_updates=17200, lr=0.000482243, gnorm=0.245, clip=0, loss_scale=1, train_wall=94, gb_free=16.2, wall=17006 +epoch 011: 337 / 1689 loss=4.228, nll_loss=2.615, ppl=6.13, wps=459438, ups=1.06, wpb=433248, bsz=16956.5, num_updates=17200, lr=0.000482243, gnorm=0.245, clip=0, loss_scale=1, train_wall=94, gb_free=16.2, wall=17006 +epoch 011: 337 / 1689 loss=4.228, nll_loss=2.615, ppl=6.13, wps=459438, ups=1.06, wpb=433248, bsz=16956.5, num_updates=17200, lr=0.000482243, gnorm=0.245, clip=0, loss_scale=1, train_wall=94, gb_free=16.2, wall=17006 +epoch 011: 337 / 1689 loss=4.228, nll_loss=2.615, ppl=6.13, wps=459438, ups=1.06, wpb=433248, bsz=16956.5, num_updates=17200, lr=0.000482243, gnorm=0.245, clip=0, loss_scale=1, train_wall=94, gb_free=16.2, wall=17006 +epoch 011: 437 / 1689 loss=4.237, nll_loss=2.625, ppl=6.17, wps=463527, ups=1.07, wpb=434739, bsz=16287.1, num_updates=17300, lr=0.000480847, gnorm=0.27, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=17100 +epoch 011: 437 / 1689 loss=4.237, nll_loss=2.625, ppl=6.17, wps=463527, ups=1.07, wpb=434739, bsz=16287.1, num_updates=17300, lr=0.000480847, gnorm=0.27, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=17100 +epoch 011: 437 / 1689 loss=4.237, nll_loss=2.625, ppl=6.17, wps=463527, ups=1.07, wpb=434739, bsz=16287.1, num_updates=17300, lr=0.000480847, gnorm=0.27, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=17100 +epoch 011: 437 / 1689 loss=4.237, nll_loss=2.625, ppl=6.17, wps=463527, ups=1.07, wpb=434739, bsz=16287.1, num_updates=17300, lr=0.000480847, gnorm=0.27, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=17100 +epoch 011: 437 / 1689 loss=4.237, nll_loss=2.625, ppl=6.17, wps=463527, ups=1.07, wpb=434739, bsz=16287.1, num_updates=17300, lr=0.000480847, gnorm=0.27, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=17100 +epoch 011: 437 / 1689 loss=4.237, nll_loss=2.625, ppl=6.17, wps=463527, ups=1.07, wpb=434739, bsz=16287.1, num_updates=17300, lr=0.000480847, gnorm=0.27, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=17100 +epoch 011: 437 / 1689 loss=4.237, nll_loss=2.625, ppl=6.17, wps=463527, ups=1.07, wpb=434739, bsz=16287.1, num_updates=17300, lr=0.000480847, gnorm=0.27, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=17100 +epoch 011: 437 / 1689 loss=4.237, nll_loss=2.625, ppl=6.17, wps=463527, ups=1.07, wpb=434739, bsz=16287.1, num_updates=17300, lr=0.000480847, gnorm=0.27, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=17100 +epoch 011: 437 / 1689 loss=4.237, nll_loss=2.625, ppl=6.17, wps=463527, ups=1.07, wpb=434739, bsz=16287.1, num_updates=17300, lr=0.000480847, gnorm=0.27, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=17100 +epoch 011: 437 / 1689 loss=4.237, nll_loss=2.625, ppl=6.17, wps=463527, ups=1.07, wpb=434739, bsz=16287.1, num_updates=17300, lr=0.000480847, gnorm=0.27, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=17100 +epoch 011: 437 / 1689 loss=4.237, nll_loss=2.625, ppl=6.17, wps=463527, ups=1.07, wpb=434739, bsz=16287.1, num_updates=17300, lr=0.000480847, gnorm=0.27, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=17100 +epoch 011: 537 / 1689 loss=4.239, nll_loss=2.628, ppl=6.18, wps=466181, ups=1.07, wpb=436058, bsz=16601.4, num_updates=17400, lr=0.000479463, gnorm=0.262, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=17194 +epoch 011: 537 / 1689 loss=4.239, nll_loss=2.628, ppl=6.18, wps=466181, ups=1.07, wpb=436058, bsz=16601.4, num_updates=17400, lr=0.000479463, gnorm=0.262, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=17194 +epoch 011: 537 / 1689 loss=4.239, nll_loss=2.628, ppl=6.18, wps=466181, ups=1.07, wpb=436058, bsz=16601.4, num_updates=17400, lr=0.000479463, gnorm=0.262, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=17194 +epoch 011: 537 / 1689 loss=4.239, nll_loss=2.628, ppl=6.18, wps=466181, ups=1.07, wpb=436058, bsz=16601.4, num_updates=17400, lr=0.000479463, gnorm=0.262, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=17194 +epoch 011: 537 / 1689 loss=4.239, nll_loss=2.628, ppl=6.18, wps=466181, ups=1.07, wpb=436058, bsz=16601.4, num_updates=17400, lr=0.000479463, gnorm=0.262, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=17194 +epoch 011: 537 / 1689 loss=4.239, nll_loss=2.628, ppl=6.18, wps=466181, ups=1.07, wpb=436058, bsz=16601.4, num_updates=17400, lr=0.000479463, gnorm=0.262, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=17194 +epoch 011: 537 / 1689 loss=4.239, nll_loss=2.628, ppl=6.18, wps=466181, ups=1.07, wpb=436058, bsz=16601.4, num_updates=17400, lr=0.000479463, gnorm=0.262, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=17194 +epoch 011: 537 / 1689 loss=4.239, nll_loss=2.628, ppl=6.18, wps=466181, ups=1.07, wpb=436058, bsz=16601.4, num_updates=17400, lr=0.000479463, gnorm=0.262, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=17194 +epoch 011: 537 / 1689 loss=4.239, nll_loss=2.628, ppl=6.18, wps=466181, ups=1.07, wpb=436058, bsz=16601.4, num_updates=17400, lr=0.000479463, gnorm=0.262, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=17194 +epoch 011: 537 / 1689 loss=4.239, nll_loss=2.628, ppl=6.18, wps=466181, ups=1.07, wpb=436058, bsz=16601.4, num_updates=17400, lr=0.000479463, gnorm=0.262, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=17194 +epoch 011: 537 / 1689 loss=4.239, nll_loss=2.628, ppl=6.18, wps=466181, ups=1.07, wpb=436058, bsz=16601.4, num_updates=17400, lr=0.000479463, gnorm=0.262, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=17194 +epoch 011: 637 / 1689 loss=4.237, nll_loss=2.625, ppl=6.17, wps=463696, ups=1.06, wpb=435653, bsz=16275, num_updates=17500, lr=0.000478091, gnorm=0.251, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=17288 +epoch 011: 637 / 1689 loss=4.237, nll_loss=2.625, ppl=6.17, wps=463696, ups=1.06, wpb=435653, bsz=16275, num_updates=17500, lr=0.000478091, gnorm=0.251, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=17288 +epoch 011: 637 / 1689 loss=4.237, nll_loss=2.625, ppl=6.17, wps=463696, ups=1.06, wpb=435653, bsz=16275, num_updates=17500, lr=0.000478091, gnorm=0.251, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=17288 +epoch 011: 637 / 1689 loss=4.237, nll_loss=2.625, ppl=6.17, wps=463696, ups=1.06, wpb=435653, bsz=16275, num_updates=17500, lr=0.000478091, gnorm=0.251, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=17288 +epoch 011: 637 / 1689 loss=4.237, nll_loss=2.625, ppl=6.17, wps=463696, ups=1.06, wpb=435653, bsz=16275, num_updates=17500, lr=0.000478091, gnorm=0.251, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=17288 +epoch 011: 637 / 1689 loss=4.237, nll_loss=2.625, ppl=6.17, wps=463696, ups=1.06, wpb=435653, bsz=16275, num_updates=17500, lr=0.000478091, gnorm=0.251, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=17288 +epoch 011: 637 / 1689 loss=4.237, nll_loss=2.625, ppl=6.17, wps=463696, ups=1.06, wpb=435653, bsz=16275, num_updates=17500, lr=0.000478091, gnorm=0.251, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=17288 +epoch 011: 637 / 1689 loss=4.237, nll_loss=2.625, ppl=6.17, wps=463696, ups=1.06, wpb=435653, bsz=16275, num_updates=17500, lr=0.000478091, gnorm=0.251, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=17288 +epoch 011: 637 / 1689 loss=4.237, nll_loss=2.625, ppl=6.17, wps=463696, ups=1.06, wpb=435653, bsz=16275, num_updates=17500, lr=0.000478091, gnorm=0.251, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=17288 +epoch 011: 637 / 1689 loss=4.237, nll_loss=2.625, ppl=6.17, wps=463696, ups=1.06, wpb=435653, bsz=16275, num_updates=17500, lr=0.000478091, gnorm=0.251, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=17288 +epoch 011: 637 / 1689 loss=4.237, nll_loss=2.625, ppl=6.17, wps=463696, ups=1.06, wpb=435653, bsz=16275, num_updates=17500, lr=0.000478091, gnorm=0.251, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=17288 +epoch 011: 737 / 1689 loss=4.244, nll_loss=2.634, ppl=6.21, wps=461114, ups=1.06, wpb=433280, bsz=16474.8, num_updates=17600, lr=0.000476731, gnorm=0.254, clip=0, loss_scale=1, train_wall=93, gb_free=21.1, wall=17382 +epoch 011: 737 / 1689 loss=4.244, nll_loss=2.634, ppl=6.21, wps=461114, ups=1.06, wpb=433280, bsz=16474.8, num_updates=17600, lr=0.000476731, gnorm=0.254, clip=0, loss_scale=1, train_wall=93, gb_free=21.1, wall=17382 +epoch 011: 737 / 1689 loss=4.244, nll_loss=2.634, ppl=6.21, wps=461114, ups=1.06, wpb=433280, bsz=16474.8, num_updates=17600, lr=0.000476731, gnorm=0.254, clip=0, loss_scale=1, train_wall=93, gb_free=21.1, wall=17382 +epoch 011: 737 / 1689 loss=4.244, nll_loss=2.634, ppl=6.21, wps=461114, ups=1.06, wpb=433280, bsz=16474.8, num_updates=17600, lr=0.000476731, gnorm=0.254, clip=0, loss_scale=1, train_wall=93, gb_free=21.1, wall=17382 +epoch 011: 737 / 1689 loss=4.244, nll_loss=2.634, ppl=6.21, wps=461114, ups=1.06, wpb=433280, bsz=16474.8, num_updates=17600, lr=0.000476731, gnorm=0.254, clip=0, loss_scale=1, train_wall=93, gb_free=21.1, wall=17382 +epoch 011: 737 / 1689 loss=4.244, nll_loss=2.634, ppl=6.21, wps=461114, ups=1.06, wpb=433280, bsz=16474.8, num_updates=17600, lr=0.000476731, gnorm=0.254, clip=0, loss_scale=1, train_wall=93, gb_free=21.1, wall=17382 +epoch 011: 737 / 1689 loss=4.244, nll_loss=2.634, ppl=6.21, wps=461114, ups=1.06, wpb=433280, bsz=16474.8, num_updates=17600, lr=0.000476731, gnorm=0.254, clip=0, loss_scale=1, train_wall=93, gb_free=21.1, wall=17382 +epoch 011: 737 / 1689 loss=4.244, nll_loss=2.634, ppl=6.21, wps=461114, ups=1.06, wpb=433280, bsz=16474.8, num_updates=17600, lr=0.000476731, gnorm=0.254, clip=0, loss_scale=1, train_wall=93, gb_free=21.1, wall=17382 +epoch 011: 737 / 1689 loss=4.244, nll_loss=2.634, ppl=6.21, wps=461114, ups=1.06, wpb=433280, bsz=16474.8, num_updates=17600, lr=0.000476731, gnorm=0.254, clip=0, loss_scale=1, train_wall=93, gb_free=21.1, wall=17382 +epoch 011: 737 / 1689 loss=4.244, nll_loss=2.634, ppl=6.21, wps=461114, ups=1.06, wpb=433280, bsz=16474.8, num_updates=17600, lr=0.000476731, gnorm=0.254, clip=0, loss_scale=1, train_wall=93, gb_free=21.1, wall=17382 +epoch 011: 737 / 1689 loss=4.244, nll_loss=2.634, ppl=6.21, wps=461114, ups=1.06, wpb=433280, bsz=16474.8, num_updates=17600, lr=0.000476731, gnorm=0.254, clip=0, loss_scale=1, train_wall=93, gb_free=21.1, wall=17382 +epoch 011: 837 / 1689 loss=4.236, nll_loss=2.624, ppl=6.17, wps=460078, ups=1.06, wpb=435145, bsz=16648.4, num_updates=17700, lr=0.000475383, gnorm=0.245, clip=0, loss_scale=2, train_wall=93, gb_free=19.9, wall=17476 +epoch 011: 837 / 1689 loss=4.236, nll_loss=2.624, ppl=6.17, wps=460078, ups=1.06, wpb=435145, bsz=16648.4, num_updates=17700, lr=0.000475383, gnorm=0.245, clip=0, loss_scale=2, train_wall=93, gb_free=19.9, wall=17476 +epoch 011: 837 / 1689 loss=4.236, nll_loss=2.624, ppl=6.17, wps=460078, ups=1.06, wpb=435145, bsz=16648.4, num_updates=17700, lr=0.000475383, gnorm=0.245, clip=0, loss_scale=2, train_wall=93, gb_free=19.9, wall=17476 +epoch 011: 837 / 1689 loss=4.236, nll_loss=2.624, ppl=6.17, wps=460078, ups=1.06, wpb=435145, bsz=16648.4, num_updates=17700, lr=0.000475383, gnorm=0.245, clip=0, loss_scale=2, train_wall=93, gb_free=19.9, wall=17476 +epoch 011: 837 / 1689 loss=4.236, nll_loss=2.624, ppl=6.17, wps=460078, ups=1.06, wpb=435145, bsz=16648.4, num_updates=17700, lr=0.000475383, gnorm=0.245, clip=0, loss_scale=2, train_wall=93, gb_free=19.9, wall=17476 +epoch 011: 837 / 1689 loss=4.236, nll_loss=2.624, ppl=6.17, wps=460078, ups=1.06, wpb=435145, bsz=16648.4, num_updates=17700, lr=0.000475383, gnorm=0.245, clip=0, loss_scale=2, train_wall=93, gb_free=19.9, wall=17476 +epoch 011: 837 / 1689 loss=4.236, nll_loss=2.624, ppl=6.17, wps=460078, ups=1.06, wpb=435145, bsz=16648.4, num_updates=17700, lr=0.000475383, gnorm=0.245, clip=0, loss_scale=2, train_wall=93, gb_free=19.9, wall=17476 +epoch 011: 837 / 1689 loss=4.236, nll_loss=2.624, ppl=6.17, wps=460078, ups=1.06, wpb=435145, bsz=16648.4, num_updates=17700, lr=0.000475383, gnorm=0.245, clip=0, loss_scale=2, train_wall=93, gb_free=19.9, wall=17476 +epoch 011: 837 / 1689 loss=4.236, nll_loss=2.624, ppl=6.17, wps=460078, ups=1.06, wpb=435145, bsz=16648.4, num_updates=17700, lr=0.000475383, gnorm=0.245, clip=0, loss_scale=2, train_wall=93, gb_free=19.9, wall=17476 +epoch 011: 837 / 1689 loss=4.236, nll_loss=2.624, ppl=6.17, wps=460078, ups=1.06, wpb=435145, bsz=16648.4, num_updates=17700, lr=0.000475383, gnorm=0.245, clip=0, loss_scale=2, train_wall=93, gb_free=19.9, wall=17476 +epoch 011: 837 / 1689 loss=4.236, nll_loss=2.624, ppl=6.17, wps=460078, ups=1.06, wpb=435145, bsz=16648.4, num_updates=17700, lr=0.000475383, gnorm=0.245, clip=0, loss_scale=2, train_wall=93, gb_free=19.9, wall=17476 +epoch 011: 938 / 1689 loss=4.229, nll_loss=2.617, ppl=6.13, wps=455540, ups=1.06, wpb=430358, bsz=16393, num_updates=17800, lr=0.000474045, gnorm=0.264, clip=0, loss_scale=1, train_wall=93, gb_free=17.6, wall=17571 +epoch 011: 938 / 1689 loss=4.229, nll_loss=2.617, ppl=6.13, wps=455540, ups=1.06, wpb=430358, bsz=16393, num_updates=17800, lr=0.000474045, gnorm=0.264, clip=0, loss_scale=1, train_wall=93, gb_free=17.6, wall=17571 +epoch 011: 938 / 1689 loss=4.229, nll_loss=2.617, ppl=6.13, wps=455540, ups=1.06, wpb=430358, bsz=16393, num_updates=17800, lr=0.000474045, gnorm=0.264, clip=0, loss_scale=1, train_wall=93, gb_free=17.6, wall=17571 +epoch 011: 938 / 1689 loss=4.229, nll_loss=2.617, ppl=6.13, wps=455540, ups=1.06, wpb=430358, bsz=16393, num_updates=17800, lr=0.000474045, gnorm=0.264, clip=0, loss_scale=1, train_wall=93, gb_free=17.6, wall=17571 +epoch 011: 938 / 1689 loss=4.229, nll_loss=2.617, ppl=6.13, wps=455540, ups=1.06, wpb=430358, bsz=16393, num_updates=17800, lr=0.000474045, gnorm=0.264, clip=0, loss_scale=1, train_wall=93, gb_free=17.6, wall=17571 +epoch 011: 938 / 1689 loss=4.229, nll_loss=2.617, ppl=6.13, wps=455540, ups=1.06, wpb=430358, bsz=16393, num_updates=17800, lr=0.000474045, gnorm=0.264, clip=0, loss_scale=1, train_wall=93, gb_free=17.6, wall=17571 +epoch 011: 938 / 1689 loss=4.229, nll_loss=2.617, ppl=6.13, wps=455540, ups=1.06, wpb=430358, bsz=16393, num_updates=17800, lr=0.000474045, gnorm=0.264, clip=0, loss_scale=1, train_wall=93, gb_free=17.6, wall=17571 +epoch 011: 938 / 1689 loss=4.229, nll_loss=2.617, ppl=6.13, wps=455540, ups=1.06, wpb=430358, bsz=16393, num_updates=17800, lr=0.000474045, gnorm=0.264, clip=0, loss_scale=1, train_wall=93, gb_free=17.6, wall=17571 +epoch 011: 938 / 1689 loss=4.229, nll_loss=2.617, ppl=6.13, wps=455540, ups=1.06, wpb=430358, bsz=16393, num_updates=17800, lr=0.000474045, gnorm=0.264, clip=0, loss_scale=1, train_wall=93, gb_free=17.6, wall=17571 +epoch 011: 938 / 1689 loss=4.229, nll_loss=2.617, ppl=6.13, wps=455540, ups=1.06, wpb=430358, bsz=16393, num_updates=17800, lr=0.000474045, gnorm=0.264, clip=0, loss_scale=1, train_wall=93, gb_free=17.6, wall=17571 +epoch 011: 938 / 1689 loss=4.229, nll_loss=2.617, ppl=6.13, wps=455540, ups=1.06, wpb=430358, bsz=16393, num_updates=17800, lr=0.000474045, gnorm=0.264, clip=0, loss_scale=1, train_wall=93, gb_free=17.6, wall=17571 +epoch 011: 1038 / 1689 loss=4.25, nll_loss=2.64, ppl=6.24, wps=461972, ups=1.06, wpb=434782, bsz=16755.6, num_updates=17900, lr=0.000472719, gnorm=0.263, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=17665 +epoch 011: 1038 / 1689 loss=4.25, nll_loss=2.64, ppl=6.24, wps=461972, ups=1.06, wpb=434782, bsz=16755.6, num_updates=17900, lr=0.000472719, gnorm=0.263, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=17665 +epoch 011: 1038 / 1689 loss=4.25, nll_loss=2.64, ppl=6.24, wps=461972, ups=1.06, wpb=434782, bsz=16755.6, num_updates=17900, lr=0.000472719, gnorm=0.263, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=17665 +epoch 011: 1038 / 1689 loss=4.25, nll_loss=2.64, ppl=6.24, wps=461972, ups=1.06, wpb=434782, bsz=16755.6, num_updates=17900, lr=0.000472719, gnorm=0.263, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=17665 +epoch 011: 1038 / 1689 loss=4.25, nll_loss=2.64, ppl=6.24, wps=461972, ups=1.06, wpb=434782, bsz=16755.6, num_updates=17900, lr=0.000472719, gnorm=0.263, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=17665 +epoch 011: 1038 / 1689 loss=4.25, nll_loss=2.64, ppl=6.24, wps=461972, ups=1.06, wpb=434782, bsz=16755.6, num_updates=17900, lr=0.000472719, gnorm=0.263, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=17665 +epoch 011: 1038 / 1689 loss=4.25, nll_loss=2.64, ppl=6.24, wps=461972, ups=1.06, wpb=434782, bsz=16755.6, num_updates=17900, lr=0.000472719, gnorm=0.263, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=17665 +epoch 011: 1038 / 1689 loss=4.25, nll_loss=2.64, ppl=6.24, wps=461972, ups=1.06, wpb=434782, bsz=16755.6, num_updates=17900, lr=0.000472719, gnorm=0.263, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=17665 +epoch 011: 1038 / 1689 loss=4.25, nll_loss=2.64, ppl=6.24, wps=461972, ups=1.06, wpb=434782, bsz=16755.6, num_updates=17900, lr=0.000472719, gnorm=0.263, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=17665 +epoch 011: 1038 / 1689 loss=4.25, nll_loss=2.64, ppl=6.24, wps=461972, ups=1.06, wpb=434782, bsz=16755.6, num_updates=17900, lr=0.000472719, gnorm=0.263, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=17665 +epoch 011: 1038 / 1689 loss=4.25, nll_loss=2.64, ppl=6.24, wps=461972, ups=1.06, wpb=434782, bsz=16755.6, num_updates=17900, lr=0.000472719, gnorm=0.263, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=17665 +epoch 011: 1138 / 1689 loss=4.235, nll_loss=2.623, ppl=6.16, wps=459608, ups=1.06, wpb=433312, bsz=16564.4, num_updates=18000, lr=0.000471405, gnorm=0.25, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=17759 +epoch 011: 1138 / 1689 loss=4.235, nll_loss=2.623, ppl=6.16, wps=459608, ups=1.06, wpb=433312, bsz=16564.4, num_updates=18000, lr=0.000471405, gnorm=0.25, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=17759 +epoch 011: 1138 / 1689 loss=4.235, nll_loss=2.623, ppl=6.16, wps=459608, ups=1.06, wpb=433312, bsz=16564.4, num_updates=18000, lr=0.000471405, gnorm=0.25, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=17759 +epoch 011: 1138 / 1689 loss=4.235, nll_loss=2.623, ppl=6.16, wps=459608, ups=1.06, wpb=433312, bsz=16564.4, num_updates=18000, lr=0.000471405, gnorm=0.25, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=17759 +epoch 011: 1138 / 1689 loss=4.235, nll_loss=2.623, ppl=6.16, wps=459608, ups=1.06, wpb=433312, bsz=16564.4, num_updates=18000, lr=0.000471405, gnorm=0.25, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=17759 +epoch 011: 1138 / 1689 loss=4.235, nll_loss=2.623, ppl=6.16, wps=459608, ups=1.06, wpb=433312, bsz=16564.4, num_updates=18000, lr=0.000471405, gnorm=0.25, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=17759 +epoch 011: 1138 / 1689 loss=4.235, nll_loss=2.623, ppl=6.16, wps=459608, ups=1.06, wpb=433312, bsz=16564.4, num_updates=18000, lr=0.000471405, gnorm=0.25, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=17759 +epoch 011: 1138 / 1689 loss=4.235, nll_loss=2.623, ppl=6.16, wps=459608, ups=1.06, wpb=433312, bsz=16564.4, num_updates=18000, lr=0.000471405, gnorm=0.25, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=17759 +epoch 011: 1138 / 1689 loss=4.235, nll_loss=2.623, ppl=6.16, wps=459608, ups=1.06, wpb=433312, bsz=16564.4, num_updates=18000, lr=0.000471405, gnorm=0.25, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=17759 +epoch 011: 1138 / 1689 loss=4.235, nll_loss=2.623, ppl=6.16, wps=459608, ups=1.06, wpb=433312, bsz=16564.4, num_updates=18000, lr=0.000471405, gnorm=0.25, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=17759 +epoch 011: 1138 / 1689 loss=4.235, nll_loss=2.623, ppl=6.16, wps=459608, ups=1.06, wpb=433312, bsz=16564.4, num_updates=18000, lr=0.000471405, gnorm=0.25, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=17759 +begin validation on "valid" subset +epoch 011 | valid on 'valid' subset | loss 4.291 | nll_loss 2.658 | ppl 6.31 | wps 0 | wpb 42662 | bsz 2032 | num_updates 18000 | best_loss 4.291 +epoch 011 | valid on 'valid' subset | loss 4.291 | nll_loss 2.658 | ppl 6.31 | wps 0 | wpb 42662 | bsz 2032 | num_updates 18000 | best_loss 4.291 +epoch 011 | valid on 'valid' subset | loss 4.291 | nll_loss 2.658 | ppl 6.31 | wps 0 | wpb 42662 | bsz 2032 | num_updates 18000 | best_loss 4.291 +epoch 011 | valid on 'valid' subset | loss 4.291 | nll_loss 2.658 | ppl 6.31 | wps 0 | wpb 42662 | bsz 2032 | num_updates 18000 | best_loss 4.291 +epoch 011 | valid on 'valid' subset | loss 4.291 | nll_loss 2.658 | ppl 6.31 | wps 0 | wpb 42662 | bsz 2032 | num_updates 18000 | best_loss 4.291 +epoch 011 | valid on 'valid' subset | loss 4.291 | nll_loss 2.658 | ppl 6.31 | wps 0 | wpb 42662 | bsz 2032 | num_updates 18000 | best_loss 4.291 +epoch 011 | valid on 'valid' subset | loss 4.291 | nll_loss 2.658 | ppl 6.31 | wps 0 | wpb 42662 | bsz 2032 | num_updates 18000 | best_loss 4.291 +epoch 011 | valid on 'valid' subset | loss 4.291 | nll_loss 2.658 | ppl 6.31 | wps 0 | wpb 42662 | bsz 2032 | num_updates 18000 | best_loss 4.291 +epoch 011 | valid on 'valid' subset | loss 4.291 | nll_loss 2.658 | ppl 6.31 | wps 0 | wpb 42662 | bsz 2032 | num_updates 18000 | best_loss 4.291 +epoch 011 | valid on 'valid' subset | loss 4.291 | nll_loss 2.658 | ppl 6.31 | wps 0 | wpb 42662 | bsz 2032 | num_updates 18000 | best_loss 4.291 +epoch 011 | valid on 'valid' subset | loss 4.291 | nll_loss 2.658 | ppl 6.31 | wps 0 | wpb 42662 | bsz 2032 | num_updates 18000 | best_loss 4.291 +epoch 011: 1238 / 1689 loss=4.229, nll_loss=2.617, ppl=6.13, wps=382834, ups=0.89, wpb=432515, bsz=16769.6, num_updates=18100, lr=0.0004701, gnorm=0.254, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=17872 +epoch 011: 1238 / 1689 loss=4.229, nll_loss=2.617, ppl=6.13, wps=382834, ups=0.89, wpb=432515, bsz=16769.6, num_updates=18100, lr=0.0004701, gnorm=0.254, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=17872 +epoch 011: 1238 / 1689 loss=4.229, nll_loss=2.617, ppl=6.13, wps=382834, ups=0.89, wpb=432515, bsz=16769.6, num_updates=18100, lr=0.0004701, gnorm=0.254, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=17872 +epoch 011: 1238 / 1689 loss=4.229, nll_loss=2.617, ppl=6.13, wps=382834, ups=0.89, wpb=432515, bsz=16769.6, num_updates=18100, lr=0.0004701, gnorm=0.254, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=17872 +epoch 011: 1238 / 1689 loss=4.229, nll_loss=2.617, ppl=6.13, wps=382834, ups=0.89, wpb=432515, bsz=16769.6, num_updates=18100, lr=0.0004701, gnorm=0.254, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=17872 +epoch 011: 1238 / 1689 loss=4.229, nll_loss=2.617, ppl=6.13, wps=382834, ups=0.89, wpb=432515, bsz=16769.6, num_updates=18100, lr=0.0004701, gnorm=0.254, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=17872 +epoch 011: 1238 / 1689 loss=4.229, nll_loss=2.617, ppl=6.13, wps=382834, ups=0.89, wpb=432515, bsz=16769.6, num_updates=18100, lr=0.0004701, gnorm=0.254, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=17872 +epoch 011: 1238 / 1689 loss=4.229, nll_loss=2.617, ppl=6.13, wps=382834, ups=0.89, wpb=432515, bsz=16769.6, num_updates=18100, lr=0.0004701, gnorm=0.254, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=17872 +epoch 011: 1238 / 1689 loss=4.229, nll_loss=2.617, ppl=6.13, wps=382834, ups=0.89, wpb=432515, bsz=16769.6, num_updates=18100, lr=0.0004701, gnorm=0.254, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=17872 +epoch 011: 1238 / 1689 loss=4.229, nll_loss=2.617, ppl=6.13, wps=382834, ups=0.89, wpb=432515, bsz=16769.6, num_updates=18100, lr=0.0004701, gnorm=0.254, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=17872 +epoch 011: 1238 / 1689 loss=4.229, nll_loss=2.617, ppl=6.13, wps=382834, ups=0.89, wpb=432515, bsz=16769.6, num_updates=18100, lr=0.0004701, gnorm=0.254, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=17872 +epoch 011: 1338 / 1689 loss=4.233, nll_loss=2.622, ppl=6.16, wps=465179, ups=1.07, wpb=433771, bsz=16456.4, num_updates=18200, lr=0.000468807, gnorm=0.266, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=17965 +epoch 011: 1338 / 1689 loss=4.233, nll_loss=2.622, ppl=6.16, wps=465179, ups=1.07, wpb=433771, bsz=16456.4, num_updates=18200, lr=0.000468807, gnorm=0.266, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=17965 +epoch 011: 1338 / 1689 loss=4.233, nll_loss=2.622, ppl=6.16, wps=465179, ups=1.07, wpb=433771, bsz=16456.4, num_updates=18200, lr=0.000468807, gnorm=0.266, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=17965 +epoch 011: 1338 / 1689 loss=4.233, nll_loss=2.622, ppl=6.16, wps=465179, ups=1.07, wpb=433771, bsz=16456.4, num_updates=18200, lr=0.000468807, gnorm=0.266, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=17965 +epoch 011: 1338 / 1689 loss=4.233, nll_loss=2.622, ppl=6.16, wps=465179, ups=1.07, wpb=433771, bsz=16456.4, num_updates=18200, lr=0.000468807, gnorm=0.266, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=17965 +epoch 011: 1338 / 1689 loss=4.233, nll_loss=2.622, ppl=6.16, wps=465179, ups=1.07, wpb=433771, bsz=16456.4, num_updates=18200, lr=0.000468807, gnorm=0.266, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=17965 +epoch 011: 1338 / 1689 loss=4.233, nll_loss=2.622, ppl=6.16, wps=465179, ups=1.07, wpb=433771, bsz=16456.4, num_updates=18200, lr=0.000468807, gnorm=0.266, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=17965 +epoch 011: 1338 / 1689 loss=4.233, nll_loss=2.622, ppl=6.16, wps=465179, ups=1.07, wpb=433771, bsz=16456.4, num_updates=18200, lr=0.000468807, gnorm=0.266, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=17965 +epoch 011: 1338 / 1689 loss=4.233, nll_loss=2.622, ppl=6.16, wps=465179, ups=1.07, wpb=433771, bsz=16456.4, num_updates=18200, lr=0.000468807, gnorm=0.266, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=17965 +epoch 011: 1338 / 1689 loss=4.233, nll_loss=2.622, ppl=6.16, wps=465179, ups=1.07, wpb=433771, bsz=16456.4, num_updates=18200, lr=0.000468807, gnorm=0.266, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=17965 +epoch 011: 1338 / 1689 loss=4.233, nll_loss=2.622, ppl=6.16, wps=465179, ups=1.07, wpb=433771, bsz=16456.4, num_updates=18200, lr=0.000468807, gnorm=0.266, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=17965 +epoch 011: 1438 / 1689 loss=4.243, nll_loss=2.633, ppl=6.2, wps=460954, ups=1.06, wpb=433280, bsz=16105.6, num_updates=18300, lr=0.000467525, gnorm=0.264, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=18059 +epoch 011: 1438 / 1689 loss=4.243, nll_loss=2.633, ppl=6.2, wps=460954, ups=1.06, wpb=433280, bsz=16105.6, num_updates=18300, lr=0.000467525, gnorm=0.264, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=18059 +epoch 011: 1438 / 1689 loss=4.243, nll_loss=2.633, ppl=6.2, wps=460954, ups=1.06, wpb=433280, bsz=16105.6, num_updates=18300, lr=0.000467525, gnorm=0.264, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=18059 +epoch 011: 1438 / 1689 loss=4.243, nll_loss=2.633, ppl=6.2, wps=460954, ups=1.06, wpb=433280, bsz=16105.6, num_updates=18300, lr=0.000467525, gnorm=0.264, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=18059 +epoch 011: 1438 / 1689 loss=4.243, nll_loss=2.633, ppl=6.2, wps=460954, ups=1.06, wpb=433280, bsz=16105.6, num_updates=18300, lr=0.000467525, gnorm=0.264, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=18059 +epoch 011: 1438 / 1689 loss=4.243, nll_loss=2.633, ppl=6.2, wps=460954, ups=1.06, wpb=433280, bsz=16105.6, num_updates=18300, lr=0.000467525, gnorm=0.264, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=18059 +epoch 011: 1438 / 1689 loss=4.243, nll_loss=2.633, ppl=6.2, wps=460954, ups=1.06, wpb=433280, bsz=16105.6, num_updates=18300, lr=0.000467525, gnorm=0.264, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=18059 +epoch 011: 1438 / 1689 loss=4.243, nll_loss=2.633, ppl=6.2, wps=460954, ups=1.06, wpb=433280, bsz=16105.6, num_updates=18300, lr=0.000467525, gnorm=0.264, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=18059 +epoch 011: 1438 / 1689 loss=4.243, nll_loss=2.633, ppl=6.2, wps=460954, ups=1.06, wpb=433280, bsz=16105.6, num_updates=18300, lr=0.000467525, gnorm=0.264, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=18059 +epoch 011: 1438 / 1689 loss=4.243, nll_loss=2.633, ppl=6.2, wps=460954, ups=1.06, wpb=433280, bsz=16105.6, num_updates=18300, lr=0.000467525, gnorm=0.264, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=18059 +epoch 011: 1438 / 1689 loss=4.243, nll_loss=2.633, ppl=6.2, wps=460954, ups=1.06, wpb=433280, bsz=16105.6, num_updates=18300, lr=0.000467525, gnorm=0.264, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=18059 +epoch 011: 1538 / 1689 loss=4.242, nll_loss=2.632, ppl=6.2, wps=464073, ups=1.07, wpb=435024, bsz=16113, num_updates=18400, lr=0.000466252, gnorm=0.257, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=18153 +epoch 011: 1538 / 1689 loss=4.242, nll_loss=2.632, ppl=6.2, wps=464073, ups=1.07, wpb=435024, bsz=16113, num_updates=18400, lr=0.000466252, gnorm=0.257, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=18153 +epoch 011: 1538 / 1689 loss=4.242, nll_loss=2.632, ppl=6.2, wps=464073, ups=1.07, wpb=435024, bsz=16113, num_updates=18400, lr=0.000466252, gnorm=0.257, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=18153 +epoch 011: 1538 / 1689 loss=4.242, nll_loss=2.632, ppl=6.2, wps=464073, ups=1.07, wpb=435024, bsz=16113, num_updates=18400, lr=0.000466252, gnorm=0.257, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=18153 +epoch 011: 1538 / 1689 loss=4.242, nll_loss=2.632, ppl=6.2, wps=464073, ups=1.07, wpb=435024, bsz=16113, num_updates=18400, lr=0.000466252, gnorm=0.257, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=18153 +epoch 011: 1538 / 1689 loss=4.242, nll_loss=2.632, ppl=6.2, wps=464073, ups=1.07, wpb=435024, bsz=16113, num_updates=18400, lr=0.000466252, gnorm=0.257, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=18153 +epoch 011: 1538 / 1689 loss=4.242, nll_loss=2.632, ppl=6.2, wps=464073, ups=1.07, wpb=435024, bsz=16113, num_updates=18400, lr=0.000466252, gnorm=0.257, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=18153 +epoch 011: 1538 / 1689 loss=4.242, nll_loss=2.632, ppl=6.2, wps=464073, ups=1.07, wpb=435024, bsz=16113, num_updates=18400, lr=0.000466252, gnorm=0.257, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=18153 +epoch 011: 1538 / 1689 loss=4.242, nll_loss=2.632, ppl=6.2, wps=464073, ups=1.07, wpb=435024, bsz=16113, num_updates=18400, lr=0.000466252, gnorm=0.257, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=18153 +epoch 011: 1538 / 1689 loss=4.242, nll_loss=2.632, ppl=6.2, wps=464073, ups=1.07, wpb=435024, bsz=16113, num_updates=18400, lr=0.000466252, gnorm=0.257, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=18153 +epoch 011: 1538 / 1689 loss=4.242, nll_loss=2.632, ppl=6.2, wps=464073, ups=1.07, wpb=435024, bsz=16113, num_updates=18400, lr=0.000466252, gnorm=0.257, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=18153 +epoch 011: 1638 / 1689 loss=4.234, nll_loss=2.624, ppl=6.16, wps=461137, ups=1.07, wpb=431309, bsz=16443.3, num_updates=18500, lr=0.000464991, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=18247 +epoch 011: 1638 / 1689 loss=4.234, nll_loss=2.624, ppl=6.16, wps=461137, ups=1.07, wpb=431309, bsz=16443.3, num_updates=18500, lr=0.000464991, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=18247 +epoch 011: 1638 / 1689 loss=4.234, nll_loss=2.624, ppl=6.16, wps=461137, ups=1.07, wpb=431309, bsz=16443.3, num_updates=18500, lr=0.000464991, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=18247 +epoch 011: 1638 / 1689 loss=4.234, nll_loss=2.624, ppl=6.16, wps=461137, ups=1.07, wpb=431309, bsz=16443.3, num_updates=18500, lr=0.000464991, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=18247 +epoch 011: 1638 / 1689 loss=4.234, nll_loss=2.624, ppl=6.16, wps=461137, ups=1.07, wpb=431309, bsz=16443.3, num_updates=18500, lr=0.000464991, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=18247 +epoch 011: 1638 / 1689 loss=4.234, nll_loss=2.624, ppl=6.16, wps=461137, ups=1.07, wpb=431309, bsz=16443.3, num_updates=18500, lr=0.000464991, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=18247 +epoch 011: 1638 / 1689 loss=4.234, nll_loss=2.624, ppl=6.16, wps=461137, ups=1.07, wpb=431309, bsz=16443.3, num_updates=18500, lr=0.000464991, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=18247 +epoch 011: 1638 / 1689 loss=4.234, nll_loss=2.624, ppl=6.16, wps=461137, ups=1.07, wpb=431309, bsz=16443.3, num_updates=18500, lr=0.000464991, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=18247 +epoch 011: 1638 / 1689 loss=4.234, nll_loss=2.624, ppl=6.16, wps=461137, ups=1.07, wpb=431309, bsz=16443.3, num_updates=18500, lr=0.000464991, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=18247 +epoch 011: 1638 / 1689 loss=4.234, nll_loss=2.624, ppl=6.16, wps=461137, ups=1.07, wpb=431309, bsz=16443.3, num_updates=18500, lr=0.000464991, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=18247 +epoch 011: 1638 / 1689 loss=4.234, nll_loss=2.624, ppl=6.16, wps=461137, ups=1.07, wpb=431309, bsz=16443.3, num_updates=18500, lr=0.000464991, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=18247 +end of epoch 11 (average epoch stats below) +epoch 011 | loss 4.235 | nll_loss 2.624 | ppl 6.16 | wps 450883 | ups 1.04 | wpb 433513 | bsz 16505.6 | num_updates 18551 | lr 0.000464351 | gnorm 0.257 | clip 0 | loss_scale 2 | train_wall 1562 | gb_free 20.8 | wall 18294 +epoch 011 | loss 4.235 | nll_loss 2.624 | ppl 6.16 | wps 450883 | ups 1.04 | wpb 433513 | bsz 16505.6 | num_updates 18551 | lr 0.000464351 | gnorm 0.257 | clip 0 | loss_scale 2 | train_wall 1562 | gb_free 20.8 | wall 18294 +epoch 011 | loss 4.235 | nll_loss 2.624 | ppl 6.16 | wps 450883 | ups 1.04 | wpb 433513 | bsz 16505.6 | num_updates 18551 | lr 0.000464351 | gnorm 0.257 | clip 0 | loss_scale 2 | train_wall 1562 | gb_free 20.8 | wall 18294 +epoch 011 | loss 4.235 | nll_loss 2.624 | ppl 6.16 | wps 450883 | ups 1.04 | wpb 433513 | bsz 16505.6 | num_updates 18551 | lr 0.000464351 | gnorm 0.257 | clip 0 | loss_scale 2 | train_wall 1562 | gb_free 20.8 | wall 18294 +epoch 011 | loss 4.235 | nll_loss 2.624 | ppl 6.16 | wps 450883 | ups 1.04 | wpb 433513 | bsz 16505.6 | num_updates 18551 | lr 0.000464351 | gnorm 0.257 | clip 0 | loss_scale 2 | train_wall 1562 | gb_free 20.8 | wall 18294 +epoch 011 | loss 4.235 | nll_loss 2.624 | ppl 6.16 | wps 450883 | ups 1.04 | wpb 433513 | bsz 16505.6 | num_updates 18551 | lr 0.000464351 | gnorm 0.257 | clip 0 | loss_scale 2 | train_wall 1562 | gb_free 20.8 | wall 18294 +epoch 011 | loss 4.235 | nll_loss 2.624 | ppl 6.16 | wps 450883 | ups 1.04 | wpb 433513 | bsz 16505.6 | num_updates 18551 | lr 0.000464351 | gnorm 0.257 | clip 0 | loss_scale 2 | train_wall 1562 | gb_free 20.8 | wall 18294 +epoch 011 | loss 4.235 | nll_loss 2.624 | ppl 6.16 | wps 450883 | ups 1.04 | wpb 433513 | bsz 16505.6 | num_updates 18551 | lr 0.000464351 | gnorm 0.257 | clip 0 | loss_scale 2 | train_wall 1562 | gb_free 20.8 | wall 18294 +epoch 011 | loss 4.235 | nll_loss 2.624 | ppl 6.16 | wps 450883 | ups 1.04 | wpb 433513 | bsz 16505.6 | num_updates 18551 | lr 0.000464351 | gnorm 0.257 | clip 0 | loss_scale 2 | train_wall 1562 | gb_free 20.8 | wall 18294 +epoch 011 | loss 4.235 | nll_loss 2.624 | ppl 6.16 | wps 450883 | ups 1.04 | wpb 433513 | bsz 16505.6 | num_updates 18551 | lr 0.000464351 | gnorm 0.257 | clip 0 | loss_scale 2 | train_wall 1562 | gb_free 20.8 | wall 18294 +epoch 011 | loss 4.235 | nll_loss 2.624 | ppl 6.16 | wps 450883 | ups 1.04 | wpb 433513 | bsz 16505.6 | num_updates 18551 | lr 0.000464351 | gnorm 0.257 | clip 0 | loss_scale 2 | train_wall 1562 | gb_free 20.8 | wall 18294 +Start iterating over samples +epoch 012: 50 / 1689 loss=4.223, nll_loss=2.61, ppl=6.1, wps=449095, ups=1.05, wpb=429523, bsz=16321.6, num_updates=18600, lr=0.000463739, gnorm=0.254, clip=0, loss_scale=1, train_wall=94, gb_free=17.1, wall=18342 +epoch 012: 50 / 1689 loss=4.223, nll_loss=2.61, ppl=6.1, wps=449095, ups=1.05, wpb=429523, bsz=16321.6, num_updates=18600, lr=0.000463739, gnorm=0.254, clip=0, loss_scale=1, train_wall=94, gb_free=17.1, wall=18342 +epoch 012: 50 / 1689 loss=4.223, nll_loss=2.61, ppl=6.1, wps=449095, ups=1.05, wpb=429523, bsz=16321.6, num_updates=18600, lr=0.000463739, gnorm=0.254, clip=0, loss_scale=1, train_wall=94, gb_free=17.1, wall=18342 +epoch 012: 50 / 1689 loss=4.223, nll_loss=2.61, ppl=6.1, wps=449095, ups=1.05, wpb=429523, bsz=16321.6, num_updates=18600, lr=0.000463739, gnorm=0.254, clip=0, loss_scale=1, train_wall=94, gb_free=17.1, wall=18342 +epoch 012: 50 / 1689 loss=4.223, nll_loss=2.61, ppl=6.1, wps=449095, ups=1.05, wpb=429523, bsz=16321.6, num_updates=18600, lr=0.000463739, gnorm=0.254, clip=0, loss_scale=1, train_wall=94, gb_free=17.1, wall=18342 +epoch 012: 50 / 1689 loss=4.223, nll_loss=2.61, ppl=6.1, wps=449095, ups=1.05, wpb=429523, bsz=16321.6, num_updates=18600, lr=0.000463739, gnorm=0.254, clip=0, loss_scale=1, train_wall=94, gb_free=17.1, wall=18342 +epoch 012: 50 / 1689 loss=4.223, nll_loss=2.61, ppl=6.1, wps=449095, ups=1.05, wpb=429523, bsz=16321.6, num_updates=18600, lr=0.000463739, gnorm=0.254, clip=0, loss_scale=1, train_wall=94, gb_free=17.1, wall=18342 +epoch 012: 50 / 1689 loss=4.223, nll_loss=2.61, ppl=6.1, wps=449095, ups=1.05, wpb=429523, bsz=16321.6, num_updates=18600, lr=0.000463739, gnorm=0.254, clip=0, loss_scale=1, train_wall=94, gb_free=17.1, wall=18342 +epoch 012: 50 / 1689 loss=4.223, nll_loss=2.61, ppl=6.1, wps=449095, ups=1.05, wpb=429523, bsz=16321.6, num_updates=18600, lr=0.000463739, gnorm=0.254, clip=0, loss_scale=1, train_wall=94, gb_free=17.1, wall=18342 +epoch 012: 50 / 1689 loss=4.223, nll_loss=2.61, ppl=6.1, wps=449095, ups=1.05, wpb=429523, bsz=16321.6, num_updates=18600, lr=0.000463739, gnorm=0.254, clip=0, loss_scale=1, train_wall=94, gb_free=17.1, wall=18342 +epoch 012: 50 / 1689 loss=4.223, nll_loss=2.61, ppl=6.1, wps=449095, ups=1.05, wpb=429523, bsz=16321.6, num_updates=18600, lr=0.000463739, gnorm=0.254, clip=0, loss_scale=1, train_wall=94, gb_free=17.1, wall=18342 +epoch 012: 50 / 1689 loss=4.223, nll_loss=2.61, ppl=6.1, wps=449095, ups=1.05, wpb=429523, bsz=16321.6, num_updates=18600, lr=0.000463739, gnorm=0.254, clip=0, loss_scale=1, train_wall=94, gb_free=17.1, wall=18342 +epoch 012: 150 / 1689 loss=4.2, nll_loss=2.583, ppl=5.99, wps=460796, ups=1.06, wpb=432691, bsz=16216.4, num_updates=18700, lr=0.000462497, gnorm=0.252, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=18436 +epoch 012: 150 / 1689 loss=4.2, nll_loss=2.583, ppl=5.99, wps=460796, ups=1.06, wpb=432691, bsz=16216.4, num_updates=18700, lr=0.000462497, gnorm=0.252, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=18436 +epoch 012: 150 / 1689 loss=4.2, nll_loss=2.583, ppl=5.99, wps=460796, ups=1.06, wpb=432691, bsz=16216.4, num_updates=18700, lr=0.000462497, gnorm=0.252, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=18436 +epoch 012: 150 / 1689 loss=4.2, nll_loss=2.583, ppl=5.99, wps=460796, ups=1.06, wpb=432691, bsz=16216.4, num_updates=18700, lr=0.000462497, gnorm=0.252, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=18436 +epoch 012: 150 / 1689 loss=4.2, nll_loss=2.583, ppl=5.99, wps=460796, ups=1.06, wpb=432691, bsz=16216.4, num_updates=18700, lr=0.000462497, gnorm=0.252, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=18436 +epoch 012: 150 / 1689 loss=4.2, nll_loss=2.583, ppl=5.99, wps=460796, ups=1.06, wpb=432691, bsz=16216.4, num_updates=18700, lr=0.000462497, gnorm=0.252, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=18436 +epoch 012: 150 / 1689 loss=4.2, nll_loss=2.583, ppl=5.99, wps=460796, ups=1.06, wpb=432691, bsz=16216.4, num_updates=18700, lr=0.000462497, gnorm=0.252, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=18436 +epoch 012: 150 / 1689 loss=4.2, nll_loss=2.583, ppl=5.99, wps=460796, ups=1.06, wpb=432691, bsz=16216.4, num_updates=18700, lr=0.000462497, gnorm=0.252, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=18436 +epoch 012: 150 / 1689 loss=4.2, nll_loss=2.583, ppl=5.99, wps=460796, ups=1.06, wpb=432691, bsz=16216.4, num_updates=18700, lr=0.000462497, gnorm=0.252, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=18436 +epoch 012: 150 / 1689 loss=4.2, nll_loss=2.583, ppl=5.99, wps=460796, ups=1.06, wpb=432691, bsz=16216.4, num_updates=18700, lr=0.000462497, gnorm=0.252, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=18436 +epoch 012: 150 / 1689 loss=4.2, nll_loss=2.583, ppl=5.99, wps=460796, ups=1.06, wpb=432691, bsz=16216.4, num_updates=18700, lr=0.000462497, gnorm=0.252, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=18436 +epoch 012: 150 / 1689 loss=4.2, nll_loss=2.583, ppl=5.99, wps=460796, ups=1.06, wpb=432691, bsz=16216.4, num_updates=18700, lr=0.000462497, gnorm=0.252, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=18436 +epoch 012: 250 / 1689 loss=4.206, nll_loss=2.59, ppl=6.02, wps=462087, ups=1.07, wpb=433048, bsz=16802.8, num_updates=18800, lr=0.000461266, gnorm=0.263, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=18530 +epoch 012: 250 / 1689 loss=4.206, nll_loss=2.59, ppl=6.02, wps=462087, ups=1.07, wpb=433048, bsz=16802.8, num_updates=18800, lr=0.000461266, gnorm=0.263, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=18530 +epoch 012: 250 / 1689 loss=4.206, nll_loss=2.59, ppl=6.02, wps=462087, ups=1.07, wpb=433048, bsz=16802.8, num_updates=18800, lr=0.000461266, gnorm=0.263, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=18530 +epoch 012: 250 / 1689 loss=4.206, nll_loss=2.59, ppl=6.02, wps=462087, ups=1.07, wpb=433048, bsz=16802.8, num_updates=18800, lr=0.000461266, gnorm=0.263, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=18530 +epoch 012: 250 / 1689 loss=4.206, nll_loss=2.59, ppl=6.02, wps=462087, ups=1.07, wpb=433048, bsz=16802.8, num_updates=18800, lr=0.000461266, gnorm=0.263, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=18530 +epoch 012: 250 / 1689 loss=4.206, nll_loss=2.59, ppl=6.02, wps=462087, ups=1.07, wpb=433048, bsz=16802.8, num_updates=18800, lr=0.000461266, gnorm=0.263, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=18530 +epoch 012: 250 / 1689 loss=4.206, nll_loss=2.59, ppl=6.02, wps=462087, ups=1.07, wpb=433048, bsz=16802.8, num_updates=18800, lr=0.000461266, gnorm=0.263, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=18530 +epoch 012: 250 / 1689 loss=4.206, nll_loss=2.59, ppl=6.02, wps=462087, ups=1.07, wpb=433048, bsz=16802.8, num_updates=18800, lr=0.000461266, gnorm=0.263, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=18530 +epoch 012: 250 / 1689 loss=4.206, nll_loss=2.59, ppl=6.02, wps=462087, ups=1.07, wpb=433048, bsz=16802.8, num_updates=18800, lr=0.000461266, gnorm=0.263, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=18530 +epoch 012: 250 / 1689 loss=4.206, nll_loss=2.59, ppl=6.02, wps=462087, ups=1.07, wpb=433048, bsz=16802.8, num_updates=18800, lr=0.000461266, gnorm=0.263, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=18530 +epoch 012: 250 / 1689 loss=4.206, nll_loss=2.59, ppl=6.02, wps=462087, ups=1.07, wpb=433048, bsz=16802.8, num_updates=18800, lr=0.000461266, gnorm=0.263, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=18530 +epoch 012: 250 / 1689 loss=4.206, nll_loss=2.59, ppl=6.02, wps=462087, ups=1.07, wpb=433048, bsz=16802.8, num_updates=18800, lr=0.000461266, gnorm=0.263, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=18530 +epoch 012: 350 / 1689 loss=4.214, nll_loss=2.599, ppl=6.06, wps=461885, ups=1.06, wpb=433950, bsz=16261.5, num_updates=18900, lr=0.000460044, gnorm=0.248, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=18624 +epoch 012: 350 / 1689 loss=4.214, nll_loss=2.599, ppl=6.06, wps=461885, ups=1.06, wpb=433950, bsz=16261.5, num_updates=18900, lr=0.000460044, gnorm=0.248, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=18624 +epoch 012: 350 / 1689 loss=4.214, nll_loss=2.599, ppl=6.06, wps=461885, ups=1.06, wpb=433950, bsz=16261.5, num_updates=18900, lr=0.000460044, gnorm=0.248, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=18624 +epoch 012: 350 / 1689 loss=4.214, nll_loss=2.599, ppl=6.06, wps=461885, ups=1.06, wpb=433950, bsz=16261.5, num_updates=18900, lr=0.000460044, gnorm=0.248, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=18624 +epoch 012: 350 / 1689 loss=4.214, nll_loss=2.599, ppl=6.06, wps=461885, ups=1.06, wpb=433950, bsz=16261.5, num_updates=18900, lr=0.000460044, gnorm=0.248, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=18624 +epoch 012: 350 / 1689 loss=4.214, nll_loss=2.599, ppl=6.06, wps=461885, ups=1.06, wpb=433950, bsz=16261.5, num_updates=18900, lr=0.000460044, gnorm=0.248, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=18624 +epoch 012: 350 / 1689 loss=4.214, nll_loss=2.599, ppl=6.06, wps=461885, ups=1.06, wpb=433950, bsz=16261.5, num_updates=18900, lr=0.000460044, gnorm=0.248, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=18624 +epoch 012: 350 / 1689 loss=4.214, nll_loss=2.599, ppl=6.06, wps=461885, ups=1.06, wpb=433950, bsz=16261.5, num_updates=18900, lr=0.000460044, gnorm=0.248, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=18624 +epoch 012: 350 / 1689 loss=4.214, nll_loss=2.599, ppl=6.06, wps=461885, ups=1.06, wpb=433950, bsz=16261.5, num_updates=18900, lr=0.000460044, gnorm=0.248, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=18624 +epoch 012: 350 / 1689 loss=4.214, nll_loss=2.599, ppl=6.06, wps=461885, ups=1.06, wpb=433950, bsz=16261.5, num_updates=18900, lr=0.000460044, gnorm=0.248, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=18624 +epoch 012: 350 / 1689 loss=4.214, nll_loss=2.599, ppl=6.06, wps=461885, ups=1.06, wpb=433950, bsz=16261.5, num_updates=18900, lr=0.000460044, gnorm=0.248, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=18624 +epoch 012: 350 / 1689 loss=4.214, nll_loss=2.599, ppl=6.06, wps=461885, ups=1.06, wpb=433950, bsz=16261.5, num_updates=18900, lr=0.000460044, gnorm=0.248, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=18624 +epoch 012: 450 / 1689 loss=4.215, nll_loss=2.601, ppl=6.07, wps=459916, ups=1.06, wpb=434033, bsz=16341.3, num_updates=19000, lr=0.000458831, gnorm=0.255, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=18718 +epoch 012: 450 / 1689 loss=4.215, nll_loss=2.601, ppl=6.07, wps=459916, ups=1.06, wpb=434033, bsz=16341.3, num_updates=19000, lr=0.000458831, gnorm=0.255, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=18718 +epoch 012: 450 / 1689 loss=4.215, nll_loss=2.601, ppl=6.07, wps=459916, ups=1.06, wpb=434033, bsz=16341.3, num_updates=19000, lr=0.000458831, gnorm=0.255, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=18718 +epoch 012: 450 / 1689 loss=4.215, nll_loss=2.601, ppl=6.07, wps=459916, ups=1.06, wpb=434033, bsz=16341.3, num_updates=19000, lr=0.000458831, gnorm=0.255, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=18718 +epoch 012: 450 / 1689 loss=4.215, nll_loss=2.601, ppl=6.07, wps=459916, ups=1.06, wpb=434033, bsz=16341.3, num_updates=19000, lr=0.000458831, gnorm=0.255, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=18718 +epoch 012: 450 / 1689 loss=4.215, nll_loss=2.601, ppl=6.07, wps=459916, ups=1.06, wpb=434033, bsz=16341.3, num_updates=19000, lr=0.000458831, gnorm=0.255, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=18718 +epoch 012: 450 / 1689 loss=4.215, nll_loss=2.601, ppl=6.07, wps=459916, ups=1.06, wpb=434033, bsz=16341.3, num_updates=19000, lr=0.000458831, gnorm=0.255, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=18718 +epoch 012: 450 / 1689 loss=4.215, nll_loss=2.601, ppl=6.07, wps=459916, ups=1.06, wpb=434033, bsz=16341.3, num_updates=19000, lr=0.000458831, gnorm=0.255, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=18718 +epoch 012: 450 / 1689 loss=4.215, nll_loss=2.601, ppl=6.07, wps=459916, ups=1.06, wpb=434033, bsz=16341.3, num_updates=19000, lr=0.000458831, gnorm=0.255, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=18718 +epoch 012: 450 / 1689 loss=4.215, nll_loss=2.601, ppl=6.07, wps=459916, ups=1.06, wpb=434033, bsz=16341.3, num_updates=19000, lr=0.000458831, gnorm=0.255, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=18718 +epoch 012: 450 / 1689 loss=4.215, nll_loss=2.601, ppl=6.07, wps=459916, ups=1.06, wpb=434033, bsz=16341.3, num_updates=19000, lr=0.000458831, gnorm=0.255, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=18718 +epoch 012: 450 / 1689 loss=4.215, nll_loss=2.601, ppl=6.07, wps=459916, ups=1.06, wpb=434033, bsz=16341.3, num_updates=19000, lr=0.000458831, gnorm=0.255, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=18718 +begin validation on "valid" subset +epoch 012 | valid on 'valid' subset | loss 4.294 | nll_loss 2.662 | ppl 6.33 | wps 0 | wpb 42662 | bsz 2032 | num_updates 19000 | best_loss 4.291 +epoch 012 | valid on 'valid' subset | loss 4.294 | nll_loss 2.662 | ppl 6.33 | wps 0 | wpb 42662 | bsz 2032 | num_updates 19000 | best_loss 4.291 +epoch 012 | valid on 'valid' subset | loss 4.294 | nll_loss 2.662 | ppl 6.33 | wps 0 | wpb 42662 | bsz 2032 | num_updates 19000 | best_loss 4.291 +epoch 012 | valid on 'valid' subset | loss 4.294 | nll_loss 2.662 | ppl 6.33 | wps 0 | wpb 42662 | bsz 2032 | num_updates 19000 | best_loss 4.291 +epoch 012 | valid on 'valid' subset | loss 4.294 | nll_loss 2.662 | ppl 6.33 | wps 0 | wpb 42662 | bsz 2032 | num_updates 19000 | best_loss 4.291 +epoch 012 | valid on 'valid' subset | loss 4.294 | nll_loss 2.662 | ppl 6.33 | wps 0 | wpb 42662 | bsz 2032 | num_updates 19000 | best_loss 4.291 +epoch 012 | valid on 'valid' subset | loss 4.294 | nll_loss 2.662 | ppl 6.33 | wps 0 | wpb 42662 | bsz 2032 | num_updates 19000 | best_loss 4.291 +epoch 012 | valid on 'valid' subset | loss 4.294 | nll_loss 2.662 | ppl 6.33 | wps 0 | wpb 42662 | bsz 2032 | num_updates 19000 | best_loss 4.291 +epoch 012 | valid on 'valid' subset | loss 4.294 | nll_loss 2.662 | ppl 6.33 | wps 0 | wpb 42662 | bsz 2032 | num_updates 19000 | best_loss 4.291 +epoch 012 | valid on 'valid' subset | loss 4.294 | nll_loss 2.662 | ppl 6.33 | wps 0 | wpb 42662 | bsz 2032 | num_updates 19000 | best_loss 4.291 +epoch 012 | valid on 'valid' subset | loss 4.294 | nll_loss 2.662 | ppl 6.33 | wps 0 | wpb 42662 | bsz 2032 | num_updates 19000 | best_loss 4.291 +epoch 012 | valid on 'valid' subset | loss 4.294 | nll_loss 2.662 | ppl 6.33 | wps 0 | wpb 42662 | bsz 2032 | num_updates 19000 | best_loss 4.291 +epoch 012: 550 / 1689 loss=4.211, nll_loss=2.596, ppl=6.05, wps=405353, ups=0.94, wpb=431222, bsz=16592.8, num_updates=19100, lr=0.000457629, gnorm=0.246, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=18825 +epoch 012: 550 / 1689 loss=4.211, nll_loss=2.596, ppl=6.05, wps=405353, ups=0.94, wpb=431222, bsz=16592.8, num_updates=19100, lr=0.000457629, gnorm=0.246, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=18825 +epoch 012: 550 / 1689 loss=4.211, nll_loss=2.596, ppl=6.05, wps=405353, ups=0.94, wpb=431222, bsz=16592.8, num_updates=19100, lr=0.000457629, gnorm=0.246, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=18825 +epoch 012: 550 / 1689 loss=4.211, nll_loss=2.596, ppl=6.05, wps=405353, ups=0.94, wpb=431222, bsz=16592.8, num_updates=19100, lr=0.000457629, gnorm=0.246, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=18825 +epoch 012: 550 / 1689 loss=4.211, nll_loss=2.596, ppl=6.05, wps=405353, ups=0.94, wpb=431222, bsz=16592.8, num_updates=19100, lr=0.000457629, gnorm=0.246, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=18825 +epoch 012: 550 / 1689 loss=4.211, nll_loss=2.596, ppl=6.05, wps=405353, ups=0.94, wpb=431222, bsz=16592.8, num_updates=19100, lr=0.000457629, gnorm=0.246, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=18825 +epoch 012: 550 / 1689 loss=4.211, nll_loss=2.596, ppl=6.05, wps=405353, ups=0.94, wpb=431222, bsz=16592.8, num_updates=19100, lr=0.000457629, gnorm=0.246, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=18825 +epoch 012: 550 / 1689 loss=4.211, nll_loss=2.596, ppl=6.05, wps=405353, ups=0.94, wpb=431222, bsz=16592.8, num_updates=19100, lr=0.000457629, gnorm=0.246, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=18825 +epoch 012: 550 / 1689 loss=4.211, nll_loss=2.596, ppl=6.05, wps=405353, ups=0.94, wpb=431222, bsz=16592.8, num_updates=19100, lr=0.000457629, gnorm=0.246, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=18825 +epoch 012: 550 / 1689 loss=4.211, nll_loss=2.596, ppl=6.05, wps=405353, ups=0.94, wpb=431222, bsz=16592.8, num_updates=19100, lr=0.000457629, gnorm=0.246, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=18825 +epoch 012: 550 / 1689 loss=4.211, nll_loss=2.596, ppl=6.05, wps=405353, ups=0.94, wpb=431222, bsz=16592.8, num_updates=19100, lr=0.000457629, gnorm=0.246, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=18825 +epoch 012: 550 / 1689 loss=4.211, nll_loss=2.596, ppl=6.05, wps=405353, ups=0.94, wpb=431222, bsz=16592.8, num_updates=19100, lr=0.000457629, gnorm=0.246, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=18825 +epoch 012: 650 / 1689 loss=4.223, nll_loss=2.61, ppl=6.11, wps=465224, ups=1.07, wpb=433995, bsz=16633.2, num_updates=19200, lr=0.000456435, gnorm=0.249, clip=0, loss_scale=2, train_wall=92, gb_free=20.4, wall=18918 +epoch 012: 650 / 1689 loss=4.223, nll_loss=2.61, ppl=6.11, wps=465224, ups=1.07, wpb=433995, bsz=16633.2, num_updates=19200, lr=0.000456435, gnorm=0.249, clip=0, loss_scale=2, train_wall=92, gb_free=20.4, wall=18918 +epoch 012: 650 / 1689 loss=4.223, nll_loss=2.61, ppl=6.11, wps=465224, ups=1.07, wpb=433995, bsz=16633.2, num_updates=19200, lr=0.000456435, gnorm=0.249, clip=0, loss_scale=2, train_wall=92, gb_free=20.4, wall=18918 +epoch 012: 650 / 1689 loss=4.223, nll_loss=2.61, ppl=6.11, wps=465224, ups=1.07, wpb=433995, bsz=16633.2, num_updates=19200, lr=0.000456435, gnorm=0.249, clip=0, loss_scale=2, train_wall=92, gb_free=20.4, wall=18918 +epoch 012: 650 / 1689 loss=4.223, nll_loss=2.61, ppl=6.11, wps=465224, ups=1.07, wpb=433995, bsz=16633.2, num_updates=19200, lr=0.000456435, gnorm=0.249, clip=0, loss_scale=2, train_wall=92, gb_free=20.4, wall=18918 +epoch 012: 650 / 1689 loss=4.223, nll_loss=2.61, ppl=6.11, wps=465224, ups=1.07, wpb=433995, bsz=16633.2, num_updates=19200, lr=0.000456435, gnorm=0.249, clip=0, loss_scale=2, train_wall=92, gb_free=20.4, wall=18918 +epoch 012: 650 / 1689 loss=4.223, nll_loss=2.61, ppl=6.11, wps=465224, ups=1.07, wpb=433995, bsz=16633.2, num_updates=19200, lr=0.000456435, gnorm=0.249, clip=0, loss_scale=2, train_wall=92, gb_free=20.4, wall=18918 +epoch 012: 650 / 1689 loss=4.223, nll_loss=2.61, ppl=6.11, wps=465224, ups=1.07, wpb=433995, bsz=16633.2, num_updates=19200, lr=0.000456435, gnorm=0.249, clip=0, loss_scale=2, train_wall=92, gb_free=20.4, wall=18918 +epoch 012: 650 / 1689 loss=4.223, nll_loss=2.61, ppl=6.11, wps=465224, ups=1.07, wpb=433995, bsz=16633.2, num_updates=19200, lr=0.000456435, gnorm=0.249, clip=0, loss_scale=2, train_wall=92, gb_free=20.4, wall=18918 +epoch 012: 650 / 1689 loss=4.223, nll_loss=2.61, ppl=6.11, wps=465224, ups=1.07, wpb=433995, bsz=16633.2, num_updates=19200, lr=0.000456435, gnorm=0.249, clip=0, loss_scale=2, train_wall=92, gb_free=20.4, wall=18918 +epoch 012: 650 / 1689 loss=4.223, nll_loss=2.61, ppl=6.11, wps=465224, ups=1.07, wpb=433995, bsz=16633.2, num_updates=19200, lr=0.000456435, gnorm=0.249, clip=0, loss_scale=2, train_wall=92, gb_free=20.4, wall=18918 +epoch 012: 650 / 1689 loss=4.223, nll_loss=2.61, ppl=6.11, wps=465224, ups=1.07, wpb=433995, bsz=16633.2, num_updates=19200, lr=0.000456435, gnorm=0.249, clip=0, loss_scale=2, train_wall=92, gb_free=20.4, wall=18918 +epoch 012: 750 / 1689 loss=4.218, nll_loss=2.604, ppl=6.08, wps=463836, ups=1.07, wpb=433112, bsz=16662.6, num_updates=19300, lr=0.000455251, gnorm=0.262, clip=0, loss_scale=2, train_wall=92, gb_free=20.9, wall=19011 +epoch 012: 750 / 1689 loss=4.218, nll_loss=2.604, ppl=6.08, wps=463836, ups=1.07, wpb=433112, bsz=16662.6, num_updates=19300, lr=0.000455251, gnorm=0.262, clip=0, loss_scale=2, train_wall=92, gb_free=20.9, wall=19011 +epoch 012: 750 / 1689 loss=4.218, nll_loss=2.604, ppl=6.08, wps=463836, ups=1.07, wpb=433112, bsz=16662.6, num_updates=19300, lr=0.000455251, gnorm=0.262, clip=0, loss_scale=2, train_wall=92, gb_free=20.9, wall=19011 +epoch 012: 750 / 1689 loss=4.218, nll_loss=2.604, ppl=6.08, wps=463836, ups=1.07, wpb=433112, bsz=16662.6, num_updates=19300, lr=0.000455251, gnorm=0.262, clip=0, loss_scale=2, train_wall=92, gb_free=20.9, wall=19011 +epoch 012: 750 / 1689 loss=4.218, nll_loss=2.604, ppl=6.08, wps=463836, ups=1.07, wpb=433112, bsz=16662.6, num_updates=19300, lr=0.000455251, gnorm=0.262, clip=0, loss_scale=2, train_wall=92, gb_free=20.9, wall=19011 +epoch 012: 750 / 1689 loss=4.218, nll_loss=2.604, ppl=6.08, wps=463836, ups=1.07, wpb=433112, bsz=16662.6, num_updates=19300, lr=0.000455251, gnorm=0.262, clip=0, loss_scale=2, train_wall=92, gb_free=20.9, wall=19011 +epoch 012: 750 / 1689 loss=4.218, nll_loss=2.604, ppl=6.08, wps=463836, ups=1.07, wpb=433112, bsz=16662.6, num_updates=19300, lr=0.000455251, gnorm=0.262, clip=0, loss_scale=2, train_wall=92, gb_free=20.9, wall=19011 +epoch 012: 750 / 1689 loss=4.218, nll_loss=2.604, ppl=6.08, wps=463836, ups=1.07, wpb=433112, bsz=16662.6, num_updates=19300, lr=0.000455251, gnorm=0.262, clip=0, loss_scale=2, train_wall=92, gb_free=20.9, wall=19011 +epoch 012: 750 / 1689 loss=4.218, nll_loss=2.604, ppl=6.08, wps=463836, ups=1.07, wpb=433112, bsz=16662.6, num_updates=19300, lr=0.000455251, gnorm=0.262, clip=0, loss_scale=2, train_wall=92, gb_free=20.9, wall=19011 +epoch 012: 750 / 1689 loss=4.218, nll_loss=2.604, ppl=6.08, wps=463836, ups=1.07, wpb=433112, bsz=16662.6, num_updates=19300, lr=0.000455251, gnorm=0.262, clip=0, loss_scale=2, train_wall=92, gb_free=20.9, wall=19011 +epoch 012: 750 / 1689 loss=4.218, nll_loss=2.604, ppl=6.08, wps=463836, ups=1.07, wpb=433112, bsz=16662.6, num_updates=19300, lr=0.000455251, gnorm=0.262, clip=0, loss_scale=2, train_wall=92, gb_free=20.9, wall=19011 +epoch 012: 750 / 1689 loss=4.218, nll_loss=2.604, ppl=6.08, wps=463836, ups=1.07, wpb=433112, bsz=16662.6, num_updates=19300, lr=0.000455251, gnorm=0.262, clip=0, loss_scale=2, train_wall=92, gb_free=20.9, wall=19011 +epoch 012: 850 / 1689 loss=4.216, nll_loss=2.603, ppl=6.07, wps=462865, ups=1.07, wpb=433560, bsz=16796.9, num_updates=19400, lr=0.000454077, gnorm=0.265, clip=0, loss_scale=2, train_wall=92, gb_free=20.5, wall=19105 +epoch 012: 850 / 1689 loss=4.216, nll_loss=2.603, ppl=6.07, wps=462865, ups=1.07, wpb=433560, bsz=16796.9, num_updates=19400, lr=0.000454077, gnorm=0.265, clip=0, loss_scale=2, train_wall=92, gb_free=20.5, wall=19105 +epoch 012: 850 / 1689 loss=4.216, nll_loss=2.603, ppl=6.07, wps=462865, ups=1.07, wpb=433560, bsz=16796.9, num_updates=19400, lr=0.000454077, gnorm=0.265, clip=0, loss_scale=2, train_wall=92, gb_free=20.5, wall=19105 +epoch 012: 850 / 1689 loss=4.216, nll_loss=2.603, ppl=6.07, wps=462865, ups=1.07, wpb=433560, bsz=16796.9, num_updates=19400, lr=0.000454077, gnorm=0.265, clip=0, loss_scale=2, train_wall=92, gb_free=20.5, wall=19105 +epoch 012: 850 / 1689 loss=4.216, nll_loss=2.603, ppl=6.07, wps=462865, ups=1.07, wpb=433560, bsz=16796.9, num_updates=19400, lr=0.000454077, gnorm=0.265, clip=0, loss_scale=2, train_wall=92, gb_free=20.5, wall=19105 +epoch 012: 850 / 1689 loss=4.216, nll_loss=2.603, ppl=6.07, wps=462865, ups=1.07, wpb=433560, bsz=16796.9, num_updates=19400, lr=0.000454077, gnorm=0.265, clip=0, loss_scale=2, train_wall=92, gb_free=20.5, wall=19105 +epoch 012: 850 / 1689 loss=4.216, nll_loss=2.603, ppl=6.07, wps=462865, ups=1.07, wpb=433560, bsz=16796.9, num_updates=19400, lr=0.000454077, gnorm=0.265, clip=0, loss_scale=2, train_wall=92, gb_free=20.5, wall=19105 +epoch 012: 850 / 1689 loss=4.216, nll_loss=2.603, ppl=6.07, wps=462865, ups=1.07, wpb=433560, bsz=16796.9, num_updates=19400, lr=0.000454077, gnorm=0.265, clip=0, loss_scale=2, train_wall=92, gb_free=20.5, wall=19105 +epoch 012: 850 / 1689 loss=4.216, nll_loss=2.603, ppl=6.07, wps=462865, ups=1.07, wpb=433560, bsz=16796.9, num_updates=19400, lr=0.000454077, gnorm=0.265, clip=0, loss_scale=2, train_wall=92, gb_free=20.5, wall=19105 +epoch 012: 850 / 1689 loss=4.216, nll_loss=2.603, ppl=6.07, wps=462865, ups=1.07, wpb=433560, bsz=16796.9, num_updates=19400, lr=0.000454077, gnorm=0.265, clip=0, loss_scale=2, train_wall=92, gb_free=20.5, wall=19105 +epoch 012: 850 / 1689 loss=4.216, nll_loss=2.603, ppl=6.07, wps=462865, ups=1.07, wpb=433560, bsz=16796.9, num_updates=19400, lr=0.000454077, gnorm=0.265, clip=0, loss_scale=2, train_wall=92, gb_free=20.5, wall=19105 +epoch 012: 850 / 1689 loss=4.216, nll_loss=2.603, ppl=6.07, wps=462865, ups=1.07, wpb=433560, bsz=16796.9, num_updates=19400, lr=0.000454077, gnorm=0.265, clip=0, loss_scale=2, train_wall=92, gb_free=20.5, wall=19105 +epoch 012: 950 / 1689 loss=4.21, nll_loss=2.596, ppl=6.05, wps=464150, ups=1.06, wpb=437394, bsz=16614.9, num_updates=19500, lr=0.000452911, gnorm=0.251, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=19199 +epoch 012: 950 / 1689 loss=4.21, nll_loss=2.596, ppl=6.05, wps=464150, ups=1.06, wpb=437394, bsz=16614.9, num_updates=19500, lr=0.000452911, gnorm=0.251, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=19199 +epoch 012: 950 / 1689 loss=4.21, nll_loss=2.596, ppl=6.05, wps=464150, ups=1.06, wpb=437394, bsz=16614.9, num_updates=19500, lr=0.000452911, gnorm=0.251, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=19199 +epoch 012: 950 / 1689 loss=4.21, nll_loss=2.596, ppl=6.05, wps=464150, ups=1.06, wpb=437394, bsz=16614.9, num_updates=19500, lr=0.000452911, gnorm=0.251, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=19199 +epoch 012: 950 / 1689 loss=4.21, nll_loss=2.596, ppl=6.05, wps=464150, ups=1.06, wpb=437394, bsz=16614.9, num_updates=19500, lr=0.000452911, gnorm=0.251, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=19199 +epoch 012: 950 / 1689 loss=4.21, nll_loss=2.596, ppl=6.05, wps=464150, ups=1.06, wpb=437394, bsz=16614.9, num_updates=19500, lr=0.000452911, gnorm=0.251, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=19199 +epoch 012: 950 / 1689 loss=4.21, nll_loss=2.596, ppl=6.05, wps=464150, ups=1.06, wpb=437394, bsz=16614.9, num_updates=19500, lr=0.000452911, gnorm=0.251, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=19199 +epoch 012: 950 / 1689 loss=4.21, nll_loss=2.596, ppl=6.05, wps=464150, ups=1.06, wpb=437394, bsz=16614.9, num_updates=19500, lr=0.000452911, gnorm=0.251, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=19199 +epoch 012: 950 / 1689 loss=4.21, nll_loss=2.596, ppl=6.05, wps=464150, ups=1.06, wpb=437394, bsz=16614.9, num_updates=19500, lr=0.000452911, gnorm=0.251, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=19199 +epoch 012: 950 / 1689 loss=4.21, nll_loss=2.596, ppl=6.05, wps=464150, ups=1.06, wpb=437394, bsz=16614.9, num_updates=19500, lr=0.000452911, gnorm=0.251, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=19199 +epoch 012: 950 / 1689 loss=4.21, nll_loss=2.596, ppl=6.05, wps=464150, ups=1.06, wpb=437394, bsz=16614.9, num_updates=19500, lr=0.000452911, gnorm=0.251, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=19199 +epoch 012: 950 / 1689 loss=4.21, nll_loss=2.596, ppl=6.05, wps=464150, ups=1.06, wpb=437394, bsz=16614.9, num_updates=19500, lr=0.000452911, gnorm=0.251, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=19199 +epoch 012: 1050 / 1689 loss=4.214, nll_loss=2.6, ppl=6.06, wps=465240, ups=1.07, wpb=433369, bsz=16096.4, num_updates=19600, lr=0.000451754, gnorm=0.234, clip=0, loss_scale=4, train_wall=92, gb_free=20.1, wall=19292 +epoch 012: 1050 / 1689 loss=4.214, nll_loss=2.6, ppl=6.06, wps=465240, ups=1.07, wpb=433369, bsz=16096.4, num_updates=19600, lr=0.000451754, gnorm=0.234, clip=0, loss_scale=4, train_wall=92, gb_free=20.1, wall=19292 +epoch 012: 1050 / 1689 loss=4.214, nll_loss=2.6, ppl=6.06, wps=465240, ups=1.07, wpb=433369, bsz=16096.4, num_updates=19600, lr=0.000451754, gnorm=0.234, clip=0, loss_scale=4, train_wall=92, gb_free=20.1, wall=19292 +epoch 012: 1050 / 1689 loss=4.214, nll_loss=2.6, ppl=6.06, wps=465240, ups=1.07, wpb=433369, bsz=16096.4, num_updates=19600, lr=0.000451754, gnorm=0.234, clip=0, loss_scale=4, train_wall=92, gb_free=20.1, wall=19292 +epoch 012: 1050 / 1689 loss=4.214, nll_loss=2.6, ppl=6.06, wps=465240, ups=1.07, wpb=433369, bsz=16096.4, num_updates=19600, lr=0.000451754, gnorm=0.234, clip=0, loss_scale=4, train_wall=92, gb_free=20.1, wall=19292 +epoch 012: 1050 / 1689 loss=4.214, nll_loss=2.6, ppl=6.06, wps=465240, ups=1.07, wpb=433369, bsz=16096.4, num_updates=19600, lr=0.000451754, gnorm=0.234, clip=0, loss_scale=4, train_wall=92, gb_free=20.1, wall=19292 +epoch 012: 1050 / 1689 loss=4.214, nll_loss=2.6, ppl=6.06, wps=465240, ups=1.07, wpb=433369, bsz=16096.4, num_updates=19600, lr=0.000451754, gnorm=0.234, clip=0, loss_scale=4, train_wall=92, gb_free=20.1, wall=19292 +epoch 012: 1050 / 1689 loss=4.214, nll_loss=2.6, ppl=6.06, wps=465240, ups=1.07, wpb=433369, bsz=16096.4, num_updates=19600, lr=0.000451754, gnorm=0.234, clip=0, loss_scale=4, train_wall=92, gb_free=20.1, wall=19292 +epoch 012: 1050 / 1689 loss=4.214, nll_loss=2.6, ppl=6.06, wps=465240, ups=1.07, wpb=433369, bsz=16096.4, num_updates=19600, lr=0.000451754, gnorm=0.234, clip=0, loss_scale=4, train_wall=92, gb_free=20.1, wall=19292 +epoch 012: 1050 / 1689 loss=4.214, nll_loss=2.6, ppl=6.06, wps=465240, ups=1.07, wpb=433369, bsz=16096.4, num_updates=19600, lr=0.000451754, gnorm=0.234, clip=0, loss_scale=4, train_wall=92, gb_free=20.1, wall=19292 +epoch 012: 1050 / 1689 loss=4.214, nll_loss=2.6, ppl=6.06, wps=465240, ups=1.07, wpb=433369, bsz=16096.4, num_updates=19600, lr=0.000451754, gnorm=0.234, clip=0, loss_scale=4, train_wall=92, gb_free=20.1, wall=19292 +epoch 012: 1050 / 1689 loss=4.214, nll_loss=2.6, ppl=6.06, wps=465240, ups=1.07, wpb=433369, bsz=16096.4, num_updates=19600, lr=0.000451754, gnorm=0.234, clip=0, loss_scale=4, train_wall=92, gb_free=20.1, wall=19292 +epoch 012: 1151 / 1689 loss=4.221, nll_loss=2.609, ppl=6.1, wps=460584, ups=1.06, wpb=433878, bsz=16565.1, num_updates=19700, lr=0.000450606, gnorm=0.258, clip=0, loss_scale=2, train_wall=92, gb_free=20.6, wall=19387 +epoch 012: 1151 / 1689 loss=4.221, nll_loss=2.609, ppl=6.1, wps=460584, ups=1.06, wpb=433878, bsz=16565.1, num_updates=19700, lr=0.000450606, gnorm=0.258, clip=0, loss_scale=2, train_wall=92, gb_free=20.6, wall=19387 +epoch 012: 1151 / 1689 loss=4.221, nll_loss=2.609, ppl=6.1, wps=460584, ups=1.06, wpb=433878, bsz=16565.1, num_updates=19700, lr=0.000450606, gnorm=0.258, clip=0, loss_scale=2, train_wall=92, gb_free=20.6, wall=19387 +epoch 012: 1151 / 1689 loss=4.221, nll_loss=2.609, ppl=6.1, wps=460584, ups=1.06, wpb=433878, bsz=16565.1, num_updates=19700, lr=0.000450606, gnorm=0.258, clip=0, loss_scale=2, train_wall=92, gb_free=20.6, wall=19387 +epoch 012: 1151 / 1689 loss=4.221, nll_loss=2.609, ppl=6.1, wps=460584, ups=1.06, wpb=433878, bsz=16565.1, num_updates=19700, lr=0.000450606, gnorm=0.258, clip=0, loss_scale=2, train_wall=92, gb_free=20.6, wall=19387 +epoch 012: 1151 / 1689 loss=4.221, nll_loss=2.609, ppl=6.1, wps=460584, ups=1.06, wpb=433878, bsz=16565.1, num_updates=19700, lr=0.000450606, gnorm=0.258, clip=0, loss_scale=2, train_wall=92, gb_free=20.6, wall=19387 +epoch 012: 1151 / 1689 loss=4.221, nll_loss=2.609, ppl=6.1, wps=460584, ups=1.06, wpb=433878, bsz=16565.1, num_updates=19700, lr=0.000450606, gnorm=0.258, clip=0, loss_scale=2, train_wall=92, gb_free=20.6, wall=19387 +epoch 012: 1151 / 1689 loss=4.221, nll_loss=2.609, ppl=6.1, wps=460584, ups=1.06, wpb=433878, bsz=16565.1, num_updates=19700, lr=0.000450606, gnorm=0.258, clip=0, loss_scale=2, train_wall=92, gb_free=20.6, wall=19387 +epoch 012: 1151 / 1689 loss=4.221, nll_loss=2.609, ppl=6.1, wps=460584, ups=1.06, wpb=433878, bsz=16565.1, num_updates=19700, lr=0.000450606, gnorm=0.258, clip=0, loss_scale=2, train_wall=92, gb_free=20.6, wall=19387 +epoch 012: 1151 / 1689 loss=4.221, nll_loss=2.609, ppl=6.1, wps=460584, ups=1.06, wpb=433878, bsz=16565.1, num_updates=19700, lr=0.000450606, gnorm=0.258, clip=0, loss_scale=2, train_wall=92, gb_free=20.6, wall=19387 +epoch 012: 1151 / 1689 loss=4.221, nll_loss=2.609, ppl=6.1, wps=460584, ups=1.06, wpb=433878, bsz=16565.1, num_updates=19700, lr=0.000450606, gnorm=0.258, clip=0, loss_scale=2, train_wall=92, gb_free=20.6, wall=19387 +epoch 012: 1151 / 1689 loss=4.221, nll_loss=2.609, ppl=6.1, wps=460584, ups=1.06, wpb=433878, bsz=16565.1, num_updates=19700, lr=0.000450606, gnorm=0.258, clip=0, loss_scale=2, train_wall=92, gb_free=20.6, wall=19387 +epoch 012: 1251 / 1689 loss=4.226, nll_loss=2.615, ppl=6.12, wps=463244, ups=1.07, wpb=434289, bsz=16583.7, num_updates=19800, lr=0.000449467, gnorm=0.25, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=19480 +epoch 012: 1251 / 1689 loss=4.226, nll_loss=2.615, ppl=6.12, wps=463244, ups=1.07, wpb=434289, bsz=16583.7, num_updates=19800, lr=0.000449467, gnorm=0.25, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=19480 +epoch 012: 1251 / 1689 loss=4.226, nll_loss=2.615, ppl=6.12, wps=463244, ups=1.07, wpb=434289, bsz=16583.7, num_updates=19800, lr=0.000449467, gnorm=0.25, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=19480 +epoch 012: 1251 / 1689 loss=4.226, nll_loss=2.615, ppl=6.12, wps=463244, ups=1.07, wpb=434289, bsz=16583.7, num_updates=19800, lr=0.000449467, gnorm=0.25, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=19480 +epoch 012: 1251 / 1689 loss=4.226, nll_loss=2.615, ppl=6.12, wps=463244, ups=1.07, wpb=434289, bsz=16583.7, num_updates=19800, lr=0.000449467, gnorm=0.25, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=19480 +epoch 012: 1251 / 1689 loss=4.226, nll_loss=2.615, ppl=6.12, wps=463244, ups=1.07, wpb=434289, bsz=16583.7, num_updates=19800, lr=0.000449467, gnorm=0.25, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=19480 +epoch 012: 1251 / 1689 loss=4.226, nll_loss=2.615, ppl=6.12, wps=463244, ups=1.07, wpb=434289, bsz=16583.7, num_updates=19800, lr=0.000449467, gnorm=0.25, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=19480 +epoch 012: 1251 / 1689 loss=4.226, nll_loss=2.615, ppl=6.12, wps=463244, ups=1.07, wpb=434289, bsz=16583.7, num_updates=19800, lr=0.000449467, gnorm=0.25, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=19480 +epoch 012: 1251 / 1689 loss=4.226, nll_loss=2.615, ppl=6.12, wps=463244, ups=1.07, wpb=434289, bsz=16583.7, num_updates=19800, lr=0.000449467, gnorm=0.25, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=19480 +epoch 012: 1251 / 1689 loss=4.226, nll_loss=2.615, ppl=6.12, wps=463244, ups=1.07, wpb=434289, bsz=16583.7, num_updates=19800, lr=0.000449467, gnorm=0.25, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=19480 +epoch 012: 1251 / 1689 loss=4.226, nll_loss=2.615, ppl=6.12, wps=463244, ups=1.07, wpb=434289, bsz=16583.7, num_updates=19800, lr=0.000449467, gnorm=0.25, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=19480 +epoch 012: 1251 / 1689 loss=4.226, nll_loss=2.615, ppl=6.12, wps=463244, ups=1.07, wpb=434289, bsz=16583.7, num_updates=19800, lr=0.000449467, gnorm=0.25, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=19480 +epoch 012: 1351 / 1689 loss=4.227, nll_loss=2.616, ppl=6.13, wps=462455, ups=1.06, wpb=435194, bsz=16958.4, num_updates=19900, lr=0.000448336, gnorm=0.243, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=19574 +epoch 012: 1351 / 1689 loss=4.227, nll_loss=2.616, ppl=6.13, wps=462455, ups=1.06, wpb=435194, bsz=16958.4, num_updates=19900, lr=0.000448336, gnorm=0.243, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=19574 +epoch 012: 1351 / 1689 loss=4.227, nll_loss=2.616, ppl=6.13, wps=462455, ups=1.06, wpb=435194, bsz=16958.4, num_updates=19900, lr=0.000448336, gnorm=0.243, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=19574 +epoch 012: 1351 / 1689 loss=4.227, nll_loss=2.616, ppl=6.13, wps=462455, ups=1.06, wpb=435194, bsz=16958.4, num_updates=19900, lr=0.000448336, gnorm=0.243, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=19574 +epoch 012: 1351 / 1689 loss=4.227, nll_loss=2.616, ppl=6.13, wps=462455, ups=1.06, wpb=435194, bsz=16958.4, num_updates=19900, lr=0.000448336, gnorm=0.243, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=19574 +epoch 012: 1351 / 1689 loss=4.227, nll_loss=2.616, ppl=6.13, wps=462455, ups=1.06, wpb=435194, bsz=16958.4, num_updates=19900, lr=0.000448336, gnorm=0.243, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=19574 +epoch 012: 1351 / 1689 loss=4.227, nll_loss=2.616, ppl=6.13, wps=462455, ups=1.06, wpb=435194, bsz=16958.4, num_updates=19900, lr=0.000448336, gnorm=0.243, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=19574 +epoch 012: 1351 / 1689 loss=4.227, nll_loss=2.616, ppl=6.13, wps=462455, ups=1.06, wpb=435194, bsz=16958.4, num_updates=19900, lr=0.000448336, gnorm=0.243, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=19574 +epoch 012: 1351 / 1689 loss=4.227, nll_loss=2.616, ppl=6.13, wps=462455, ups=1.06, wpb=435194, bsz=16958.4, num_updates=19900, lr=0.000448336, gnorm=0.243, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=19574 +epoch 012: 1351 / 1689 loss=4.227, nll_loss=2.616, ppl=6.13, wps=462455, ups=1.06, wpb=435194, bsz=16958.4, num_updates=19900, lr=0.000448336, gnorm=0.243, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=19574 +epoch 012: 1351 / 1689 loss=4.227, nll_loss=2.616, ppl=6.13, wps=462455, ups=1.06, wpb=435194, bsz=16958.4, num_updates=19900, lr=0.000448336, gnorm=0.243, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=19574 +epoch 012: 1351 / 1689 loss=4.227, nll_loss=2.616, ppl=6.13, wps=462455, ups=1.06, wpb=435194, bsz=16958.4, num_updates=19900, lr=0.000448336, gnorm=0.243, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=19574 +epoch 012: 1451 / 1689 loss=4.23, nll_loss=2.619, ppl=6.14, wps=464028, ups=1.07, wpb=435025, bsz=16256.2, num_updates=20000, lr=0.000447214, gnorm=0.254, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=19668 +epoch 012: 1451 / 1689 loss=4.23, nll_loss=2.619, ppl=6.14, wps=464028, ups=1.07, wpb=435025, bsz=16256.2, num_updates=20000, lr=0.000447214, gnorm=0.254, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=19668 +epoch 012: 1451 / 1689 loss=4.23, nll_loss=2.619, ppl=6.14, wps=464028, ups=1.07, wpb=435025, bsz=16256.2, num_updates=20000, lr=0.000447214, gnorm=0.254, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=19668 +epoch 012: 1451 / 1689 loss=4.23, nll_loss=2.619, ppl=6.14, wps=464028, ups=1.07, wpb=435025, bsz=16256.2, num_updates=20000, lr=0.000447214, gnorm=0.254, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=19668 +epoch 012: 1451 / 1689 loss=4.23, nll_loss=2.619, ppl=6.14, wps=464028, ups=1.07, wpb=435025, bsz=16256.2, num_updates=20000, lr=0.000447214, gnorm=0.254, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=19668 +epoch 012: 1451 / 1689 loss=4.23, nll_loss=2.619, ppl=6.14, wps=464028, ups=1.07, wpb=435025, bsz=16256.2, num_updates=20000, lr=0.000447214, gnorm=0.254, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=19668 +epoch 012: 1451 / 1689 loss=4.23, nll_loss=2.619, ppl=6.14, wps=464028, ups=1.07, wpb=435025, bsz=16256.2, num_updates=20000, lr=0.000447214, gnorm=0.254, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=19668 +epoch 012: 1451 / 1689 loss=4.23, nll_loss=2.619, ppl=6.14, wps=464028, ups=1.07, wpb=435025, bsz=16256.2, num_updates=20000, lr=0.000447214, gnorm=0.254, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=19668 +epoch 012: 1451 / 1689 loss=4.23, nll_loss=2.619, ppl=6.14, wps=464028, ups=1.07, wpb=435025, bsz=16256.2, num_updates=20000, lr=0.000447214, gnorm=0.254, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=19668 +epoch 012: 1451 / 1689 loss=4.23, nll_loss=2.619, ppl=6.14, wps=464028, ups=1.07, wpb=435025, bsz=16256.2, num_updates=20000, lr=0.000447214, gnorm=0.254, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=19668 +epoch 012: 1451 / 1689 loss=4.23, nll_loss=2.619, ppl=6.14, wps=464028, ups=1.07, wpb=435025, bsz=16256.2, num_updates=20000, lr=0.000447214, gnorm=0.254, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=19668 +epoch 012: 1451 / 1689 loss=4.23, nll_loss=2.619, ppl=6.14, wps=464028, ups=1.07, wpb=435025, bsz=16256.2, num_updates=20000, lr=0.000447214, gnorm=0.254, clip=0, loss_scale=2, train_wall=92, gb_free=18.6, wall=19668 +begin validation on "valid" subset +epoch 012 | valid on 'valid' subset | loss 4.288 | nll_loss 2.655 | ppl 6.3 | wps 0 | wpb 42662 | bsz 2032 | num_updates 20000 | best_loss 4.288 +epoch 012 | valid on 'valid' subset | loss 4.288 | nll_loss 2.655 | ppl 6.3 | wps 0 | wpb 42662 | bsz 2032 | num_updates 20000 | best_loss 4.288 +epoch 012 | valid on 'valid' subset | loss 4.288 | nll_loss 2.655 | ppl 6.3 | wps 0 | wpb 42662 | bsz 2032 | num_updates 20000 | best_loss 4.288 +epoch 012 | valid on 'valid' subset | loss 4.288 | nll_loss 2.655 | ppl 6.3 | wps 0 | wpb 42662 | bsz 2032 | num_updates 20000 | best_loss 4.288 +epoch 012 | valid on 'valid' subset | loss 4.288 | nll_loss 2.655 | ppl 6.3 | wps 0 | wpb 42662 | bsz 2032 | num_updates 20000 | best_loss 4.288 +epoch 012 | valid on 'valid' subset | loss 4.288 | nll_loss 2.655 | ppl 6.3 | wps 0 | wpb 42662 | bsz 2032 | num_updates 20000 | best_loss 4.288 +epoch 012 | valid on 'valid' subset | loss 4.288 | nll_loss 2.655 | ppl 6.3 | wps 0 | wpb 42662 | bsz 2032 | num_updates 20000 | best_loss 4.288 +epoch 012 | valid on 'valid' subset | loss 4.288 | nll_loss 2.655 | ppl 6.3 | wps 0 | wpb 42662 | bsz 2032 | num_updates 20000 | best_loss 4.288 +epoch 012 | valid on 'valid' subset | loss 4.288 | nll_loss 2.655 | ppl 6.3 | wps 0 | wpb 42662 | bsz 2032 | num_updates 20000 | best_loss 4.288 +epoch 012 | valid on 'valid' subset | loss 4.288 | nll_loss 2.655 | ppl 6.3 | wps 0 | wpb 42662 | bsz 2032 | num_updates 20000 | best_loss 4.288 +epoch 012 | valid on 'valid' subset | loss 4.288 | nll_loss 2.655 | ppl 6.3 | wps 0 | wpb 42662 | bsz 2032 | num_updates 20000 | best_loss 4.288 +epoch 012 | valid on 'valid' subset | loss 4.288 | nll_loss 2.655 | ppl 6.3 | wps 0 | wpb 42662 | bsz 2032 | num_updates 20000 | best_loss 4.288 +epoch 012: 1551 / 1689 loss=4.23, nll_loss=2.619, ppl=6.14, wps=340787, ups=0.78, wpb=434305, bsz=16463.3, num_updates=20100, lr=0.0004461, gnorm=0.239, clip=0, loss_scale=2, train_wall=102, gb_free=18.9, wall=19796 +epoch 012: 1551 / 1689 loss=4.23, nll_loss=2.619, ppl=6.14, wps=340787, ups=0.78, wpb=434305, bsz=16463.3, num_updates=20100, lr=0.0004461, gnorm=0.239, clip=0, loss_scale=2, train_wall=102, gb_free=18.9, wall=19796 +epoch 012: 1551 / 1689 loss=4.23, nll_loss=2.619, ppl=6.14, wps=340787, ups=0.78, wpb=434305, bsz=16463.3, num_updates=20100, lr=0.0004461, gnorm=0.239, clip=0, loss_scale=2, train_wall=102, gb_free=18.9, wall=19796 +epoch 012: 1551 / 1689 loss=4.23, nll_loss=2.619, ppl=6.14, wps=340787, ups=0.78, wpb=434305, bsz=16463.3, num_updates=20100, lr=0.0004461, gnorm=0.239, clip=0, loss_scale=2, train_wall=102, gb_free=18.9, wall=19796 +epoch 012: 1551 / 1689 loss=4.23, nll_loss=2.619, ppl=6.14, wps=340787, ups=0.78, wpb=434305, bsz=16463.3, num_updates=20100, lr=0.0004461, gnorm=0.239, clip=0, loss_scale=2, train_wall=102, gb_free=18.9, wall=19796 +epoch 012: 1551 / 1689 loss=4.23, nll_loss=2.619, ppl=6.14, wps=340787, ups=0.78, wpb=434305, bsz=16463.3, num_updates=20100, lr=0.0004461, gnorm=0.239, clip=0, loss_scale=2, train_wall=102, gb_free=18.9, wall=19796 +epoch 012: 1551 / 1689 loss=4.23, nll_loss=2.619, ppl=6.14, wps=340787, ups=0.78, wpb=434305, bsz=16463.3, num_updates=20100, lr=0.0004461, gnorm=0.239, clip=0, loss_scale=2, train_wall=102, gb_free=18.9, wall=19796 +epoch 012: 1551 / 1689 loss=4.23, nll_loss=2.619, ppl=6.14, wps=340787, ups=0.78, wpb=434305, bsz=16463.3, num_updates=20100, lr=0.0004461, gnorm=0.239, clip=0, loss_scale=2, train_wall=102, gb_free=18.9, wall=19796 +epoch 012: 1551 / 1689 loss=4.23, nll_loss=2.619, ppl=6.14, wps=340787, ups=0.78, wpb=434305, bsz=16463.3, num_updates=20100, lr=0.0004461, gnorm=0.239, clip=0, loss_scale=2, train_wall=102, gb_free=18.9, wall=19796 +epoch 012: 1551 / 1689 loss=4.23, nll_loss=2.619, ppl=6.14, wps=340787, ups=0.78, wpb=434305, bsz=16463.3, num_updates=20100, lr=0.0004461, gnorm=0.239, clip=0, loss_scale=2, train_wall=102, gb_free=18.9, wall=19796 +epoch 012: 1551 / 1689 loss=4.23, nll_loss=2.619, ppl=6.14, wps=340787, ups=0.78, wpb=434305, bsz=16463.3, num_updates=20100, lr=0.0004461, gnorm=0.239, clip=0, loss_scale=2, train_wall=102, gb_free=18.9, wall=19796 +epoch 012: 1551 / 1689 loss=4.23, nll_loss=2.619, ppl=6.14, wps=340787, ups=0.78, wpb=434305, bsz=16463.3, num_updates=20100, lr=0.0004461, gnorm=0.239, clip=0, loss_scale=2, train_wall=102, gb_free=18.9, wall=19796 +epoch 012: 1652 / 1689 loss=4.226, nll_loss=2.615, ppl=6.13, wps=453763, ups=1.05, wpb=433137, bsz=16618.1, num_updates=20200, lr=0.000444994, gnorm=0.238, clip=0, loss_scale=2, train_wall=94, gb_free=19.3, wall=19891 +epoch 012: 1652 / 1689 loss=4.226, nll_loss=2.615, ppl=6.13, wps=453763, ups=1.05, wpb=433137, bsz=16618.1, num_updates=20200, lr=0.000444994, gnorm=0.238, clip=0, loss_scale=2, train_wall=94, gb_free=19.3, wall=19891 +epoch 012: 1652 / 1689 loss=4.226, nll_loss=2.615, ppl=6.13, wps=453763, ups=1.05, wpb=433137, bsz=16618.1, num_updates=20200, lr=0.000444994, gnorm=0.238, clip=0, loss_scale=2, train_wall=94, gb_free=19.3, wall=19891 +epoch 012: 1652 / 1689 loss=4.226, nll_loss=2.615, ppl=6.13, wps=453763, ups=1.05, wpb=433137, bsz=16618.1, num_updates=20200, lr=0.000444994, gnorm=0.238, clip=0, loss_scale=2, train_wall=94, gb_free=19.3, wall=19891 +epoch 012: 1652 / 1689 loss=4.226, nll_loss=2.615, ppl=6.13, wps=453763, ups=1.05, wpb=433137, bsz=16618.1, num_updates=20200, lr=0.000444994, gnorm=0.238, clip=0, loss_scale=2, train_wall=94, gb_free=19.3, wall=19891 +epoch 012: 1652 / 1689 loss=4.226, nll_loss=2.615, ppl=6.13, wps=453763, ups=1.05, wpb=433137, bsz=16618.1, num_updates=20200, lr=0.000444994, gnorm=0.238, clip=0, loss_scale=2, train_wall=94, gb_free=19.3, wall=19891 +epoch 012: 1652 / 1689 loss=4.226, nll_loss=2.615, ppl=6.13, wps=453763, ups=1.05, wpb=433137, bsz=16618.1, num_updates=20200, lr=0.000444994, gnorm=0.238, clip=0, loss_scale=2, train_wall=94, gb_free=19.3, wall=19891 +epoch 012: 1652 / 1689 loss=4.226, nll_loss=2.615, ppl=6.13, wps=453763, ups=1.05, wpb=433137, bsz=16618.1, num_updates=20200, lr=0.000444994, gnorm=0.238, clip=0, loss_scale=2, train_wall=94, gb_free=19.3, wall=19891 +epoch 012: 1652 / 1689 loss=4.226, nll_loss=2.615, ppl=6.13, wps=453763, ups=1.05, wpb=433137, bsz=16618.1, num_updates=20200, lr=0.000444994, gnorm=0.238, clip=0, loss_scale=2, train_wall=94, gb_free=19.3, wall=19891 +epoch 012: 1652 / 1689 loss=4.226, nll_loss=2.615, ppl=6.13, wps=453763, ups=1.05, wpb=433137, bsz=16618.1, num_updates=20200, lr=0.000444994, gnorm=0.238, clip=0, loss_scale=2, train_wall=94, gb_free=19.3, wall=19891 +epoch 012: 1652 / 1689 loss=4.226, nll_loss=2.615, ppl=6.13, wps=453763, ups=1.05, wpb=433137, bsz=16618.1, num_updates=20200, lr=0.000444994, gnorm=0.238, clip=0, loss_scale=2, train_wall=94, gb_free=19.3, wall=19891 +epoch 012: 1652 / 1689 loss=4.226, nll_loss=2.615, ppl=6.13, wps=453763, ups=1.05, wpb=433137, bsz=16618.1, num_updates=20200, lr=0.000444994, gnorm=0.238, clip=0, loss_scale=2, train_wall=94, gb_free=19.3, wall=19891 +end of epoch 12 (average epoch stats below) +epoch 012 | loss 4.217 | nll_loss 2.604 | ppl 6.08 | wps 448233 | ups 1.03 | wpb 433538 | bsz 16506.4 | num_updates 20237 | lr 0.000444587 | gnorm 0.251 | clip 0 | loss_scale 2 | train_wall 1566 | gb_free 20.7 | wall 19925 +epoch 012 | loss 4.217 | nll_loss 2.604 | ppl 6.08 | wps 448233 | ups 1.03 | wpb 433538 | bsz 16506.4 | num_updates 20237 | lr 0.000444587 | gnorm 0.251 | clip 0 | loss_scale 2 | train_wall 1566 | gb_free 20.7 | wall 19925 +epoch 012 | loss 4.217 | nll_loss 2.604 | ppl 6.08 | wps 448233 | ups 1.03 | wpb 433538 | bsz 16506.4 | num_updates 20237 | lr 0.000444587 | gnorm 0.251 | clip 0 | loss_scale 2 | train_wall 1566 | gb_free 20.7 | wall 19925 +epoch 012 | loss 4.217 | nll_loss 2.604 | ppl 6.08 | wps 448233 | ups 1.03 | wpb 433538 | bsz 16506.4 | num_updates 20237 | lr 0.000444587 | gnorm 0.251 | clip 0 | loss_scale 2 | train_wall 1566 | gb_free 20.7 | wall 19925 +epoch 012 | loss 4.217 | nll_loss 2.604 | ppl 6.08 | wps 448233 | ups 1.03 | wpb 433538 | bsz 16506.4 | num_updates 20237 | lr 0.000444587 | gnorm 0.251 | clip 0 | loss_scale 2 | train_wall 1566 | gb_free 20.7 | wall 19925 +epoch 012 | loss 4.217 | nll_loss 2.604 | ppl 6.08 | wps 448233 | ups 1.03 | wpb 433538 | bsz 16506.4 | num_updates 20237 | lr 0.000444587 | gnorm 0.251 | clip 0 | loss_scale 2 | train_wall 1566 | gb_free 20.7 | wall 19925 +epoch 012 | loss 4.217 | nll_loss 2.604 | ppl 6.08 | wps 448233 | ups 1.03 | wpb 433538 | bsz 16506.4 | num_updates 20237 | lr 0.000444587 | gnorm 0.251 | clip 0 | loss_scale 2 | train_wall 1566 | gb_free 20.7 | wall 19925 +epoch 012 | loss 4.217 | nll_loss 2.604 | ppl 6.08 | wps 448233 | ups 1.03 | wpb 433538 | bsz 16506.4 | num_updates 20237 | lr 0.000444587 | gnorm 0.251 | clip 0 | loss_scale 2 | train_wall 1566 | gb_free 20.7 | wall 19925 +epoch 012 | loss 4.217 | nll_loss 2.604 | ppl 6.08 | wps 448233 | ups 1.03 | wpb 433538 | bsz 16506.4 | num_updates 20237 | lr 0.000444587 | gnorm 0.251 | clip 0 | loss_scale 2 | train_wall 1566 | gb_free 20.7 | wall 19925 +epoch 012 | loss 4.217 | nll_loss 2.604 | ppl 6.08 | wps 448233 | ups 1.03 | wpb 433538 | bsz 16506.4 | num_updates 20237 | lr 0.000444587 | gnorm 0.251 | clip 0 | loss_scale 2 | train_wall 1566 | gb_free 20.7 | wall 19925 +epoch 012 | loss 4.217 | nll_loss 2.604 | ppl 6.08 | wps 448233 | ups 1.03 | wpb 433538 | bsz 16506.4 | num_updates 20237 | lr 0.000444587 | gnorm 0.251 | clip 0 | loss_scale 2 | train_wall 1566 | gb_free 20.7 | wall 19925 +epoch 012 | loss 4.217 | nll_loss 2.604 | ppl 6.08 | wps 448233 | ups 1.03 | wpb 433538 | bsz 16506.4 | num_updates 20237 | lr 0.000444587 | gnorm 0.251 | clip 0 | loss_scale 2 | train_wall 1566 | gb_free 20.7 | wall 19925 +Start iterating over samples +epoch 013: 63 / 1689 loss=4.194, nll_loss=2.578, ppl=5.97, wps=456614, ups=1.06, wpb=429616, bsz=16160.7, num_updates=20300, lr=0.000443897, gnorm=0.261, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=19985 +epoch 013: 63 / 1689 loss=4.194, nll_loss=2.578, ppl=5.97, wps=456614, ups=1.06, wpb=429616, bsz=16160.7, num_updates=20300, lr=0.000443897, gnorm=0.261, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=19985 +epoch 013: 63 / 1689 loss=4.194, nll_loss=2.578, ppl=5.97, wps=456614, ups=1.06, wpb=429616, bsz=16160.7, num_updates=20300, lr=0.000443897, gnorm=0.261, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=19985 +epoch 013: 63 / 1689 loss=4.194, nll_loss=2.578, ppl=5.97, wps=456614, ups=1.06, wpb=429616, bsz=16160.7, num_updates=20300, lr=0.000443897, gnorm=0.261, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=19985 +epoch 013: 63 / 1689 loss=4.194, nll_loss=2.578, ppl=5.97, wps=456614, ups=1.06, wpb=429616, bsz=16160.7, num_updates=20300, lr=0.000443897, gnorm=0.261, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=19985 +epoch 013: 63 / 1689 loss=4.194, nll_loss=2.578, ppl=5.97, wps=456614, ups=1.06, wpb=429616, bsz=16160.7, num_updates=20300, lr=0.000443897, gnorm=0.261, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=19985 +epoch 013: 63 / 1689 loss=4.194, nll_loss=2.578, ppl=5.97, wps=456614, ups=1.06, wpb=429616, bsz=16160.7, num_updates=20300, lr=0.000443897, gnorm=0.261, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=19985 +epoch 013: 63 / 1689 loss=4.194, nll_loss=2.578, ppl=5.97, wps=456614, ups=1.06, wpb=429616, bsz=16160.7, num_updates=20300, lr=0.000443897, gnorm=0.261, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=19985 +epoch 013: 63 / 1689 loss=4.194, nll_loss=2.578, ppl=5.97, wps=456614, ups=1.06, wpb=429616, bsz=16160.7, num_updates=20300, lr=0.000443897, gnorm=0.261, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=19985 +epoch 013: 63 / 1689 loss=4.194, nll_loss=2.578, ppl=5.97, wps=456614, ups=1.06, wpb=429616, bsz=16160.7, num_updates=20300, lr=0.000443897, gnorm=0.261, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=19985 +epoch 013: 63 / 1689 loss=4.194, nll_loss=2.578, ppl=5.97, wps=456614, ups=1.06, wpb=429616, bsz=16160.7, num_updates=20300, lr=0.000443897, gnorm=0.261, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=19985 +epoch 013: 63 / 1689 loss=4.194, nll_loss=2.578, ppl=5.97, wps=456614, ups=1.06, wpb=429616, bsz=16160.7, num_updates=20300, lr=0.000443897, gnorm=0.261, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=19985 +epoch 013: 63 / 1689 loss=4.194, nll_loss=2.578, ppl=5.97, wps=456614, ups=1.06, wpb=429616, bsz=16160.7, num_updates=20300, lr=0.000443897, gnorm=0.261, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=19985 +epoch 013: 163 / 1689 loss=4.185, nll_loss=2.566, ppl=5.92, wps=466754, ups=1.08, wpb=434156, bsz=16381, num_updates=20400, lr=0.000442807, gnorm=0.237, clip=0, loss_scale=2, train_wall=91, gb_free=19.8, wall=20078 +epoch 013: 163 / 1689 loss=4.185, nll_loss=2.566, ppl=5.92, wps=466754, ups=1.08, wpb=434156, bsz=16381, num_updates=20400, lr=0.000442807, gnorm=0.237, clip=0, loss_scale=2, train_wall=91, gb_free=19.8, wall=20078 +epoch 013: 163 / 1689 loss=4.185, nll_loss=2.566, ppl=5.92, wps=466754, ups=1.08, wpb=434156, bsz=16381, num_updates=20400, lr=0.000442807, gnorm=0.237, clip=0, loss_scale=2, train_wall=91, gb_free=19.8, wall=20078 +epoch 013: 163 / 1689 loss=4.185, nll_loss=2.566, ppl=5.92, wps=466754, ups=1.08, wpb=434156, bsz=16381, num_updates=20400, lr=0.000442807, gnorm=0.237, clip=0, loss_scale=2, train_wall=91, gb_free=19.8, wall=20078 +epoch 013: 163 / 1689 loss=4.185, nll_loss=2.566, ppl=5.92, wps=466754, ups=1.08, wpb=434156, bsz=16381, num_updates=20400, lr=0.000442807, gnorm=0.237, clip=0, loss_scale=2, train_wall=91, gb_free=19.8, wall=20078 +epoch 013: 163 / 1689 loss=4.185, nll_loss=2.566, ppl=5.92, wps=466754, ups=1.08, wpb=434156, bsz=16381, num_updates=20400, lr=0.000442807, gnorm=0.237, clip=0, loss_scale=2, train_wall=91, gb_free=19.8, wall=20078 +epoch 013: 163 / 1689 loss=4.185, nll_loss=2.566, ppl=5.92, wps=466754, ups=1.08, wpb=434156, bsz=16381, num_updates=20400, lr=0.000442807, gnorm=0.237, clip=0, loss_scale=2, train_wall=91, gb_free=19.8, wall=20078 +epoch 013: 163 / 1689 loss=4.185, nll_loss=2.566, ppl=5.92, wps=466754, ups=1.08, wpb=434156, bsz=16381, num_updates=20400, lr=0.000442807, gnorm=0.237, clip=0, loss_scale=2, train_wall=91, gb_free=19.8, wall=20078 +epoch 013: 163 / 1689 loss=4.185, nll_loss=2.566, ppl=5.92, wps=466754, ups=1.08, wpb=434156, bsz=16381, num_updates=20400, lr=0.000442807, gnorm=0.237, clip=0, loss_scale=2, train_wall=91, gb_free=19.8, wall=20078 +epoch 013: 163 / 1689 loss=4.185, nll_loss=2.566, ppl=5.92, wps=466754, ups=1.08, wpb=434156, bsz=16381, num_updates=20400, lr=0.000442807, gnorm=0.237, clip=0, loss_scale=2, train_wall=91, gb_free=19.8, wall=20078 +epoch 013: 163 / 1689 loss=4.185, nll_loss=2.566, ppl=5.92, wps=466754, ups=1.08, wpb=434156, bsz=16381, num_updates=20400, lr=0.000442807, gnorm=0.237, clip=0, loss_scale=2, train_wall=91, gb_free=19.8, wall=20078 +epoch 013: 163 / 1689 loss=4.185, nll_loss=2.566, ppl=5.92, wps=466754, ups=1.08, wpb=434156, bsz=16381, num_updates=20400, lr=0.000442807, gnorm=0.237, clip=0, loss_scale=2, train_wall=91, gb_free=19.8, wall=20078 +epoch 013: 163 / 1689 loss=4.185, nll_loss=2.566, ppl=5.92, wps=466754, ups=1.08, wpb=434156, bsz=16381, num_updates=20400, lr=0.000442807, gnorm=0.237, clip=0, loss_scale=2, train_wall=91, gb_free=19.8, wall=20078 +epoch 013: 263 / 1689 loss=4.195, nll_loss=2.579, ppl=5.97, wps=460097, ups=1.06, wpb=435142, bsz=16400.1, num_updates=20500, lr=0.000441726, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=20173 +epoch 013: 263 / 1689 loss=4.195, nll_loss=2.579, ppl=5.97, wps=460097, ups=1.06, wpb=435142, bsz=16400.1, num_updates=20500, lr=0.000441726, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=20173 +epoch 013: 263 / 1689 loss=4.195, nll_loss=2.579, ppl=5.97, wps=460097, ups=1.06, wpb=435142, bsz=16400.1, num_updates=20500, lr=0.000441726, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=20173 +epoch 013: 263 / 1689 loss=4.195, nll_loss=2.579, ppl=5.97, wps=460097, ups=1.06, wpb=435142, bsz=16400.1, num_updates=20500, lr=0.000441726, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=20173 +epoch 013: 263 / 1689 loss=4.195, nll_loss=2.579, ppl=5.97, wps=460097, ups=1.06, wpb=435142, bsz=16400.1, num_updates=20500, lr=0.000441726, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=20173 +epoch 013: 263 / 1689 loss=4.195, nll_loss=2.579, ppl=5.97, wps=460097, ups=1.06, wpb=435142, bsz=16400.1, num_updates=20500, lr=0.000441726, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=20173 +epoch 013: 263 / 1689 loss=4.195, nll_loss=2.579, ppl=5.97, wps=460097, ups=1.06, wpb=435142, bsz=16400.1, num_updates=20500, lr=0.000441726, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=20173 +epoch 013: 263 / 1689 loss=4.195, nll_loss=2.579, ppl=5.97, wps=460097, ups=1.06, wpb=435142, bsz=16400.1, num_updates=20500, lr=0.000441726, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=20173 +epoch 013: 263 / 1689 loss=4.195, nll_loss=2.579, ppl=5.97, wps=460097, ups=1.06, wpb=435142, bsz=16400.1, num_updates=20500, lr=0.000441726, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=20173 +epoch 013: 263 / 1689 loss=4.195, nll_loss=2.579, ppl=5.97, wps=460097, ups=1.06, wpb=435142, bsz=16400.1, num_updates=20500, lr=0.000441726, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=20173 +epoch 013: 263 / 1689 loss=4.195, nll_loss=2.579, ppl=5.97, wps=460097, ups=1.06, wpb=435142, bsz=16400.1, num_updates=20500, lr=0.000441726, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=20173 +epoch 013: 263 / 1689 loss=4.195, nll_loss=2.579, ppl=5.97, wps=460097, ups=1.06, wpb=435142, bsz=16400.1, num_updates=20500, lr=0.000441726, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=20173 +epoch 013: 263 / 1689 loss=4.195, nll_loss=2.579, ppl=5.97, wps=460097, ups=1.06, wpb=435142, bsz=16400.1, num_updates=20500, lr=0.000441726, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=20173 +epoch 013: 363 / 1689 loss=4.186, nll_loss=2.568, ppl=5.93, wps=463900, ups=1.07, wpb=433304, bsz=16735.8, num_updates=20600, lr=0.000440653, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=19.9, wall=20266 +epoch 013: 363 / 1689 loss=4.186, nll_loss=2.568, ppl=5.93, wps=463900, ups=1.07, wpb=433304, bsz=16735.8, num_updates=20600, lr=0.000440653, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=19.9, wall=20266 +epoch 013: 363 / 1689 loss=4.186, nll_loss=2.568, ppl=5.93, wps=463900, ups=1.07, wpb=433304, bsz=16735.8, num_updates=20600, lr=0.000440653, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=19.9, wall=20266 +epoch 013: 363 / 1689 loss=4.186, nll_loss=2.568, ppl=5.93, wps=463900, ups=1.07, wpb=433304, bsz=16735.8, num_updates=20600, lr=0.000440653, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=19.9, wall=20266 +epoch 013: 363 / 1689 loss=4.186, nll_loss=2.568, ppl=5.93, wps=463900, ups=1.07, wpb=433304, bsz=16735.8, num_updates=20600, lr=0.000440653, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=19.9, wall=20266 +epoch 013: 363 / 1689 loss=4.186, nll_loss=2.568, ppl=5.93, wps=463900, ups=1.07, wpb=433304, bsz=16735.8, num_updates=20600, lr=0.000440653, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=19.9, wall=20266 +epoch 013: 363 / 1689 loss=4.186, nll_loss=2.568, ppl=5.93, wps=463900, ups=1.07, wpb=433304, bsz=16735.8, num_updates=20600, lr=0.000440653, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=19.9, wall=20266 +epoch 013: 363 / 1689 loss=4.186, nll_loss=2.568, ppl=5.93, wps=463900, ups=1.07, wpb=433304, bsz=16735.8, num_updates=20600, lr=0.000440653, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=19.9, wall=20266 +epoch 013: 363 / 1689 loss=4.186, nll_loss=2.568, ppl=5.93, wps=463900, ups=1.07, wpb=433304, bsz=16735.8, num_updates=20600, lr=0.000440653, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=19.9, wall=20266 +epoch 013: 363 / 1689 loss=4.186, nll_loss=2.568, ppl=5.93, wps=463900, ups=1.07, wpb=433304, bsz=16735.8, num_updates=20600, lr=0.000440653, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=19.9, wall=20266 +epoch 013: 363 / 1689 loss=4.186, nll_loss=2.568, ppl=5.93, wps=463900, ups=1.07, wpb=433304, bsz=16735.8, num_updates=20600, lr=0.000440653, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=19.9, wall=20266 +epoch 013: 363 / 1689 loss=4.186, nll_loss=2.568, ppl=5.93, wps=463900, ups=1.07, wpb=433304, bsz=16735.8, num_updates=20600, lr=0.000440653, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=19.9, wall=20266 +epoch 013: 363 / 1689 loss=4.186, nll_loss=2.568, ppl=5.93, wps=463900, ups=1.07, wpb=433304, bsz=16735.8, num_updates=20600, lr=0.000440653, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=19.9, wall=20266 +epoch 013: 463 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=459106, ups=1.06, wpb=432542, bsz=16636.8, num_updates=20700, lr=0.000439587, gnorm=0.251, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=20360 +epoch 013: 463 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=459106, ups=1.06, wpb=432542, bsz=16636.8, num_updates=20700, lr=0.000439587, gnorm=0.251, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=20360 +epoch 013: 463 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=459106, ups=1.06, wpb=432542, bsz=16636.8, num_updates=20700, lr=0.000439587, gnorm=0.251, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=20360 +epoch 013: 463 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=459106, ups=1.06, wpb=432542, bsz=16636.8, num_updates=20700, lr=0.000439587, gnorm=0.251, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=20360 +epoch 013: 463 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=459106, ups=1.06, wpb=432542, bsz=16636.8, num_updates=20700, lr=0.000439587, gnorm=0.251, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=20360 +epoch 013: 463 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=459106, ups=1.06, wpb=432542, bsz=16636.8, num_updates=20700, lr=0.000439587, gnorm=0.251, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=20360 +epoch 013: 463 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=459106, ups=1.06, wpb=432542, bsz=16636.8, num_updates=20700, lr=0.000439587, gnorm=0.251, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=20360 +epoch 013: 463 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=459106, ups=1.06, wpb=432542, bsz=16636.8, num_updates=20700, lr=0.000439587, gnorm=0.251, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=20360 +epoch 013: 463 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=459106, ups=1.06, wpb=432542, bsz=16636.8, num_updates=20700, lr=0.000439587, gnorm=0.251, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=20360 +epoch 013: 463 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=459106, ups=1.06, wpb=432542, bsz=16636.8, num_updates=20700, lr=0.000439587, gnorm=0.251, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=20360 +epoch 013: 463 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=459106, ups=1.06, wpb=432542, bsz=16636.8, num_updates=20700, lr=0.000439587, gnorm=0.251, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=20360 +epoch 013: 463 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=459106, ups=1.06, wpb=432542, bsz=16636.8, num_updates=20700, lr=0.000439587, gnorm=0.251, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=20360 +epoch 013: 463 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=459106, ups=1.06, wpb=432542, bsz=16636.8, num_updates=20700, lr=0.000439587, gnorm=0.251, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=20360 +epoch 013: 563 / 1689 loss=4.203, nll_loss=2.588, ppl=6.01, wps=461162, ups=1.07, wpb=432892, bsz=16120.8, num_updates=20800, lr=0.000438529, gnorm=0.246, clip=0, loss_scale=4, train_wall=92, gb_free=19, wall=20454 +epoch 013: 563 / 1689 loss=4.203, nll_loss=2.588, ppl=6.01, wps=461162, ups=1.07, wpb=432892, bsz=16120.8, num_updates=20800, lr=0.000438529, gnorm=0.246, clip=0, loss_scale=4, train_wall=92, gb_free=19, wall=20454 +epoch 013: 563 / 1689 loss=4.203, nll_loss=2.588, ppl=6.01, wps=461162, ups=1.07, wpb=432892, bsz=16120.8, num_updates=20800, lr=0.000438529, gnorm=0.246, clip=0, loss_scale=4, train_wall=92, gb_free=19, wall=20454 +epoch 013: 563 / 1689 loss=4.203, nll_loss=2.588, ppl=6.01, wps=461162, ups=1.07, wpb=432892, bsz=16120.8, num_updates=20800, lr=0.000438529, gnorm=0.246, clip=0, loss_scale=4, train_wall=92, gb_free=19, wall=20454 +epoch 013: 563 / 1689 loss=4.203, nll_loss=2.588, ppl=6.01, wps=461162, ups=1.07, wpb=432892, bsz=16120.8, num_updates=20800, lr=0.000438529, gnorm=0.246, clip=0, loss_scale=4, train_wall=92, gb_free=19, wall=20454 +epoch 013: 563 / 1689 loss=4.203, nll_loss=2.588, ppl=6.01, wps=461162, ups=1.07, wpb=432892, bsz=16120.8, num_updates=20800, lr=0.000438529, gnorm=0.246, clip=0, loss_scale=4, train_wall=92, gb_free=19, wall=20454 +epoch 013: 563 / 1689 loss=4.203, nll_loss=2.588, ppl=6.01, wps=461162, ups=1.07, wpb=432892, bsz=16120.8, num_updates=20800, lr=0.000438529, gnorm=0.246, clip=0, loss_scale=4, train_wall=92, gb_free=19, wall=20454 +epoch 013: 563 / 1689 loss=4.203, nll_loss=2.588, ppl=6.01, wps=461162, ups=1.07, wpb=432892, bsz=16120.8, num_updates=20800, lr=0.000438529, gnorm=0.246, clip=0, loss_scale=4, train_wall=92, gb_free=19, wall=20454 +epoch 013: 563 / 1689 loss=4.203, nll_loss=2.588, ppl=6.01, wps=461162, ups=1.07, wpb=432892, bsz=16120.8, num_updates=20800, lr=0.000438529, gnorm=0.246, clip=0, loss_scale=4, train_wall=92, gb_free=19, wall=20454 +epoch 013: 563 / 1689 loss=4.203, nll_loss=2.588, ppl=6.01, wps=461162, ups=1.07, wpb=432892, bsz=16120.8, num_updates=20800, lr=0.000438529, gnorm=0.246, clip=0, loss_scale=4, train_wall=92, gb_free=19, wall=20454 +epoch 013: 563 / 1689 loss=4.203, nll_loss=2.588, ppl=6.01, wps=461162, ups=1.07, wpb=432892, bsz=16120.8, num_updates=20800, lr=0.000438529, gnorm=0.246, clip=0, loss_scale=4, train_wall=92, gb_free=19, wall=20454 +epoch 013: 563 / 1689 loss=4.203, nll_loss=2.588, ppl=6.01, wps=461162, ups=1.07, wpb=432892, bsz=16120.8, num_updates=20800, lr=0.000438529, gnorm=0.246, clip=0, loss_scale=4, train_wall=92, gb_free=19, wall=20454 +epoch 013: 563 / 1689 loss=4.203, nll_loss=2.588, ppl=6.01, wps=461162, ups=1.07, wpb=432892, bsz=16120.8, num_updates=20800, lr=0.000438529, gnorm=0.246, clip=0, loss_scale=4, train_wall=92, gb_free=19, wall=20454 +epoch 013: 664 / 1689 loss=4.207, nll_loss=2.593, ppl=6.03, wps=457845, ups=1.05, wpb=435398, bsz=17062.8, num_updates=20900, lr=0.000437479, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=20549 +epoch 013: 664 / 1689 loss=4.207, nll_loss=2.593, ppl=6.03, wps=457845, ups=1.05, wpb=435398, bsz=17062.8, num_updates=20900, lr=0.000437479, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=20549 +epoch 013: 664 / 1689 loss=4.207, nll_loss=2.593, ppl=6.03, wps=457845, ups=1.05, wpb=435398, bsz=17062.8, num_updates=20900, lr=0.000437479, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=20549 +epoch 013: 664 / 1689 loss=4.207, nll_loss=2.593, ppl=6.03, wps=457845, ups=1.05, wpb=435398, bsz=17062.8, num_updates=20900, lr=0.000437479, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=20549 +epoch 013: 664 / 1689 loss=4.207, nll_loss=2.593, ppl=6.03, wps=457845, ups=1.05, wpb=435398, bsz=17062.8, num_updates=20900, lr=0.000437479, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=20549 +epoch 013: 664 / 1689 loss=4.207, nll_loss=2.593, ppl=6.03, wps=457845, ups=1.05, wpb=435398, bsz=17062.8, num_updates=20900, lr=0.000437479, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=20549 +epoch 013: 664 / 1689 loss=4.207, nll_loss=2.593, ppl=6.03, wps=457845, ups=1.05, wpb=435398, bsz=17062.8, num_updates=20900, lr=0.000437479, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=20549 +epoch 013: 664 / 1689 loss=4.207, nll_loss=2.593, ppl=6.03, wps=457845, ups=1.05, wpb=435398, bsz=17062.8, num_updates=20900, lr=0.000437479, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=20549 +epoch 013: 664 / 1689 loss=4.207, nll_loss=2.593, ppl=6.03, wps=457845, ups=1.05, wpb=435398, bsz=17062.8, num_updates=20900, lr=0.000437479, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=20549 +epoch 013: 664 / 1689 loss=4.207, nll_loss=2.593, ppl=6.03, wps=457845, ups=1.05, wpb=435398, bsz=17062.8, num_updates=20900, lr=0.000437479, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=20549 +epoch 013: 664 / 1689 loss=4.207, nll_loss=2.593, ppl=6.03, wps=457845, ups=1.05, wpb=435398, bsz=17062.8, num_updates=20900, lr=0.000437479, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=20549 +epoch 013: 664 / 1689 loss=4.207, nll_loss=2.593, ppl=6.03, wps=457845, ups=1.05, wpb=435398, bsz=17062.8, num_updates=20900, lr=0.000437479, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=20549 +epoch 013: 664 / 1689 loss=4.207, nll_loss=2.593, ppl=6.03, wps=457845, ups=1.05, wpb=435398, bsz=17062.8, num_updates=20900, lr=0.000437479, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=20549 +epoch 013: 764 / 1689 loss=4.206, nll_loss=2.592, ppl=6.03, wps=467787, ups=1.08, wpb=433113, bsz=16506.8, num_updates=21000, lr=0.000436436, gnorm=0.245, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=20642 +epoch 013: 764 / 1689 loss=4.206, nll_loss=2.592, ppl=6.03, wps=467787, ups=1.08, wpb=433113, bsz=16506.8, num_updates=21000, lr=0.000436436, gnorm=0.245, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=20642 +epoch 013: 764 / 1689 loss=4.206, nll_loss=2.592, ppl=6.03, wps=467787, ups=1.08, wpb=433113, bsz=16506.8, num_updates=21000, lr=0.000436436, gnorm=0.245, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=20642 +epoch 013: 764 / 1689 loss=4.206, nll_loss=2.592, ppl=6.03, wps=467787, ups=1.08, wpb=433113, bsz=16506.8, num_updates=21000, lr=0.000436436, gnorm=0.245, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=20642 +epoch 013: 764 / 1689 loss=4.206, nll_loss=2.592, ppl=6.03, wps=467787, ups=1.08, wpb=433113, bsz=16506.8, num_updates=21000, lr=0.000436436, gnorm=0.245, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=20642 +epoch 013: 764 / 1689 loss=4.206, nll_loss=2.592, ppl=6.03, wps=467787, ups=1.08, wpb=433113, bsz=16506.8, num_updates=21000, lr=0.000436436, gnorm=0.245, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=20642 +epoch 013: 764 / 1689 loss=4.206, nll_loss=2.592, ppl=6.03, wps=467787, ups=1.08, wpb=433113, bsz=16506.8, num_updates=21000, lr=0.000436436, gnorm=0.245, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=20642 +epoch 013: 764 / 1689 loss=4.206, nll_loss=2.592, ppl=6.03, wps=467787, ups=1.08, wpb=433113, bsz=16506.8, num_updates=21000, lr=0.000436436, gnorm=0.245, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=20642 +epoch 013: 764 / 1689 loss=4.206, nll_loss=2.592, ppl=6.03, wps=467787, ups=1.08, wpb=433113, bsz=16506.8, num_updates=21000, lr=0.000436436, gnorm=0.245, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=20642 +epoch 013: 764 / 1689 loss=4.206, nll_loss=2.592, ppl=6.03, wps=467787, ups=1.08, wpb=433113, bsz=16506.8, num_updates=21000, lr=0.000436436, gnorm=0.245, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=20642 +epoch 013: 764 / 1689 loss=4.206, nll_loss=2.592, ppl=6.03, wps=467787, ups=1.08, wpb=433113, bsz=16506.8, num_updates=21000, lr=0.000436436, gnorm=0.245, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=20642 +epoch 013: 764 / 1689 loss=4.206, nll_loss=2.592, ppl=6.03, wps=467787, ups=1.08, wpb=433113, bsz=16506.8, num_updates=21000, lr=0.000436436, gnorm=0.245, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=20642 +epoch 013: 764 / 1689 loss=4.206, nll_loss=2.592, ppl=6.03, wps=467787, ups=1.08, wpb=433113, bsz=16506.8, num_updates=21000, lr=0.000436436, gnorm=0.245, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=20642 +begin validation on "valid" subset +epoch 013 | valid on 'valid' subset | loss 4.286 | nll_loss 2.652 | ppl 6.29 | wps 0 | wpb 42662 | bsz 2032 | num_updates 21000 | best_loss 4.286 +epoch 013 | valid on 'valid' subset | loss 4.286 | nll_loss 2.652 | ppl 6.29 | wps 0 | wpb 42662 | bsz 2032 | num_updates 21000 | best_loss 4.286 +epoch 013 | valid on 'valid' subset | loss 4.286 | nll_loss 2.652 | ppl 6.29 | wps 0 | wpb 42662 | bsz 2032 | num_updates 21000 | best_loss 4.286 +epoch 013 | valid on 'valid' subset | loss 4.286 | nll_loss 2.652 | ppl 6.29 | wps 0 | wpb 42662 | bsz 2032 | num_updates 21000 | best_loss 4.286 +epoch 013 | valid on 'valid' subset | loss 4.286 | nll_loss 2.652 | ppl 6.29 | wps 0 | wpb 42662 | bsz 2032 | num_updates 21000 | best_loss 4.286 +epoch 013 | valid on 'valid' subset | loss 4.286 | nll_loss 2.652 | ppl 6.29 | wps 0 | wpb 42662 | bsz 2032 | num_updates 21000 | best_loss 4.286 +epoch 013 | valid on 'valid' subset | loss 4.286 | nll_loss 2.652 | ppl 6.29 | wps 0 | wpb 42662 | bsz 2032 | num_updates 21000 | best_loss 4.286 +epoch 013 | valid on 'valid' subset | loss 4.286 | nll_loss 2.652 | ppl 6.29 | wps 0 | wpb 42662 | bsz 2032 | num_updates 21000 | best_loss 4.286 +epoch 013 | valid on 'valid' subset | loss 4.286 | nll_loss 2.652 | ppl 6.29 | wps 0 | wpb 42662 | bsz 2032 | num_updates 21000 | best_loss 4.286 +epoch 013 | valid on 'valid' subset | loss 4.286 | nll_loss 2.652 | ppl 6.29 | wps 0 | wpb 42662 | bsz 2032 | num_updates 21000 | best_loss 4.286 +epoch 013 | valid on 'valid' subset | loss 4.286 | nll_loss 2.652 | ppl 6.29 | wps 0 | wpb 42662 | bsz 2032 | num_updates 21000 | best_loss 4.286 +epoch 013 | valid on 'valid' subset | loss 4.286 | nll_loss 2.652 | ppl 6.29 | wps 0 | wpb 42662 | bsz 2032 | num_updates 21000 | best_loss 4.286 +epoch 013 | valid on 'valid' subset | loss 4.286 | nll_loss 2.652 | ppl 6.29 | wps 0 | wpb 42662 | bsz 2032 | num_updates 21000 | best_loss 4.286 +epoch 013: 864 / 1689 loss=4.209, nll_loss=2.595, ppl=6.04, wps=128567, ups=0.3, wpb=435288, bsz=16645.4, num_updates=21100, lr=0.0004354, gnorm=0.246, clip=0, loss_scale=2, train_wall=218, gb_free=20.2, wall=20980 +epoch 013: 864 / 1689 loss=4.209, nll_loss=2.595, ppl=6.04, wps=128567, ups=0.3, wpb=435288, bsz=16645.4, num_updates=21100, lr=0.0004354, gnorm=0.246, clip=0, loss_scale=2, train_wall=218, gb_free=20.2, wall=20980 +epoch 013: 864 / 1689 loss=4.209, nll_loss=2.595, ppl=6.04, wps=128567, ups=0.3, wpb=435288, bsz=16645.4, num_updates=21100, lr=0.0004354, gnorm=0.246, clip=0, loss_scale=2, train_wall=218, gb_free=20.2, wall=20980 +epoch 013: 864 / 1689 loss=4.209, nll_loss=2.595, ppl=6.04, wps=128567, ups=0.3, wpb=435288, bsz=16645.4, num_updates=21100, lr=0.0004354, gnorm=0.246, clip=0, loss_scale=2, train_wall=218, gb_free=20.2, wall=20980 +epoch 013: 864 / 1689 loss=4.209, nll_loss=2.595, ppl=6.04, wps=128567, ups=0.3, wpb=435288, bsz=16645.4, num_updates=21100, lr=0.0004354, gnorm=0.246, clip=0, loss_scale=2, train_wall=218, gb_free=20.2, wall=20980 +epoch 013: 864 / 1689 loss=4.209, nll_loss=2.595, ppl=6.04, wps=128567, ups=0.3, wpb=435288, bsz=16645.4, num_updates=21100, lr=0.0004354, gnorm=0.246, clip=0, loss_scale=2, train_wall=218, gb_free=20.2, wall=20980 +epoch 013: 864 / 1689 loss=4.209, nll_loss=2.595, ppl=6.04, wps=128567, ups=0.3, wpb=435288, bsz=16645.4, num_updates=21100, lr=0.0004354, gnorm=0.246, clip=0, loss_scale=2, train_wall=218, gb_free=20.2, wall=20980 +epoch 013: 864 / 1689 loss=4.209, nll_loss=2.595, ppl=6.04, wps=128567, ups=0.3, wpb=435288, bsz=16645.4, num_updates=21100, lr=0.0004354, gnorm=0.246, clip=0, loss_scale=2, train_wall=218, gb_free=20.2, wall=20980 +epoch 013: 864 / 1689 loss=4.209, nll_loss=2.595, ppl=6.04, wps=128567, ups=0.3, wpb=435288, bsz=16645.4, num_updates=21100, lr=0.0004354, gnorm=0.246, clip=0, loss_scale=2, train_wall=218, gb_free=20.2, wall=20980 +epoch 013: 864 / 1689 loss=4.209, nll_loss=2.595, ppl=6.04, wps=128567, ups=0.3, wpb=435288, bsz=16645.4, num_updates=21100, lr=0.0004354, gnorm=0.246, clip=0, loss_scale=2, train_wall=218, gb_free=20.2, wall=20980 +epoch 013: 864 / 1689 loss=4.209, nll_loss=2.595, ppl=6.04, wps=128567, ups=0.3, wpb=435288, bsz=16645.4, num_updates=21100, lr=0.0004354, gnorm=0.246, clip=0, loss_scale=2, train_wall=218, gb_free=20.2, wall=20980 +epoch 013: 864 / 1689 loss=4.209, nll_loss=2.595, ppl=6.04, wps=128567, ups=0.3, wpb=435288, bsz=16645.4, num_updates=21100, lr=0.0004354, gnorm=0.246, clip=0, loss_scale=2, train_wall=218, gb_free=20.2, wall=20980 +epoch 013: 864 / 1689 loss=4.209, nll_loss=2.595, ppl=6.04, wps=128567, ups=0.3, wpb=435288, bsz=16645.4, num_updates=21100, lr=0.0004354, gnorm=0.246, clip=0, loss_scale=2, train_wall=218, gb_free=20.2, wall=20980 +epoch 013: 964 / 1689 loss=4.204, nll_loss=2.59, ppl=6.02, wps=469456, ups=1.09, wpb=432090, bsz=16414.8, num_updates=21200, lr=0.000434372, gnorm=0.244, clip=0, loss_scale=2, train_wall=91, gb_free=19.3, wall=21073 +epoch 013: 964 / 1689 loss=4.204, nll_loss=2.59, ppl=6.02, wps=469456, ups=1.09, wpb=432090, bsz=16414.8, num_updates=21200, lr=0.000434372, gnorm=0.244, clip=0, loss_scale=2, train_wall=91, gb_free=19.3, wall=21073 +epoch 013: 964 / 1689 loss=4.204, nll_loss=2.59, ppl=6.02, wps=469456, ups=1.09, wpb=432090, bsz=16414.8, num_updates=21200, lr=0.000434372, gnorm=0.244, clip=0, loss_scale=2, train_wall=91, gb_free=19.3, wall=21073 +epoch 013: 964 / 1689 loss=4.204, nll_loss=2.59, ppl=6.02, wps=469456, ups=1.09, wpb=432090, bsz=16414.8, num_updates=21200, lr=0.000434372, gnorm=0.244, clip=0, loss_scale=2, train_wall=91, gb_free=19.3, wall=21073 +epoch 013: 964 / 1689 loss=4.204, nll_loss=2.59, ppl=6.02, wps=469456, ups=1.09, wpb=432090, bsz=16414.8, num_updates=21200, lr=0.000434372, gnorm=0.244, clip=0, loss_scale=2, train_wall=91, gb_free=19.3, wall=21073 +epoch 013: 964 / 1689 loss=4.204, nll_loss=2.59, ppl=6.02, wps=469456, ups=1.09, wpb=432090, bsz=16414.8, num_updates=21200, lr=0.000434372, gnorm=0.244, clip=0, loss_scale=2, train_wall=91, gb_free=19.3, wall=21073 +epoch 013: 964 / 1689 loss=4.204, nll_loss=2.59, ppl=6.02, wps=469456, ups=1.09, wpb=432090, bsz=16414.8, num_updates=21200, lr=0.000434372, gnorm=0.244, clip=0, loss_scale=2, train_wall=91, gb_free=19.3, wall=21073 +epoch 013: 964 / 1689 loss=4.204, nll_loss=2.59, ppl=6.02, wps=469456, ups=1.09, wpb=432090, bsz=16414.8, num_updates=21200, lr=0.000434372, gnorm=0.244, clip=0, loss_scale=2, train_wall=91, gb_free=19.3, wall=21073 +epoch 013: 964 / 1689 loss=4.204, nll_loss=2.59, ppl=6.02, wps=469456, ups=1.09, wpb=432090, bsz=16414.8, num_updates=21200, lr=0.000434372, gnorm=0.244, clip=0, loss_scale=2, train_wall=91, gb_free=19.3, wall=21073 +epoch 013: 964 / 1689 loss=4.204, nll_loss=2.59, ppl=6.02, wps=469456, ups=1.09, wpb=432090, bsz=16414.8, num_updates=21200, lr=0.000434372, gnorm=0.244, clip=0, loss_scale=2, train_wall=91, gb_free=19.3, wall=21073 +epoch 013: 964 / 1689 loss=4.204, nll_loss=2.59, ppl=6.02, wps=469456, ups=1.09, wpb=432090, bsz=16414.8, num_updates=21200, lr=0.000434372, gnorm=0.244, clip=0, loss_scale=2, train_wall=91, gb_free=19.3, wall=21073 +epoch 013: 964 / 1689 loss=4.204, nll_loss=2.59, ppl=6.02, wps=469456, ups=1.09, wpb=432090, bsz=16414.8, num_updates=21200, lr=0.000434372, gnorm=0.244, clip=0, loss_scale=2, train_wall=91, gb_free=19.3, wall=21073 +epoch 013: 964 / 1689 loss=4.204, nll_loss=2.59, ppl=6.02, wps=469456, ups=1.09, wpb=432090, bsz=16414.8, num_updates=21200, lr=0.000434372, gnorm=0.244, clip=0, loss_scale=2, train_wall=91, gb_free=19.3, wall=21073 +epoch 013: 1064 / 1689 loss=4.211, nll_loss=2.597, ppl=6.05, wps=469438, ups=1.08, wpb=435246, bsz=16455.9, num_updates=21300, lr=0.000433351, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=18.5, wall=21165 +epoch 013: 1064 / 1689 loss=4.211, nll_loss=2.597, ppl=6.05, wps=469438, ups=1.08, wpb=435246, bsz=16455.9, num_updates=21300, lr=0.000433351, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=18.5, wall=21165 +epoch 013: 1064 / 1689 loss=4.211, nll_loss=2.597, ppl=6.05, wps=469438, ups=1.08, wpb=435246, bsz=16455.9, num_updates=21300, lr=0.000433351, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=18.5, wall=21165 +epoch 013: 1064 / 1689 loss=4.211, nll_loss=2.597, ppl=6.05, wps=469438, ups=1.08, wpb=435246, bsz=16455.9, num_updates=21300, lr=0.000433351, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=18.5, wall=21165 +epoch 013: 1064 / 1689 loss=4.211, nll_loss=2.597, ppl=6.05, wps=469438, ups=1.08, wpb=435246, bsz=16455.9, num_updates=21300, lr=0.000433351, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=18.5, wall=21165 +epoch 013: 1064 / 1689 loss=4.211, nll_loss=2.597, ppl=6.05, wps=469438, ups=1.08, wpb=435246, bsz=16455.9, num_updates=21300, lr=0.000433351, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=18.5, wall=21165 +epoch 013: 1064 / 1689 loss=4.211, nll_loss=2.597, ppl=6.05, wps=469438, ups=1.08, wpb=435246, bsz=16455.9, num_updates=21300, lr=0.000433351, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=18.5, wall=21165 +epoch 013: 1064 / 1689 loss=4.211, nll_loss=2.597, ppl=6.05, wps=469438, ups=1.08, wpb=435246, bsz=16455.9, num_updates=21300, lr=0.000433351, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=18.5, wall=21165 +epoch 013: 1064 / 1689 loss=4.211, nll_loss=2.597, ppl=6.05, wps=469438, ups=1.08, wpb=435246, bsz=16455.9, num_updates=21300, lr=0.000433351, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=18.5, wall=21165 +epoch 013: 1064 / 1689 loss=4.211, nll_loss=2.597, ppl=6.05, wps=469438, ups=1.08, wpb=435246, bsz=16455.9, num_updates=21300, lr=0.000433351, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=18.5, wall=21165 +epoch 013: 1064 / 1689 loss=4.211, nll_loss=2.597, ppl=6.05, wps=469438, ups=1.08, wpb=435246, bsz=16455.9, num_updates=21300, lr=0.000433351, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=18.5, wall=21165 +epoch 013: 1064 / 1689 loss=4.211, nll_loss=2.597, ppl=6.05, wps=469438, ups=1.08, wpb=435246, bsz=16455.9, num_updates=21300, lr=0.000433351, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=18.5, wall=21165 +epoch 013: 1064 / 1689 loss=4.211, nll_loss=2.597, ppl=6.05, wps=469438, ups=1.08, wpb=435246, bsz=16455.9, num_updates=21300, lr=0.000433351, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=18.5, wall=21165 +epoch 013: 1165 / 1689 loss=4.199, nll_loss=2.584, ppl=6, wps=462622, ups=1.07, wpb=433986, bsz=16440.6, num_updates=21400, lr=0.000432338, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=21259 +epoch 013: 1165 / 1689 loss=4.199, nll_loss=2.584, ppl=6, wps=462622, ups=1.07, wpb=433986, bsz=16440.6, num_updates=21400, lr=0.000432338, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=21259 +epoch 013: 1165 / 1689 loss=4.199, nll_loss=2.584, ppl=6, wps=462622, ups=1.07, wpb=433986, bsz=16440.6, num_updates=21400, lr=0.000432338, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=21259 +epoch 013: 1165 / 1689 loss=4.199, nll_loss=2.584, ppl=6, wps=462622, ups=1.07, wpb=433986, bsz=16440.6, num_updates=21400, lr=0.000432338, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=21259 +epoch 013: 1165 / 1689 loss=4.199, nll_loss=2.584, ppl=6, wps=462622, ups=1.07, wpb=433986, bsz=16440.6, num_updates=21400, lr=0.000432338, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=21259 +epoch 013: 1165 / 1689 loss=4.199, nll_loss=2.584, ppl=6, wps=462622, ups=1.07, wpb=433986, bsz=16440.6, num_updates=21400, lr=0.000432338, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=21259 +epoch 013: 1165 / 1689 loss=4.199, nll_loss=2.584, ppl=6, wps=462622, ups=1.07, wpb=433986, bsz=16440.6, num_updates=21400, lr=0.000432338, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=21259 +epoch 013: 1165 / 1689 loss=4.199, nll_loss=2.584, ppl=6, wps=462622, ups=1.07, wpb=433986, bsz=16440.6, num_updates=21400, lr=0.000432338, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=21259 +epoch 013: 1165 / 1689 loss=4.199, nll_loss=2.584, ppl=6, wps=462622, ups=1.07, wpb=433986, bsz=16440.6, num_updates=21400, lr=0.000432338, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=21259 +epoch 013: 1165 / 1689 loss=4.199, nll_loss=2.584, ppl=6, wps=462622, ups=1.07, wpb=433986, bsz=16440.6, num_updates=21400, lr=0.000432338, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=21259 +epoch 013: 1165 / 1689 loss=4.199, nll_loss=2.584, ppl=6, wps=462622, ups=1.07, wpb=433986, bsz=16440.6, num_updates=21400, lr=0.000432338, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=21259 +epoch 013: 1165 / 1689 loss=4.199, nll_loss=2.584, ppl=6, wps=462622, ups=1.07, wpb=433986, bsz=16440.6, num_updates=21400, lr=0.000432338, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=21259 +epoch 013: 1165 / 1689 loss=4.199, nll_loss=2.584, ppl=6, wps=462622, ups=1.07, wpb=433986, bsz=16440.6, num_updates=21400, lr=0.000432338, gnorm=0.253, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=21259 +epoch 013: 1265 / 1689 loss=4.21, nll_loss=2.596, ppl=6.05, wps=467813, ups=1.08, wpb=433638, bsz=16325.1, num_updates=21500, lr=0.000431331, gnorm=0.266, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=21352 +epoch 013: 1265 / 1689 loss=4.21, nll_loss=2.596, ppl=6.05, wps=467813, ups=1.08, wpb=433638, bsz=16325.1, num_updates=21500, lr=0.000431331, gnorm=0.266, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=21352 +epoch 013: 1265 / 1689 loss=4.21, nll_loss=2.596, ppl=6.05, wps=467813, ups=1.08, wpb=433638, bsz=16325.1, num_updates=21500, lr=0.000431331, gnorm=0.266, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=21352 +epoch 013: 1265 / 1689 loss=4.21, nll_loss=2.596, ppl=6.05, wps=467813, ups=1.08, wpb=433638, bsz=16325.1, num_updates=21500, lr=0.000431331, gnorm=0.266, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=21352 +epoch 013: 1265 / 1689 loss=4.21, nll_loss=2.596, ppl=6.05, wps=467813, ups=1.08, wpb=433638, bsz=16325.1, num_updates=21500, lr=0.000431331, gnorm=0.266, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=21352 +epoch 013: 1265 / 1689 loss=4.21, nll_loss=2.596, ppl=6.05, wps=467813, ups=1.08, wpb=433638, bsz=16325.1, num_updates=21500, lr=0.000431331, gnorm=0.266, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=21352 +epoch 013: 1265 / 1689 loss=4.21, nll_loss=2.596, ppl=6.05, wps=467813, ups=1.08, wpb=433638, bsz=16325.1, num_updates=21500, lr=0.000431331, gnorm=0.266, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=21352 +epoch 013: 1265 / 1689 loss=4.21, nll_loss=2.596, ppl=6.05, wps=467813, ups=1.08, wpb=433638, bsz=16325.1, num_updates=21500, lr=0.000431331, gnorm=0.266, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=21352 +epoch 013: 1265 / 1689 loss=4.21, nll_loss=2.596, ppl=6.05, wps=467813, ups=1.08, wpb=433638, bsz=16325.1, num_updates=21500, lr=0.000431331, gnorm=0.266, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=21352 +epoch 013: 1265 / 1689 loss=4.21, nll_loss=2.596, ppl=6.05, wps=467813, ups=1.08, wpb=433638, bsz=16325.1, num_updates=21500, lr=0.000431331, gnorm=0.266, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=21352 +epoch 013: 1265 / 1689 loss=4.21, nll_loss=2.596, ppl=6.05, wps=467813, ups=1.08, wpb=433638, bsz=16325.1, num_updates=21500, lr=0.000431331, gnorm=0.266, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=21352 +epoch 013: 1265 / 1689 loss=4.21, nll_loss=2.596, ppl=6.05, wps=467813, ups=1.08, wpb=433638, bsz=16325.1, num_updates=21500, lr=0.000431331, gnorm=0.266, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=21352 +epoch 013: 1265 / 1689 loss=4.21, nll_loss=2.596, ppl=6.05, wps=467813, ups=1.08, wpb=433638, bsz=16325.1, num_updates=21500, lr=0.000431331, gnorm=0.266, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=21352 +epoch 013: 1365 / 1689 loss=4.215, nll_loss=2.603, ppl=6.07, wps=464272, ups=1.07, wpb=433365, bsz=16795.4, num_updates=21600, lr=0.000430331, gnorm=0.243, clip=0, loss_scale=2, train_wall=92, gb_free=19.7, wall=21445 +epoch 013: 1365 / 1689 loss=4.215, nll_loss=2.603, ppl=6.07, wps=464272, ups=1.07, wpb=433365, bsz=16795.4, num_updates=21600, lr=0.000430331, gnorm=0.243, clip=0, loss_scale=2, train_wall=92, gb_free=19.7, wall=21445 +epoch 013: 1365 / 1689 loss=4.215, nll_loss=2.603, ppl=6.07, wps=464272, ups=1.07, wpb=433365, bsz=16795.4, num_updates=21600, lr=0.000430331, gnorm=0.243, clip=0, loss_scale=2, train_wall=92, gb_free=19.7, wall=21445 +epoch 013: 1365 / 1689 loss=4.215, nll_loss=2.603, ppl=6.07, wps=464272, ups=1.07, wpb=433365, bsz=16795.4, num_updates=21600, lr=0.000430331, gnorm=0.243, clip=0, loss_scale=2, train_wall=92, gb_free=19.7, wall=21445 +epoch 013: 1365 / 1689 loss=4.215, nll_loss=2.603, ppl=6.07, wps=464272, ups=1.07, wpb=433365, bsz=16795.4, num_updates=21600, lr=0.000430331, gnorm=0.243, clip=0, loss_scale=2, train_wall=92, gb_free=19.7, wall=21445 +epoch 013: 1365 / 1689 loss=4.215, nll_loss=2.603, ppl=6.07, wps=464272, ups=1.07, wpb=433365, bsz=16795.4, num_updates=21600, lr=0.000430331, gnorm=0.243, clip=0, loss_scale=2, train_wall=92, gb_free=19.7, wall=21445 +epoch 013: 1365 / 1689 loss=4.215, nll_loss=2.603, ppl=6.07, wps=464272, ups=1.07, wpb=433365, bsz=16795.4, num_updates=21600, lr=0.000430331, gnorm=0.243, clip=0, loss_scale=2, train_wall=92, gb_free=19.7, wall=21445 +epoch 013: 1365 / 1689 loss=4.215, nll_loss=2.603, ppl=6.07, wps=464272, ups=1.07, wpb=433365, bsz=16795.4, num_updates=21600, lr=0.000430331, gnorm=0.243, clip=0, loss_scale=2, train_wall=92, gb_free=19.7, wall=21445 +epoch 013: 1365 / 1689 loss=4.215, nll_loss=2.603, ppl=6.07, wps=464272, ups=1.07, wpb=433365, bsz=16795.4, num_updates=21600, lr=0.000430331, gnorm=0.243, clip=0, loss_scale=2, train_wall=92, gb_free=19.7, wall=21445 +epoch 013: 1365 / 1689 loss=4.215, nll_loss=2.603, ppl=6.07, wps=464272, ups=1.07, wpb=433365, bsz=16795.4, num_updates=21600, lr=0.000430331, gnorm=0.243, clip=0, loss_scale=2, train_wall=92, gb_free=19.7, wall=21445 +epoch 013: 1365 / 1689 loss=4.215, nll_loss=2.603, ppl=6.07, wps=464272, ups=1.07, wpb=433365, bsz=16795.4, num_updates=21600, lr=0.000430331, gnorm=0.243, clip=0, loss_scale=2, train_wall=92, gb_free=19.7, wall=21445 +epoch 013: 1365 / 1689 loss=4.215, nll_loss=2.603, ppl=6.07, wps=464272, ups=1.07, wpb=433365, bsz=16795.4, num_updates=21600, lr=0.000430331, gnorm=0.243, clip=0, loss_scale=2, train_wall=92, gb_free=19.7, wall=21445 +epoch 013: 1365 / 1689 loss=4.215, nll_loss=2.603, ppl=6.07, wps=464272, ups=1.07, wpb=433365, bsz=16795.4, num_updates=21600, lr=0.000430331, gnorm=0.243, clip=0, loss_scale=2, train_wall=92, gb_free=19.7, wall=21445 +epoch 013: 1465 / 1689 loss=4.206, nll_loss=2.592, ppl=6.03, wps=458749, ups=1.06, wpb=432879, bsz=16563.2, num_updates=21700, lr=0.000429339, gnorm=0.233, clip=0, loss_scale=2, train_wall=93, gb_free=20, wall=21539 +epoch 013: 1465 / 1689 loss=4.206, nll_loss=2.592, ppl=6.03, wps=458749, ups=1.06, wpb=432879, bsz=16563.2, num_updates=21700, lr=0.000429339, gnorm=0.233, clip=0, loss_scale=2, train_wall=93, gb_free=20, wall=21539 +epoch 013: 1465 / 1689 loss=4.206, nll_loss=2.592, ppl=6.03, wps=458749, ups=1.06, wpb=432879, bsz=16563.2, num_updates=21700, lr=0.000429339, gnorm=0.233, clip=0, loss_scale=2, train_wall=93, gb_free=20, wall=21539 +epoch 013: 1465 / 1689 loss=4.206, nll_loss=2.592, ppl=6.03, wps=458749, ups=1.06, wpb=432879, bsz=16563.2, num_updates=21700, lr=0.000429339, gnorm=0.233, clip=0, loss_scale=2, train_wall=93, gb_free=20, wall=21539 +epoch 013: 1465 / 1689 loss=4.206, nll_loss=2.592, ppl=6.03, wps=458749, ups=1.06, wpb=432879, bsz=16563.2, num_updates=21700, lr=0.000429339, gnorm=0.233, clip=0, loss_scale=2, train_wall=93, gb_free=20, wall=21539 +epoch 013: 1465 / 1689 loss=4.206, nll_loss=2.592, ppl=6.03, wps=458749, ups=1.06, wpb=432879, bsz=16563.2, num_updates=21700, lr=0.000429339, gnorm=0.233, clip=0, loss_scale=2, train_wall=93, gb_free=20, wall=21539 +epoch 013: 1465 / 1689 loss=4.206, nll_loss=2.592, ppl=6.03, wps=458749, ups=1.06, wpb=432879, bsz=16563.2, num_updates=21700, lr=0.000429339, gnorm=0.233, clip=0, loss_scale=2, train_wall=93, gb_free=20, wall=21539 +epoch 013: 1465 / 1689 loss=4.206, nll_loss=2.592, ppl=6.03, wps=458749, ups=1.06, wpb=432879, bsz=16563.2, num_updates=21700, lr=0.000429339, gnorm=0.233, clip=0, loss_scale=2, train_wall=93, gb_free=20, wall=21539 +epoch 013: 1465 / 1689 loss=4.206, nll_loss=2.592, ppl=6.03, wps=458749, ups=1.06, wpb=432879, bsz=16563.2, num_updates=21700, lr=0.000429339, gnorm=0.233, clip=0, loss_scale=2, train_wall=93, gb_free=20, wall=21539 +epoch 013: 1465 / 1689 loss=4.206, nll_loss=2.592, ppl=6.03, wps=458749, ups=1.06, wpb=432879, bsz=16563.2, num_updates=21700, lr=0.000429339, gnorm=0.233, clip=0, loss_scale=2, train_wall=93, gb_free=20, wall=21539 +epoch 013: 1465 / 1689 loss=4.206, nll_loss=2.592, ppl=6.03, wps=458749, ups=1.06, wpb=432879, bsz=16563.2, num_updates=21700, lr=0.000429339, gnorm=0.233, clip=0, loss_scale=2, train_wall=93, gb_free=20, wall=21539 +epoch 013: 1465 / 1689 loss=4.206, nll_loss=2.592, ppl=6.03, wps=458749, ups=1.06, wpb=432879, bsz=16563.2, num_updates=21700, lr=0.000429339, gnorm=0.233, clip=0, loss_scale=2, train_wall=93, gb_free=20, wall=21539 +epoch 013: 1465 / 1689 loss=4.206, nll_loss=2.592, ppl=6.03, wps=458749, ups=1.06, wpb=432879, bsz=16563.2, num_updates=21700, lr=0.000429339, gnorm=0.233, clip=0, loss_scale=2, train_wall=93, gb_free=20, wall=21539 +epoch 013: 1565 / 1689 loss=4.211, nll_loss=2.598, ppl=6.05, wps=460471, ups=1.06, wpb=432960, bsz=16668.6, num_updates=21800, lr=0.000428353, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=21633 +epoch 013: 1565 / 1689 loss=4.211, nll_loss=2.598, ppl=6.05, wps=460471, ups=1.06, wpb=432960, bsz=16668.6, num_updates=21800, lr=0.000428353, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=21633 +epoch 013: 1565 / 1689 loss=4.211, nll_loss=2.598, ppl=6.05, wps=460471, ups=1.06, wpb=432960, bsz=16668.6, num_updates=21800, lr=0.000428353, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=21633 +epoch 013: 1565 / 1689 loss=4.211, nll_loss=2.598, ppl=6.05, wps=460471, ups=1.06, wpb=432960, bsz=16668.6, num_updates=21800, lr=0.000428353, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=21633 +epoch 013: 1565 / 1689 loss=4.211, nll_loss=2.598, ppl=6.05, wps=460471, ups=1.06, wpb=432960, bsz=16668.6, num_updates=21800, lr=0.000428353, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=21633 +epoch 013: 1565 / 1689 loss=4.211, nll_loss=2.598, ppl=6.05, wps=460471, ups=1.06, wpb=432960, bsz=16668.6, num_updates=21800, lr=0.000428353, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=21633 +epoch 013: 1565 / 1689 loss=4.211, nll_loss=2.598, ppl=6.05, wps=460471, ups=1.06, wpb=432960, bsz=16668.6, num_updates=21800, lr=0.000428353, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=21633 +epoch 013: 1565 / 1689 loss=4.211, nll_loss=2.598, ppl=6.05, wps=460471, ups=1.06, wpb=432960, bsz=16668.6, num_updates=21800, lr=0.000428353, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=21633 +epoch 013: 1565 / 1689 loss=4.211, nll_loss=2.598, ppl=6.05, wps=460471, ups=1.06, wpb=432960, bsz=16668.6, num_updates=21800, lr=0.000428353, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=21633 +epoch 013: 1565 / 1689 loss=4.211, nll_loss=2.598, ppl=6.05, wps=460471, ups=1.06, wpb=432960, bsz=16668.6, num_updates=21800, lr=0.000428353, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=21633 +epoch 013: 1565 / 1689 loss=4.211, nll_loss=2.598, ppl=6.05, wps=460471, ups=1.06, wpb=432960, bsz=16668.6, num_updates=21800, lr=0.000428353, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=21633 +epoch 013: 1565 / 1689 loss=4.211, nll_loss=2.598, ppl=6.05, wps=460471, ups=1.06, wpb=432960, bsz=16668.6, num_updates=21800, lr=0.000428353, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=21633 +epoch 013: 1565 / 1689 loss=4.211, nll_loss=2.598, ppl=6.05, wps=460471, ups=1.06, wpb=432960, bsz=16668.6, num_updates=21800, lr=0.000428353, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=21633 +epoch 013: 1665 / 1689 loss=4.203, nll_loss=2.588, ppl=6.01, wps=460939, ups=1.06, wpb=432996, bsz=16272.1, num_updates=21900, lr=0.000427374, gnorm=0.25, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=21727 +epoch 013: 1665 / 1689 loss=4.203, nll_loss=2.588, ppl=6.01, wps=460939, ups=1.06, wpb=432996, bsz=16272.1, num_updates=21900, lr=0.000427374, gnorm=0.25, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=21727 +epoch 013: 1665 / 1689 loss=4.203, nll_loss=2.588, ppl=6.01, wps=460939, ups=1.06, wpb=432996, bsz=16272.1, num_updates=21900, lr=0.000427374, gnorm=0.25, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=21727 +epoch 013: 1665 / 1689 loss=4.203, nll_loss=2.588, ppl=6.01, wps=460939, ups=1.06, wpb=432996, bsz=16272.1, num_updates=21900, lr=0.000427374, gnorm=0.25, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=21727 +epoch 013: 1665 / 1689 loss=4.203, nll_loss=2.588, ppl=6.01, wps=460939, ups=1.06, wpb=432996, bsz=16272.1, num_updates=21900, lr=0.000427374, gnorm=0.25, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=21727 +epoch 013: 1665 / 1689 loss=4.203, nll_loss=2.588, ppl=6.01, wps=460939, ups=1.06, wpb=432996, bsz=16272.1, num_updates=21900, lr=0.000427374, gnorm=0.25, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=21727 +epoch 013: 1665 / 1689 loss=4.203, nll_loss=2.588, ppl=6.01, wps=460939, ups=1.06, wpb=432996, bsz=16272.1, num_updates=21900, lr=0.000427374, gnorm=0.25, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=21727 +epoch 013: 1665 / 1689 loss=4.203, nll_loss=2.588, ppl=6.01, wps=460939, ups=1.06, wpb=432996, bsz=16272.1, num_updates=21900, lr=0.000427374, gnorm=0.25, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=21727 +epoch 013: 1665 / 1689 loss=4.203, nll_loss=2.588, ppl=6.01, wps=460939, ups=1.06, wpb=432996, bsz=16272.1, num_updates=21900, lr=0.000427374, gnorm=0.25, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=21727 +epoch 013: 1665 / 1689 loss=4.203, nll_loss=2.588, ppl=6.01, wps=460939, ups=1.06, wpb=432996, bsz=16272.1, num_updates=21900, lr=0.000427374, gnorm=0.25, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=21727 +epoch 013: 1665 / 1689 loss=4.203, nll_loss=2.588, ppl=6.01, wps=460939, ups=1.06, wpb=432996, bsz=16272.1, num_updates=21900, lr=0.000427374, gnorm=0.25, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=21727 +epoch 013: 1665 / 1689 loss=4.203, nll_loss=2.588, ppl=6.01, wps=460939, ups=1.06, wpb=432996, bsz=16272.1, num_updates=21900, lr=0.000427374, gnorm=0.25, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=21727 +epoch 013: 1665 / 1689 loss=4.203, nll_loss=2.588, ppl=6.01, wps=460939, ups=1.06, wpb=432996, bsz=16272.1, num_updates=21900, lr=0.000427374, gnorm=0.25, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=21727 +end of epoch 13 (average epoch stats below) +epoch 013 | loss 4.201 | nll_loss 2.586 | ppl 6.01 | wps 400590 | ups 0.92 | wpb 433519 | bsz 16507.5 | num_updates 21923 | lr 0.00042715 | gnorm 0.248 | clip 0 | loss_scale 2 | train_wall 1682 | gb_free 19.9 | wall 21750 +epoch 013 | loss 4.201 | nll_loss 2.586 | ppl 6.01 | wps 400590 | ups 0.92 | wpb 433519 | bsz 16507.5 | num_updates 21923 | lr 0.00042715 | gnorm 0.248 | clip 0 | loss_scale 2 | train_wall 1682 | gb_free 19.9 | wall 21750 +epoch 013 | loss 4.201 | nll_loss 2.586 | ppl 6.01 | wps 400590 | ups 0.92 | wpb 433519 | bsz 16507.5 | num_updates 21923 | lr 0.00042715 | gnorm 0.248 | clip 0 | loss_scale 2 | train_wall 1682 | gb_free 19.9 | wall 21750 +epoch 013 | loss 4.201 | nll_loss 2.586 | ppl 6.01 | wps 400590 | ups 0.92 | wpb 433519 | bsz 16507.5 | num_updates 21923 | lr 0.00042715 | gnorm 0.248 | clip 0 | loss_scale 2 | train_wall 1682 | gb_free 19.9 | wall 21750 +epoch 013 | loss 4.201 | nll_loss 2.586 | ppl 6.01 | wps 400590 | ups 0.92 | wpb 433519 | bsz 16507.5 | num_updates 21923 | lr 0.00042715 | gnorm 0.248 | clip 0 | loss_scale 2 | train_wall 1682 | gb_free 19.9 | wall 21750 +epoch 013 | loss 4.201 | nll_loss 2.586 | ppl 6.01 | wps 400590 | ups 0.92 | wpb 433519 | bsz 16507.5 | num_updates 21923 | lr 0.00042715 | gnorm 0.248 | clip 0 | loss_scale 2 | train_wall 1682 | gb_free 19.9 | wall 21750 +epoch 013 | loss 4.201 | nll_loss 2.586 | ppl 6.01 | wps 400590 | ups 0.92 | wpb 433519 | bsz 16507.5 | num_updates 21923 | lr 0.00042715 | gnorm 0.248 | clip 0 | loss_scale 2 | train_wall 1682 | gb_free 19.9 | wall 21750 +epoch 013 | loss 4.201 | nll_loss 2.586 | ppl 6.01 | wps 400590 | ups 0.92 | wpb 433519 | bsz 16507.5 | num_updates 21923 | lr 0.00042715 | gnorm 0.248 | clip 0 | loss_scale 2 | train_wall 1682 | gb_free 19.9 | wall 21750 +epoch 013 | loss 4.201 | nll_loss 2.586 | ppl 6.01 | wps 400590 | ups 0.92 | wpb 433519 | bsz 16507.5 | num_updates 21923 | lr 0.00042715 | gnorm 0.248 | clip 0 | loss_scale 2 | train_wall 1682 | gb_free 19.9 | wall 21750 +epoch 013 | loss 4.201 | nll_loss 2.586 | ppl 6.01 | wps 400590 | ups 0.92 | wpb 433519 | bsz 16507.5 | num_updates 21923 | lr 0.00042715 | gnorm 0.248 | clip 0 | loss_scale 2 | train_wall 1682 | gb_free 19.9 | wall 21750 +epoch 013 | loss 4.201 | nll_loss 2.586 | ppl 6.01 | wps 400590 | ups 0.92 | wpb 433519 | bsz 16507.5 | num_updates 21923 | lr 0.00042715 | gnorm 0.248 | clip 0 | loss_scale 2 | train_wall 1682 | gb_free 19.9 | wall 21750 +epoch 013 | loss 4.201 | nll_loss 2.586 | ppl 6.01 | wps 400590 | ups 0.92 | wpb 433519 | bsz 16507.5 | num_updates 21923 | lr 0.00042715 | gnorm 0.248 | clip 0 | loss_scale 2 | train_wall 1682 | gb_free 19.9 | wall 21750 +epoch 013 | loss 4.201 | nll_loss 2.586 | ppl 6.01 | wps 400590 | ups 0.92 | wpb 433519 | bsz 16507.5 | num_updates 21923 | lr 0.00042715 | gnorm 0.248 | clip 0 | loss_scale 2 | train_wall 1682 | gb_free 19.9 | wall 21750 +Start iterating over samples +epoch 014: 77 / 1689 loss=4.176, nll_loss=2.558, ppl=5.89, wps=448746, ups=1.05, wpb=428743, bsz=16352.5, num_updates=22000, lr=0.000426401, gnorm=0.242, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=21823 +epoch 014: 77 / 1689 loss=4.176, nll_loss=2.558, ppl=5.89, wps=448746, ups=1.05, wpb=428743, bsz=16352.5, num_updates=22000, lr=0.000426401, gnorm=0.242, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=21823 +epoch 014: 77 / 1689 loss=4.176, nll_loss=2.558, ppl=5.89, wps=448746, ups=1.05, wpb=428743, bsz=16352.5, num_updates=22000, lr=0.000426401, gnorm=0.242, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=21823 +epoch 014: 77 / 1689 loss=4.176, nll_loss=2.558, ppl=5.89, wps=448746, ups=1.05, wpb=428743, bsz=16352.5, num_updates=22000, lr=0.000426401, gnorm=0.242, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=21823 +epoch 014: 77 / 1689 loss=4.176, nll_loss=2.558, ppl=5.89, wps=448746, ups=1.05, wpb=428743, bsz=16352.5, num_updates=22000, lr=0.000426401, gnorm=0.242, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=21823 +epoch 014: 77 / 1689 loss=4.176, nll_loss=2.558, ppl=5.89, wps=448746, ups=1.05, wpb=428743, bsz=16352.5, num_updates=22000, lr=0.000426401, gnorm=0.242, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=21823 +epoch 014: 77 / 1689 loss=4.176, nll_loss=2.558, ppl=5.89, wps=448746, ups=1.05, wpb=428743, bsz=16352.5, num_updates=22000, lr=0.000426401, gnorm=0.242, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=21823 +epoch 014: 77 / 1689 loss=4.176, nll_loss=2.558, ppl=5.89, wps=448746, ups=1.05, wpb=428743, bsz=16352.5, num_updates=22000, lr=0.000426401, gnorm=0.242, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=21823 +epoch 014: 77 / 1689 loss=4.176, nll_loss=2.558, ppl=5.89, wps=448746, ups=1.05, wpb=428743, bsz=16352.5, num_updates=22000, lr=0.000426401, gnorm=0.242, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=21823 +epoch 014: 77 / 1689 loss=4.176, nll_loss=2.558, ppl=5.89, wps=448746, ups=1.05, wpb=428743, bsz=16352.5, num_updates=22000, lr=0.000426401, gnorm=0.242, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=21823 +epoch 014: 77 / 1689 loss=4.176, nll_loss=2.558, ppl=5.89, wps=448746, ups=1.05, wpb=428743, bsz=16352.5, num_updates=22000, lr=0.000426401, gnorm=0.242, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=21823 +epoch 014: 77 / 1689 loss=4.176, nll_loss=2.558, ppl=5.89, wps=448746, ups=1.05, wpb=428743, bsz=16352.5, num_updates=22000, lr=0.000426401, gnorm=0.242, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=21823 +epoch 014: 77 / 1689 loss=4.176, nll_loss=2.558, ppl=5.89, wps=448746, ups=1.05, wpb=428743, bsz=16352.5, num_updates=22000, lr=0.000426401, gnorm=0.242, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=21823 +epoch 014: 77 / 1689 loss=4.176, nll_loss=2.558, ppl=5.89, wps=448746, ups=1.05, wpb=428743, bsz=16352.5, num_updates=22000, lr=0.000426401, gnorm=0.242, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=21823 +begin validation on "valid" subset +epoch 014 | valid on 'valid' subset | loss 4.279 | nll_loss 2.644 | ppl 6.25 | wps 0 | wpb 42662 | bsz 2032 | num_updates 22000 | best_loss 4.279 +epoch 014 | valid on 'valid' subset | loss 4.279 | nll_loss 2.644 | ppl 6.25 | wps 0 | wpb 42662 | bsz 2032 | num_updates 22000 | best_loss 4.279 +epoch 014 | valid on 'valid' subset | loss 4.279 | nll_loss 2.644 | ppl 6.25 | wps 0 | wpb 42662 | bsz 2032 | num_updates 22000 | best_loss 4.279 +epoch 014 | valid on 'valid' subset | loss 4.279 | nll_loss 2.644 | ppl 6.25 | wps 0 | wpb 42662 | bsz 2032 | num_updates 22000 | best_loss 4.279 +epoch 014 | valid on 'valid' subset | loss 4.279 | nll_loss 2.644 | ppl 6.25 | wps 0 | wpb 42662 | bsz 2032 | num_updates 22000 | best_loss 4.279 +epoch 014 | valid on 'valid' subset | loss 4.279 | nll_loss 2.644 | ppl 6.25 | wps 0 | wpb 42662 | bsz 2032 | num_updates 22000 | best_loss 4.279 +epoch 014 | valid on 'valid' subset | loss 4.279 | nll_loss 2.644 | ppl 6.25 | wps 0 | wpb 42662 | bsz 2032 | num_updates 22000 | best_loss 4.279 +epoch 014 | valid on 'valid' subset | loss 4.279 | nll_loss 2.644 | ppl 6.25 | wps 0 | wpb 42662 | bsz 2032 | num_updates 22000 | best_loss 4.279 +epoch 014 | valid on 'valid' subset | loss 4.279 | nll_loss 2.644 | ppl 6.25 | wps 0 | wpb 42662 | bsz 2032 | num_updates 22000 | best_loss 4.279 +epoch 014 | valid on 'valid' subset | loss 4.279 | nll_loss 2.644 | ppl 6.25 | wps 0 | wpb 42662 | bsz 2032 | num_updates 22000 | best_loss 4.279 +epoch 014 | valid on 'valid' subset | loss 4.279 | nll_loss 2.644 | ppl 6.25 | wps 0 | wpb 42662 | bsz 2032 | num_updates 22000 | best_loss 4.279 +epoch 014 | valid on 'valid' subset | loss 4.279 | nll_loss 2.644 | ppl 6.25 | wps 0 | wpb 42662 | bsz 2032 | num_updates 22000 | best_loss 4.279 +epoch 014 | valid on 'valid' subset | loss 4.279 | nll_loss 2.644 | ppl 6.25 | wps 0 | wpb 42662 | bsz 2032 | num_updates 22000 | best_loss 4.279 +epoch 014 | valid on 'valid' subset | loss 4.279 | nll_loss 2.644 | ppl 6.25 | wps 0 | wpb 42662 | bsz 2032 | num_updates 22000 | best_loss 4.279 +epoch 014: 177 / 1689 loss=4.185, nll_loss=2.567, ppl=5.93, wps=384952, ups=0.88, wpb=435037, bsz=16524.1, num_updates=22100, lr=0.000425436, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=21936 +epoch 014: 177 / 1689 loss=4.185, nll_loss=2.567, ppl=5.93, wps=384952, ups=0.88, wpb=435037, bsz=16524.1, num_updates=22100, lr=0.000425436, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=21936 +epoch 014: 177 / 1689 loss=4.185, nll_loss=2.567, ppl=5.93, wps=384952, ups=0.88, wpb=435037, bsz=16524.1, num_updates=22100, lr=0.000425436, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=21936 +epoch 014: 177 / 1689 loss=4.185, nll_loss=2.567, ppl=5.93, wps=384952, ups=0.88, wpb=435037, bsz=16524.1, num_updates=22100, lr=0.000425436, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=21936 +epoch 014: 177 / 1689 loss=4.185, nll_loss=2.567, ppl=5.93, wps=384952, ups=0.88, wpb=435037, bsz=16524.1, num_updates=22100, lr=0.000425436, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=21936 +epoch 014: 177 / 1689 loss=4.185, nll_loss=2.567, ppl=5.93, wps=384952, ups=0.88, wpb=435037, bsz=16524.1, num_updates=22100, lr=0.000425436, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=21936 +epoch 014: 177 / 1689 loss=4.185, nll_loss=2.567, ppl=5.93, wps=384952, ups=0.88, wpb=435037, bsz=16524.1, num_updates=22100, lr=0.000425436, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=21936 +epoch 014: 177 / 1689 loss=4.185, nll_loss=2.567, ppl=5.93, wps=384952, ups=0.88, wpb=435037, bsz=16524.1, num_updates=22100, lr=0.000425436, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=21936 +epoch 014: 177 / 1689 loss=4.185, nll_loss=2.567, ppl=5.93, wps=384952, ups=0.88, wpb=435037, bsz=16524.1, num_updates=22100, lr=0.000425436, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=21936 +epoch 014: 177 / 1689 loss=4.185, nll_loss=2.567, ppl=5.93, wps=384952, ups=0.88, wpb=435037, bsz=16524.1, num_updates=22100, lr=0.000425436, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=21936 +epoch 014: 177 / 1689 loss=4.185, nll_loss=2.567, ppl=5.93, wps=384952, ups=0.88, wpb=435037, bsz=16524.1, num_updates=22100, lr=0.000425436, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=21936 +epoch 014: 177 / 1689 loss=4.185, nll_loss=2.567, ppl=5.93, wps=384952, ups=0.88, wpb=435037, bsz=16524.1, num_updates=22100, lr=0.000425436, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=21936 +epoch 014: 177 / 1689 loss=4.185, nll_loss=2.567, ppl=5.93, wps=384952, ups=0.88, wpb=435037, bsz=16524.1, num_updates=22100, lr=0.000425436, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=21936 +epoch 014: 177 / 1689 loss=4.185, nll_loss=2.567, ppl=5.93, wps=384952, ups=0.88, wpb=435037, bsz=16524.1, num_updates=22100, lr=0.000425436, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=21936 +epoch 014: 277 / 1689 loss=4.174, nll_loss=2.555, ppl=5.88, wps=460519, ups=1.06, wpb=432876, bsz=16453.5, num_updates=22200, lr=0.000424476, gnorm=0.243, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=22030 +epoch 014: 277 / 1689 loss=4.174, nll_loss=2.555, ppl=5.88, wps=460519, ups=1.06, wpb=432876, bsz=16453.5, num_updates=22200, lr=0.000424476, gnorm=0.243, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=22030 +epoch 014: 277 / 1689 loss=4.174, nll_loss=2.555, ppl=5.88, wps=460519, ups=1.06, wpb=432876, bsz=16453.5, num_updates=22200, lr=0.000424476, gnorm=0.243, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=22030 +epoch 014: 277 / 1689 loss=4.174, nll_loss=2.555, ppl=5.88, wps=460519, ups=1.06, wpb=432876, bsz=16453.5, num_updates=22200, lr=0.000424476, gnorm=0.243, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=22030 +epoch 014: 277 / 1689 loss=4.174, nll_loss=2.555, ppl=5.88, wps=460519, ups=1.06, wpb=432876, bsz=16453.5, num_updates=22200, lr=0.000424476, gnorm=0.243, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=22030 +epoch 014: 277 / 1689 loss=4.174, nll_loss=2.555, ppl=5.88, wps=460519, ups=1.06, wpb=432876, bsz=16453.5, num_updates=22200, lr=0.000424476, gnorm=0.243, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=22030 +epoch 014: 277 / 1689 loss=4.174, nll_loss=2.555, ppl=5.88, wps=460519, ups=1.06, wpb=432876, bsz=16453.5, num_updates=22200, lr=0.000424476, gnorm=0.243, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=22030 +epoch 014: 277 / 1689 loss=4.174, nll_loss=2.555, ppl=5.88, wps=460519, ups=1.06, wpb=432876, bsz=16453.5, num_updates=22200, lr=0.000424476, gnorm=0.243, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=22030 +epoch 014: 277 / 1689 loss=4.174, nll_loss=2.555, ppl=5.88, wps=460519, ups=1.06, wpb=432876, bsz=16453.5, num_updates=22200, lr=0.000424476, gnorm=0.243, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=22030 +epoch 014: 277 / 1689 loss=4.174, nll_loss=2.555, ppl=5.88, wps=460519, ups=1.06, wpb=432876, bsz=16453.5, num_updates=22200, lr=0.000424476, gnorm=0.243, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=22030 +epoch 014: 277 / 1689 loss=4.174, nll_loss=2.555, ppl=5.88, wps=460519, ups=1.06, wpb=432876, bsz=16453.5, num_updates=22200, lr=0.000424476, gnorm=0.243, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=22030 +epoch 014: 277 / 1689 loss=4.174, nll_loss=2.555, ppl=5.88, wps=460519, ups=1.06, wpb=432876, bsz=16453.5, num_updates=22200, lr=0.000424476, gnorm=0.243, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=22030 +epoch 014: 277 / 1689 loss=4.174, nll_loss=2.555, ppl=5.88, wps=460519, ups=1.06, wpb=432876, bsz=16453.5, num_updates=22200, lr=0.000424476, gnorm=0.243, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=22030 +epoch 014: 277 / 1689 loss=4.174, nll_loss=2.555, ppl=5.88, wps=460519, ups=1.06, wpb=432876, bsz=16453.5, num_updates=22200, lr=0.000424476, gnorm=0.243, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=22030 +epoch 014: 377 / 1689 loss=4.18, nll_loss=2.562, ppl=5.91, wps=458391, ups=1.06, wpb=432296, bsz=16695.3, num_updates=22300, lr=0.000423524, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=20.6, wall=22124 +epoch 014: 377 / 1689 loss=4.18, nll_loss=2.562, ppl=5.91, wps=458391, ups=1.06, wpb=432296, bsz=16695.3, num_updates=22300, lr=0.000423524, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=20.6, wall=22124 +epoch 014: 377 / 1689 loss=4.18, nll_loss=2.562, ppl=5.91, wps=458391, ups=1.06, wpb=432296, bsz=16695.3, num_updates=22300, lr=0.000423524, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=20.6, wall=22124 +epoch 014: 377 / 1689 loss=4.18, nll_loss=2.562, ppl=5.91, wps=458391, ups=1.06, wpb=432296, bsz=16695.3, num_updates=22300, lr=0.000423524, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=20.6, wall=22124 +epoch 014: 377 / 1689 loss=4.18, nll_loss=2.562, ppl=5.91, wps=458391, ups=1.06, wpb=432296, bsz=16695.3, num_updates=22300, lr=0.000423524, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=20.6, wall=22124 +epoch 014: 377 / 1689 loss=4.18, nll_loss=2.562, ppl=5.91, wps=458391, ups=1.06, wpb=432296, bsz=16695.3, num_updates=22300, lr=0.000423524, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=20.6, wall=22124 +epoch 014: 377 / 1689 loss=4.18, nll_loss=2.562, ppl=5.91, wps=458391, ups=1.06, wpb=432296, bsz=16695.3, num_updates=22300, lr=0.000423524, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=20.6, wall=22124 +epoch 014: 377 / 1689 loss=4.18, nll_loss=2.562, ppl=5.91, wps=458391, ups=1.06, wpb=432296, bsz=16695.3, num_updates=22300, lr=0.000423524, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=20.6, wall=22124 +epoch 014: 377 / 1689 loss=4.18, nll_loss=2.562, ppl=5.91, wps=458391, ups=1.06, wpb=432296, bsz=16695.3, num_updates=22300, lr=0.000423524, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=20.6, wall=22124 +epoch 014: 377 / 1689 loss=4.18, nll_loss=2.562, ppl=5.91, wps=458391, ups=1.06, wpb=432296, bsz=16695.3, num_updates=22300, lr=0.000423524, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=20.6, wall=22124 +epoch 014: 377 / 1689 loss=4.18, nll_loss=2.562, ppl=5.91, wps=458391, ups=1.06, wpb=432296, bsz=16695.3, num_updates=22300, lr=0.000423524, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=20.6, wall=22124 +epoch 014: 377 / 1689 loss=4.18, nll_loss=2.562, ppl=5.91, wps=458391, ups=1.06, wpb=432296, bsz=16695.3, num_updates=22300, lr=0.000423524, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=20.6, wall=22124 +epoch 014: 377 / 1689 loss=4.18, nll_loss=2.562, ppl=5.91, wps=458391, ups=1.06, wpb=432296, bsz=16695.3, num_updates=22300, lr=0.000423524, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=20.6, wall=22124 +epoch 014: 377 / 1689 loss=4.18, nll_loss=2.562, ppl=5.91, wps=458391, ups=1.06, wpb=432296, bsz=16695.3, num_updates=22300, lr=0.000423524, gnorm=0.252, clip=0, loss_scale=2, train_wall=93, gb_free=20.6, wall=22124 +epoch 014: 477 / 1689 loss=4.18, nll_loss=2.562, ppl=5.91, wps=460363, ups=1.05, wpb=437593, bsz=16429.8, num_updates=22400, lr=0.000422577, gnorm=0.249, clip=0, loss_scale=2, train_wall=94, gb_free=19.1, wall=22219 +epoch 014: 477 / 1689 loss=4.18, nll_loss=2.562, ppl=5.91, wps=460363, ups=1.05, wpb=437593, bsz=16429.8, num_updates=22400, lr=0.000422577, gnorm=0.249, clip=0, loss_scale=2, train_wall=94, gb_free=19.1, wall=22219 +epoch 014: 477 / 1689 loss=4.18, nll_loss=2.562, ppl=5.91, wps=460363, ups=1.05, wpb=437593, bsz=16429.8, num_updates=22400, lr=0.000422577, gnorm=0.249, clip=0, loss_scale=2, train_wall=94, gb_free=19.1, wall=22219 +epoch 014: 477 / 1689 loss=4.18, nll_loss=2.562, ppl=5.91, wps=460363, ups=1.05, wpb=437593, bsz=16429.8, num_updates=22400, lr=0.000422577, gnorm=0.249, clip=0, loss_scale=2, train_wall=94, gb_free=19.1, wall=22219 +epoch 014: 477 / 1689 loss=4.18, nll_loss=2.562, ppl=5.91, wps=460363, ups=1.05, wpb=437593, bsz=16429.8, num_updates=22400, lr=0.000422577, gnorm=0.249, clip=0, loss_scale=2, train_wall=94, gb_free=19.1, wall=22219 +epoch 014: 477 / 1689 loss=4.18, nll_loss=2.562, ppl=5.91, wps=460363, ups=1.05, wpb=437593, bsz=16429.8, num_updates=22400, lr=0.000422577, gnorm=0.249, clip=0, loss_scale=2, train_wall=94, gb_free=19.1, wall=22219 +epoch 014: 477 / 1689 loss=4.18, nll_loss=2.562, ppl=5.91, wps=460363, ups=1.05, wpb=437593, bsz=16429.8, num_updates=22400, lr=0.000422577, gnorm=0.249, clip=0, loss_scale=2, train_wall=94, gb_free=19.1, wall=22219 +epoch 014: 477 / 1689 loss=4.18, nll_loss=2.562, ppl=5.91, wps=460363, ups=1.05, wpb=437593, bsz=16429.8, num_updates=22400, lr=0.000422577, gnorm=0.249, clip=0, loss_scale=2, train_wall=94, gb_free=19.1, wall=22219 +epoch 014: 477 / 1689 loss=4.18, nll_loss=2.562, ppl=5.91, wps=460363, ups=1.05, wpb=437593, bsz=16429.8, num_updates=22400, lr=0.000422577, gnorm=0.249, clip=0, loss_scale=2, train_wall=94, gb_free=19.1, wall=22219 +epoch 014: 477 / 1689 loss=4.18, nll_loss=2.562, ppl=5.91, wps=460363, ups=1.05, wpb=437593, bsz=16429.8, num_updates=22400, lr=0.000422577, gnorm=0.249, clip=0, loss_scale=2, train_wall=94, gb_free=19.1, wall=22219 +epoch 014: 477 / 1689 loss=4.18, nll_loss=2.562, ppl=5.91, wps=460363, ups=1.05, wpb=437593, bsz=16429.8, num_updates=22400, lr=0.000422577, gnorm=0.249, clip=0, loss_scale=2, train_wall=94, gb_free=19.1, wall=22219 +epoch 014: 477 / 1689 loss=4.18, nll_loss=2.562, ppl=5.91, wps=460363, ups=1.05, wpb=437593, bsz=16429.8, num_updates=22400, lr=0.000422577, gnorm=0.249, clip=0, loss_scale=2, train_wall=94, gb_free=19.1, wall=22219 +epoch 014: 477 / 1689 loss=4.18, nll_loss=2.562, ppl=5.91, wps=460363, ups=1.05, wpb=437593, bsz=16429.8, num_updates=22400, lr=0.000422577, gnorm=0.249, clip=0, loss_scale=2, train_wall=94, gb_free=19.1, wall=22219 +epoch 014: 477 / 1689 loss=4.18, nll_loss=2.562, ppl=5.91, wps=460363, ups=1.05, wpb=437593, bsz=16429.8, num_updates=22400, lr=0.000422577, gnorm=0.249, clip=0, loss_scale=2, train_wall=94, gb_free=19.1, wall=22219 +epoch 014: 578 / 1689 loss=4.183, nll_loss=2.566, ppl=5.92, wps=455525, ups=1.05, wpb=433951, bsz=16493.3, num_updates=22500, lr=0.000421637, gnorm=0.231, clip=0, loss_scale=2, train_wall=94, gb_free=19, wall=22315 +epoch 014: 578 / 1689 loss=4.183, nll_loss=2.566, ppl=5.92, wps=455525, ups=1.05, wpb=433951, bsz=16493.3, num_updates=22500, lr=0.000421637, gnorm=0.231, clip=0, loss_scale=2, train_wall=94, gb_free=19, wall=22315 +epoch 014: 578 / 1689 loss=4.183, nll_loss=2.566, ppl=5.92, wps=455525, ups=1.05, wpb=433951, bsz=16493.3, num_updates=22500, lr=0.000421637, gnorm=0.231, clip=0, loss_scale=2, train_wall=94, gb_free=19, wall=22315 +epoch 014: 578 / 1689 loss=4.183, nll_loss=2.566, ppl=5.92, wps=455525, ups=1.05, wpb=433951, bsz=16493.3, num_updates=22500, lr=0.000421637, gnorm=0.231, clip=0, loss_scale=2, train_wall=94, gb_free=19, wall=22315 +epoch 014: 578 / 1689 loss=4.183, nll_loss=2.566, ppl=5.92, wps=455525, ups=1.05, wpb=433951, bsz=16493.3, num_updates=22500, lr=0.000421637, gnorm=0.231, clip=0, loss_scale=2, train_wall=94, gb_free=19, wall=22315 +epoch 014: 578 / 1689 loss=4.183, nll_loss=2.566, ppl=5.92, wps=455525, ups=1.05, wpb=433951, bsz=16493.3, num_updates=22500, lr=0.000421637, gnorm=0.231, clip=0, loss_scale=2, train_wall=94, gb_free=19, wall=22315 +epoch 014: 578 / 1689 loss=4.183, nll_loss=2.566, ppl=5.92, wps=455525, ups=1.05, wpb=433951, bsz=16493.3, num_updates=22500, lr=0.000421637, gnorm=0.231, clip=0, loss_scale=2, train_wall=94, gb_free=19, wall=22315 +epoch 014: 578 / 1689 loss=4.183, nll_loss=2.566, ppl=5.92, wps=455525, ups=1.05, wpb=433951, bsz=16493.3, num_updates=22500, lr=0.000421637, gnorm=0.231, clip=0, loss_scale=2, train_wall=94, gb_free=19, wall=22315 +epoch 014: 578 / 1689 loss=4.183, nll_loss=2.566, ppl=5.92, wps=455525, ups=1.05, wpb=433951, bsz=16493.3, num_updates=22500, lr=0.000421637, gnorm=0.231, clip=0, loss_scale=2, train_wall=94, gb_free=19, wall=22315 +epoch 014: 578 / 1689 loss=4.183, nll_loss=2.566, ppl=5.92, wps=455525, ups=1.05, wpb=433951, bsz=16493.3, num_updates=22500, lr=0.000421637, gnorm=0.231, clip=0, loss_scale=2, train_wall=94, gb_free=19, wall=22315 +epoch 014: 578 / 1689 loss=4.183, nll_loss=2.566, ppl=5.92, wps=455525, ups=1.05, wpb=433951, bsz=16493.3, num_updates=22500, lr=0.000421637, gnorm=0.231, clip=0, loss_scale=2, train_wall=94, gb_free=19, wall=22315 +epoch 014: 578 / 1689 loss=4.183, nll_loss=2.566, ppl=5.92, wps=455525, ups=1.05, wpb=433951, bsz=16493.3, num_updates=22500, lr=0.000421637, gnorm=0.231, clip=0, loss_scale=2, train_wall=94, gb_free=19, wall=22315 +epoch 014: 578 / 1689 loss=4.183, nll_loss=2.566, ppl=5.92, wps=455525, ups=1.05, wpb=433951, bsz=16493.3, num_updates=22500, lr=0.000421637, gnorm=0.231, clip=0, loss_scale=2, train_wall=94, gb_free=19, wall=22315 +epoch 014: 578 / 1689 loss=4.183, nll_loss=2.566, ppl=5.92, wps=455525, ups=1.05, wpb=433951, bsz=16493.3, num_updates=22500, lr=0.000421637, gnorm=0.231, clip=0, loss_scale=2, train_wall=94, gb_free=19, wall=22315 +epoch 014: 678 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=460880, ups=1.07, wpb=432246, bsz=16344.2, num_updates=22600, lr=0.000420703, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=22408 +epoch 014: 678 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=460880, ups=1.07, wpb=432246, bsz=16344.2, num_updates=22600, lr=0.000420703, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=22408 +epoch 014: 678 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=460880, ups=1.07, wpb=432246, bsz=16344.2, num_updates=22600, lr=0.000420703, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=22408 +epoch 014: 678 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=460880, ups=1.07, wpb=432246, bsz=16344.2, num_updates=22600, lr=0.000420703, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=22408 +epoch 014: 678 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=460880, ups=1.07, wpb=432246, bsz=16344.2, num_updates=22600, lr=0.000420703, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=22408 +epoch 014: 678 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=460880, ups=1.07, wpb=432246, bsz=16344.2, num_updates=22600, lr=0.000420703, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=22408 +epoch 014: 678 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=460880, ups=1.07, wpb=432246, bsz=16344.2, num_updates=22600, lr=0.000420703, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=22408 +epoch 014: 678 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=460880, ups=1.07, wpb=432246, bsz=16344.2, num_updates=22600, lr=0.000420703, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=22408 +epoch 014: 678 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=460880, ups=1.07, wpb=432246, bsz=16344.2, num_updates=22600, lr=0.000420703, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=22408 +epoch 014: 678 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=460880, ups=1.07, wpb=432246, bsz=16344.2, num_updates=22600, lr=0.000420703, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=22408 +epoch 014: 678 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=460880, ups=1.07, wpb=432246, bsz=16344.2, num_updates=22600, lr=0.000420703, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=22408 +epoch 014: 678 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=460880, ups=1.07, wpb=432246, bsz=16344.2, num_updates=22600, lr=0.000420703, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=22408 +epoch 014: 678 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=460880, ups=1.07, wpb=432246, bsz=16344.2, num_updates=22600, lr=0.000420703, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=22408 +epoch 014: 678 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=460880, ups=1.07, wpb=432246, bsz=16344.2, num_updates=22600, lr=0.000420703, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=22408 +epoch 014: 778 / 1689 loss=4.186, nll_loss=2.569, ppl=5.93, wps=463055, ups=1.06, wpb=436096, bsz=16427.9, num_updates=22700, lr=0.000419775, gnorm=0.262, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=22503 +epoch 014: 778 / 1689 loss=4.186, nll_loss=2.569, ppl=5.93, wps=463055, ups=1.06, wpb=436096, bsz=16427.9, num_updates=22700, lr=0.000419775, gnorm=0.262, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=22503 +epoch 014: 778 / 1689 loss=4.186, nll_loss=2.569, ppl=5.93, wps=463055, ups=1.06, wpb=436096, bsz=16427.9, num_updates=22700, lr=0.000419775, gnorm=0.262, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=22503 +epoch 014: 778 / 1689 loss=4.186, nll_loss=2.569, ppl=5.93, wps=463055, ups=1.06, wpb=436096, bsz=16427.9, num_updates=22700, lr=0.000419775, gnorm=0.262, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=22503 +epoch 014: 778 / 1689 loss=4.186, nll_loss=2.569, ppl=5.93, wps=463055, ups=1.06, wpb=436096, bsz=16427.9, num_updates=22700, lr=0.000419775, gnorm=0.262, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=22503 +epoch 014: 778 / 1689 loss=4.186, nll_loss=2.569, ppl=5.93, wps=463055, ups=1.06, wpb=436096, bsz=16427.9, num_updates=22700, lr=0.000419775, gnorm=0.262, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=22503 +epoch 014: 778 / 1689 loss=4.186, nll_loss=2.569, ppl=5.93, wps=463055, ups=1.06, wpb=436096, bsz=16427.9, num_updates=22700, lr=0.000419775, gnorm=0.262, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=22503 +epoch 014: 778 / 1689 loss=4.186, nll_loss=2.569, ppl=5.93, wps=463055, ups=1.06, wpb=436096, bsz=16427.9, num_updates=22700, lr=0.000419775, gnorm=0.262, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=22503 +epoch 014: 778 / 1689 loss=4.186, nll_loss=2.569, ppl=5.93, wps=463055, ups=1.06, wpb=436096, bsz=16427.9, num_updates=22700, lr=0.000419775, gnorm=0.262, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=22503 +epoch 014: 778 / 1689 loss=4.186, nll_loss=2.569, ppl=5.93, wps=463055, ups=1.06, wpb=436096, bsz=16427.9, num_updates=22700, lr=0.000419775, gnorm=0.262, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=22503 +epoch 014: 778 / 1689 loss=4.186, nll_loss=2.569, ppl=5.93, wps=463055, ups=1.06, wpb=436096, bsz=16427.9, num_updates=22700, lr=0.000419775, gnorm=0.262, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=22503 +epoch 014: 778 / 1689 loss=4.186, nll_loss=2.569, ppl=5.93, wps=463055, ups=1.06, wpb=436096, bsz=16427.9, num_updates=22700, lr=0.000419775, gnorm=0.262, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=22503 +epoch 014: 778 / 1689 loss=4.186, nll_loss=2.569, ppl=5.93, wps=463055, ups=1.06, wpb=436096, bsz=16427.9, num_updates=22700, lr=0.000419775, gnorm=0.262, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=22503 +epoch 014: 778 / 1689 loss=4.186, nll_loss=2.569, ppl=5.93, wps=463055, ups=1.06, wpb=436096, bsz=16427.9, num_updates=22700, lr=0.000419775, gnorm=0.262, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=22503 +epoch 014: 878 / 1689 loss=4.193, nll_loss=2.578, ppl=5.97, wps=464672, ups=1.07, wpb=434759, bsz=16651.8, num_updates=22800, lr=0.000418854, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=22596 +epoch 014: 878 / 1689 loss=4.193, nll_loss=2.578, ppl=5.97, wps=464672, ups=1.07, wpb=434759, bsz=16651.8, num_updates=22800, lr=0.000418854, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=22596 +epoch 014: 878 / 1689 loss=4.193, nll_loss=2.578, ppl=5.97, wps=464672, ups=1.07, wpb=434759, bsz=16651.8, num_updates=22800, lr=0.000418854, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=22596 +epoch 014: 878 / 1689 loss=4.193, nll_loss=2.578, ppl=5.97, wps=464672, ups=1.07, wpb=434759, bsz=16651.8, num_updates=22800, lr=0.000418854, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=22596 +epoch 014: 878 / 1689 loss=4.193, nll_loss=2.578, ppl=5.97, wps=464672, ups=1.07, wpb=434759, bsz=16651.8, num_updates=22800, lr=0.000418854, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=22596 +epoch 014: 878 / 1689 loss=4.193, nll_loss=2.578, ppl=5.97, wps=464672, ups=1.07, wpb=434759, bsz=16651.8, num_updates=22800, lr=0.000418854, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=22596 +epoch 014: 878 / 1689 loss=4.193, nll_loss=2.578, ppl=5.97, wps=464672, ups=1.07, wpb=434759, bsz=16651.8, num_updates=22800, lr=0.000418854, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=22596 +epoch 014: 878 / 1689 loss=4.193, nll_loss=2.578, ppl=5.97, wps=464672, ups=1.07, wpb=434759, bsz=16651.8, num_updates=22800, lr=0.000418854, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=22596 +epoch 014: 878 / 1689 loss=4.193, nll_loss=2.578, ppl=5.97, wps=464672, ups=1.07, wpb=434759, bsz=16651.8, num_updates=22800, lr=0.000418854, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=22596 +epoch 014: 878 / 1689 loss=4.193, nll_loss=2.578, ppl=5.97, wps=464672, ups=1.07, wpb=434759, bsz=16651.8, num_updates=22800, lr=0.000418854, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=22596 +epoch 014: 878 / 1689 loss=4.193, nll_loss=2.578, ppl=5.97, wps=464672, ups=1.07, wpb=434759, bsz=16651.8, num_updates=22800, lr=0.000418854, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=22596 +epoch 014: 878 / 1689 loss=4.193, nll_loss=2.578, ppl=5.97, wps=464672, ups=1.07, wpb=434759, bsz=16651.8, num_updates=22800, lr=0.000418854, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=22596 +epoch 014: 878 / 1689 loss=4.193, nll_loss=2.578, ppl=5.97, wps=464672, ups=1.07, wpb=434759, bsz=16651.8, num_updates=22800, lr=0.000418854, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=22596 +epoch 014: 878 / 1689 loss=4.193, nll_loss=2.578, ppl=5.97, wps=464672, ups=1.07, wpb=434759, bsz=16651.8, num_updates=22800, lr=0.000418854, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=20.1, wall=22596 +epoch 014: 978 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=455062, ups=1.06, wpb=429452, bsz=16831, num_updates=22900, lr=0.000417938, gnorm=0.239, clip=0, loss_scale=2, train_wall=93, gb_free=19.9, wall=22691 +epoch 014: 978 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=455062, ups=1.06, wpb=429452, bsz=16831, num_updates=22900, lr=0.000417938, gnorm=0.239, clip=0, loss_scale=2, train_wall=93, gb_free=19.9, wall=22691 +epoch 014: 978 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=455062, ups=1.06, wpb=429452, bsz=16831, num_updates=22900, lr=0.000417938, gnorm=0.239, clip=0, loss_scale=2, train_wall=93, gb_free=19.9, wall=22691 +epoch 014: 978 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=455062, ups=1.06, wpb=429452, bsz=16831, num_updates=22900, lr=0.000417938, gnorm=0.239, clip=0, loss_scale=2, train_wall=93, gb_free=19.9, wall=22691 +epoch 014: 978 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=455062, ups=1.06, wpb=429452, bsz=16831, num_updates=22900, lr=0.000417938, gnorm=0.239, clip=0, loss_scale=2, train_wall=93, gb_free=19.9, wall=22691 +epoch 014: 978 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=455062, ups=1.06, wpb=429452, bsz=16831, num_updates=22900, lr=0.000417938, gnorm=0.239, clip=0, loss_scale=2, train_wall=93, gb_free=19.9, wall=22691 +epoch 014: 978 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=455062, ups=1.06, wpb=429452, bsz=16831, num_updates=22900, lr=0.000417938, gnorm=0.239, clip=0, loss_scale=2, train_wall=93, gb_free=19.9, wall=22691 +epoch 014: 978 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=455062, ups=1.06, wpb=429452, bsz=16831, num_updates=22900, lr=0.000417938, gnorm=0.239, clip=0, loss_scale=2, train_wall=93, gb_free=19.9, wall=22691 +epoch 014: 978 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=455062, ups=1.06, wpb=429452, bsz=16831, num_updates=22900, lr=0.000417938, gnorm=0.239, clip=0, loss_scale=2, train_wall=93, gb_free=19.9, wall=22691 +epoch 014: 978 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=455062, ups=1.06, wpb=429452, bsz=16831, num_updates=22900, lr=0.000417938, gnorm=0.239, clip=0, loss_scale=2, train_wall=93, gb_free=19.9, wall=22691 +epoch 014: 978 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=455062, ups=1.06, wpb=429452, bsz=16831, num_updates=22900, lr=0.000417938, gnorm=0.239, clip=0, loss_scale=2, train_wall=93, gb_free=19.9, wall=22691 +epoch 014: 978 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=455062, ups=1.06, wpb=429452, bsz=16831, num_updates=22900, lr=0.000417938, gnorm=0.239, clip=0, loss_scale=2, train_wall=93, gb_free=19.9, wall=22691 +epoch 014: 978 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=455062, ups=1.06, wpb=429452, bsz=16831, num_updates=22900, lr=0.000417938, gnorm=0.239, clip=0, loss_scale=2, train_wall=93, gb_free=19.9, wall=22691 +epoch 014: 978 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=455062, ups=1.06, wpb=429452, bsz=16831, num_updates=22900, lr=0.000417938, gnorm=0.239, clip=0, loss_scale=2, train_wall=93, gb_free=19.9, wall=22691 +epoch 014: 1079 / 1689 loss=4.187, nll_loss=2.57, ppl=5.94, wps=457922, ups=1.05, wpb=435804, bsz=16574.6, num_updates=23000, lr=0.000417029, gnorm=0.229, clip=0, loss_scale=2, train_wall=94, gb_free=19.1, wall=22786 +epoch 014: 1079 / 1689 loss=4.187, nll_loss=2.57, ppl=5.94, wps=457922, ups=1.05, wpb=435804, bsz=16574.6, num_updates=23000, lr=0.000417029, gnorm=0.229, clip=0, loss_scale=2, train_wall=94, gb_free=19.1, wall=22786 +epoch 014: 1079 / 1689 loss=4.187, nll_loss=2.57, ppl=5.94, wps=457922, ups=1.05, wpb=435804, bsz=16574.6, num_updates=23000, lr=0.000417029, gnorm=0.229, clip=0, loss_scale=2, train_wall=94, gb_free=19.1, wall=22786 +epoch 014: 1079 / 1689 loss=4.187, nll_loss=2.57, ppl=5.94, wps=457922, ups=1.05, wpb=435804, bsz=16574.6, num_updates=23000, lr=0.000417029, gnorm=0.229, clip=0, loss_scale=2, train_wall=94, gb_free=19.1, wall=22786 +epoch 014: 1079 / 1689 loss=4.187, nll_loss=2.57, ppl=5.94, wps=457922, ups=1.05, wpb=435804, bsz=16574.6, num_updates=23000, lr=0.000417029, gnorm=0.229, clip=0, loss_scale=2, train_wall=94, gb_free=19.1, wall=22786 +epoch 014: 1079 / 1689 loss=4.187, nll_loss=2.57, ppl=5.94, wps=457922, ups=1.05, wpb=435804, bsz=16574.6, num_updates=23000, lr=0.000417029, gnorm=0.229, clip=0, loss_scale=2, train_wall=94, gb_free=19.1, wall=22786 +epoch 014: 1079 / 1689 loss=4.187, nll_loss=2.57, ppl=5.94, wps=457922, ups=1.05, wpb=435804, bsz=16574.6, num_updates=23000, lr=0.000417029, gnorm=0.229, clip=0, loss_scale=2, train_wall=94, gb_free=19.1, wall=22786 +epoch 014: 1079 / 1689 loss=4.187, nll_loss=2.57, ppl=5.94, wps=457922, ups=1.05, wpb=435804, bsz=16574.6, num_updates=23000, lr=0.000417029, gnorm=0.229, clip=0, loss_scale=2, train_wall=94, gb_free=19.1, wall=22786 +epoch 014: 1079 / 1689 loss=4.187, nll_loss=2.57, ppl=5.94, wps=457922, ups=1.05, wpb=435804, bsz=16574.6, num_updates=23000, lr=0.000417029, gnorm=0.229, clip=0, loss_scale=2, train_wall=94, gb_free=19.1, wall=22786 +epoch 014: 1079 / 1689 loss=4.187, nll_loss=2.57, ppl=5.94, wps=457922, ups=1.05, wpb=435804, bsz=16574.6, num_updates=23000, lr=0.000417029, gnorm=0.229, clip=0, loss_scale=2, train_wall=94, gb_free=19.1, wall=22786 +epoch 014: 1079 / 1689 loss=4.187, nll_loss=2.57, ppl=5.94, wps=457922, ups=1.05, wpb=435804, bsz=16574.6, num_updates=23000, lr=0.000417029, gnorm=0.229, clip=0, loss_scale=2, train_wall=94, gb_free=19.1, wall=22786 +epoch 014: 1079 / 1689 loss=4.187, nll_loss=2.57, ppl=5.94, wps=457922, ups=1.05, wpb=435804, bsz=16574.6, num_updates=23000, lr=0.000417029, gnorm=0.229, clip=0, loss_scale=2, train_wall=94, gb_free=19.1, wall=22786 +epoch 014: 1079 / 1689 loss=4.187, nll_loss=2.57, ppl=5.94, wps=457922, ups=1.05, wpb=435804, bsz=16574.6, num_updates=23000, lr=0.000417029, gnorm=0.229, clip=0, loss_scale=2, train_wall=94, gb_free=19.1, wall=22786 +epoch 014: 1079 / 1689 loss=4.187, nll_loss=2.57, ppl=5.94, wps=457922, ups=1.05, wpb=435804, bsz=16574.6, num_updates=23000, lr=0.000417029, gnorm=0.229, clip=0, loss_scale=2, train_wall=94, gb_free=19.1, wall=22786 +begin validation on "valid" subset +epoch 014 | valid on 'valid' subset | loss 4.267 | nll_loss 2.63 | ppl 6.19 | wps 0 | wpb 42662 | bsz 2032 | num_updates 23000 | best_loss 4.267 +epoch 014 | valid on 'valid' subset | loss 4.267 | nll_loss 2.63 | ppl 6.19 | wps 0 | wpb 42662 | bsz 2032 | num_updates 23000 | best_loss 4.267 +epoch 014 | valid on 'valid' subset | loss 4.267 | nll_loss 2.63 | ppl 6.19 | wps 0 | wpb 42662 | bsz 2032 | num_updates 23000 | best_loss 4.267 +epoch 014 | valid on 'valid' subset | loss 4.267 | nll_loss 2.63 | ppl 6.19 | wps 0 | wpb 42662 | bsz 2032 | num_updates 23000 | best_loss 4.267 +epoch 014 | valid on 'valid' subset | loss 4.267 | nll_loss 2.63 | ppl 6.19 | wps 0 | wpb 42662 | bsz 2032 | num_updates 23000 | best_loss 4.267 +epoch 014 | valid on 'valid' subset | loss 4.267 | nll_loss 2.63 | ppl 6.19 | wps 0 | wpb 42662 | bsz 2032 | num_updates 23000 | best_loss 4.267 +epoch 014 | valid on 'valid' subset | loss 4.267 | nll_loss 2.63 | ppl 6.19 | wps 0 | wpb 42662 | bsz 2032 | num_updates 23000 | best_loss 4.267 +epoch 014 | valid on 'valid' subset | loss 4.267 | nll_loss 2.63 | ppl 6.19 | wps 0 | wpb 42662 | bsz 2032 | num_updates 23000 | best_loss 4.267 +epoch 014 | valid on 'valid' subset | loss 4.267 | nll_loss 2.63 | ppl 6.19 | wps 0 | wpb 42662 | bsz 2032 | num_updates 23000 | best_loss 4.267 +epoch 014 | valid on 'valid' subset | loss 4.267 | nll_loss 2.63 | ppl 6.19 | wps 0 | wpb 42662 | bsz 2032 | num_updates 23000 | best_loss 4.267 +epoch 014 | valid on 'valid' subset | loss 4.267 | nll_loss 2.63 | ppl 6.19 | wps 0 | wpb 42662 | bsz 2032 | num_updates 23000 | best_loss 4.267 +epoch 014 | valid on 'valid' subset | loss 4.267 | nll_loss 2.63 | ppl 6.19 | wps 0 | wpb 42662 | bsz 2032 | num_updates 23000 | best_loss 4.267 +epoch 014 | valid on 'valid' subset | loss 4.267 | nll_loss 2.63 | ppl 6.19 | wps 0 | wpb 42662 | bsz 2032 | num_updates 23000 | best_loss 4.267 +epoch 014 | valid on 'valid' subset | loss 4.267 | nll_loss 2.63 | ppl 6.19 | wps 0 | wpb 42662 | bsz 2032 | num_updates 23000 | best_loss 4.267 +epoch 014: 1179 / 1689 loss=4.185, nll_loss=2.569, ppl=5.93, wps=346356, ups=0.8, wpb=431214, bsz=16439.9, num_updates=23100, lr=0.000416125, gnorm=0.24, clip=0, loss_scale=2, train_wall=99, gb_free=19.4, wall=22910 +epoch 014: 1179 / 1689 loss=4.185, nll_loss=2.569, ppl=5.93, wps=346356, ups=0.8, wpb=431214, bsz=16439.9, num_updates=23100, lr=0.000416125, gnorm=0.24, clip=0, loss_scale=2, train_wall=99, gb_free=19.4, wall=22910 +epoch 014: 1179 / 1689 loss=4.185, nll_loss=2.569, ppl=5.93, wps=346356, ups=0.8, wpb=431214, bsz=16439.9, num_updates=23100, lr=0.000416125, gnorm=0.24, clip=0, loss_scale=2, train_wall=99, gb_free=19.4, wall=22910 +epoch 014: 1179 / 1689 loss=4.185, nll_loss=2.569, ppl=5.93, wps=346356, ups=0.8, wpb=431214, bsz=16439.9, num_updates=23100, lr=0.000416125, gnorm=0.24, clip=0, loss_scale=2, train_wall=99, gb_free=19.4, wall=22910 +epoch 014: 1179 / 1689 loss=4.185, nll_loss=2.569, ppl=5.93, wps=346356, ups=0.8, wpb=431214, bsz=16439.9, num_updates=23100, lr=0.000416125, gnorm=0.24, clip=0, loss_scale=2, train_wall=99, gb_free=19.4, wall=22910 +epoch 014: 1179 / 1689 loss=4.185, nll_loss=2.569, ppl=5.93, wps=346356, ups=0.8, wpb=431214, bsz=16439.9, num_updates=23100, lr=0.000416125, gnorm=0.24, clip=0, loss_scale=2, train_wall=99, gb_free=19.4, wall=22910 +epoch 014: 1179 / 1689 loss=4.185, nll_loss=2.569, ppl=5.93, wps=346356, ups=0.8, wpb=431214, bsz=16439.9, num_updates=23100, lr=0.000416125, gnorm=0.24, clip=0, loss_scale=2, train_wall=99, gb_free=19.4, wall=22910 +epoch 014: 1179 / 1689 loss=4.185, nll_loss=2.569, ppl=5.93, wps=346356, ups=0.8, wpb=431214, bsz=16439.9, num_updates=23100, lr=0.000416125, gnorm=0.24, clip=0, loss_scale=2, train_wall=99, gb_free=19.4, wall=22910 +epoch 014: 1179 / 1689 loss=4.185, nll_loss=2.569, ppl=5.93, wps=346356, ups=0.8, wpb=431214, bsz=16439.9, num_updates=23100, lr=0.000416125, gnorm=0.24, clip=0, loss_scale=2, train_wall=99, gb_free=19.4, wall=22910 +epoch 014: 1179 / 1689 loss=4.185, nll_loss=2.569, ppl=5.93, wps=346356, ups=0.8, wpb=431214, bsz=16439.9, num_updates=23100, lr=0.000416125, gnorm=0.24, clip=0, loss_scale=2, train_wall=99, gb_free=19.4, wall=22910 +epoch 014: 1179 / 1689 loss=4.185, nll_loss=2.569, ppl=5.93, wps=346356, ups=0.8, wpb=431214, bsz=16439.9, num_updates=23100, lr=0.000416125, gnorm=0.24, clip=0, loss_scale=2, train_wall=99, gb_free=19.4, wall=22910 +epoch 014: 1179 / 1689 loss=4.185, nll_loss=2.569, ppl=5.93, wps=346356, ups=0.8, wpb=431214, bsz=16439.9, num_updates=23100, lr=0.000416125, gnorm=0.24, clip=0, loss_scale=2, train_wall=99, gb_free=19.4, wall=22910 +epoch 014: 1179 / 1689 loss=4.185, nll_loss=2.569, ppl=5.93, wps=346356, ups=0.8, wpb=431214, bsz=16439.9, num_updates=23100, lr=0.000416125, gnorm=0.24, clip=0, loss_scale=2, train_wall=99, gb_free=19.4, wall=22910 +epoch 014: 1179 / 1689 loss=4.185, nll_loss=2.569, ppl=5.93, wps=346356, ups=0.8, wpb=431214, bsz=16439.9, num_updates=23100, lr=0.000416125, gnorm=0.24, clip=0, loss_scale=2, train_wall=99, gb_free=19.4, wall=22910 +epoch 014: 1279 / 1689 loss=4.193, nll_loss=2.577, ppl=5.97, wps=464426, ups=1.07, wpb=434441, bsz=16452, num_updates=23200, lr=0.000415227, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=20.2, wall=23004 +epoch 014: 1279 / 1689 loss=4.193, nll_loss=2.577, ppl=5.97, wps=464426, ups=1.07, wpb=434441, bsz=16452, num_updates=23200, lr=0.000415227, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=20.2, wall=23004 +epoch 014: 1279 / 1689 loss=4.193, nll_loss=2.577, ppl=5.97, wps=464426, ups=1.07, wpb=434441, bsz=16452, num_updates=23200, lr=0.000415227, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=20.2, wall=23004 +epoch 014: 1279 / 1689 loss=4.193, nll_loss=2.577, ppl=5.97, wps=464426, ups=1.07, wpb=434441, bsz=16452, num_updates=23200, lr=0.000415227, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=20.2, wall=23004 +epoch 014: 1279 / 1689 loss=4.193, nll_loss=2.577, ppl=5.97, wps=464426, ups=1.07, wpb=434441, bsz=16452, num_updates=23200, lr=0.000415227, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=20.2, wall=23004 +epoch 014: 1279 / 1689 loss=4.193, nll_loss=2.577, ppl=5.97, wps=464426, ups=1.07, wpb=434441, bsz=16452, num_updates=23200, lr=0.000415227, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=20.2, wall=23004 +epoch 014: 1279 / 1689 loss=4.193, nll_loss=2.577, ppl=5.97, wps=464426, ups=1.07, wpb=434441, bsz=16452, num_updates=23200, lr=0.000415227, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=20.2, wall=23004 +epoch 014: 1279 / 1689 loss=4.193, nll_loss=2.577, ppl=5.97, wps=464426, ups=1.07, wpb=434441, bsz=16452, num_updates=23200, lr=0.000415227, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=20.2, wall=23004 +epoch 014: 1279 / 1689 loss=4.193, nll_loss=2.577, ppl=5.97, wps=464426, ups=1.07, wpb=434441, bsz=16452, num_updates=23200, lr=0.000415227, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=20.2, wall=23004 +epoch 014: 1279 / 1689 loss=4.193, nll_loss=2.577, ppl=5.97, wps=464426, ups=1.07, wpb=434441, bsz=16452, num_updates=23200, lr=0.000415227, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=20.2, wall=23004 +epoch 014: 1279 / 1689 loss=4.193, nll_loss=2.577, ppl=5.97, wps=464426, ups=1.07, wpb=434441, bsz=16452, num_updates=23200, lr=0.000415227, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=20.2, wall=23004 +epoch 014: 1279 / 1689 loss=4.193, nll_loss=2.577, ppl=5.97, wps=464426, ups=1.07, wpb=434441, bsz=16452, num_updates=23200, lr=0.000415227, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=20.2, wall=23004 +epoch 014: 1279 / 1689 loss=4.193, nll_loss=2.577, ppl=5.97, wps=464426, ups=1.07, wpb=434441, bsz=16452, num_updates=23200, lr=0.000415227, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=20.2, wall=23004 +epoch 014: 1279 / 1689 loss=4.193, nll_loss=2.577, ppl=5.97, wps=464426, ups=1.07, wpb=434441, bsz=16452, num_updates=23200, lr=0.000415227, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=20.2, wall=23004 +epoch 014: 1380 / 1689 loss=4.208, nll_loss=2.595, ppl=6.04, wps=457791, ups=1.05, wpb=435065, bsz=16301.6, num_updates=23300, lr=0.000414335, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=23099 +epoch 014: 1380 / 1689 loss=4.208, nll_loss=2.595, ppl=6.04, wps=457791, ups=1.05, wpb=435065, bsz=16301.6, num_updates=23300, lr=0.000414335, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=23099 +epoch 014: 1380 / 1689 loss=4.208, nll_loss=2.595, ppl=6.04, wps=457791, ups=1.05, wpb=435065, bsz=16301.6, num_updates=23300, lr=0.000414335, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=23099 +epoch 014: 1380 / 1689 loss=4.208, nll_loss=2.595, ppl=6.04, wps=457791, ups=1.05, wpb=435065, bsz=16301.6, num_updates=23300, lr=0.000414335, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=23099 +epoch 014: 1380 / 1689 loss=4.208, nll_loss=2.595, ppl=6.04, wps=457791, ups=1.05, wpb=435065, bsz=16301.6, num_updates=23300, lr=0.000414335, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=23099 +epoch 014: 1380 / 1689 loss=4.208, nll_loss=2.595, ppl=6.04, wps=457791, ups=1.05, wpb=435065, bsz=16301.6, num_updates=23300, lr=0.000414335, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=23099 +epoch 014: 1380 / 1689 loss=4.208, nll_loss=2.595, ppl=6.04, wps=457791, ups=1.05, wpb=435065, bsz=16301.6, num_updates=23300, lr=0.000414335, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=23099 +epoch 014: 1380 / 1689 loss=4.208, nll_loss=2.595, ppl=6.04, wps=457791, ups=1.05, wpb=435065, bsz=16301.6, num_updates=23300, lr=0.000414335, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=23099 +epoch 014: 1380 / 1689 loss=4.208, nll_loss=2.595, ppl=6.04, wps=457791, ups=1.05, wpb=435065, bsz=16301.6, num_updates=23300, lr=0.000414335, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=23099 +epoch 014: 1380 / 1689 loss=4.208, nll_loss=2.595, ppl=6.04, wps=457791, ups=1.05, wpb=435065, bsz=16301.6, num_updates=23300, lr=0.000414335, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=23099 +epoch 014: 1380 / 1689 loss=4.208, nll_loss=2.595, ppl=6.04, wps=457791, ups=1.05, wpb=435065, bsz=16301.6, num_updates=23300, lr=0.000414335, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=23099 +epoch 014: 1380 / 1689 loss=4.208, nll_loss=2.595, ppl=6.04, wps=457791, ups=1.05, wpb=435065, bsz=16301.6, num_updates=23300, lr=0.000414335, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=23099 +epoch 014: 1380 / 1689 loss=4.208, nll_loss=2.595, ppl=6.04, wps=457791, ups=1.05, wpb=435065, bsz=16301.6, num_updates=23300, lr=0.000414335, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=23099 +epoch 014: 1380 / 1689 loss=4.208, nll_loss=2.595, ppl=6.04, wps=457791, ups=1.05, wpb=435065, bsz=16301.6, num_updates=23300, lr=0.000414335, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=23099 +epoch 014: 1480 / 1689 loss=4.196, nll_loss=2.581, ppl=5.98, wps=462590, ups=1.06, wpb=434770, bsz=16554.6, num_updates=23400, lr=0.000413449, gnorm=0.245, clip=0, loss_scale=1, train_wall=92, gb_free=17.7, wall=23193 +epoch 014: 1480 / 1689 loss=4.196, nll_loss=2.581, ppl=5.98, wps=462590, ups=1.06, wpb=434770, bsz=16554.6, num_updates=23400, lr=0.000413449, gnorm=0.245, clip=0, loss_scale=1, train_wall=92, gb_free=17.7, wall=23193 +epoch 014: 1480 / 1689 loss=4.196, nll_loss=2.581, ppl=5.98, wps=462590, ups=1.06, wpb=434770, bsz=16554.6, num_updates=23400, lr=0.000413449, gnorm=0.245, clip=0, loss_scale=1, train_wall=92, gb_free=17.7, wall=23193 +epoch 014: 1480 / 1689 loss=4.196, nll_loss=2.581, ppl=5.98, wps=462590, ups=1.06, wpb=434770, bsz=16554.6, num_updates=23400, lr=0.000413449, gnorm=0.245, clip=0, loss_scale=1, train_wall=92, gb_free=17.7, wall=23193 +epoch 014: 1480 / 1689 loss=4.196, nll_loss=2.581, ppl=5.98, wps=462590, ups=1.06, wpb=434770, bsz=16554.6, num_updates=23400, lr=0.000413449, gnorm=0.245, clip=0, loss_scale=1, train_wall=92, gb_free=17.7, wall=23193 +epoch 014: 1480 / 1689 loss=4.196, nll_loss=2.581, ppl=5.98, wps=462590, ups=1.06, wpb=434770, bsz=16554.6, num_updates=23400, lr=0.000413449, gnorm=0.245, clip=0, loss_scale=1, train_wall=92, gb_free=17.7, wall=23193 +epoch 014: 1480 / 1689 loss=4.196, nll_loss=2.581, ppl=5.98, wps=462590, ups=1.06, wpb=434770, bsz=16554.6, num_updates=23400, lr=0.000413449, gnorm=0.245, clip=0, loss_scale=1, train_wall=92, gb_free=17.7, wall=23193 +epoch 014: 1480 / 1689 loss=4.196, nll_loss=2.581, ppl=5.98, wps=462590, ups=1.06, wpb=434770, bsz=16554.6, num_updates=23400, lr=0.000413449, gnorm=0.245, clip=0, loss_scale=1, train_wall=92, gb_free=17.7, wall=23193 +epoch 014: 1480 / 1689 loss=4.196, nll_loss=2.581, ppl=5.98, wps=462590, ups=1.06, wpb=434770, bsz=16554.6, num_updates=23400, lr=0.000413449, gnorm=0.245, clip=0, loss_scale=1, train_wall=92, gb_free=17.7, wall=23193 +epoch 014: 1480 / 1689 loss=4.196, nll_loss=2.581, ppl=5.98, wps=462590, ups=1.06, wpb=434770, bsz=16554.6, num_updates=23400, lr=0.000413449, gnorm=0.245, clip=0, loss_scale=1, train_wall=92, gb_free=17.7, wall=23193 +epoch 014: 1480 / 1689 loss=4.196, nll_loss=2.581, ppl=5.98, wps=462590, ups=1.06, wpb=434770, bsz=16554.6, num_updates=23400, lr=0.000413449, gnorm=0.245, clip=0, loss_scale=1, train_wall=92, gb_free=17.7, wall=23193 +epoch 014: 1480 / 1689 loss=4.196, nll_loss=2.581, ppl=5.98, wps=462590, ups=1.06, wpb=434770, bsz=16554.6, num_updates=23400, lr=0.000413449, gnorm=0.245, clip=0, loss_scale=1, train_wall=92, gb_free=17.7, wall=23193 +epoch 014: 1480 / 1689 loss=4.196, nll_loss=2.581, ppl=5.98, wps=462590, ups=1.06, wpb=434770, bsz=16554.6, num_updates=23400, lr=0.000413449, gnorm=0.245, clip=0, loss_scale=1, train_wall=92, gb_free=17.7, wall=23193 +epoch 014: 1480 / 1689 loss=4.196, nll_loss=2.581, ppl=5.98, wps=462590, ups=1.06, wpb=434770, bsz=16554.6, num_updates=23400, lr=0.000413449, gnorm=0.245, clip=0, loss_scale=1, train_wall=92, gb_free=17.7, wall=23193 +epoch 014: 1580 / 1689 loss=4.206, nll_loss=2.593, ppl=6.03, wps=464316, ups=1.07, wpb=433552, bsz=16655.3, num_updates=23500, lr=0.000412568, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=23286 +epoch 014: 1580 / 1689 loss=4.206, nll_loss=2.593, ppl=6.03, wps=464316, ups=1.07, wpb=433552, bsz=16655.3, num_updates=23500, lr=0.000412568, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=23286 +epoch 014: 1580 / 1689 loss=4.206, nll_loss=2.593, ppl=6.03, wps=464316, ups=1.07, wpb=433552, bsz=16655.3, num_updates=23500, lr=0.000412568, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=23286 +epoch 014: 1580 / 1689 loss=4.206, nll_loss=2.593, ppl=6.03, wps=464316, ups=1.07, wpb=433552, bsz=16655.3, num_updates=23500, lr=0.000412568, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=23286 +epoch 014: 1580 / 1689 loss=4.206, nll_loss=2.593, ppl=6.03, wps=464316, ups=1.07, wpb=433552, bsz=16655.3, num_updates=23500, lr=0.000412568, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=23286 +epoch 014: 1580 / 1689 loss=4.206, nll_loss=2.593, ppl=6.03, wps=464316, ups=1.07, wpb=433552, bsz=16655.3, num_updates=23500, lr=0.000412568, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=23286 +epoch 014: 1580 / 1689 loss=4.206, nll_loss=2.593, ppl=6.03, wps=464316, ups=1.07, wpb=433552, bsz=16655.3, num_updates=23500, lr=0.000412568, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=23286 +epoch 014: 1580 / 1689 loss=4.206, nll_loss=2.593, ppl=6.03, wps=464316, ups=1.07, wpb=433552, bsz=16655.3, num_updates=23500, lr=0.000412568, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=23286 +epoch 014: 1580 / 1689 loss=4.206, nll_loss=2.593, ppl=6.03, wps=464316, ups=1.07, wpb=433552, bsz=16655.3, num_updates=23500, lr=0.000412568, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=23286 +epoch 014: 1580 / 1689 loss=4.206, nll_loss=2.593, ppl=6.03, wps=464316, ups=1.07, wpb=433552, bsz=16655.3, num_updates=23500, lr=0.000412568, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=23286 +epoch 014: 1580 / 1689 loss=4.206, nll_loss=2.593, ppl=6.03, wps=464316, ups=1.07, wpb=433552, bsz=16655.3, num_updates=23500, lr=0.000412568, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=23286 +epoch 014: 1580 / 1689 loss=4.206, nll_loss=2.593, ppl=6.03, wps=464316, ups=1.07, wpb=433552, bsz=16655.3, num_updates=23500, lr=0.000412568, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=23286 +epoch 014: 1580 / 1689 loss=4.206, nll_loss=2.593, ppl=6.03, wps=464316, ups=1.07, wpb=433552, bsz=16655.3, num_updates=23500, lr=0.000412568, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=23286 +epoch 014: 1580 / 1689 loss=4.206, nll_loss=2.593, ppl=6.03, wps=464316, ups=1.07, wpb=433552, bsz=16655.3, num_updates=23500, lr=0.000412568, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=23286 +epoch 014: 1680 / 1689 loss=4.19, nll_loss=2.575, ppl=5.96, wps=456889, ups=1.06, wpb=432308, bsz=16450.5, num_updates=23600, lr=0.000411693, gnorm=0.241, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=23381 +epoch 014: 1680 / 1689 loss=4.19, nll_loss=2.575, ppl=5.96, wps=456889, ups=1.06, wpb=432308, bsz=16450.5, num_updates=23600, lr=0.000411693, gnorm=0.241, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=23381 +epoch 014: 1680 / 1689 loss=4.19, nll_loss=2.575, ppl=5.96, wps=456889, ups=1.06, wpb=432308, bsz=16450.5, num_updates=23600, lr=0.000411693, gnorm=0.241, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=23381 +epoch 014: 1680 / 1689 loss=4.19, nll_loss=2.575, ppl=5.96, wps=456889, ups=1.06, wpb=432308, bsz=16450.5, num_updates=23600, lr=0.000411693, gnorm=0.241, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=23381 +epoch 014: 1680 / 1689 loss=4.19, nll_loss=2.575, ppl=5.96, wps=456889, ups=1.06, wpb=432308, bsz=16450.5, num_updates=23600, lr=0.000411693, gnorm=0.241, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=23381 +epoch 014: 1680 / 1689 loss=4.19, nll_loss=2.575, ppl=5.96, wps=456889, ups=1.06, wpb=432308, bsz=16450.5, num_updates=23600, lr=0.000411693, gnorm=0.241, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=23381 +epoch 014: 1680 / 1689 loss=4.19, nll_loss=2.575, ppl=5.96, wps=456889, ups=1.06, wpb=432308, bsz=16450.5, num_updates=23600, lr=0.000411693, gnorm=0.241, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=23381 +epoch 014: 1680 / 1689 loss=4.19, nll_loss=2.575, ppl=5.96, wps=456889, ups=1.06, wpb=432308, bsz=16450.5, num_updates=23600, lr=0.000411693, gnorm=0.241, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=23381 +epoch 014: 1680 / 1689 loss=4.19, nll_loss=2.575, ppl=5.96, wps=456889, ups=1.06, wpb=432308, bsz=16450.5, num_updates=23600, lr=0.000411693, gnorm=0.241, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=23381 +epoch 014: 1680 / 1689 loss=4.19, nll_loss=2.575, ppl=5.96, wps=456889, ups=1.06, wpb=432308, bsz=16450.5, num_updates=23600, lr=0.000411693, gnorm=0.241, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=23381 +epoch 014: 1680 / 1689 loss=4.19, nll_loss=2.575, ppl=5.96, wps=456889, ups=1.06, wpb=432308, bsz=16450.5, num_updates=23600, lr=0.000411693, gnorm=0.241, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=23381 +epoch 014: 1680 / 1689 loss=4.19, nll_loss=2.575, ppl=5.96, wps=456889, ups=1.06, wpb=432308, bsz=16450.5, num_updates=23600, lr=0.000411693, gnorm=0.241, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=23381 +epoch 014: 1680 / 1689 loss=4.19, nll_loss=2.575, ppl=5.96, wps=456889, ups=1.06, wpb=432308, bsz=16450.5, num_updates=23600, lr=0.000411693, gnorm=0.241, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=23381 +epoch 014: 1680 / 1689 loss=4.19, nll_loss=2.575, ppl=5.96, wps=456889, ups=1.06, wpb=432308, bsz=16450.5, num_updates=23600, lr=0.000411693, gnorm=0.241, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=23381 +end of epoch 14 (average epoch stats below) +epoch 014 | loss 4.187 | nll_loss 2.571 | ppl 5.94 | wps 445961 | ups 1.03 | wpb 433522 | bsz 16504.2 | num_updates 23609 | lr 0.000411615 | gnorm 0.241 | clip 0 | loss_scale 1 | train_wall 1570 | gb_free 21.7 | wall 23388 +epoch 014 | loss 4.187 | nll_loss 2.571 | ppl 5.94 | wps 445961 | ups 1.03 | wpb 433522 | bsz 16504.2 | num_updates 23609 | lr 0.000411615 | gnorm 0.241 | clip 0 | loss_scale 1 | train_wall 1570 | gb_free 21.7 | wall 23388 +epoch 014 | loss 4.187 | nll_loss 2.571 | ppl 5.94 | wps 445961 | ups 1.03 | wpb 433522 | bsz 16504.2 | num_updates 23609 | lr 0.000411615 | gnorm 0.241 | clip 0 | loss_scale 1 | train_wall 1570 | gb_free 21.7 | wall 23388 +epoch 014 | loss 4.187 | nll_loss 2.571 | ppl 5.94 | wps 445961 | ups 1.03 | wpb 433522 | bsz 16504.2 | num_updates 23609 | lr 0.000411615 | gnorm 0.241 | clip 0 | loss_scale 1 | train_wall 1570 | gb_free 21.7 | wall 23388 +epoch 014 | loss 4.187 | nll_loss 2.571 | ppl 5.94 | wps 445961 | ups 1.03 | wpb 433522 | bsz 16504.2 | num_updates 23609 | lr 0.000411615 | gnorm 0.241 | clip 0 | loss_scale 1 | train_wall 1570 | gb_free 21.7 | wall 23388 +epoch 014 | loss 4.187 | nll_loss 2.571 | ppl 5.94 | wps 445961 | ups 1.03 | wpb 433522 | bsz 16504.2 | num_updates 23609 | lr 0.000411615 | gnorm 0.241 | clip 0 | loss_scale 1 | train_wall 1570 | gb_free 21.7 | wall 23388 +epoch 014 | loss 4.187 | nll_loss 2.571 | ppl 5.94 | wps 445961 | ups 1.03 | wpb 433522 | bsz 16504.2 | num_updates 23609 | lr 0.000411615 | gnorm 0.241 | clip 0 | loss_scale 1 | train_wall 1570 | gb_free 21.7 | wall 23388 +epoch 014 | loss 4.187 | nll_loss 2.571 | ppl 5.94 | wps 445961 | ups 1.03 | wpb 433522 | bsz 16504.2 | num_updates 23609 | lr 0.000411615 | gnorm 0.241 | clip 0 | loss_scale 1 | train_wall 1570 | gb_free 21.7 | wall 23388 +epoch 014 | loss 4.187 | nll_loss 2.571 | ppl 5.94 | wps 445961 | ups 1.03 | wpb 433522 | bsz 16504.2 | num_updates 23609 | lr 0.000411615 | gnorm 0.241 | clip 0 | loss_scale 1 | train_wall 1570 | gb_free 21.7 | wall 23388 +epoch 014 | loss 4.187 | nll_loss 2.571 | ppl 5.94 | wps 445961 | ups 1.03 | wpb 433522 | bsz 16504.2 | num_updates 23609 | lr 0.000411615 | gnorm 0.241 | clip 0 | loss_scale 1 | train_wall 1570 | gb_free 21.7 | wall 23388 +epoch 014 | loss 4.187 | nll_loss 2.571 | ppl 5.94 | wps 445961 | ups 1.03 | wpb 433522 | bsz 16504.2 | num_updates 23609 | lr 0.000411615 | gnorm 0.241 | clip 0 | loss_scale 1 | train_wall 1570 | gb_free 21.7 | wall 23388 +epoch 014 | loss 4.187 | nll_loss 2.571 | ppl 5.94 | wps 445961 | ups 1.03 | wpb 433522 | bsz 16504.2 | num_updates 23609 | lr 0.000411615 | gnorm 0.241 | clip 0 | loss_scale 1 | train_wall 1570 | gb_free 21.7 | wall 23388 +epoch 014 | loss 4.187 | nll_loss 2.571 | ppl 5.94 | wps 445961 | ups 1.03 | wpb 433522 | bsz 16504.2 | num_updates 23609 | lr 0.000411615 | gnorm 0.241 | clip 0 | loss_scale 1 | train_wall 1570 | gb_free 21.7 | wall 23388 +epoch 014 | loss 4.187 | nll_loss 2.571 | ppl 5.94 | wps 445961 | ups 1.03 | wpb 433522 | bsz 16504.2 | num_updates 23609 | lr 0.000411615 | gnorm 0.241 | clip 0 | loss_scale 1 | train_wall 1570 | gb_free 21.7 | wall 23388 +Start iterating over samples +epoch 015: 91 / 1689 loss=4.148, nll_loss=2.526, ppl=5.76, wps=450550, ups=1.05, wpb=430541, bsz=16180.3, num_updates=23700, lr=0.000410824, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=23476 +epoch 015: 91 / 1689 loss=4.148, nll_loss=2.526, ppl=5.76, wps=450550, ups=1.05, wpb=430541, bsz=16180.3, num_updates=23700, lr=0.000410824, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=23476 +epoch 015: 91 / 1689 loss=4.148, nll_loss=2.526, ppl=5.76, wps=450550, ups=1.05, wpb=430541, bsz=16180.3, num_updates=23700, lr=0.000410824, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=23476 +epoch 015: 91 / 1689 loss=4.148, nll_loss=2.526, ppl=5.76, wps=450550, ups=1.05, wpb=430541, bsz=16180.3, num_updates=23700, lr=0.000410824, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=23476 +epoch 015: 91 / 1689 loss=4.148, nll_loss=2.526, ppl=5.76, wps=450550, ups=1.05, wpb=430541, bsz=16180.3, num_updates=23700, lr=0.000410824, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=23476 +epoch 015: 91 / 1689 loss=4.148, nll_loss=2.526, ppl=5.76, wps=450550, ups=1.05, wpb=430541, bsz=16180.3, num_updates=23700, lr=0.000410824, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=23476 +epoch 015: 91 / 1689 loss=4.148, nll_loss=2.526, ppl=5.76, wps=450550, ups=1.05, wpb=430541, bsz=16180.3, num_updates=23700, lr=0.000410824, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=23476 +epoch 015: 91 / 1689 loss=4.148, nll_loss=2.526, ppl=5.76, wps=450550, ups=1.05, wpb=430541, bsz=16180.3, num_updates=23700, lr=0.000410824, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=23476 +epoch 015: 91 / 1689 loss=4.148, nll_loss=2.526, ppl=5.76, wps=450550, ups=1.05, wpb=430541, bsz=16180.3, num_updates=23700, lr=0.000410824, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=23476 +epoch 015: 91 / 1689 loss=4.148, nll_loss=2.526, ppl=5.76, wps=450550, ups=1.05, wpb=430541, bsz=16180.3, num_updates=23700, lr=0.000410824, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=23476 +epoch 015: 91 / 1689 loss=4.148, nll_loss=2.526, ppl=5.76, wps=450550, ups=1.05, wpb=430541, bsz=16180.3, num_updates=23700, lr=0.000410824, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=23476 +epoch 015: 91 / 1689 loss=4.148, nll_loss=2.526, ppl=5.76, wps=450550, ups=1.05, wpb=430541, bsz=16180.3, num_updates=23700, lr=0.000410824, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=23476 +epoch 015: 91 / 1689 loss=4.148, nll_loss=2.526, ppl=5.76, wps=450550, ups=1.05, wpb=430541, bsz=16180.3, num_updates=23700, lr=0.000410824, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=23476 +epoch 015: 91 / 1689 loss=4.148, nll_loss=2.526, ppl=5.76, wps=450550, ups=1.05, wpb=430541, bsz=16180.3, num_updates=23700, lr=0.000410824, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=23476 +epoch 015: 91 / 1689 loss=4.148, nll_loss=2.526, ppl=5.76, wps=450550, ups=1.05, wpb=430541, bsz=16180.3, num_updates=23700, lr=0.000410824, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=23476 +epoch 015: 191 / 1689 loss=4.157, nll_loss=2.536, ppl=5.8, wps=463707, ups=1.07, wpb=434453, bsz=16475.4, num_updates=23800, lr=0.00040996, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=23570 +epoch 015: 191 / 1689 loss=4.157, nll_loss=2.536, ppl=5.8, wps=463707, ups=1.07, wpb=434453, bsz=16475.4, num_updates=23800, lr=0.00040996, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=23570 +epoch 015: 191 / 1689 loss=4.157, nll_loss=2.536, ppl=5.8, wps=463707, ups=1.07, wpb=434453, bsz=16475.4, num_updates=23800, lr=0.00040996, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=23570 +epoch 015: 191 / 1689 loss=4.157, nll_loss=2.536, ppl=5.8, wps=463707, ups=1.07, wpb=434453, bsz=16475.4, num_updates=23800, lr=0.00040996, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=23570 +epoch 015: 191 / 1689 loss=4.157, nll_loss=2.536, ppl=5.8, wps=463707, ups=1.07, wpb=434453, bsz=16475.4, num_updates=23800, lr=0.00040996, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=23570 +epoch 015: 191 / 1689 loss=4.157, nll_loss=2.536, ppl=5.8, wps=463707, ups=1.07, wpb=434453, bsz=16475.4, num_updates=23800, lr=0.00040996, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=23570 +epoch 015: 191 / 1689 loss=4.157, nll_loss=2.536, ppl=5.8, wps=463707, ups=1.07, wpb=434453, bsz=16475.4, num_updates=23800, lr=0.00040996, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=23570 +epoch 015: 191 / 1689 loss=4.157, nll_loss=2.536, ppl=5.8, wps=463707, ups=1.07, wpb=434453, bsz=16475.4, num_updates=23800, lr=0.00040996, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=23570 +epoch 015: 191 / 1689 loss=4.157, nll_loss=2.536, ppl=5.8, wps=463707, ups=1.07, wpb=434453, bsz=16475.4, num_updates=23800, lr=0.00040996, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=23570 +epoch 015: 191 / 1689 loss=4.157, nll_loss=2.536, ppl=5.8, wps=463707, ups=1.07, wpb=434453, bsz=16475.4, num_updates=23800, lr=0.00040996, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=23570 +epoch 015: 191 / 1689 loss=4.157, nll_loss=2.536, ppl=5.8, wps=463707, ups=1.07, wpb=434453, bsz=16475.4, num_updates=23800, lr=0.00040996, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=23570 +epoch 015: 191 / 1689 loss=4.157, nll_loss=2.536, ppl=5.8, wps=463707, ups=1.07, wpb=434453, bsz=16475.4, num_updates=23800, lr=0.00040996, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=23570 +epoch 015: 191 / 1689 loss=4.157, nll_loss=2.536, ppl=5.8, wps=463707, ups=1.07, wpb=434453, bsz=16475.4, num_updates=23800, lr=0.00040996, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=23570 +epoch 015: 191 / 1689 loss=4.157, nll_loss=2.536, ppl=5.8, wps=463707, ups=1.07, wpb=434453, bsz=16475.4, num_updates=23800, lr=0.00040996, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=23570 +epoch 015: 191 / 1689 loss=4.157, nll_loss=2.536, ppl=5.8, wps=463707, ups=1.07, wpb=434453, bsz=16475.4, num_updates=23800, lr=0.00040996, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=23570 +epoch 015: 291 / 1689 loss=4.165, nll_loss=2.546, ppl=5.84, wps=460112, ups=1.06, wpb=434148, bsz=16533.4, num_updates=23900, lr=0.000409101, gnorm=0.244, clip=0, loss_scale=2, train_wall=93, gb_free=17.5, wall=23664 +epoch 015: 291 / 1689 loss=4.165, nll_loss=2.546, ppl=5.84, wps=460112, ups=1.06, wpb=434148, bsz=16533.4, num_updates=23900, lr=0.000409101, gnorm=0.244, clip=0, loss_scale=2, train_wall=93, gb_free=17.5, wall=23664 +epoch 015: 291 / 1689 loss=4.165, nll_loss=2.546, ppl=5.84, wps=460112, ups=1.06, wpb=434148, bsz=16533.4, num_updates=23900, lr=0.000409101, gnorm=0.244, clip=0, loss_scale=2, train_wall=93, gb_free=17.5, wall=23664 +epoch 015: 291 / 1689 loss=4.165, nll_loss=2.546, ppl=5.84, wps=460112, ups=1.06, wpb=434148, bsz=16533.4, num_updates=23900, lr=0.000409101, gnorm=0.244, clip=0, loss_scale=2, train_wall=93, gb_free=17.5, wall=23664 +epoch 015: 291 / 1689 loss=4.165, nll_loss=2.546, ppl=5.84, wps=460112, ups=1.06, wpb=434148, bsz=16533.4, num_updates=23900, lr=0.000409101, gnorm=0.244, clip=0, loss_scale=2, train_wall=93, gb_free=17.5, wall=23664 +epoch 015: 291 / 1689 loss=4.165, nll_loss=2.546, ppl=5.84, wps=460112, ups=1.06, wpb=434148, bsz=16533.4, num_updates=23900, lr=0.000409101, gnorm=0.244, clip=0, loss_scale=2, train_wall=93, gb_free=17.5, wall=23664 +epoch 015: 291 / 1689 loss=4.165, nll_loss=2.546, ppl=5.84, wps=460112, ups=1.06, wpb=434148, bsz=16533.4, num_updates=23900, lr=0.000409101, gnorm=0.244, clip=0, loss_scale=2, train_wall=93, gb_free=17.5, wall=23664 +epoch 015: 291 / 1689 loss=4.165, nll_loss=2.546, ppl=5.84, wps=460112, ups=1.06, wpb=434148, bsz=16533.4, num_updates=23900, lr=0.000409101, gnorm=0.244, clip=0, loss_scale=2, train_wall=93, gb_free=17.5, wall=23664 +epoch 015: 291 / 1689 loss=4.165, nll_loss=2.546, ppl=5.84, wps=460112, ups=1.06, wpb=434148, bsz=16533.4, num_updates=23900, lr=0.000409101, gnorm=0.244, clip=0, loss_scale=2, train_wall=93, gb_free=17.5, wall=23664 +epoch 015: 291 / 1689 loss=4.165, nll_loss=2.546, ppl=5.84, wps=460112, ups=1.06, wpb=434148, bsz=16533.4, num_updates=23900, lr=0.000409101, gnorm=0.244, clip=0, loss_scale=2, train_wall=93, gb_free=17.5, wall=23664 +epoch 015: 291 / 1689 loss=4.165, nll_loss=2.546, ppl=5.84, wps=460112, ups=1.06, wpb=434148, bsz=16533.4, num_updates=23900, lr=0.000409101, gnorm=0.244, clip=0, loss_scale=2, train_wall=93, gb_free=17.5, wall=23664 +epoch 015: 291 / 1689 loss=4.165, nll_loss=2.546, ppl=5.84, wps=460112, ups=1.06, wpb=434148, bsz=16533.4, num_updates=23900, lr=0.000409101, gnorm=0.244, clip=0, loss_scale=2, train_wall=93, gb_free=17.5, wall=23664 +epoch 015: 291 / 1689 loss=4.165, nll_loss=2.546, ppl=5.84, wps=460112, ups=1.06, wpb=434148, bsz=16533.4, num_updates=23900, lr=0.000409101, gnorm=0.244, clip=0, loss_scale=2, train_wall=93, gb_free=17.5, wall=23664 +epoch 015: 291 / 1689 loss=4.165, nll_loss=2.546, ppl=5.84, wps=460112, ups=1.06, wpb=434148, bsz=16533.4, num_updates=23900, lr=0.000409101, gnorm=0.244, clip=0, loss_scale=2, train_wall=93, gb_free=17.5, wall=23664 +epoch 015: 291 / 1689 loss=4.165, nll_loss=2.546, ppl=5.84, wps=460112, ups=1.06, wpb=434148, bsz=16533.4, num_updates=23900, lr=0.000409101, gnorm=0.244, clip=0, loss_scale=2, train_wall=93, gb_free=17.5, wall=23664 +epoch 015: 391 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=463646, ups=1.07, wpb=434465, bsz=16846.2, num_updates=24000, lr=0.000408248, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=18.2, wall=23758 +epoch 015: 391 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=463646, ups=1.07, wpb=434465, bsz=16846.2, num_updates=24000, lr=0.000408248, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=18.2, wall=23758 +epoch 015: 391 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=463646, ups=1.07, wpb=434465, bsz=16846.2, num_updates=24000, lr=0.000408248, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=18.2, wall=23758 +epoch 015: 391 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=463646, ups=1.07, wpb=434465, bsz=16846.2, num_updates=24000, lr=0.000408248, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=18.2, wall=23758 +epoch 015: 391 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=463646, ups=1.07, wpb=434465, bsz=16846.2, num_updates=24000, lr=0.000408248, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=18.2, wall=23758 +epoch 015: 391 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=463646, ups=1.07, wpb=434465, bsz=16846.2, num_updates=24000, lr=0.000408248, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=18.2, wall=23758 +epoch 015: 391 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=463646, ups=1.07, wpb=434465, bsz=16846.2, num_updates=24000, lr=0.000408248, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=18.2, wall=23758 +epoch 015: 391 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=463646, ups=1.07, wpb=434465, bsz=16846.2, num_updates=24000, lr=0.000408248, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=18.2, wall=23758 +epoch 015: 391 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=463646, ups=1.07, wpb=434465, bsz=16846.2, num_updates=24000, lr=0.000408248, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=18.2, wall=23758 +epoch 015: 391 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=463646, ups=1.07, wpb=434465, bsz=16846.2, num_updates=24000, lr=0.000408248, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=18.2, wall=23758 +epoch 015: 391 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=463646, ups=1.07, wpb=434465, bsz=16846.2, num_updates=24000, lr=0.000408248, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=18.2, wall=23758 +epoch 015: 391 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=463646, ups=1.07, wpb=434465, bsz=16846.2, num_updates=24000, lr=0.000408248, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=18.2, wall=23758 +epoch 015: 391 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=463646, ups=1.07, wpb=434465, bsz=16846.2, num_updates=24000, lr=0.000408248, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=18.2, wall=23758 +epoch 015: 391 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=463646, ups=1.07, wpb=434465, bsz=16846.2, num_updates=24000, lr=0.000408248, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=18.2, wall=23758 +epoch 015: 391 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=463646, ups=1.07, wpb=434465, bsz=16846.2, num_updates=24000, lr=0.000408248, gnorm=0.248, clip=0, loss_scale=2, train_wall=92, gb_free=18.2, wall=23758 +begin validation on "valid" subset +epoch 015 | valid on 'valid' subset | loss 4.289 | nll_loss 2.659 | ppl 6.32 | wps 0 | wpb 42662 | bsz 2032 | num_updates 24000 | best_loss 4.267 +epoch 015 | valid on 'valid' subset | loss 4.289 | nll_loss 2.659 | ppl 6.32 | wps 0 | wpb 42662 | bsz 2032 | num_updates 24000 | best_loss 4.267 +epoch 015 | valid on 'valid' subset | loss 4.289 | nll_loss 2.659 | ppl 6.32 | wps 0 | wpb 42662 | bsz 2032 | num_updates 24000 | best_loss 4.267 +epoch 015 | valid on 'valid' subset | loss 4.289 | nll_loss 2.659 | ppl 6.32 | wps 0 | wpb 42662 | bsz 2032 | num_updates 24000 | best_loss 4.267 +epoch 015 | valid on 'valid' subset | loss 4.289 | nll_loss 2.659 | ppl 6.32 | wps 0 | wpb 42662 | bsz 2032 | num_updates 24000 | best_loss 4.267 +epoch 015 | valid on 'valid' subset | loss 4.289 | nll_loss 2.659 | ppl 6.32 | wps 0 | wpb 42662 | bsz 2032 | num_updates 24000 | best_loss 4.267 +epoch 015 | valid on 'valid' subset | loss 4.289 | nll_loss 2.659 | ppl 6.32 | wps 0 | wpb 42662 | bsz 2032 | num_updates 24000 | best_loss 4.267 +epoch 015 | valid on 'valid' subset | loss 4.289 | nll_loss 2.659 | ppl 6.32 | wps 0 | wpb 42662 | bsz 2032 | num_updates 24000 | best_loss 4.267 +epoch 015 | valid on 'valid' subset | loss 4.289 | nll_loss 2.659 | ppl 6.32 | wps 0 | wpb 42662 | bsz 2032 | num_updates 24000 | best_loss 4.267 +epoch 015 | valid on 'valid' subset | loss 4.289 | nll_loss 2.659 | ppl 6.32 | wps 0 | wpb 42662 | bsz 2032 | num_updates 24000 | best_loss 4.267 +epoch 015 | valid on 'valid' subset | loss 4.289 | nll_loss 2.659 | ppl 6.32 | wps 0 | wpb 42662 | bsz 2032 | num_updates 24000 | best_loss 4.267 +epoch 015 | valid on 'valid' subset | loss 4.289 | nll_loss 2.659 | ppl 6.32 | wps 0 | wpb 42662 | bsz 2032 | num_updates 24000 | best_loss 4.267 +epoch 015 | valid on 'valid' subset | loss 4.289 | nll_loss 2.659 | ppl 6.32 | wps 0 | wpb 42662 | bsz 2032 | num_updates 24000 | best_loss 4.267 +epoch 015 | valid on 'valid' subset | loss 4.289 | nll_loss 2.659 | ppl 6.32 | wps 0 | wpb 42662 | bsz 2032 | num_updates 24000 | best_loss 4.267 +epoch 015 | valid on 'valid' subset | loss 4.289 | nll_loss 2.659 | ppl 6.32 | wps 0 | wpb 42662 | bsz 2032 | num_updates 24000 | best_loss 4.267 +epoch 015: 491 / 1689 loss=4.173, nll_loss=2.555, ppl=5.88, wps=406989, ups=0.93, wpb=435414, bsz=16726.7, num_updates=24100, lr=0.0004074, gnorm=0.234, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=23865 +epoch 015: 491 / 1689 loss=4.173, nll_loss=2.555, ppl=5.88, wps=406989, ups=0.93, wpb=435414, bsz=16726.7, num_updates=24100, lr=0.0004074, gnorm=0.234, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=23865 +epoch 015: 491 / 1689 loss=4.173, nll_loss=2.555, ppl=5.88, wps=406989, ups=0.93, wpb=435414, bsz=16726.7, num_updates=24100, lr=0.0004074, gnorm=0.234, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=23865 +epoch 015: 491 / 1689 loss=4.173, nll_loss=2.555, ppl=5.88, wps=406989, ups=0.93, wpb=435414, bsz=16726.7, num_updates=24100, lr=0.0004074, gnorm=0.234, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=23865 +epoch 015: 491 / 1689 loss=4.173, nll_loss=2.555, ppl=5.88, wps=406989, ups=0.93, wpb=435414, bsz=16726.7, num_updates=24100, lr=0.0004074, gnorm=0.234, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=23865 +epoch 015: 491 / 1689 loss=4.173, nll_loss=2.555, ppl=5.88, wps=406989, ups=0.93, wpb=435414, bsz=16726.7, num_updates=24100, lr=0.0004074, gnorm=0.234, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=23865 +epoch 015: 491 / 1689 loss=4.173, nll_loss=2.555, ppl=5.88, wps=406989, ups=0.93, wpb=435414, bsz=16726.7, num_updates=24100, lr=0.0004074, gnorm=0.234, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=23865 +epoch 015: 491 / 1689 loss=4.173, nll_loss=2.555, ppl=5.88, wps=406989, ups=0.93, wpb=435414, bsz=16726.7, num_updates=24100, lr=0.0004074, gnorm=0.234, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=23865 +epoch 015: 491 / 1689 loss=4.173, nll_loss=2.555, ppl=5.88, wps=406989, ups=0.93, wpb=435414, bsz=16726.7, num_updates=24100, lr=0.0004074, gnorm=0.234, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=23865 +epoch 015: 491 / 1689 loss=4.173, nll_loss=2.555, ppl=5.88, wps=406989, ups=0.93, wpb=435414, bsz=16726.7, num_updates=24100, lr=0.0004074, gnorm=0.234, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=23865 +epoch 015: 491 / 1689 loss=4.173, nll_loss=2.555, ppl=5.88, wps=406989, ups=0.93, wpb=435414, bsz=16726.7, num_updates=24100, lr=0.0004074, gnorm=0.234, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=23865 +epoch 015: 491 / 1689 loss=4.173, nll_loss=2.555, ppl=5.88, wps=406989, ups=0.93, wpb=435414, bsz=16726.7, num_updates=24100, lr=0.0004074, gnorm=0.234, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=23865 +epoch 015: 491 / 1689 loss=4.173, nll_loss=2.555, ppl=5.88, wps=406989, ups=0.93, wpb=435414, bsz=16726.7, num_updates=24100, lr=0.0004074, gnorm=0.234, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=23865 +epoch 015: 491 / 1689 loss=4.173, nll_loss=2.555, ppl=5.88, wps=406989, ups=0.93, wpb=435414, bsz=16726.7, num_updates=24100, lr=0.0004074, gnorm=0.234, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=23865 +epoch 015: 491 / 1689 loss=4.173, nll_loss=2.555, ppl=5.88, wps=406989, ups=0.93, wpb=435414, bsz=16726.7, num_updates=24100, lr=0.0004074, gnorm=0.234, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=23865 +epoch 015: 591 / 1689 loss=4.164, nll_loss=2.544, ppl=5.83, wps=458087, ups=1.06, wpb=431450, bsz=16461.6, num_updates=24200, lr=0.000406558, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=20.3, wall=23959 +epoch 015: 591 / 1689 loss=4.164, nll_loss=2.544, ppl=5.83, wps=458087, ups=1.06, wpb=431450, bsz=16461.6, num_updates=24200, lr=0.000406558, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=20.3, wall=23959 +epoch 015: 591 / 1689 loss=4.164, nll_loss=2.544, ppl=5.83, wps=458087, ups=1.06, wpb=431450, bsz=16461.6, num_updates=24200, lr=0.000406558, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=20.3, wall=23959 +epoch 015: 591 / 1689 loss=4.164, nll_loss=2.544, ppl=5.83, wps=458087, ups=1.06, wpb=431450, bsz=16461.6, num_updates=24200, lr=0.000406558, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=20.3, wall=23959 +epoch 015: 591 / 1689 loss=4.164, nll_loss=2.544, ppl=5.83, wps=458087, ups=1.06, wpb=431450, bsz=16461.6, num_updates=24200, lr=0.000406558, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=20.3, wall=23959 +epoch 015: 591 / 1689 loss=4.164, nll_loss=2.544, ppl=5.83, wps=458087, ups=1.06, wpb=431450, bsz=16461.6, num_updates=24200, lr=0.000406558, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=20.3, wall=23959 +epoch 015: 591 / 1689 loss=4.164, nll_loss=2.544, ppl=5.83, wps=458087, ups=1.06, wpb=431450, bsz=16461.6, num_updates=24200, lr=0.000406558, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=20.3, wall=23959 +epoch 015: 591 / 1689 loss=4.164, nll_loss=2.544, ppl=5.83, wps=458087, ups=1.06, wpb=431450, bsz=16461.6, num_updates=24200, lr=0.000406558, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=20.3, wall=23959 +epoch 015: 591 / 1689 loss=4.164, nll_loss=2.544, ppl=5.83, wps=458087, ups=1.06, wpb=431450, bsz=16461.6, num_updates=24200, lr=0.000406558, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=20.3, wall=23959 +epoch 015: 591 / 1689 loss=4.164, nll_loss=2.544, ppl=5.83, wps=458087, ups=1.06, wpb=431450, bsz=16461.6, num_updates=24200, lr=0.000406558, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=20.3, wall=23959 +epoch 015: 591 / 1689 loss=4.164, nll_loss=2.544, ppl=5.83, wps=458087, ups=1.06, wpb=431450, bsz=16461.6, num_updates=24200, lr=0.000406558, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=20.3, wall=23959 +epoch 015: 591 / 1689 loss=4.164, nll_loss=2.544, ppl=5.83, wps=458087, ups=1.06, wpb=431450, bsz=16461.6, num_updates=24200, lr=0.000406558, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=20.3, wall=23959 +epoch 015: 591 / 1689 loss=4.164, nll_loss=2.544, ppl=5.83, wps=458087, ups=1.06, wpb=431450, bsz=16461.6, num_updates=24200, lr=0.000406558, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=20.3, wall=23959 +epoch 015: 591 / 1689 loss=4.164, nll_loss=2.544, ppl=5.83, wps=458087, ups=1.06, wpb=431450, bsz=16461.6, num_updates=24200, lr=0.000406558, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=20.3, wall=23959 +epoch 015: 591 / 1689 loss=4.164, nll_loss=2.544, ppl=5.83, wps=458087, ups=1.06, wpb=431450, bsz=16461.6, num_updates=24200, lr=0.000406558, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=20.3, wall=23959 +epoch 015: 692 / 1689 loss=4.17, nll_loss=2.551, ppl=5.86, wps=451729, ups=1.04, wpb=434396, bsz=16669.2, num_updates=24300, lr=0.00040572, gnorm=0.241, clip=0, loss_scale=1, train_wall=95, gb_free=18.1, wall=24055 +epoch 015: 692 / 1689 loss=4.17, nll_loss=2.551, ppl=5.86, wps=451729, ups=1.04, wpb=434396, bsz=16669.2, num_updates=24300, lr=0.00040572, gnorm=0.241, clip=0, loss_scale=1, train_wall=95, gb_free=18.1, wall=24055 +epoch 015: 692 / 1689 loss=4.17, nll_loss=2.551, ppl=5.86, wps=451729, ups=1.04, wpb=434396, bsz=16669.2, num_updates=24300, lr=0.00040572, gnorm=0.241, clip=0, loss_scale=1, train_wall=95, gb_free=18.1, wall=24055 +epoch 015: 692 / 1689 loss=4.17, nll_loss=2.551, ppl=5.86, wps=451729, ups=1.04, wpb=434396, bsz=16669.2, num_updates=24300, lr=0.00040572, gnorm=0.241, clip=0, loss_scale=1, train_wall=95, gb_free=18.1, wall=24055 +epoch 015: 692 / 1689 loss=4.17, nll_loss=2.551, ppl=5.86, wps=451729, ups=1.04, wpb=434396, bsz=16669.2, num_updates=24300, lr=0.00040572, gnorm=0.241, clip=0, loss_scale=1, train_wall=95, gb_free=18.1, wall=24055 +epoch 015: 692 / 1689 loss=4.17, nll_loss=2.551, ppl=5.86, wps=451729, ups=1.04, wpb=434396, bsz=16669.2, num_updates=24300, lr=0.00040572, gnorm=0.241, clip=0, loss_scale=1, train_wall=95, gb_free=18.1, wall=24055 +epoch 015: 692 / 1689 loss=4.17, nll_loss=2.551, ppl=5.86, wps=451729, ups=1.04, wpb=434396, bsz=16669.2, num_updates=24300, lr=0.00040572, gnorm=0.241, clip=0, loss_scale=1, train_wall=95, gb_free=18.1, wall=24055 +epoch 015: 692 / 1689 loss=4.17, nll_loss=2.551, ppl=5.86, wps=451729, ups=1.04, wpb=434396, bsz=16669.2, num_updates=24300, lr=0.00040572, gnorm=0.241, clip=0, loss_scale=1, train_wall=95, gb_free=18.1, wall=24055 +epoch 015: 692 / 1689 loss=4.17, nll_loss=2.551, ppl=5.86, wps=451729, ups=1.04, wpb=434396, bsz=16669.2, num_updates=24300, lr=0.00040572, gnorm=0.241, clip=0, loss_scale=1, train_wall=95, gb_free=18.1, wall=24055 +epoch 015: 692 / 1689 loss=4.17, nll_loss=2.551, ppl=5.86, wps=451729, ups=1.04, wpb=434396, bsz=16669.2, num_updates=24300, lr=0.00040572, gnorm=0.241, clip=0, loss_scale=1, train_wall=95, gb_free=18.1, wall=24055 +epoch 015: 692 / 1689 loss=4.17, nll_loss=2.551, ppl=5.86, wps=451729, ups=1.04, wpb=434396, bsz=16669.2, num_updates=24300, lr=0.00040572, gnorm=0.241, clip=0, loss_scale=1, train_wall=95, gb_free=18.1, wall=24055 +epoch 015: 692 / 1689 loss=4.17, nll_loss=2.551, ppl=5.86, wps=451729, ups=1.04, wpb=434396, bsz=16669.2, num_updates=24300, lr=0.00040572, gnorm=0.241, clip=0, loss_scale=1, train_wall=95, gb_free=18.1, wall=24055 +epoch 015: 692 / 1689 loss=4.17, nll_loss=2.551, ppl=5.86, wps=451729, ups=1.04, wpb=434396, bsz=16669.2, num_updates=24300, lr=0.00040572, gnorm=0.241, clip=0, loss_scale=1, train_wall=95, gb_free=18.1, wall=24055 +epoch 015: 692 / 1689 loss=4.17, nll_loss=2.551, ppl=5.86, wps=451729, ups=1.04, wpb=434396, bsz=16669.2, num_updates=24300, lr=0.00040572, gnorm=0.241, clip=0, loss_scale=1, train_wall=95, gb_free=18.1, wall=24055 +epoch 015: 692 / 1689 loss=4.17, nll_loss=2.551, ppl=5.86, wps=451729, ups=1.04, wpb=434396, bsz=16669.2, num_updates=24300, lr=0.00040572, gnorm=0.241, clip=0, loss_scale=1, train_wall=95, gb_free=18.1, wall=24055 +epoch 015: 792 / 1689 loss=4.178, nll_loss=2.561, ppl=5.9, wps=461952, ups=1.07, wpb=431292, bsz=16241.4, num_updates=24400, lr=0.000404888, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=24149 +epoch 015: 792 / 1689 loss=4.178, nll_loss=2.561, ppl=5.9, wps=461952, ups=1.07, wpb=431292, bsz=16241.4, num_updates=24400, lr=0.000404888, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=24149 +epoch 015: 792 / 1689 loss=4.178, nll_loss=2.561, ppl=5.9, wps=461952, ups=1.07, wpb=431292, bsz=16241.4, num_updates=24400, lr=0.000404888, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=24149 +epoch 015: 792 / 1689 loss=4.178, nll_loss=2.561, ppl=5.9, wps=461952, ups=1.07, wpb=431292, bsz=16241.4, num_updates=24400, lr=0.000404888, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=24149 +epoch 015: 792 / 1689 loss=4.178, nll_loss=2.561, ppl=5.9, wps=461952, ups=1.07, wpb=431292, bsz=16241.4, num_updates=24400, lr=0.000404888, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=24149 +epoch 015: 792 / 1689 loss=4.178, nll_loss=2.561, ppl=5.9, wps=461952, ups=1.07, wpb=431292, bsz=16241.4, num_updates=24400, lr=0.000404888, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=24149 +epoch 015: 792 / 1689 loss=4.178, nll_loss=2.561, ppl=5.9, wps=461952, ups=1.07, wpb=431292, bsz=16241.4, num_updates=24400, lr=0.000404888, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=24149 +epoch 015: 792 / 1689 loss=4.178, nll_loss=2.561, ppl=5.9, wps=461952, ups=1.07, wpb=431292, bsz=16241.4, num_updates=24400, lr=0.000404888, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=24149 +epoch 015: 792 / 1689 loss=4.178, nll_loss=2.561, ppl=5.9, wps=461952, ups=1.07, wpb=431292, bsz=16241.4, num_updates=24400, lr=0.000404888, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=24149 +epoch 015: 792 / 1689 loss=4.178, nll_loss=2.561, ppl=5.9, wps=461952, ups=1.07, wpb=431292, bsz=16241.4, num_updates=24400, lr=0.000404888, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=24149 +epoch 015: 792 / 1689 loss=4.178, nll_loss=2.561, ppl=5.9, wps=461952, ups=1.07, wpb=431292, bsz=16241.4, num_updates=24400, lr=0.000404888, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=24149 +epoch 015: 792 / 1689 loss=4.178, nll_loss=2.561, ppl=5.9, wps=461952, ups=1.07, wpb=431292, bsz=16241.4, num_updates=24400, lr=0.000404888, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=24149 +epoch 015: 792 / 1689 loss=4.178, nll_loss=2.561, ppl=5.9, wps=461952, ups=1.07, wpb=431292, bsz=16241.4, num_updates=24400, lr=0.000404888, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=24149 +epoch 015: 792 / 1689 loss=4.178, nll_loss=2.561, ppl=5.9, wps=461952, ups=1.07, wpb=431292, bsz=16241.4, num_updates=24400, lr=0.000404888, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=24149 +epoch 015: 792 / 1689 loss=4.178, nll_loss=2.561, ppl=5.9, wps=461952, ups=1.07, wpb=431292, bsz=16241.4, num_updates=24400, lr=0.000404888, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=24149 +epoch 015: 892 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=464135, ups=1.07, wpb=435089, bsz=16273, num_updates=24500, lr=0.000404061, gnorm=0.23, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=24243 +epoch 015: 892 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=464135, ups=1.07, wpb=435089, bsz=16273, num_updates=24500, lr=0.000404061, gnorm=0.23, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=24243 +epoch 015: 892 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=464135, ups=1.07, wpb=435089, bsz=16273, num_updates=24500, lr=0.000404061, gnorm=0.23, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=24243 +epoch 015: 892 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=464135, ups=1.07, wpb=435089, bsz=16273, num_updates=24500, lr=0.000404061, gnorm=0.23, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=24243 +epoch 015: 892 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=464135, ups=1.07, wpb=435089, bsz=16273, num_updates=24500, lr=0.000404061, gnorm=0.23, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=24243 +epoch 015: 892 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=464135, ups=1.07, wpb=435089, bsz=16273, num_updates=24500, lr=0.000404061, gnorm=0.23, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=24243 +epoch 015: 892 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=464135, ups=1.07, wpb=435089, bsz=16273, num_updates=24500, lr=0.000404061, gnorm=0.23, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=24243 +epoch 015: 892 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=464135, ups=1.07, wpb=435089, bsz=16273, num_updates=24500, lr=0.000404061, gnorm=0.23, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=24243 +epoch 015: 892 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=464135, ups=1.07, wpb=435089, bsz=16273, num_updates=24500, lr=0.000404061, gnorm=0.23, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=24243 +epoch 015: 892 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=464135, ups=1.07, wpb=435089, bsz=16273, num_updates=24500, lr=0.000404061, gnorm=0.23, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=24243 +epoch 015: 892 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=464135, ups=1.07, wpb=435089, bsz=16273, num_updates=24500, lr=0.000404061, gnorm=0.23, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=24243 +epoch 015: 892 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=464135, ups=1.07, wpb=435089, bsz=16273, num_updates=24500, lr=0.000404061, gnorm=0.23, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=24243 +epoch 015: 892 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=464135, ups=1.07, wpb=435089, bsz=16273, num_updates=24500, lr=0.000404061, gnorm=0.23, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=24243 +epoch 015: 892 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=464135, ups=1.07, wpb=435089, bsz=16273, num_updates=24500, lr=0.000404061, gnorm=0.23, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=24243 +epoch 015: 892 / 1689 loss=4.185, nll_loss=2.568, ppl=5.93, wps=464135, ups=1.07, wpb=435089, bsz=16273, num_updates=24500, lr=0.000404061, gnorm=0.23, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=24243 +epoch 015: 992 / 1689 loss=4.186, nll_loss=2.569, ppl=5.94, wps=466729, ups=1.08, wpb=432753, bsz=16211.3, num_updates=24600, lr=0.000403239, gnorm=0.242, clip=0, loss_scale=1, train_wall=91, gb_free=19.6, wall=24335 +epoch 015: 992 / 1689 loss=4.186, nll_loss=2.569, ppl=5.94, wps=466729, ups=1.08, wpb=432753, bsz=16211.3, num_updates=24600, lr=0.000403239, gnorm=0.242, clip=0, loss_scale=1, train_wall=91, gb_free=19.6, wall=24335 +epoch 015: 992 / 1689 loss=4.186, nll_loss=2.569, ppl=5.94, wps=466729, ups=1.08, wpb=432753, bsz=16211.3, num_updates=24600, lr=0.000403239, gnorm=0.242, clip=0, loss_scale=1, train_wall=91, gb_free=19.6, wall=24335 +epoch 015: 992 / 1689 loss=4.186, nll_loss=2.569, ppl=5.94, wps=466729, ups=1.08, wpb=432753, bsz=16211.3, num_updates=24600, lr=0.000403239, gnorm=0.242, clip=0, loss_scale=1, train_wall=91, gb_free=19.6, wall=24335 +epoch 015: 992 / 1689 loss=4.186, nll_loss=2.569, ppl=5.94, wps=466729, ups=1.08, wpb=432753, bsz=16211.3, num_updates=24600, lr=0.000403239, gnorm=0.242, clip=0, loss_scale=1, train_wall=91, gb_free=19.6, wall=24335 +epoch 015: 992 / 1689 loss=4.186, nll_loss=2.569, ppl=5.94, wps=466729, ups=1.08, wpb=432753, bsz=16211.3, num_updates=24600, lr=0.000403239, gnorm=0.242, clip=0, loss_scale=1, train_wall=91, gb_free=19.6, wall=24335 +epoch 015: 992 / 1689 loss=4.186, nll_loss=2.569, ppl=5.94, wps=466729, ups=1.08, wpb=432753, bsz=16211.3, num_updates=24600, lr=0.000403239, gnorm=0.242, clip=0, loss_scale=1, train_wall=91, gb_free=19.6, wall=24335 +epoch 015: 992 / 1689 loss=4.186, nll_loss=2.569, ppl=5.94, wps=466729, ups=1.08, wpb=432753, bsz=16211.3, num_updates=24600, lr=0.000403239, gnorm=0.242, clip=0, loss_scale=1, train_wall=91, gb_free=19.6, wall=24335 +epoch 015: 992 / 1689 loss=4.186, nll_loss=2.569, ppl=5.94, wps=466729, ups=1.08, wpb=432753, bsz=16211.3, num_updates=24600, lr=0.000403239, gnorm=0.242, clip=0, loss_scale=1, train_wall=91, gb_free=19.6, wall=24335 +epoch 015: 992 / 1689 loss=4.186, nll_loss=2.569, ppl=5.94, wps=466729, ups=1.08, wpb=432753, bsz=16211.3, num_updates=24600, lr=0.000403239, gnorm=0.242, clip=0, loss_scale=1, train_wall=91, gb_free=19.6, wall=24335 +epoch 015: 992 / 1689 loss=4.186, nll_loss=2.569, ppl=5.94, wps=466729, ups=1.08, wpb=432753, bsz=16211.3, num_updates=24600, lr=0.000403239, gnorm=0.242, clip=0, loss_scale=1, train_wall=91, gb_free=19.6, wall=24335 +epoch 015: 992 / 1689 loss=4.186, nll_loss=2.569, ppl=5.94, wps=466729, ups=1.08, wpb=432753, bsz=16211.3, num_updates=24600, lr=0.000403239, gnorm=0.242, clip=0, loss_scale=1, train_wall=91, gb_free=19.6, wall=24335 +epoch 015: 992 / 1689 loss=4.186, nll_loss=2.569, ppl=5.94, wps=466729, ups=1.08, wpb=432753, bsz=16211.3, num_updates=24600, lr=0.000403239, gnorm=0.242, clip=0, loss_scale=1, train_wall=91, gb_free=19.6, wall=24335 +epoch 015: 992 / 1689 loss=4.186, nll_loss=2.569, ppl=5.94, wps=466729, ups=1.08, wpb=432753, bsz=16211.3, num_updates=24600, lr=0.000403239, gnorm=0.242, clip=0, loss_scale=1, train_wall=91, gb_free=19.6, wall=24335 +epoch 015: 992 / 1689 loss=4.186, nll_loss=2.569, ppl=5.94, wps=466729, ups=1.08, wpb=432753, bsz=16211.3, num_updates=24600, lr=0.000403239, gnorm=0.242, clip=0, loss_scale=1, train_wall=91, gb_free=19.6, wall=24335 +epoch 015: 1092 / 1689 loss=4.18, nll_loss=2.564, ppl=5.91, wps=459480, ups=1.06, wpb=433278, bsz=16815.3, num_updates=24700, lr=0.000402422, gnorm=0.236, clip=0, loss_scale=1, train_wall=93, gb_free=17.4, wall=24430 +epoch 015: 1092 / 1689 loss=4.18, nll_loss=2.564, ppl=5.91, wps=459480, ups=1.06, wpb=433278, bsz=16815.3, num_updates=24700, lr=0.000402422, gnorm=0.236, clip=0, loss_scale=1, train_wall=93, gb_free=17.4, wall=24430 +epoch 015: 1092 / 1689 loss=4.18, nll_loss=2.564, ppl=5.91, wps=459480, ups=1.06, wpb=433278, bsz=16815.3, num_updates=24700, lr=0.000402422, gnorm=0.236, clip=0, loss_scale=1, train_wall=93, gb_free=17.4, wall=24430 +epoch 015: 1092 / 1689 loss=4.18, nll_loss=2.564, ppl=5.91, wps=459480, ups=1.06, wpb=433278, bsz=16815.3, num_updates=24700, lr=0.000402422, gnorm=0.236, clip=0, loss_scale=1, train_wall=93, gb_free=17.4, wall=24430 +epoch 015: 1092 / 1689 loss=4.18, nll_loss=2.564, ppl=5.91, wps=459480, ups=1.06, wpb=433278, bsz=16815.3, num_updates=24700, lr=0.000402422, gnorm=0.236, clip=0, loss_scale=1, train_wall=93, gb_free=17.4, wall=24430 +epoch 015: 1092 / 1689 loss=4.18, nll_loss=2.564, ppl=5.91, wps=459480, ups=1.06, wpb=433278, bsz=16815.3, num_updates=24700, lr=0.000402422, gnorm=0.236, clip=0, loss_scale=1, train_wall=93, gb_free=17.4, wall=24430 +epoch 015: 1092 / 1689 loss=4.18, nll_loss=2.564, ppl=5.91, wps=459480, ups=1.06, wpb=433278, bsz=16815.3, num_updates=24700, lr=0.000402422, gnorm=0.236, clip=0, loss_scale=1, train_wall=93, gb_free=17.4, wall=24430 +epoch 015: 1092 / 1689 loss=4.18, nll_loss=2.564, ppl=5.91, wps=459480, ups=1.06, wpb=433278, bsz=16815.3, num_updates=24700, lr=0.000402422, gnorm=0.236, clip=0, loss_scale=1, train_wall=93, gb_free=17.4, wall=24430 +epoch 015: 1092 / 1689 loss=4.18, nll_loss=2.564, ppl=5.91, wps=459480, ups=1.06, wpb=433278, bsz=16815.3, num_updates=24700, lr=0.000402422, gnorm=0.236, clip=0, loss_scale=1, train_wall=93, gb_free=17.4, wall=24430 +epoch 015: 1092 / 1689 loss=4.18, nll_loss=2.564, ppl=5.91, wps=459480, ups=1.06, wpb=433278, bsz=16815.3, num_updates=24700, lr=0.000402422, gnorm=0.236, clip=0, loss_scale=1, train_wall=93, gb_free=17.4, wall=24430 +epoch 015: 1092 / 1689 loss=4.18, nll_loss=2.564, ppl=5.91, wps=459480, ups=1.06, wpb=433278, bsz=16815.3, num_updates=24700, lr=0.000402422, gnorm=0.236, clip=0, loss_scale=1, train_wall=93, gb_free=17.4, wall=24430 +epoch 015: 1092 / 1689 loss=4.18, nll_loss=2.564, ppl=5.91, wps=459480, ups=1.06, wpb=433278, bsz=16815.3, num_updates=24700, lr=0.000402422, gnorm=0.236, clip=0, loss_scale=1, train_wall=93, gb_free=17.4, wall=24430 +epoch 015: 1092 / 1689 loss=4.18, nll_loss=2.564, ppl=5.91, wps=459480, ups=1.06, wpb=433278, bsz=16815.3, num_updates=24700, lr=0.000402422, gnorm=0.236, clip=0, loss_scale=1, train_wall=93, gb_free=17.4, wall=24430 +epoch 015: 1092 / 1689 loss=4.18, nll_loss=2.564, ppl=5.91, wps=459480, ups=1.06, wpb=433278, bsz=16815.3, num_updates=24700, lr=0.000402422, gnorm=0.236, clip=0, loss_scale=1, train_wall=93, gb_free=17.4, wall=24430 +epoch 015: 1092 / 1689 loss=4.18, nll_loss=2.564, ppl=5.91, wps=459480, ups=1.06, wpb=433278, bsz=16815.3, num_updates=24700, lr=0.000402422, gnorm=0.236, clip=0, loss_scale=1, train_wall=93, gb_free=17.4, wall=24430 +epoch 015: 1192 / 1689 loss=4.187, nll_loss=2.571, ppl=5.94, wps=465104, ups=1.07, wpb=434329, bsz=16263.6, num_updates=24800, lr=0.00040161, gnorm=0.234, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=24523 +epoch 015: 1192 / 1689 loss=4.187, nll_loss=2.571, ppl=5.94, wps=465104, ups=1.07, wpb=434329, bsz=16263.6, num_updates=24800, lr=0.00040161, gnorm=0.234, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=24523 +epoch 015: 1192 / 1689 loss=4.187, nll_loss=2.571, ppl=5.94, wps=465104, ups=1.07, wpb=434329, bsz=16263.6, num_updates=24800, lr=0.00040161, gnorm=0.234, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=24523 +epoch 015: 1192 / 1689 loss=4.187, nll_loss=2.571, ppl=5.94, wps=465104, ups=1.07, wpb=434329, bsz=16263.6, num_updates=24800, lr=0.00040161, gnorm=0.234, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=24523 +epoch 015: 1192 / 1689 loss=4.187, nll_loss=2.571, ppl=5.94, wps=465104, ups=1.07, wpb=434329, bsz=16263.6, num_updates=24800, lr=0.00040161, gnorm=0.234, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=24523 +epoch 015: 1192 / 1689 loss=4.187, nll_loss=2.571, ppl=5.94, wps=465104, ups=1.07, wpb=434329, bsz=16263.6, num_updates=24800, lr=0.00040161, gnorm=0.234, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=24523 +epoch 015: 1192 / 1689 loss=4.187, nll_loss=2.571, ppl=5.94, wps=465104, ups=1.07, wpb=434329, bsz=16263.6, num_updates=24800, lr=0.00040161, gnorm=0.234, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=24523 +epoch 015: 1192 / 1689 loss=4.187, nll_loss=2.571, ppl=5.94, wps=465104, ups=1.07, wpb=434329, bsz=16263.6, num_updates=24800, lr=0.00040161, gnorm=0.234, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=24523 +epoch 015: 1192 / 1689 loss=4.187, nll_loss=2.571, ppl=5.94, wps=465104, ups=1.07, wpb=434329, bsz=16263.6, num_updates=24800, lr=0.00040161, gnorm=0.234, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=24523 +epoch 015: 1192 / 1689 loss=4.187, nll_loss=2.571, ppl=5.94, wps=465104, ups=1.07, wpb=434329, bsz=16263.6, num_updates=24800, lr=0.00040161, gnorm=0.234, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=24523 +epoch 015: 1192 / 1689 loss=4.187, nll_loss=2.571, ppl=5.94, wps=465104, ups=1.07, wpb=434329, bsz=16263.6, num_updates=24800, lr=0.00040161, gnorm=0.234, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=24523 +epoch 015: 1192 / 1689 loss=4.187, nll_loss=2.571, ppl=5.94, wps=465104, ups=1.07, wpb=434329, bsz=16263.6, num_updates=24800, lr=0.00040161, gnorm=0.234, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=24523 +epoch 015: 1192 / 1689 loss=4.187, nll_loss=2.571, ppl=5.94, wps=465104, ups=1.07, wpb=434329, bsz=16263.6, num_updates=24800, lr=0.00040161, gnorm=0.234, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=24523 +epoch 015: 1192 / 1689 loss=4.187, nll_loss=2.571, ppl=5.94, wps=465104, ups=1.07, wpb=434329, bsz=16263.6, num_updates=24800, lr=0.00040161, gnorm=0.234, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=24523 +epoch 015: 1192 / 1689 loss=4.187, nll_loss=2.571, ppl=5.94, wps=465104, ups=1.07, wpb=434329, bsz=16263.6, num_updates=24800, lr=0.00040161, gnorm=0.234, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=24523 +epoch 015: 1292 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=461167, ups=1.07, wpb=431591, bsz=16883.4, num_updates=24900, lr=0.000400802, gnorm=0.23, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=24617 +epoch 015: 1292 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=461167, ups=1.07, wpb=431591, bsz=16883.4, num_updates=24900, lr=0.000400802, gnorm=0.23, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=24617 +epoch 015: 1292 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=461167, ups=1.07, wpb=431591, bsz=16883.4, num_updates=24900, lr=0.000400802, gnorm=0.23, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=24617 +epoch 015: 1292 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=461167, ups=1.07, wpb=431591, bsz=16883.4, num_updates=24900, lr=0.000400802, gnorm=0.23, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=24617 +epoch 015: 1292 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=461167, ups=1.07, wpb=431591, bsz=16883.4, num_updates=24900, lr=0.000400802, gnorm=0.23, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=24617 +epoch 015: 1292 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=461167, ups=1.07, wpb=431591, bsz=16883.4, num_updates=24900, lr=0.000400802, gnorm=0.23, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=24617 +epoch 015: 1292 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=461167, ups=1.07, wpb=431591, bsz=16883.4, num_updates=24900, lr=0.000400802, gnorm=0.23, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=24617 +epoch 015: 1292 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=461167, ups=1.07, wpb=431591, bsz=16883.4, num_updates=24900, lr=0.000400802, gnorm=0.23, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=24617 +epoch 015: 1292 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=461167, ups=1.07, wpb=431591, bsz=16883.4, num_updates=24900, lr=0.000400802, gnorm=0.23, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=24617 +epoch 015: 1292 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=461167, ups=1.07, wpb=431591, bsz=16883.4, num_updates=24900, lr=0.000400802, gnorm=0.23, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=24617 +epoch 015: 1292 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=461167, ups=1.07, wpb=431591, bsz=16883.4, num_updates=24900, lr=0.000400802, gnorm=0.23, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=24617 +epoch 015: 1292 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=461167, ups=1.07, wpb=431591, bsz=16883.4, num_updates=24900, lr=0.000400802, gnorm=0.23, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=24617 +epoch 015: 1292 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=461167, ups=1.07, wpb=431591, bsz=16883.4, num_updates=24900, lr=0.000400802, gnorm=0.23, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=24617 +epoch 015: 1292 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=461167, ups=1.07, wpb=431591, bsz=16883.4, num_updates=24900, lr=0.000400802, gnorm=0.23, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=24617 +epoch 015: 1292 / 1689 loss=4.18, nll_loss=2.563, ppl=5.91, wps=461167, ups=1.07, wpb=431591, bsz=16883.4, num_updates=24900, lr=0.000400802, gnorm=0.23, clip=0, loss_scale=2, train_wall=92, gb_free=18.8, wall=24617 +epoch 015: 1392 / 1689 loss=4.177, nll_loss=2.561, ppl=5.9, wps=463916, ups=1.07, wpb=433347, bsz=16589.8, num_updates=25000, lr=0.0004, gnorm=0.243, clip=0, loss_scale=2, train_wall=93, gb_free=20.1, wall=24710 +epoch 015: 1392 / 1689 loss=4.177, nll_loss=2.561, ppl=5.9, wps=463916, ups=1.07, wpb=433347, bsz=16589.8, num_updates=25000, lr=0.0004, gnorm=0.243, clip=0, loss_scale=2, train_wall=93, gb_free=20.1, wall=24710 +epoch 015: 1392 / 1689 loss=4.177, nll_loss=2.561, ppl=5.9, wps=463916, ups=1.07, wpb=433347, bsz=16589.8, num_updates=25000, lr=0.0004, gnorm=0.243, clip=0, loss_scale=2, train_wall=93, gb_free=20.1, wall=24710 +epoch 015: 1392 / 1689 loss=4.177, nll_loss=2.561, ppl=5.9, wps=463916, ups=1.07, wpb=433347, bsz=16589.8, num_updates=25000, lr=0.0004, gnorm=0.243, clip=0, loss_scale=2, train_wall=93, gb_free=20.1, wall=24710 +epoch 015: 1392 / 1689 loss=4.177, nll_loss=2.561, ppl=5.9, wps=463916, ups=1.07, wpb=433347, bsz=16589.8, num_updates=25000, lr=0.0004, gnorm=0.243, clip=0, loss_scale=2, train_wall=93, gb_free=20.1, wall=24710 +epoch 015: 1392 / 1689 loss=4.177, nll_loss=2.561, ppl=5.9, wps=463916, ups=1.07, wpb=433347, bsz=16589.8, num_updates=25000, lr=0.0004, gnorm=0.243, clip=0, loss_scale=2, train_wall=93, gb_free=20.1, wall=24710 +epoch 015: 1392 / 1689 loss=4.177, nll_loss=2.561, ppl=5.9, wps=463916, ups=1.07, wpb=433347, bsz=16589.8, num_updates=25000, lr=0.0004, gnorm=0.243, clip=0, loss_scale=2, train_wall=93, gb_free=20.1, wall=24710 +epoch 015: 1392 / 1689 loss=4.177, nll_loss=2.561, ppl=5.9, wps=463916, ups=1.07, wpb=433347, bsz=16589.8, num_updates=25000, lr=0.0004, gnorm=0.243, clip=0, loss_scale=2, train_wall=93, gb_free=20.1, wall=24710 +epoch 015: 1392 / 1689 loss=4.177, nll_loss=2.561, ppl=5.9, wps=463916, ups=1.07, wpb=433347, bsz=16589.8, num_updates=25000, lr=0.0004, gnorm=0.243, clip=0, loss_scale=2, train_wall=93, gb_free=20.1, wall=24710 +epoch 015: 1392 / 1689 loss=4.177, nll_loss=2.561, ppl=5.9, wps=463916, ups=1.07, wpb=433347, bsz=16589.8, num_updates=25000, lr=0.0004, gnorm=0.243, clip=0, loss_scale=2, train_wall=93, gb_free=20.1, wall=24710 +epoch 015: 1392 / 1689 loss=4.177, nll_loss=2.561, ppl=5.9, wps=463916, ups=1.07, wpb=433347, bsz=16589.8, num_updates=25000, lr=0.0004, gnorm=0.243, clip=0, loss_scale=2, train_wall=93, gb_free=20.1, wall=24710 +epoch 015: 1392 / 1689 loss=4.177, nll_loss=2.561, ppl=5.9, wps=463916, ups=1.07, wpb=433347, bsz=16589.8, num_updates=25000, lr=0.0004, gnorm=0.243, clip=0, loss_scale=2, train_wall=93, gb_free=20.1, wall=24710 +epoch 015: 1392 / 1689 loss=4.177, nll_loss=2.561, ppl=5.9, wps=463916, ups=1.07, wpb=433347, bsz=16589.8, num_updates=25000, lr=0.0004, gnorm=0.243, clip=0, loss_scale=2, train_wall=93, gb_free=20.1, wall=24710 +epoch 015: 1392 / 1689 loss=4.177, nll_loss=2.561, ppl=5.9, wps=463916, ups=1.07, wpb=433347, bsz=16589.8, num_updates=25000, lr=0.0004, gnorm=0.243, clip=0, loss_scale=2, train_wall=93, gb_free=20.1, wall=24710 +epoch 015: 1392 / 1689 loss=4.177, nll_loss=2.561, ppl=5.9, wps=463916, ups=1.07, wpb=433347, bsz=16589.8, num_updates=25000, lr=0.0004, gnorm=0.243, clip=0, loss_scale=2, train_wall=93, gb_free=20.1, wall=24710 +begin validation on "valid" subset +epoch 015 | valid on 'valid' subset | loss 4.261 | nll_loss 2.626 | ppl 6.17 | wps 0 | wpb 42662 | bsz 2032 | num_updates 25000 | best_loss 4.261 +epoch 015 | valid on 'valid' subset | loss 4.261 | nll_loss 2.626 | ppl 6.17 | wps 0 | wpb 42662 | bsz 2032 | num_updates 25000 | best_loss 4.261 +epoch 015 | valid on 'valid' subset | loss 4.261 | nll_loss 2.626 | ppl 6.17 | wps 0 | wpb 42662 | bsz 2032 | num_updates 25000 | best_loss 4.261 +epoch 015 | valid on 'valid' subset | loss 4.261 | nll_loss 2.626 | ppl 6.17 | wps 0 | wpb 42662 | bsz 2032 | num_updates 25000 | best_loss 4.261 +epoch 015 | valid on 'valid' subset | loss 4.261 | nll_loss 2.626 | ppl 6.17 | wps 0 | wpb 42662 | bsz 2032 | num_updates 25000 | best_loss 4.261 +epoch 015 | valid on 'valid' subset | loss 4.261 | nll_loss 2.626 | ppl 6.17 | wps 0 | wpb 42662 | bsz 2032 | num_updates 25000 | best_loss 4.261 +epoch 015 | valid on 'valid' subset | loss 4.261 | nll_loss 2.626 | ppl 6.17 | wps 0 | wpb 42662 | bsz 2032 | num_updates 25000 | best_loss 4.261 +epoch 015 | valid on 'valid' subset | loss 4.261 | nll_loss 2.626 | ppl 6.17 | wps 0 | wpb 42662 | bsz 2032 | num_updates 25000 | best_loss 4.261 +epoch 015 | valid on 'valid' subset | loss 4.261 | nll_loss 2.626 | ppl 6.17 | wps 0 | wpb 42662 | bsz 2032 | num_updates 25000 | best_loss 4.261 +epoch 015 | valid on 'valid' subset | loss 4.261 | nll_loss 2.626 | ppl 6.17 | wps 0 | wpb 42662 | bsz 2032 | num_updates 25000 | best_loss 4.261 +epoch 015 | valid on 'valid' subset | loss 4.261 | nll_loss 2.626 | ppl 6.17 | wps 0 | wpb 42662 | bsz 2032 | num_updates 25000 | best_loss 4.261 +epoch 015 | valid on 'valid' subset | loss 4.261 | nll_loss 2.626 | ppl 6.17 | wps 0 | wpb 42662 | bsz 2032 | num_updates 25000 | best_loss 4.261 +epoch 015 | valid on 'valid' subset | loss 4.261 | nll_loss 2.626 | ppl 6.17 | wps 0 | wpb 42662 | bsz 2032 | num_updates 25000 | best_loss 4.261 +epoch 015 | valid on 'valid' subset | loss 4.261 | nll_loss 2.626 | ppl 6.17 | wps 0 | wpb 42662 | bsz 2032 | num_updates 25000 | best_loss 4.261 +epoch 015 | valid on 'valid' subset | loss 4.261 | nll_loss 2.626 | ppl 6.17 | wps 0 | wpb 42662 | bsz 2032 | num_updates 25000 | best_loss 4.261 +epoch 015: 1492 / 1689 loss=4.182, nll_loss=2.566, ppl=5.92, wps=386720, ups=0.89, wpb=434199, bsz=16731.9, num_updates=25100, lr=0.000399202, gnorm=0.239, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=24822 +epoch 015: 1492 / 1689 loss=4.182, nll_loss=2.566, ppl=5.92, wps=386720, ups=0.89, wpb=434199, bsz=16731.9, num_updates=25100, lr=0.000399202, gnorm=0.239, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=24822 +epoch 015: 1492 / 1689 loss=4.182, nll_loss=2.566, ppl=5.92, wps=386720, ups=0.89, wpb=434199, bsz=16731.9, num_updates=25100, lr=0.000399202, gnorm=0.239, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=24822 +epoch 015: 1492 / 1689 loss=4.182, nll_loss=2.566, ppl=5.92, wps=386720, ups=0.89, wpb=434199, bsz=16731.9, num_updates=25100, lr=0.000399202, gnorm=0.239, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=24822 +epoch 015: 1492 / 1689 loss=4.182, nll_loss=2.566, ppl=5.92, wps=386720, ups=0.89, wpb=434199, bsz=16731.9, num_updates=25100, lr=0.000399202, gnorm=0.239, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=24822 +epoch 015: 1492 / 1689 loss=4.182, nll_loss=2.566, ppl=5.92, wps=386720, ups=0.89, wpb=434199, bsz=16731.9, num_updates=25100, lr=0.000399202, gnorm=0.239, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=24822 +epoch 015: 1492 / 1689 loss=4.182, nll_loss=2.566, ppl=5.92, wps=386720, ups=0.89, wpb=434199, bsz=16731.9, num_updates=25100, lr=0.000399202, gnorm=0.239, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=24822 +epoch 015: 1492 / 1689 loss=4.182, nll_loss=2.566, ppl=5.92, wps=386720, ups=0.89, wpb=434199, bsz=16731.9, num_updates=25100, lr=0.000399202, gnorm=0.239, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=24822 +epoch 015: 1492 / 1689 loss=4.182, nll_loss=2.566, ppl=5.92, wps=386720, ups=0.89, wpb=434199, bsz=16731.9, num_updates=25100, lr=0.000399202, gnorm=0.239, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=24822 +epoch 015: 1492 / 1689 loss=4.182, nll_loss=2.566, ppl=5.92, wps=386720, ups=0.89, wpb=434199, bsz=16731.9, num_updates=25100, lr=0.000399202, gnorm=0.239, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=24822 +epoch 015: 1492 / 1689 loss=4.182, nll_loss=2.566, ppl=5.92, wps=386720, ups=0.89, wpb=434199, bsz=16731.9, num_updates=25100, lr=0.000399202, gnorm=0.239, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=24822 +epoch 015: 1492 / 1689 loss=4.182, nll_loss=2.566, ppl=5.92, wps=386720, ups=0.89, wpb=434199, bsz=16731.9, num_updates=25100, lr=0.000399202, gnorm=0.239, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=24822 +epoch 015: 1492 / 1689 loss=4.182, nll_loss=2.566, ppl=5.92, wps=386720, ups=0.89, wpb=434199, bsz=16731.9, num_updates=25100, lr=0.000399202, gnorm=0.239, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=24822 +epoch 015: 1492 / 1689 loss=4.182, nll_loss=2.566, ppl=5.92, wps=386720, ups=0.89, wpb=434199, bsz=16731.9, num_updates=25100, lr=0.000399202, gnorm=0.239, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=24822 +epoch 015: 1492 / 1689 loss=4.182, nll_loss=2.566, ppl=5.92, wps=386720, ups=0.89, wpb=434199, bsz=16731.9, num_updates=25100, lr=0.000399202, gnorm=0.239, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=24822 +epoch 015: 1592 / 1689 loss=4.183, nll_loss=2.567, ppl=5.92, wps=468557, ups=1.08, wpb=435268, bsz=16521.3, num_updates=25200, lr=0.00039841, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=24915 +epoch 015: 1592 / 1689 loss=4.183, nll_loss=2.567, ppl=5.92, wps=468557, ups=1.08, wpb=435268, bsz=16521.3, num_updates=25200, lr=0.00039841, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=24915 +epoch 015: 1592 / 1689 loss=4.183, nll_loss=2.567, ppl=5.92, wps=468557, ups=1.08, wpb=435268, bsz=16521.3, num_updates=25200, lr=0.00039841, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=24915 +epoch 015: 1592 / 1689 loss=4.183, nll_loss=2.567, ppl=5.92, wps=468557, ups=1.08, wpb=435268, bsz=16521.3, num_updates=25200, lr=0.00039841, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=24915 +epoch 015: 1592 / 1689 loss=4.183, nll_loss=2.567, ppl=5.92, wps=468557, ups=1.08, wpb=435268, bsz=16521.3, num_updates=25200, lr=0.00039841, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=24915 +epoch 015: 1592 / 1689 loss=4.183, nll_loss=2.567, ppl=5.92, wps=468557, ups=1.08, wpb=435268, bsz=16521.3, num_updates=25200, lr=0.00039841, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=24915 +epoch 015: 1592 / 1689 loss=4.183, nll_loss=2.567, ppl=5.92, wps=468557, ups=1.08, wpb=435268, bsz=16521.3, num_updates=25200, lr=0.00039841, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=24915 +epoch 015: 1592 / 1689 loss=4.183, nll_loss=2.567, ppl=5.92, wps=468557, ups=1.08, wpb=435268, bsz=16521.3, num_updates=25200, lr=0.00039841, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=24915 +epoch 015: 1592 / 1689 loss=4.183, nll_loss=2.567, ppl=5.92, wps=468557, ups=1.08, wpb=435268, bsz=16521.3, num_updates=25200, lr=0.00039841, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=24915 +epoch 015: 1592 / 1689 loss=4.183, nll_loss=2.567, ppl=5.92, wps=468557, ups=1.08, wpb=435268, bsz=16521.3, num_updates=25200, lr=0.00039841, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=24915 +epoch 015: 1592 / 1689 loss=4.183, nll_loss=2.567, ppl=5.92, wps=468557, ups=1.08, wpb=435268, bsz=16521.3, num_updates=25200, lr=0.00039841, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=24915 +epoch 015: 1592 / 1689 loss=4.183, nll_loss=2.567, ppl=5.92, wps=468557, ups=1.08, wpb=435268, bsz=16521.3, num_updates=25200, lr=0.00039841, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=24915 +epoch 015: 1592 / 1689 loss=4.183, nll_loss=2.567, ppl=5.92, wps=468557, ups=1.08, wpb=435268, bsz=16521.3, num_updates=25200, lr=0.00039841, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=24915 +epoch 015: 1592 / 1689 loss=4.183, nll_loss=2.567, ppl=5.92, wps=468557, ups=1.08, wpb=435268, bsz=16521.3, num_updates=25200, lr=0.00039841, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=24915 +epoch 015: 1592 / 1689 loss=4.183, nll_loss=2.567, ppl=5.92, wps=468557, ups=1.08, wpb=435268, bsz=16521.3, num_updates=25200, lr=0.00039841, gnorm=0.238, clip=0, loss_scale=2, train_wall=92, gb_free=19.4, wall=24915 +end of epoch 15 (average epoch stats below) +epoch 015 | loss 4.175 | nll_loss 2.557 | ppl 5.88 | wps 452476 | ups 1.04 | wpb 433534 | bsz 16509.1 | num_updates 25296 | lr 0.000397653 | gnorm 0.239 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 19.9 | wall 25005 +epoch 015 | loss 4.175 | nll_loss 2.557 | ppl 5.88 | wps 452476 | ups 1.04 | wpb 433534 | bsz 16509.1 | num_updates 25296 | lr 0.000397653 | gnorm 0.239 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 19.9 | wall 25005 +epoch 015 | loss 4.175 | nll_loss 2.557 | ppl 5.88 | wps 452476 | ups 1.04 | wpb 433534 | bsz 16509.1 | num_updates 25296 | lr 0.000397653 | gnorm 0.239 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 19.9 | wall 25005 +epoch 015 | loss 4.175 | nll_loss 2.557 | ppl 5.88 | wps 452476 | ups 1.04 | wpb 433534 | bsz 16509.1 | num_updates 25296 | lr 0.000397653 | gnorm 0.239 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 19.9 | wall 25005 +epoch 015 | loss 4.175 | nll_loss 2.557 | ppl 5.88 | wps 452476 | ups 1.04 | wpb 433534 | bsz 16509.1 | num_updates 25296 | lr 0.000397653 | gnorm 0.239 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 19.9 | wall 25005 +epoch 015 | loss 4.175 | nll_loss 2.557 | ppl 5.88 | wps 452476 | ups 1.04 | wpb 433534 | bsz 16509.1 | num_updates 25296 | lr 0.000397653 | gnorm 0.239 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 19.9 | wall 25005 +epoch 015 | loss 4.175 | nll_loss 2.557 | ppl 5.88 | wps 452476 | ups 1.04 | wpb 433534 | bsz 16509.1 | num_updates 25296 | lr 0.000397653 | gnorm 0.239 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 19.9 | wall 25005 +epoch 015 | loss 4.175 | nll_loss 2.557 | ppl 5.88 | wps 452476 | ups 1.04 | wpb 433534 | bsz 16509.1 | num_updates 25296 | lr 0.000397653 | gnorm 0.239 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 19.9 | wall 25005 +epoch 015 | loss 4.175 | nll_loss 2.557 | ppl 5.88 | wps 452476 | ups 1.04 | wpb 433534 | bsz 16509.1 | num_updates 25296 | lr 0.000397653 | gnorm 0.239 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 19.9 | wall 25005 +epoch 015 | loss 4.175 | nll_loss 2.557 | ppl 5.88 | wps 452476 | ups 1.04 | wpb 433534 | bsz 16509.1 | num_updates 25296 | lr 0.000397653 | gnorm 0.239 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 19.9 | wall 25005 +epoch 015 | loss 4.175 | nll_loss 2.557 | ppl 5.88 | wps 452476 | ups 1.04 | wpb 433534 | bsz 16509.1 | num_updates 25296 | lr 0.000397653 | gnorm 0.239 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 19.9 | wall 25005 +epoch 015 | loss 4.175 | nll_loss 2.557 | ppl 5.88 | wps 452476 | ups 1.04 | wpb 433534 | bsz 16509.1 | num_updates 25296 | lr 0.000397653 | gnorm 0.239 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 19.9 | wall 25005 +epoch 015 | loss 4.175 | nll_loss 2.557 | ppl 5.88 | wps 452476 | ups 1.04 | wpb 433534 | bsz 16509.1 | num_updates 25296 | lr 0.000397653 | gnorm 0.239 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 19.9 | wall 25005 +epoch 015 | loss 4.175 | nll_loss 2.557 | ppl 5.88 | wps 452476 | ups 1.04 | wpb 433534 | bsz 16509.1 | num_updates 25296 | lr 0.000397653 | gnorm 0.239 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 19.9 | wall 25005 +epoch 015 | loss 4.175 | nll_loss 2.557 | ppl 5.88 | wps 452476 | ups 1.04 | wpb 433534 | bsz 16509.1 | num_updates 25296 | lr 0.000397653 | gnorm 0.239 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 19.9 | wall 25005 +Start iterating over samples +epoch 016: 4 / 1689 loss=4.175, nll_loss=2.558, ppl=5.89, wps=454172, ups=1.05, wpb=430595, bsz=16033.7, num_updates=25300, lr=0.000397621, gnorm=0.246, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=25010 +epoch 016: 4 / 1689 loss=4.175, nll_loss=2.558, ppl=5.89, wps=454172, ups=1.05, wpb=430595, bsz=16033.7, num_updates=25300, lr=0.000397621, gnorm=0.246, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=25010 +epoch 016: 4 / 1689 loss=4.175, nll_loss=2.558, ppl=5.89, wps=454172, ups=1.05, wpb=430595, bsz=16033.7, num_updates=25300, lr=0.000397621, gnorm=0.246, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=25010 +epoch 016: 4 / 1689 loss=4.175, nll_loss=2.558, ppl=5.89, wps=454172, ups=1.05, wpb=430595, bsz=16033.7, num_updates=25300, lr=0.000397621, gnorm=0.246, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=25010 +epoch 016: 4 / 1689 loss=4.175, nll_loss=2.558, ppl=5.89, wps=454172, ups=1.05, wpb=430595, bsz=16033.7, num_updates=25300, lr=0.000397621, gnorm=0.246, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=25010 +epoch 016: 4 / 1689 loss=4.175, nll_loss=2.558, ppl=5.89, wps=454172, ups=1.05, wpb=430595, bsz=16033.7, num_updates=25300, lr=0.000397621, gnorm=0.246, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=25010 +epoch 016: 4 / 1689 loss=4.175, nll_loss=2.558, ppl=5.89, wps=454172, ups=1.05, wpb=430595, bsz=16033.7, num_updates=25300, lr=0.000397621, gnorm=0.246, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=25010 +epoch 016: 4 / 1689 loss=4.175, nll_loss=2.558, ppl=5.89, wps=454172, ups=1.05, wpb=430595, bsz=16033.7, num_updates=25300, lr=0.000397621, gnorm=0.246, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=25010 +epoch 016: 4 / 1689 loss=4.175, nll_loss=2.558, ppl=5.89, wps=454172, ups=1.05, wpb=430595, bsz=16033.7, num_updates=25300, lr=0.000397621, gnorm=0.246, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=25010 +epoch 016: 4 / 1689 loss=4.175, nll_loss=2.558, ppl=5.89, wps=454172, ups=1.05, wpb=430595, bsz=16033.7, num_updates=25300, lr=0.000397621, gnorm=0.246, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=25010 +epoch 016: 4 / 1689 loss=4.175, nll_loss=2.558, ppl=5.89, wps=454172, ups=1.05, wpb=430595, bsz=16033.7, num_updates=25300, lr=0.000397621, gnorm=0.246, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=25010 +epoch 016: 4 / 1689 loss=4.175, nll_loss=2.558, ppl=5.89, wps=454172, ups=1.05, wpb=430595, bsz=16033.7, num_updates=25300, lr=0.000397621, gnorm=0.246, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=25010 +epoch 016: 4 / 1689 loss=4.175, nll_loss=2.558, ppl=5.89, wps=454172, ups=1.05, wpb=430595, bsz=16033.7, num_updates=25300, lr=0.000397621, gnorm=0.246, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=25010 +epoch 016: 4 / 1689 loss=4.175, nll_loss=2.558, ppl=5.89, wps=454172, ups=1.05, wpb=430595, bsz=16033.7, num_updates=25300, lr=0.000397621, gnorm=0.246, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=25010 +epoch 016: 4 / 1689 loss=4.175, nll_loss=2.558, ppl=5.89, wps=454172, ups=1.05, wpb=430595, bsz=16033.7, num_updates=25300, lr=0.000397621, gnorm=0.246, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=25010 +epoch 016: 4 / 1689 loss=4.175, nll_loss=2.558, ppl=5.89, wps=454172, ups=1.05, wpb=430595, bsz=16033.7, num_updates=25300, lr=0.000397621, gnorm=0.246, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=25010 +epoch 016: 104 / 1689 loss=4.146, nll_loss=2.524, ppl=5.75, wps=457486, ups=1.06, wpb=431378, bsz=16492.7, num_updates=25400, lr=0.000396838, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=25104 +epoch 016: 104 / 1689 loss=4.146, nll_loss=2.524, ppl=5.75, wps=457486, ups=1.06, wpb=431378, bsz=16492.7, num_updates=25400, lr=0.000396838, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=25104 +epoch 016: 104 / 1689 loss=4.146, nll_loss=2.524, ppl=5.75, wps=457486, ups=1.06, wpb=431378, bsz=16492.7, num_updates=25400, lr=0.000396838, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=25104 +epoch 016: 104 / 1689 loss=4.146, nll_loss=2.524, ppl=5.75, wps=457486, ups=1.06, wpb=431378, bsz=16492.7, num_updates=25400, lr=0.000396838, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=25104 +epoch 016: 104 / 1689 loss=4.146, nll_loss=2.524, ppl=5.75, wps=457486, ups=1.06, wpb=431378, bsz=16492.7, num_updates=25400, lr=0.000396838, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=25104 +epoch 016: 104 / 1689 loss=4.146, nll_loss=2.524, ppl=5.75, wps=457486, ups=1.06, wpb=431378, bsz=16492.7, num_updates=25400, lr=0.000396838, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=25104 +epoch 016: 104 / 1689 loss=4.146, nll_loss=2.524, ppl=5.75, wps=457486, ups=1.06, wpb=431378, bsz=16492.7, num_updates=25400, lr=0.000396838, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=25104 +epoch 016: 104 / 1689 loss=4.146, nll_loss=2.524, ppl=5.75, wps=457486, ups=1.06, wpb=431378, bsz=16492.7, num_updates=25400, lr=0.000396838, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=25104 +epoch 016: 104 / 1689 loss=4.146, nll_loss=2.524, ppl=5.75, wps=457486, ups=1.06, wpb=431378, bsz=16492.7, num_updates=25400, lr=0.000396838, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=25104 +epoch 016: 104 / 1689 loss=4.146, nll_loss=2.524, ppl=5.75, wps=457486, ups=1.06, wpb=431378, bsz=16492.7, num_updates=25400, lr=0.000396838, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=25104 +epoch 016: 104 / 1689 loss=4.146, nll_loss=2.524, ppl=5.75, wps=457486, ups=1.06, wpb=431378, bsz=16492.7, num_updates=25400, lr=0.000396838, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=25104 +epoch 016: 104 / 1689 loss=4.146, nll_loss=2.524, ppl=5.75, wps=457486, ups=1.06, wpb=431378, bsz=16492.7, num_updates=25400, lr=0.000396838, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=25104 +epoch 016: 104 / 1689 loss=4.146, nll_loss=2.524, ppl=5.75, wps=457486, ups=1.06, wpb=431378, bsz=16492.7, num_updates=25400, lr=0.000396838, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=25104 +epoch 016: 104 / 1689 loss=4.146, nll_loss=2.524, ppl=5.75, wps=457486, ups=1.06, wpb=431378, bsz=16492.7, num_updates=25400, lr=0.000396838, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=25104 +epoch 016: 104 / 1689 loss=4.146, nll_loss=2.524, ppl=5.75, wps=457486, ups=1.06, wpb=431378, bsz=16492.7, num_updates=25400, lr=0.000396838, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=25104 +epoch 016: 104 / 1689 loss=4.146, nll_loss=2.524, ppl=5.75, wps=457486, ups=1.06, wpb=431378, bsz=16492.7, num_updates=25400, lr=0.000396838, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=25104 +epoch 016: 204 / 1689 loss=4.159, nll_loss=2.539, ppl=5.81, wps=459429, ups=1.06, wpb=434231, bsz=16686, num_updates=25500, lr=0.000396059, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=25199 +epoch 016: 204 / 1689 loss=4.159, nll_loss=2.539, ppl=5.81, wps=459429, ups=1.06, wpb=434231, bsz=16686, num_updates=25500, lr=0.000396059, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=25199 +epoch 016: 204 / 1689 loss=4.159, nll_loss=2.539, ppl=5.81, wps=459429, ups=1.06, wpb=434231, bsz=16686, num_updates=25500, lr=0.000396059, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=25199 +epoch 016: 204 / 1689 loss=4.159, nll_loss=2.539, ppl=5.81, wps=459429, ups=1.06, wpb=434231, bsz=16686, num_updates=25500, lr=0.000396059, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=25199 +epoch 016: 204 / 1689 loss=4.159, nll_loss=2.539, ppl=5.81, wps=459429, ups=1.06, wpb=434231, bsz=16686, num_updates=25500, lr=0.000396059, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=25199 +epoch 016: 204 / 1689 loss=4.159, nll_loss=2.539, ppl=5.81, wps=459429, ups=1.06, wpb=434231, bsz=16686, num_updates=25500, lr=0.000396059, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=25199 +epoch 016: 204 / 1689 loss=4.159, nll_loss=2.539, ppl=5.81, wps=459429, ups=1.06, wpb=434231, bsz=16686, num_updates=25500, lr=0.000396059, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=25199 +epoch 016: 204 / 1689 loss=4.159, nll_loss=2.539, ppl=5.81, wps=459429, ups=1.06, wpb=434231, bsz=16686, num_updates=25500, lr=0.000396059, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=25199 +epoch 016: 204 / 1689 loss=4.159, nll_loss=2.539, ppl=5.81, wps=459429, ups=1.06, wpb=434231, bsz=16686, num_updates=25500, lr=0.000396059, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=25199 +epoch 016: 204 / 1689 loss=4.159, nll_loss=2.539, ppl=5.81, wps=459429, ups=1.06, wpb=434231, bsz=16686, num_updates=25500, lr=0.000396059, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=25199 +epoch 016: 204 / 1689 loss=4.159, nll_loss=2.539, ppl=5.81, wps=459429, ups=1.06, wpb=434231, bsz=16686, num_updates=25500, lr=0.000396059, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=25199 +epoch 016: 204 / 1689 loss=4.159, nll_loss=2.539, ppl=5.81, wps=459429, ups=1.06, wpb=434231, bsz=16686, num_updates=25500, lr=0.000396059, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=25199 +epoch 016: 204 / 1689 loss=4.159, nll_loss=2.539, ppl=5.81, wps=459429, ups=1.06, wpb=434231, bsz=16686, num_updates=25500, lr=0.000396059, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=25199 +epoch 016: 204 / 1689 loss=4.159, nll_loss=2.539, ppl=5.81, wps=459429, ups=1.06, wpb=434231, bsz=16686, num_updates=25500, lr=0.000396059, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=25199 +epoch 016: 204 / 1689 loss=4.159, nll_loss=2.539, ppl=5.81, wps=459429, ups=1.06, wpb=434231, bsz=16686, num_updates=25500, lr=0.000396059, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=25199 +epoch 016: 204 / 1689 loss=4.159, nll_loss=2.539, ppl=5.81, wps=459429, ups=1.06, wpb=434231, bsz=16686, num_updates=25500, lr=0.000396059, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=25199 +epoch 016: 304 / 1689 loss=4.153, nll_loss=2.532, ppl=5.78, wps=464106, ups=1.07, wpb=435607, bsz=16205.6, num_updates=25600, lr=0.000395285, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=25293 +epoch 016: 304 / 1689 loss=4.153, nll_loss=2.532, ppl=5.78, wps=464106, ups=1.07, wpb=435607, bsz=16205.6, num_updates=25600, lr=0.000395285, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=25293 +epoch 016: 304 / 1689 loss=4.153, nll_loss=2.532, ppl=5.78, wps=464106, ups=1.07, wpb=435607, bsz=16205.6, num_updates=25600, lr=0.000395285, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=25293 +epoch 016: 304 / 1689 loss=4.153, nll_loss=2.532, ppl=5.78, wps=464106, ups=1.07, wpb=435607, bsz=16205.6, num_updates=25600, lr=0.000395285, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=25293 +epoch 016: 304 / 1689 loss=4.153, nll_loss=2.532, ppl=5.78, wps=464106, ups=1.07, wpb=435607, bsz=16205.6, num_updates=25600, lr=0.000395285, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=25293 +epoch 016: 304 / 1689 loss=4.153, nll_loss=2.532, ppl=5.78, wps=464106, ups=1.07, wpb=435607, bsz=16205.6, num_updates=25600, lr=0.000395285, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=25293 +epoch 016: 304 / 1689 loss=4.153, nll_loss=2.532, ppl=5.78, wps=464106, ups=1.07, wpb=435607, bsz=16205.6, num_updates=25600, lr=0.000395285, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=25293 +epoch 016: 304 / 1689 loss=4.153, nll_loss=2.532, ppl=5.78, wps=464106, ups=1.07, wpb=435607, bsz=16205.6, num_updates=25600, lr=0.000395285, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=25293 +epoch 016: 304 / 1689 loss=4.153, nll_loss=2.532, ppl=5.78, wps=464106, ups=1.07, wpb=435607, bsz=16205.6, num_updates=25600, lr=0.000395285, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=25293 +epoch 016: 304 / 1689 loss=4.153, nll_loss=2.532, ppl=5.78, wps=464106, ups=1.07, wpb=435607, bsz=16205.6, num_updates=25600, lr=0.000395285, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=25293 +epoch 016: 304 / 1689 loss=4.153, nll_loss=2.532, ppl=5.78, wps=464106, ups=1.07, wpb=435607, bsz=16205.6, num_updates=25600, lr=0.000395285, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=25293 +epoch 016: 304 / 1689 loss=4.153, nll_loss=2.532, ppl=5.78, wps=464106, ups=1.07, wpb=435607, bsz=16205.6, num_updates=25600, lr=0.000395285, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=25293 +epoch 016: 304 / 1689 loss=4.153, nll_loss=2.532, ppl=5.78, wps=464106, ups=1.07, wpb=435607, bsz=16205.6, num_updates=25600, lr=0.000395285, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=25293 +epoch 016: 304 / 1689 loss=4.153, nll_loss=2.532, ppl=5.78, wps=464106, ups=1.07, wpb=435607, bsz=16205.6, num_updates=25600, lr=0.000395285, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=25293 +epoch 016: 304 / 1689 loss=4.153, nll_loss=2.532, ppl=5.78, wps=464106, ups=1.07, wpb=435607, bsz=16205.6, num_updates=25600, lr=0.000395285, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=25293 +epoch 016: 304 / 1689 loss=4.153, nll_loss=2.532, ppl=5.78, wps=464106, ups=1.07, wpb=435607, bsz=16205.6, num_updates=25600, lr=0.000395285, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=25293 +epoch 016: 404 / 1689 loss=4.158, nll_loss=2.538, ppl=5.81, wps=463243, ups=1.06, wpb=435590, bsz=16143.4, num_updates=25700, lr=0.000394515, gnorm=0.241, clip=0, loss_scale=1, train_wall=93, gb_free=17.8, wall=25387 +epoch 016: 404 / 1689 loss=4.158, nll_loss=2.538, ppl=5.81, wps=463243, ups=1.06, wpb=435590, bsz=16143.4, num_updates=25700, lr=0.000394515, gnorm=0.241, clip=0, loss_scale=1, train_wall=93, gb_free=17.8, wall=25387 +epoch 016: 404 / 1689 loss=4.158, nll_loss=2.538, ppl=5.81, wps=463243, ups=1.06, wpb=435590, bsz=16143.4, num_updates=25700, lr=0.000394515, gnorm=0.241, clip=0, loss_scale=1, train_wall=93, gb_free=17.8, wall=25387 +epoch 016: 404 / 1689 loss=4.158, nll_loss=2.538, ppl=5.81, wps=463243, ups=1.06, wpb=435590, bsz=16143.4, num_updates=25700, lr=0.000394515, gnorm=0.241, clip=0, loss_scale=1, train_wall=93, gb_free=17.8, wall=25387 +epoch 016: 404 / 1689 loss=4.158, nll_loss=2.538, ppl=5.81, wps=463243, ups=1.06, wpb=435590, bsz=16143.4, num_updates=25700, lr=0.000394515, gnorm=0.241, clip=0, loss_scale=1, train_wall=93, gb_free=17.8, wall=25387 +epoch 016: 404 / 1689 loss=4.158, nll_loss=2.538, ppl=5.81, wps=463243, ups=1.06, wpb=435590, bsz=16143.4, num_updates=25700, lr=0.000394515, gnorm=0.241, clip=0, loss_scale=1, train_wall=93, gb_free=17.8, wall=25387 +epoch 016: 404 / 1689 loss=4.158, nll_loss=2.538, ppl=5.81, wps=463243, ups=1.06, wpb=435590, bsz=16143.4, num_updates=25700, lr=0.000394515, gnorm=0.241, clip=0, loss_scale=1, train_wall=93, gb_free=17.8, wall=25387 +epoch 016: 404 / 1689 loss=4.158, nll_loss=2.538, ppl=5.81, wps=463243, ups=1.06, wpb=435590, bsz=16143.4, num_updates=25700, lr=0.000394515, gnorm=0.241, clip=0, loss_scale=1, train_wall=93, gb_free=17.8, wall=25387 +epoch 016: 404 / 1689 loss=4.158, nll_loss=2.538, ppl=5.81, wps=463243, ups=1.06, wpb=435590, bsz=16143.4, num_updates=25700, lr=0.000394515, gnorm=0.241, clip=0, loss_scale=1, train_wall=93, gb_free=17.8, wall=25387 +epoch 016: 404 / 1689 loss=4.158, nll_loss=2.538, ppl=5.81, wps=463243, ups=1.06, wpb=435590, bsz=16143.4, num_updates=25700, lr=0.000394515, gnorm=0.241, clip=0, loss_scale=1, train_wall=93, gb_free=17.8, wall=25387 +epoch 016: 404 / 1689 loss=4.158, nll_loss=2.538, ppl=5.81, wps=463243, ups=1.06, wpb=435590, bsz=16143.4, num_updates=25700, lr=0.000394515, gnorm=0.241, clip=0, loss_scale=1, train_wall=93, gb_free=17.8, wall=25387 +epoch 016: 404 / 1689 loss=4.158, nll_loss=2.538, ppl=5.81, wps=463243, ups=1.06, wpb=435590, bsz=16143.4, num_updates=25700, lr=0.000394515, gnorm=0.241, clip=0, loss_scale=1, train_wall=93, gb_free=17.8, wall=25387 +epoch 016: 404 / 1689 loss=4.158, nll_loss=2.538, ppl=5.81, wps=463243, ups=1.06, wpb=435590, bsz=16143.4, num_updates=25700, lr=0.000394515, gnorm=0.241, clip=0, loss_scale=1, train_wall=93, gb_free=17.8, wall=25387 +epoch 016: 404 / 1689 loss=4.158, nll_loss=2.538, ppl=5.81, wps=463243, ups=1.06, wpb=435590, bsz=16143.4, num_updates=25700, lr=0.000394515, gnorm=0.241, clip=0, loss_scale=1, train_wall=93, gb_free=17.8, wall=25387 +epoch 016: 404 / 1689 loss=4.158, nll_loss=2.538, ppl=5.81, wps=463243, ups=1.06, wpb=435590, bsz=16143.4, num_updates=25700, lr=0.000394515, gnorm=0.241, clip=0, loss_scale=1, train_wall=93, gb_free=17.8, wall=25387 +epoch 016: 404 / 1689 loss=4.158, nll_loss=2.538, ppl=5.81, wps=463243, ups=1.06, wpb=435590, bsz=16143.4, num_updates=25700, lr=0.000394515, gnorm=0.241, clip=0, loss_scale=1, train_wall=93, gb_free=17.8, wall=25387 +epoch 016: 504 / 1689 loss=4.161, nll_loss=2.542, ppl=5.82, wps=459998, ups=1.06, wpb=433332, bsz=16135, num_updates=25800, lr=0.00039375, gnorm=0.234, clip=0, loss_scale=2, train_wall=93, gb_free=20.2, wall=25481 +epoch 016: 504 / 1689 loss=4.161, nll_loss=2.542, ppl=5.82, wps=459998, ups=1.06, wpb=433332, bsz=16135, num_updates=25800, lr=0.00039375, gnorm=0.234, clip=0, loss_scale=2, train_wall=93, gb_free=20.2, wall=25481 +epoch 016: 504 / 1689 loss=4.161, nll_loss=2.542, ppl=5.82, wps=459998, ups=1.06, wpb=433332, bsz=16135, num_updates=25800, lr=0.00039375, gnorm=0.234, clip=0, loss_scale=2, train_wall=93, gb_free=20.2, wall=25481 +epoch 016: 504 / 1689 loss=4.161, nll_loss=2.542, ppl=5.82, wps=459998, ups=1.06, wpb=433332, bsz=16135, num_updates=25800, lr=0.00039375, gnorm=0.234, clip=0, loss_scale=2, train_wall=93, gb_free=20.2, wall=25481 +epoch 016: 504 / 1689 loss=4.161, nll_loss=2.542, ppl=5.82, wps=459998, ups=1.06, wpb=433332, bsz=16135, num_updates=25800, lr=0.00039375, gnorm=0.234, clip=0, loss_scale=2, train_wall=93, gb_free=20.2, wall=25481 +epoch 016: 504 / 1689 loss=4.161, nll_loss=2.542, ppl=5.82, wps=459998, ups=1.06, wpb=433332, bsz=16135, num_updates=25800, lr=0.00039375, gnorm=0.234, clip=0, loss_scale=2, train_wall=93, gb_free=20.2, wall=25481 +epoch 016: 504 / 1689 loss=4.161, nll_loss=2.542, ppl=5.82, wps=459998, ups=1.06, wpb=433332, bsz=16135, num_updates=25800, lr=0.00039375, gnorm=0.234, clip=0, loss_scale=2, train_wall=93, gb_free=20.2, wall=25481 +epoch 016: 504 / 1689 loss=4.161, nll_loss=2.542, ppl=5.82, wps=459998, ups=1.06, wpb=433332, bsz=16135, num_updates=25800, lr=0.00039375, gnorm=0.234, clip=0, loss_scale=2, train_wall=93, gb_free=20.2, wall=25481 +epoch 016: 504 / 1689 loss=4.161, nll_loss=2.542, ppl=5.82, wps=459998, ups=1.06, wpb=433332, bsz=16135, num_updates=25800, lr=0.00039375, gnorm=0.234, clip=0, loss_scale=2, train_wall=93, gb_free=20.2, wall=25481 +epoch 016: 504 / 1689 loss=4.161, nll_loss=2.542, ppl=5.82, wps=459998, ups=1.06, wpb=433332, bsz=16135, num_updates=25800, lr=0.00039375, gnorm=0.234, clip=0, loss_scale=2, train_wall=93, gb_free=20.2, wall=25481 +epoch 016: 504 / 1689 loss=4.161, nll_loss=2.542, ppl=5.82, wps=459998, ups=1.06, wpb=433332, bsz=16135, num_updates=25800, lr=0.00039375, gnorm=0.234, clip=0, loss_scale=2, train_wall=93, gb_free=20.2, wall=25481 +epoch 016: 504 / 1689 loss=4.161, nll_loss=2.542, ppl=5.82, wps=459998, ups=1.06, wpb=433332, bsz=16135, num_updates=25800, lr=0.00039375, gnorm=0.234, clip=0, loss_scale=2, train_wall=93, gb_free=20.2, wall=25481 +epoch 016: 504 / 1689 loss=4.161, nll_loss=2.542, ppl=5.82, wps=459998, ups=1.06, wpb=433332, bsz=16135, num_updates=25800, lr=0.00039375, gnorm=0.234, clip=0, loss_scale=2, train_wall=93, gb_free=20.2, wall=25481 +epoch 016: 504 / 1689 loss=4.161, nll_loss=2.542, ppl=5.82, wps=459998, ups=1.06, wpb=433332, bsz=16135, num_updates=25800, lr=0.00039375, gnorm=0.234, clip=0, loss_scale=2, train_wall=93, gb_free=20.2, wall=25481 +epoch 016: 504 / 1689 loss=4.161, nll_loss=2.542, ppl=5.82, wps=459998, ups=1.06, wpb=433332, bsz=16135, num_updates=25800, lr=0.00039375, gnorm=0.234, clip=0, loss_scale=2, train_wall=93, gb_free=20.2, wall=25481 +epoch 016: 504 / 1689 loss=4.161, nll_loss=2.542, ppl=5.82, wps=459998, ups=1.06, wpb=433332, bsz=16135, num_updates=25800, lr=0.00039375, gnorm=0.234, clip=0, loss_scale=2, train_wall=93, gb_free=20.2, wall=25481 +epoch 016: 604 / 1689 loss=4.167, nll_loss=2.549, ppl=5.85, wps=457222, ups=1.05, wpb=433780, bsz=16479.4, num_updates=25900, lr=0.000392989, gnorm=0.228, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=25576 +epoch 016: 604 / 1689 loss=4.167, nll_loss=2.549, ppl=5.85, wps=457222, ups=1.05, wpb=433780, bsz=16479.4, num_updates=25900, lr=0.000392989, gnorm=0.228, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=25576 +epoch 016: 604 / 1689 loss=4.167, nll_loss=2.549, ppl=5.85, wps=457222, ups=1.05, wpb=433780, bsz=16479.4, num_updates=25900, lr=0.000392989, gnorm=0.228, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=25576 +epoch 016: 604 / 1689 loss=4.167, nll_loss=2.549, ppl=5.85, wps=457222, ups=1.05, wpb=433780, bsz=16479.4, num_updates=25900, lr=0.000392989, gnorm=0.228, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=25576 +epoch 016: 604 / 1689 loss=4.167, nll_loss=2.549, ppl=5.85, wps=457222, ups=1.05, wpb=433780, bsz=16479.4, num_updates=25900, lr=0.000392989, gnorm=0.228, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=25576 +epoch 016: 604 / 1689 loss=4.167, nll_loss=2.549, ppl=5.85, wps=457222, ups=1.05, wpb=433780, bsz=16479.4, num_updates=25900, lr=0.000392989, gnorm=0.228, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=25576 +epoch 016: 604 / 1689 loss=4.167, nll_loss=2.549, ppl=5.85, wps=457222, ups=1.05, wpb=433780, bsz=16479.4, num_updates=25900, lr=0.000392989, gnorm=0.228, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=25576 +epoch 016: 604 / 1689 loss=4.167, nll_loss=2.549, ppl=5.85, wps=457222, ups=1.05, wpb=433780, bsz=16479.4, num_updates=25900, lr=0.000392989, gnorm=0.228, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=25576 +epoch 016: 604 / 1689 loss=4.167, nll_loss=2.549, ppl=5.85, wps=457222, ups=1.05, wpb=433780, bsz=16479.4, num_updates=25900, lr=0.000392989, gnorm=0.228, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=25576 +epoch 016: 604 / 1689 loss=4.167, nll_loss=2.549, ppl=5.85, wps=457222, ups=1.05, wpb=433780, bsz=16479.4, num_updates=25900, lr=0.000392989, gnorm=0.228, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=25576 +epoch 016: 604 / 1689 loss=4.167, nll_loss=2.549, ppl=5.85, wps=457222, ups=1.05, wpb=433780, bsz=16479.4, num_updates=25900, lr=0.000392989, gnorm=0.228, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=25576 +epoch 016: 604 / 1689 loss=4.167, nll_loss=2.549, ppl=5.85, wps=457222, ups=1.05, wpb=433780, bsz=16479.4, num_updates=25900, lr=0.000392989, gnorm=0.228, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=25576 +epoch 016: 604 / 1689 loss=4.167, nll_loss=2.549, ppl=5.85, wps=457222, ups=1.05, wpb=433780, bsz=16479.4, num_updates=25900, lr=0.000392989, gnorm=0.228, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=25576 +epoch 016: 604 / 1689 loss=4.167, nll_loss=2.549, ppl=5.85, wps=457222, ups=1.05, wpb=433780, bsz=16479.4, num_updates=25900, lr=0.000392989, gnorm=0.228, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=25576 +epoch 016: 604 / 1689 loss=4.167, nll_loss=2.549, ppl=5.85, wps=457222, ups=1.05, wpb=433780, bsz=16479.4, num_updates=25900, lr=0.000392989, gnorm=0.228, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=25576 +epoch 016: 604 / 1689 loss=4.167, nll_loss=2.549, ppl=5.85, wps=457222, ups=1.05, wpb=433780, bsz=16479.4, num_updates=25900, lr=0.000392989, gnorm=0.228, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=25576 +epoch 016: 705 / 1689 loss=4.157, nll_loss=2.537, ppl=5.81, wps=453430, ups=1.04, wpb=434288, bsz=16525.8, num_updates=26000, lr=0.000392232, gnorm=0.241, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=25672 +epoch 016: 705 / 1689 loss=4.157, nll_loss=2.537, ppl=5.81, wps=453430, ups=1.04, wpb=434288, bsz=16525.8, num_updates=26000, lr=0.000392232, gnorm=0.241, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=25672 +epoch 016: 705 / 1689 loss=4.157, nll_loss=2.537, ppl=5.81, wps=453430, ups=1.04, wpb=434288, bsz=16525.8, num_updates=26000, lr=0.000392232, gnorm=0.241, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=25672 +epoch 016: 705 / 1689 loss=4.157, nll_loss=2.537, ppl=5.81, wps=453430, ups=1.04, wpb=434288, bsz=16525.8, num_updates=26000, lr=0.000392232, gnorm=0.241, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=25672 +epoch 016: 705 / 1689 loss=4.157, nll_loss=2.537, ppl=5.81, wps=453430, ups=1.04, wpb=434288, bsz=16525.8, num_updates=26000, lr=0.000392232, gnorm=0.241, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=25672 +epoch 016: 705 / 1689 loss=4.157, nll_loss=2.537, ppl=5.81, wps=453430, ups=1.04, wpb=434288, bsz=16525.8, num_updates=26000, lr=0.000392232, gnorm=0.241, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=25672 +epoch 016: 705 / 1689 loss=4.157, nll_loss=2.537, ppl=5.81, wps=453430, ups=1.04, wpb=434288, bsz=16525.8, num_updates=26000, lr=0.000392232, gnorm=0.241, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=25672 +epoch 016: 705 / 1689 loss=4.157, nll_loss=2.537, ppl=5.81, wps=453430, ups=1.04, wpb=434288, bsz=16525.8, num_updates=26000, lr=0.000392232, gnorm=0.241, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=25672 +epoch 016: 705 / 1689 loss=4.157, nll_loss=2.537, ppl=5.81, wps=453430, ups=1.04, wpb=434288, bsz=16525.8, num_updates=26000, lr=0.000392232, gnorm=0.241, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=25672 +epoch 016: 705 / 1689 loss=4.157, nll_loss=2.537, ppl=5.81, wps=453430, ups=1.04, wpb=434288, bsz=16525.8, num_updates=26000, lr=0.000392232, gnorm=0.241, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=25672 +epoch 016: 705 / 1689 loss=4.157, nll_loss=2.537, ppl=5.81, wps=453430, ups=1.04, wpb=434288, bsz=16525.8, num_updates=26000, lr=0.000392232, gnorm=0.241, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=25672 +epoch 016: 705 / 1689 loss=4.157, nll_loss=2.537, ppl=5.81, wps=453430, ups=1.04, wpb=434288, bsz=16525.8, num_updates=26000, lr=0.000392232, gnorm=0.241, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=25672 +epoch 016: 705 / 1689 loss=4.157, nll_loss=2.537, ppl=5.81, wps=453430, ups=1.04, wpb=434288, bsz=16525.8, num_updates=26000, lr=0.000392232, gnorm=0.241, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=25672 +epoch 016: 705 / 1689 loss=4.157, nll_loss=2.537, ppl=5.81, wps=453430, ups=1.04, wpb=434288, bsz=16525.8, num_updates=26000, lr=0.000392232, gnorm=0.241, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=25672 +epoch 016: 705 / 1689 loss=4.157, nll_loss=2.537, ppl=5.81, wps=453430, ups=1.04, wpb=434288, bsz=16525.8, num_updates=26000, lr=0.000392232, gnorm=0.241, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=25672 +epoch 016: 705 / 1689 loss=4.157, nll_loss=2.537, ppl=5.81, wps=453430, ups=1.04, wpb=434288, bsz=16525.8, num_updates=26000, lr=0.000392232, gnorm=0.241, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=25672 +begin validation on "valid" subset +epoch 016 | valid on 'valid' subset | loss 4.257 | nll_loss 2.618 | ppl 6.14 | wps 0 | wpb 42662 | bsz 2032 | num_updates 26000 | best_loss 4.257 +epoch 016 | valid on 'valid' subset | loss 4.257 | nll_loss 2.618 | ppl 6.14 | wps 0 | wpb 42662 | bsz 2032 | num_updates 26000 | best_loss 4.257 +epoch 016 | valid on 'valid' subset | loss 4.257 | nll_loss 2.618 | ppl 6.14 | wps 0 | wpb 42662 | bsz 2032 | num_updates 26000 | best_loss 4.257 +epoch 016 | valid on 'valid' subset | loss 4.257 | nll_loss 2.618 | ppl 6.14 | wps 0 | wpb 42662 | bsz 2032 | num_updates 26000 | best_loss 4.257 +epoch 016 | valid on 'valid' subset | loss 4.257 | nll_loss 2.618 | ppl 6.14 | wps 0 | wpb 42662 | bsz 2032 | num_updates 26000 | best_loss 4.257 +epoch 016 | valid on 'valid' subset | loss 4.257 | nll_loss 2.618 | ppl 6.14 | wps 0 | wpb 42662 | bsz 2032 | num_updates 26000 | best_loss 4.257 +epoch 016 | valid on 'valid' subset | loss 4.257 | nll_loss 2.618 | ppl 6.14 | wps 0 | wpb 42662 | bsz 2032 | num_updates 26000 | best_loss 4.257 +epoch 016 | valid on 'valid' subset | loss 4.257 | nll_loss 2.618 | ppl 6.14 | wps 0 | wpb 42662 | bsz 2032 | num_updates 26000 | best_loss 4.257 +epoch 016 | valid on 'valid' subset | loss 4.257 | nll_loss 2.618 | ppl 6.14 | wps 0 | wpb 42662 | bsz 2032 | num_updates 26000 | best_loss 4.257 +epoch 016 | valid on 'valid' subset | loss 4.257 | nll_loss 2.618 | ppl 6.14 | wps 0 | wpb 42662 | bsz 2032 | num_updates 26000 | best_loss 4.257 +epoch 016 | valid on 'valid' subset | loss 4.257 | nll_loss 2.618 | ppl 6.14 | wps 0 | wpb 42662 | bsz 2032 | num_updates 26000 | best_loss 4.257 +epoch 016 | valid on 'valid' subset | loss 4.257 | nll_loss 2.618 | ppl 6.14 | wps 0 | wpb 42662 | bsz 2032 | num_updates 26000 | best_loss 4.257 +epoch 016 | valid on 'valid' subset | loss 4.257 | nll_loss 2.618 | ppl 6.14 | wps 0 | wpb 42662 | bsz 2032 | num_updates 26000 | best_loss 4.257 +epoch 016 | valid on 'valid' subset | loss 4.257 | nll_loss 2.618 | ppl 6.14 | wps 0 | wpb 42662 | bsz 2032 | num_updates 26000 | best_loss 4.257 +epoch 016 | valid on 'valid' subset | loss 4.257 | nll_loss 2.618 | ppl 6.14 | wps 0 | wpb 42662 | bsz 2032 | num_updates 26000 | best_loss 4.257 +epoch 016 | valid on 'valid' subset | loss 4.257 | nll_loss 2.618 | ppl 6.14 | wps 0 | wpb 42662 | bsz 2032 | num_updates 26000 | best_loss 4.257 +epoch 016: 805 / 1689 loss=4.155, nll_loss=2.536, ppl=5.8, wps=385497, ups=0.89, wpb=432204, bsz=16582, num_updates=26100, lr=0.00039148, gnorm=0.246, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=25784 +epoch 016: 805 / 1689 loss=4.155, nll_loss=2.536, ppl=5.8, wps=385497, ups=0.89, wpb=432204, bsz=16582, num_updates=26100, lr=0.00039148, gnorm=0.246, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=25784 +epoch 016: 805 / 1689 loss=4.155, nll_loss=2.536, ppl=5.8, wps=385497, ups=0.89, wpb=432204, bsz=16582, num_updates=26100, lr=0.00039148, gnorm=0.246, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=25784 +epoch 016: 805 / 1689 loss=4.155, nll_loss=2.536, ppl=5.8, wps=385497, ups=0.89, wpb=432204, bsz=16582, num_updates=26100, lr=0.00039148, gnorm=0.246, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=25784 +epoch 016: 805 / 1689 loss=4.155, nll_loss=2.536, ppl=5.8, wps=385497, ups=0.89, wpb=432204, bsz=16582, num_updates=26100, lr=0.00039148, gnorm=0.246, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=25784 +epoch 016: 805 / 1689 loss=4.155, nll_loss=2.536, ppl=5.8, wps=385497, ups=0.89, wpb=432204, bsz=16582, num_updates=26100, lr=0.00039148, gnorm=0.246, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=25784 +epoch 016: 805 / 1689 loss=4.155, nll_loss=2.536, ppl=5.8, wps=385497, ups=0.89, wpb=432204, bsz=16582, num_updates=26100, lr=0.00039148, gnorm=0.246, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=25784 +epoch 016: 805 / 1689 loss=4.155, nll_loss=2.536, ppl=5.8, wps=385497, ups=0.89, wpb=432204, bsz=16582, num_updates=26100, lr=0.00039148, gnorm=0.246, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=25784 +epoch 016: 805 / 1689 loss=4.155, nll_loss=2.536, ppl=5.8, wps=385497, ups=0.89, wpb=432204, bsz=16582, num_updates=26100, lr=0.00039148, gnorm=0.246, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=25784 +epoch 016: 805 / 1689 loss=4.155, nll_loss=2.536, ppl=5.8, wps=385497, ups=0.89, wpb=432204, bsz=16582, num_updates=26100, lr=0.00039148, gnorm=0.246, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=25784 +epoch 016: 805 / 1689 loss=4.155, nll_loss=2.536, ppl=5.8, wps=385497, ups=0.89, wpb=432204, bsz=16582, num_updates=26100, lr=0.00039148, gnorm=0.246, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=25784 +epoch 016: 805 / 1689 loss=4.155, nll_loss=2.536, ppl=5.8, wps=385497, ups=0.89, wpb=432204, bsz=16582, num_updates=26100, lr=0.00039148, gnorm=0.246, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=25784 +epoch 016: 805 / 1689 loss=4.155, nll_loss=2.536, ppl=5.8, wps=385497, ups=0.89, wpb=432204, bsz=16582, num_updates=26100, lr=0.00039148, gnorm=0.246, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=25784 +epoch 016: 805 / 1689 loss=4.155, nll_loss=2.536, ppl=5.8, wps=385497, ups=0.89, wpb=432204, bsz=16582, num_updates=26100, lr=0.00039148, gnorm=0.246, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=25784 +epoch 016: 805 / 1689 loss=4.155, nll_loss=2.536, ppl=5.8, wps=385497, ups=0.89, wpb=432204, bsz=16582, num_updates=26100, lr=0.00039148, gnorm=0.246, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=25784 +epoch 016: 805 / 1689 loss=4.155, nll_loss=2.536, ppl=5.8, wps=385497, ups=0.89, wpb=432204, bsz=16582, num_updates=26100, lr=0.00039148, gnorm=0.246, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=25784 +epoch 016: 905 / 1689 loss=4.164, nll_loss=2.546, ppl=5.84, wps=459381, ups=1.06, wpb=434854, bsz=16724.9, num_updates=26200, lr=0.000390732, gnorm=0.236, clip=0, loss_scale=1, train_wall=93, gb_free=19.7, wall=25878 +epoch 016: 905 / 1689 loss=4.164, nll_loss=2.546, ppl=5.84, wps=459381, ups=1.06, wpb=434854, bsz=16724.9, num_updates=26200, lr=0.000390732, gnorm=0.236, clip=0, loss_scale=1, train_wall=93, gb_free=19.7, wall=25878 +epoch 016: 905 / 1689 loss=4.164, nll_loss=2.546, ppl=5.84, wps=459381, ups=1.06, wpb=434854, bsz=16724.9, num_updates=26200, lr=0.000390732, gnorm=0.236, clip=0, loss_scale=1, train_wall=93, gb_free=19.7, wall=25878 +epoch 016: 905 / 1689 loss=4.164, nll_loss=2.546, ppl=5.84, wps=459381, ups=1.06, wpb=434854, bsz=16724.9, num_updates=26200, lr=0.000390732, gnorm=0.236, clip=0, loss_scale=1, train_wall=93, gb_free=19.7, wall=25878 +epoch 016: 905 / 1689 loss=4.164, nll_loss=2.546, ppl=5.84, wps=459381, ups=1.06, wpb=434854, bsz=16724.9, num_updates=26200, lr=0.000390732, gnorm=0.236, clip=0, loss_scale=1, train_wall=93, gb_free=19.7, wall=25878 +epoch 016: 905 / 1689 loss=4.164, nll_loss=2.546, ppl=5.84, wps=459381, ups=1.06, wpb=434854, bsz=16724.9, num_updates=26200, lr=0.000390732, gnorm=0.236, clip=0, loss_scale=1, train_wall=93, gb_free=19.7, wall=25878 +epoch 016: 905 / 1689 loss=4.164, nll_loss=2.546, ppl=5.84, wps=459381, ups=1.06, wpb=434854, bsz=16724.9, num_updates=26200, lr=0.000390732, gnorm=0.236, clip=0, loss_scale=1, train_wall=93, gb_free=19.7, wall=25878 +epoch 016: 905 / 1689 loss=4.164, nll_loss=2.546, ppl=5.84, wps=459381, ups=1.06, wpb=434854, bsz=16724.9, num_updates=26200, lr=0.000390732, gnorm=0.236, clip=0, loss_scale=1, train_wall=93, gb_free=19.7, wall=25878 +epoch 016: 905 / 1689 loss=4.164, nll_loss=2.546, ppl=5.84, wps=459381, ups=1.06, wpb=434854, bsz=16724.9, num_updates=26200, lr=0.000390732, gnorm=0.236, clip=0, loss_scale=1, train_wall=93, gb_free=19.7, wall=25878 +epoch 016: 905 / 1689 loss=4.164, nll_loss=2.546, ppl=5.84, wps=459381, ups=1.06, wpb=434854, bsz=16724.9, num_updates=26200, lr=0.000390732, gnorm=0.236, clip=0, loss_scale=1, train_wall=93, gb_free=19.7, wall=25878 +epoch 016: 905 / 1689 loss=4.164, nll_loss=2.546, ppl=5.84, wps=459381, ups=1.06, wpb=434854, bsz=16724.9, num_updates=26200, lr=0.000390732, gnorm=0.236, clip=0, loss_scale=1, train_wall=93, gb_free=19.7, wall=25878 +epoch 016: 905 / 1689 loss=4.164, nll_loss=2.546, ppl=5.84, wps=459381, ups=1.06, wpb=434854, bsz=16724.9, num_updates=26200, lr=0.000390732, gnorm=0.236, clip=0, loss_scale=1, train_wall=93, gb_free=19.7, wall=25878 +epoch 016: 905 / 1689 loss=4.164, nll_loss=2.546, ppl=5.84, wps=459381, ups=1.06, wpb=434854, bsz=16724.9, num_updates=26200, lr=0.000390732, gnorm=0.236, clip=0, loss_scale=1, train_wall=93, gb_free=19.7, wall=25878 +epoch 016: 905 / 1689 loss=4.164, nll_loss=2.546, ppl=5.84, wps=459381, ups=1.06, wpb=434854, bsz=16724.9, num_updates=26200, lr=0.000390732, gnorm=0.236, clip=0, loss_scale=1, train_wall=93, gb_free=19.7, wall=25878 +epoch 016: 905 / 1689 loss=4.164, nll_loss=2.546, ppl=5.84, wps=459381, ups=1.06, wpb=434854, bsz=16724.9, num_updates=26200, lr=0.000390732, gnorm=0.236, clip=0, loss_scale=1, train_wall=93, gb_free=19.7, wall=25878 +epoch 016: 905 / 1689 loss=4.164, nll_loss=2.546, ppl=5.84, wps=459381, ups=1.06, wpb=434854, bsz=16724.9, num_updates=26200, lr=0.000390732, gnorm=0.236, clip=0, loss_scale=1, train_wall=93, gb_free=19.7, wall=25878 +epoch 016: 1005 / 1689 loss=4.164, nll_loss=2.546, ppl=5.84, wps=461275, ups=1.06, wpb=433474, bsz=16329.5, num_updates=26300, lr=0.000389989, gnorm=0.24, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=25972 +epoch 016: 1005 / 1689 loss=4.164, nll_loss=2.546, ppl=5.84, wps=461275, ups=1.06, wpb=433474, bsz=16329.5, num_updates=26300, lr=0.000389989, gnorm=0.24, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=25972 +epoch 016: 1005 / 1689 loss=4.164, nll_loss=2.546, ppl=5.84, wps=461275, ups=1.06, wpb=433474, bsz=16329.5, num_updates=26300, lr=0.000389989, gnorm=0.24, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=25972 +epoch 016: 1005 / 1689 loss=4.164, nll_loss=2.546, ppl=5.84, wps=461275, ups=1.06, wpb=433474, bsz=16329.5, num_updates=26300, lr=0.000389989, gnorm=0.24, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=25972 +epoch 016: 1005 / 1689 loss=4.164, nll_loss=2.546, ppl=5.84, wps=461275, ups=1.06, wpb=433474, bsz=16329.5, num_updates=26300, lr=0.000389989, gnorm=0.24, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=25972 +epoch 016: 1005 / 1689 loss=4.164, nll_loss=2.546, ppl=5.84, wps=461275, ups=1.06, wpb=433474, bsz=16329.5, num_updates=26300, lr=0.000389989, gnorm=0.24, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=25972 +epoch 016: 1005 / 1689 loss=4.164, nll_loss=2.546, ppl=5.84, wps=461275, ups=1.06, wpb=433474, bsz=16329.5, num_updates=26300, lr=0.000389989, gnorm=0.24, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=25972 +epoch 016: 1005 / 1689 loss=4.164, nll_loss=2.546, ppl=5.84, wps=461275, ups=1.06, wpb=433474, bsz=16329.5, num_updates=26300, lr=0.000389989, gnorm=0.24, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=25972 +epoch 016: 1005 / 1689 loss=4.164, nll_loss=2.546, ppl=5.84, wps=461275, ups=1.06, wpb=433474, bsz=16329.5, num_updates=26300, lr=0.000389989, gnorm=0.24, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=25972 +epoch 016: 1005 / 1689 loss=4.164, nll_loss=2.546, ppl=5.84, wps=461275, ups=1.06, wpb=433474, bsz=16329.5, num_updates=26300, lr=0.000389989, gnorm=0.24, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=25972 +epoch 016: 1005 / 1689 loss=4.164, nll_loss=2.546, ppl=5.84, wps=461275, ups=1.06, wpb=433474, bsz=16329.5, num_updates=26300, lr=0.000389989, gnorm=0.24, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=25972 +epoch 016: 1005 / 1689 loss=4.164, nll_loss=2.546, ppl=5.84, wps=461275, ups=1.06, wpb=433474, bsz=16329.5, num_updates=26300, lr=0.000389989, gnorm=0.24, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=25972 +epoch 016: 1005 / 1689 loss=4.164, nll_loss=2.546, ppl=5.84, wps=461275, ups=1.06, wpb=433474, bsz=16329.5, num_updates=26300, lr=0.000389989, gnorm=0.24, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=25972 +epoch 016: 1005 / 1689 loss=4.164, nll_loss=2.546, ppl=5.84, wps=461275, ups=1.06, wpb=433474, bsz=16329.5, num_updates=26300, lr=0.000389989, gnorm=0.24, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=25972 +epoch 016: 1005 / 1689 loss=4.164, nll_loss=2.546, ppl=5.84, wps=461275, ups=1.06, wpb=433474, bsz=16329.5, num_updates=26300, lr=0.000389989, gnorm=0.24, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=25972 +epoch 016: 1005 / 1689 loss=4.164, nll_loss=2.546, ppl=5.84, wps=461275, ups=1.06, wpb=433474, bsz=16329.5, num_updates=26300, lr=0.000389989, gnorm=0.24, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=25972 +epoch 016: 1105 / 1689 loss=4.171, nll_loss=2.553, ppl=5.87, wps=459449, ups=1.06, wpb=433713, bsz=16550.6, num_updates=26400, lr=0.000389249, gnorm=0.23, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=26067 +epoch 016: 1105 / 1689 loss=4.171, nll_loss=2.553, ppl=5.87, wps=459449, ups=1.06, wpb=433713, bsz=16550.6, num_updates=26400, lr=0.000389249, gnorm=0.23, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=26067 +epoch 016: 1105 / 1689 loss=4.171, nll_loss=2.553, ppl=5.87, wps=459449, ups=1.06, wpb=433713, bsz=16550.6, num_updates=26400, lr=0.000389249, gnorm=0.23, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=26067 +epoch 016: 1105 / 1689 loss=4.171, nll_loss=2.553, ppl=5.87, wps=459449, ups=1.06, wpb=433713, bsz=16550.6, num_updates=26400, lr=0.000389249, gnorm=0.23, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=26067 +epoch 016: 1105 / 1689 loss=4.171, nll_loss=2.553, ppl=5.87, wps=459449, ups=1.06, wpb=433713, bsz=16550.6, num_updates=26400, lr=0.000389249, gnorm=0.23, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=26067 +epoch 016: 1105 / 1689 loss=4.171, nll_loss=2.553, ppl=5.87, wps=459449, ups=1.06, wpb=433713, bsz=16550.6, num_updates=26400, lr=0.000389249, gnorm=0.23, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=26067 +epoch 016: 1105 / 1689 loss=4.171, nll_loss=2.553, ppl=5.87, wps=459449, ups=1.06, wpb=433713, bsz=16550.6, num_updates=26400, lr=0.000389249, gnorm=0.23, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=26067 +epoch 016: 1105 / 1689 loss=4.171, nll_loss=2.553, ppl=5.87, wps=459449, ups=1.06, wpb=433713, bsz=16550.6, num_updates=26400, lr=0.000389249, gnorm=0.23, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=26067 +epoch 016: 1105 / 1689 loss=4.171, nll_loss=2.553, ppl=5.87, wps=459449, ups=1.06, wpb=433713, bsz=16550.6, num_updates=26400, lr=0.000389249, gnorm=0.23, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=26067 +epoch 016: 1105 / 1689 loss=4.171, nll_loss=2.553, ppl=5.87, wps=459449, ups=1.06, wpb=433713, bsz=16550.6, num_updates=26400, lr=0.000389249, gnorm=0.23, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=26067 +epoch 016: 1105 / 1689 loss=4.171, nll_loss=2.553, ppl=5.87, wps=459449, ups=1.06, wpb=433713, bsz=16550.6, num_updates=26400, lr=0.000389249, gnorm=0.23, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=26067 +epoch 016: 1105 / 1689 loss=4.171, nll_loss=2.553, ppl=5.87, wps=459449, ups=1.06, wpb=433713, bsz=16550.6, num_updates=26400, lr=0.000389249, gnorm=0.23, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=26067 +epoch 016: 1105 / 1689 loss=4.171, nll_loss=2.553, ppl=5.87, wps=459449, ups=1.06, wpb=433713, bsz=16550.6, num_updates=26400, lr=0.000389249, gnorm=0.23, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=26067 +epoch 016: 1105 / 1689 loss=4.171, nll_loss=2.553, ppl=5.87, wps=459449, ups=1.06, wpb=433713, bsz=16550.6, num_updates=26400, lr=0.000389249, gnorm=0.23, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=26067 +epoch 016: 1105 / 1689 loss=4.171, nll_loss=2.553, ppl=5.87, wps=459449, ups=1.06, wpb=433713, bsz=16550.6, num_updates=26400, lr=0.000389249, gnorm=0.23, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=26067 +epoch 016: 1105 / 1689 loss=4.171, nll_loss=2.553, ppl=5.87, wps=459449, ups=1.06, wpb=433713, bsz=16550.6, num_updates=26400, lr=0.000389249, gnorm=0.23, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=26067 +epoch 016: 1205 / 1689 loss=4.168, nll_loss=2.551, ppl=5.86, wps=456926, ups=1.05, wpb=434008, bsz=17063.8, num_updates=26500, lr=0.000388514, gnorm=0.236, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=26162 +epoch 016: 1205 / 1689 loss=4.168, nll_loss=2.551, ppl=5.86, wps=456926, ups=1.05, wpb=434008, bsz=17063.8, num_updates=26500, lr=0.000388514, gnorm=0.236, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=26162 +epoch 016: 1205 / 1689 loss=4.168, nll_loss=2.551, ppl=5.86, wps=456926, ups=1.05, wpb=434008, bsz=17063.8, num_updates=26500, lr=0.000388514, gnorm=0.236, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=26162 +epoch 016: 1205 / 1689 loss=4.168, nll_loss=2.551, ppl=5.86, wps=456926, ups=1.05, wpb=434008, bsz=17063.8, num_updates=26500, lr=0.000388514, gnorm=0.236, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=26162 +epoch 016: 1205 / 1689 loss=4.168, nll_loss=2.551, ppl=5.86, wps=456926, ups=1.05, wpb=434008, bsz=17063.8, num_updates=26500, lr=0.000388514, gnorm=0.236, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=26162 +epoch 016: 1205 / 1689 loss=4.168, nll_loss=2.551, ppl=5.86, wps=456926, ups=1.05, wpb=434008, bsz=17063.8, num_updates=26500, lr=0.000388514, gnorm=0.236, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=26162 +epoch 016: 1205 / 1689 loss=4.168, nll_loss=2.551, ppl=5.86, wps=456926, ups=1.05, wpb=434008, bsz=17063.8, num_updates=26500, lr=0.000388514, gnorm=0.236, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=26162 +epoch 016: 1205 / 1689 loss=4.168, nll_loss=2.551, ppl=5.86, wps=456926, ups=1.05, wpb=434008, bsz=17063.8, num_updates=26500, lr=0.000388514, gnorm=0.236, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=26162 +epoch 016: 1205 / 1689 loss=4.168, nll_loss=2.551, ppl=5.86, wps=456926, ups=1.05, wpb=434008, bsz=17063.8, num_updates=26500, lr=0.000388514, gnorm=0.236, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=26162 +epoch 016: 1205 / 1689 loss=4.168, nll_loss=2.551, ppl=5.86, wps=456926, ups=1.05, wpb=434008, bsz=17063.8, num_updates=26500, lr=0.000388514, gnorm=0.236, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=26162 +epoch 016: 1205 / 1689 loss=4.168, nll_loss=2.551, ppl=5.86, wps=456926, ups=1.05, wpb=434008, bsz=17063.8, num_updates=26500, lr=0.000388514, gnorm=0.236, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=26162 +epoch 016: 1205 / 1689 loss=4.168, nll_loss=2.551, ppl=5.86, wps=456926, ups=1.05, wpb=434008, bsz=17063.8, num_updates=26500, lr=0.000388514, gnorm=0.236, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=26162 +epoch 016: 1205 / 1689 loss=4.168, nll_loss=2.551, ppl=5.86, wps=456926, ups=1.05, wpb=434008, bsz=17063.8, num_updates=26500, lr=0.000388514, gnorm=0.236, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=26162 +epoch 016: 1205 / 1689 loss=4.168, nll_loss=2.551, ppl=5.86, wps=456926, ups=1.05, wpb=434008, bsz=17063.8, num_updates=26500, lr=0.000388514, gnorm=0.236, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=26162 +epoch 016: 1205 / 1689 loss=4.168, nll_loss=2.551, ppl=5.86, wps=456926, ups=1.05, wpb=434008, bsz=17063.8, num_updates=26500, lr=0.000388514, gnorm=0.236, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=26162 +epoch 016: 1205 / 1689 loss=4.168, nll_loss=2.551, ppl=5.86, wps=456926, ups=1.05, wpb=434008, bsz=17063.8, num_updates=26500, lr=0.000388514, gnorm=0.236, clip=0, loss_scale=2, train_wall=93, gb_free=19.4, wall=26162 +epoch 016: 1306 / 1689 loss=4.172, nll_loss=2.554, ppl=5.87, wps=456057, ups=1.05, wpb=433439, bsz=16728.6, num_updates=26600, lr=0.000387783, gnorm=0.231, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=26257 +epoch 016: 1306 / 1689 loss=4.172, nll_loss=2.554, ppl=5.87, wps=456057, ups=1.05, wpb=433439, bsz=16728.6, num_updates=26600, lr=0.000387783, gnorm=0.231, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=26257 +epoch 016: 1306 / 1689 loss=4.172, nll_loss=2.554, ppl=5.87, wps=456057, ups=1.05, wpb=433439, bsz=16728.6, num_updates=26600, lr=0.000387783, gnorm=0.231, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=26257 +epoch 016: 1306 / 1689 loss=4.172, nll_loss=2.554, ppl=5.87, wps=456057, ups=1.05, wpb=433439, bsz=16728.6, num_updates=26600, lr=0.000387783, gnorm=0.231, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=26257 +epoch 016: 1306 / 1689 loss=4.172, nll_loss=2.554, ppl=5.87, wps=456057, ups=1.05, wpb=433439, bsz=16728.6, num_updates=26600, lr=0.000387783, gnorm=0.231, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=26257 +epoch 016: 1306 / 1689 loss=4.172, nll_loss=2.554, ppl=5.87, wps=456057, ups=1.05, wpb=433439, bsz=16728.6, num_updates=26600, lr=0.000387783, gnorm=0.231, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=26257 +epoch 016: 1306 / 1689 loss=4.172, nll_loss=2.554, ppl=5.87, wps=456057, ups=1.05, wpb=433439, bsz=16728.6, num_updates=26600, lr=0.000387783, gnorm=0.231, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=26257 +epoch 016: 1306 / 1689 loss=4.172, nll_loss=2.554, ppl=5.87, wps=456057, ups=1.05, wpb=433439, bsz=16728.6, num_updates=26600, lr=0.000387783, gnorm=0.231, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=26257 +epoch 016: 1306 / 1689 loss=4.172, nll_loss=2.554, ppl=5.87, wps=456057, ups=1.05, wpb=433439, bsz=16728.6, num_updates=26600, lr=0.000387783, gnorm=0.231, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=26257 +epoch 016: 1306 / 1689 loss=4.172, nll_loss=2.554, ppl=5.87, wps=456057, ups=1.05, wpb=433439, bsz=16728.6, num_updates=26600, lr=0.000387783, gnorm=0.231, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=26257 +epoch 016: 1306 / 1689 loss=4.172, nll_loss=2.554, ppl=5.87, wps=456057, ups=1.05, wpb=433439, bsz=16728.6, num_updates=26600, lr=0.000387783, gnorm=0.231, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=26257 +epoch 016: 1306 / 1689 loss=4.172, nll_loss=2.554, ppl=5.87, wps=456057, ups=1.05, wpb=433439, bsz=16728.6, num_updates=26600, lr=0.000387783, gnorm=0.231, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=26257 +epoch 016: 1306 / 1689 loss=4.172, nll_loss=2.554, ppl=5.87, wps=456057, ups=1.05, wpb=433439, bsz=16728.6, num_updates=26600, lr=0.000387783, gnorm=0.231, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=26257 +epoch 016: 1306 / 1689 loss=4.172, nll_loss=2.554, ppl=5.87, wps=456057, ups=1.05, wpb=433439, bsz=16728.6, num_updates=26600, lr=0.000387783, gnorm=0.231, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=26257 +epoch 016: 1306 / 1689 loss=4.172, nll_loss=2.554, ppl=5.87, wps=456057, ups=1.05, wpb=433439, bsz=16728.6, num_updates=26600, lr=0.000387783, gnorm=0.231, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=26257 +epoch 016: 1306 / 1689 loss=4.172, nll_loss=2.554, ppl=5.87, wps=456057, ups=1.05, wpb=433439, bsz=16728.6, num_updates=26600, lr=0.000387783, gnorm=0.231, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=26257 +epoch 016: 1406 / 1689 loss=4.158, nll_loss=2.539, ppl=5.81, wps=459894, ups=1.06, wpb=433336, bsz=16519.9, num_updates=26700, lr=0.000387056, gnorm=0.235, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=26351 +epoch 016: 1406 / 1689 loss=4.158, nll_loss=2.539, ppl=5.81, wps=459894, ups=1.06, wpb=433336, bsz=16519.9, num_updates=26700, lr=0.000387056, gnorm=0.235, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=26351 +epoch 016: 1406 / 1689 loss=4.158, nll_loss=2.539, ppl=5.81, wps=459894, ups=1.06, wpb=433336, bsz=16519.9, num_updates=26700, lr=0.000387056, gnorm=0.235, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=26351 +epoch 016: 1406 / 1689 loss=4.158, nll_loss=2.539, ppl=5.81, wps=459894, ups=1.06, wpb=433336, bsz=16519.9, num_updates=26700, lr=0.000387056, gnorm=0.235, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=26351 +epoch 016: 1406 / 1689 loss=4.158, nll_loss=2.539, ppl=5.81, wps=459894, ups=1.06, wpb=433336, bsz=16519.9, num_updates=26700, lr=0.000387056, gnorm=0.235, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=26351 +epoch 016: 1406 / 1689 loss=4.158, nll_loss=2.539, ppl=5.81, wps=459894, ups=1.06, wpb=433336, bsz=16519.9, num_updates=26700, lr=0.000387056, gnorm=0.235, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=26351 +epoch 016: 1406 / 1689 loss=4.158, nll_loss=2.539, ppl=5.81, wps=459894, ups=1.06, wpb=433336, bsz=16519.9, num_updates=26700, lr=0.000387056, gnorm=0.235, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=26351 +epoch 016: 1406 / 1689 loss=4.158, nll_loss=2.539, ppl=5.81, wps=459894, ups=1.06, wpb=433336, bsz=16519.9, num_updates=26700, lr=0.000387056, gnorm=0.235, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=26351 +epoch 016: 1406 / 1689 loss=4.158, nll_loss=2.539, ppl=5.81, wps=459894, ups=1.06, wpb=433336, bsz=16519.9, num_updates=26700, lr=0.000387056, gnorm=0.235, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=26351 +epoch 016: 1406 / 1689 loss=4.158, nll_loss=2.539, ppl=5.81, wps=459894, ups=1.06, wpb=433336, bsz=16519.9, num_updates=26700, lr=0.000387056, gnorm=0.235, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=26351 +epoch 016: 1406 / 1689 loss=4.158, nll_loss=2.539, ppl=5.81, wps=459894, ups=1.06, wpb=433336, bsz=16519.9, num_updates=26700, lr=0.000387056, gnorm=0.235, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=26351 +epoch 016: 1406 / 1689 loss=4.158, nll_loss=2.539, ppl=5.81, wps=459894, ups=1.06, wpb=433336, bsz=16519.9, num_updates=26700, lr=0.000387056, gnorm=0.235, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=26351 +epoch 016: 1406 / 1689 loss=4.158, nll_loss=2.539, ppl=5.81, wps=459894, ups=1.06, wpb=433336, bsz=16519.9, num_updates=26700, lr=0.000387056, gnorm=0.235, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=26351 +epoch 016: 1406 / 1689 loss=4.158, nll_loss=2.539, ppl=5.81, wps=459894, ups=1.06, wpb=433336, bsz=16519.9, num_updates=26700, lr=0.000387056, gnorm=0.235, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=26351 +epoch 016: 1406 / 1689 loss=4.158, nll_loss=2.539, ppl=5.81, wps=459894, ups=1.06, wpb=433336, bsz=16519.9, num_updates=26700, lr=0.000387056, gnorm=0.235, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=26351 +epoch 016: 1406 / 1689 loss=4.158, nll_loss=2.539, ppl=5.81, wps=459894, ups=1.06, wpb=433336, bsz=16519.9, num_updates=26700, lr=0.000387056, gnorm=0.235, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=26351 +epoch 016: 1506 / 1689 loss=4.163, nll_loss=2.545, ppl=5.83, wps=456133, ups=1.05, wpb=432621, bsz=16482.7, num_updates=26800, lr=0.000386334, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=17.4, wall=26446 +epoch 016: 1506 / 1689 loss=4.163, nll_loss=2.545, ppl=5.83, wps=456133, ups=1.05, wpb=432621, bsz=16482.7, num_updates=26800, lr=0.000386334, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=17.4, wall=26446 +epoch 016: 1506 / 1689 loss=4.163, nll_loss=2.545, ppl=5.83, wps=456133, ups=1.05, wpb=432621, bsz=16482.7, num_updates=26800, lr=0.000386334, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=17.4, wall=26446 +epoch 016: 1506 / 1689 loss=4.163, nll_loss=2.545, ppl=5.83, wps=456133, ups=1.05, wpb=432621, bsz=16482.7, num_updates=26800, lr=0.000386334, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=17.4, wall=26446 +epoch 016: 1506 / 1689 loss=4.163, nll_loss=2.545, ppl=5.83, wps=456133, ups=1.05, wpb=432621, bsz=16482.7, num_updates=26800, lr=0.000386334, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=17.4, wall=26446 +epoch 016: 1506 / 1689 loss=4.163, nll_loss=2.545, ppl=5.83, wps=456133, ups=1.05, wpb=432621, bsz=16482.7, num_updates=26800, lr=0.000386334, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=17.4, wall=26446 +epoch 016: 1506 / 1689 loss=4.163, nll_loss=2.545, ppl=5.83, wps=456133, ups=1.05, wpb=432621, bsz=16482.7, num_updates=26800, lr=0.000386334, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=17.4, wall=26446 +epoch 016: 1506 / 1689 loss=4.163, nll_loss=2.545, ppl=5.83, wps=456133, ups=1.05, wpb=432621, bsz=16482.7, num_updates=26800, lr=0.000386334, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=17.4, wall=26446 +epoch 016: 1506 / 1689 loss=4.163, nll_loss=2.545, ppl=5.83, wps=456133, ups=1.05, wpb=432621, bsz=16482.7, num_updates=26800, lr=0.000386334, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=17.4, wall=26446 +epoch 016: 1506 / 1689 loss=4.163, nll_loss=2.545, ppl=5.83, wps=456133, ups=1.05, wpb=432621, bsz=16482.7, num_updates=26800, lr=0.000386334, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=17.4, wall=26446 +epoch 016: 1506 / 1689 loss=4.163, nll_loss=2.545, ppl=5.83, wps=456133, ups=1.05, wpb=432621, bsz=16482.7, num_updates=26800, lr=0.000386334, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=17.4, wall=26446 +epoch 016: 1506 / 1689 loss=4.163, nll_loss=2.545, ppl=5.83, wps=456133, ups=1.05, wpb=432621, bsz=16482.7, num_updates=26800, lr=0.000386334, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=17.4, wall=26446 +epoch 016: 1506 / 1689 loss=4.163, nll_loss=2.545, ppl=5.83, wps=456133, ups=1.05, wpb=432621, bsz=16482.7, num_updates=26800, lr=0.000386334, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=17.4, wall=26446 +epoch 016: 1506 / 1689 loss=4.163, nll_loss=2.545, ppl=5.83, wps=456133, ups=1.05, wpb=432621, bsz=16482.7, num_updates=26800, lr=0.000386334, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=17.4, wall=26446 +epoch 016: 1506 / 1689 loss=4.163, nll_loss=2.545, ppl=5.83, wps=456133, ups=1.05, wpb=432621, bsz=16482.7, num_updates=26800, lr=0.000386334, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=17.4, wall=26446 +epoch 016: 1506 / 1689 loss=4.163, nll_loss=2.545, ppl=5.83, wps=456133, ups=1.05, wpb=432621, bsz=16482.7, num_updates=26800, lr=0.000386334, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=17.4, wall=26446 +epoch 016: 1606 / 1689 loss=4.175, nll_loss=2.558, ppl=5.89, wps=456139, ups=1.05, wpb=432629, bsz=16404.8, num_updates=26900, lr=0.000385615, gnorm=0.257, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=26541 +epoch 016: 1606 / 1689 loss=4.175, nll_loss=2.558, ppl=5.89, wps=456139, ups=1.05, wpb=432629, bsz=16404.8, num_updates=26900, lr=0.000385615, gnorm=0.257, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=26541 +epoch 016: 1606 / 1689 loss=4.175, nll_loss=2.558, ppl=5.89, wps=456139, ups=1.05, wpb=432629, bsz=16404.8, num_updates=26900, lr=0.000385615, gnorm=0.257, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=26541 +epoch 016: 1606 / 1689 loss=4.175, nll_loss=2.558, ppl=5.89, wps=456139, ups=1.05, wpb=432629, bsz=16404.8, num_updates=26900, lr=0.000385615, gnorm=0.257, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=26541 +epoch 016: 1606 / 1689 loss=4.175, nll_loss=2.558, ppl=5.89, wps=456139, ups=1.05, wpb=432629, bsz=16404.8, num_updates=26900, lr=0.000385615, gnorm=0.257, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=26541 +epoch 016: 1606 / 1689 loss=4.175, nll_loss=2.558, ppl=5.89, wps=456139, ups=1.05, wpb=432629, bsz=16404.8, num_updates=26900, lr=0.000385615, gnorm=0.257, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=26541 +epoch 016: 1606 / 1689 loss=4.175, nll_loss=2.558, ppl=5.89, wps=456139, ups=1.05, wpb=432629, bsz=16404.8, num_updates=26900, lr=0.000385615, gnorm=0.257, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=26541 +epoch 016: 1606 / 1689 loss=4.175, nll_loss=2.558, ppl=5.89, wps=456139, ups=1.05, wpb=432629, bsz=16404.8, num_updates=26900, lr=0.000385615, gnorm=0.257, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=26541 +epoch 016: 1606 / 1689 loss=4.175, nll_loss=2.558, ppl=5.89, wps=456139, ups=1.05, wpb=432629, bsz=16404.8, num_updates=26900, lr=0.000385615, gnorm=0.257, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=26541 +epoch 016: 1606 / 1689 loss=4.175, nll_loss=2.558, ppl=5.89, wps=456139, ups=1.05, wpb=432629, bsz=16404.8, num_updates=26900, lr=0.000385615, gnorm=0.257, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=26541 +epoch 016: 1606 / 1689 loss=4.175, nll_loss=2.558, ppl=5.89, wps=456139, ups=1.05, wpb=432629, bsz=16404.8, num_updates=26900, lr=0.000385615, gnorm=0.257, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=26541 +epoch 016: 1606 / 1689 loss=4.175, nll_loss=2.558, ppl=5.89, wps=456139, ups=1.05, wpb=432629, bsz=16404.8, num_updates=26900, lr=0.000385615, gnorm=0.257, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=26541 +epoch 016: 1606 / 1689 loss=4.175, nll_loss=2.558, ppl=5.89, wps=456139, ups=1.05, wpb=432629, bsz=16404.8, num_updates=26900, lr=0.000385615, gnorm=0.257, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=26541 +epoch 016: 1606 / 1689 loss=4.175, nll_loss=2.558, ppl=5.89, wps=456139, ups=1.05, wpb=432629, bsz=16404.8, num_updates=26900, lr=0.000385615, gnorm=0.257, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=26541 +epoch 016: 1606 / 1689 loss=4.175, nll_loss=2.558, ppl=5.89, wps=456139, ups=1.05, wpb=432629, bsz=16404.8, num_updates=26900, lr=0.000385615, gnorm=0.257, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=26541 +epoch 016: 1606 / 1689 loss=4.175, nll_loss=2.558, ppl=5.89, wps=456139, ups=1.05, wpb=432629, bsz=16404.8, num_updates=26900, lr=0.000385615, gnorm=0.257, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=26541 +end of epoch 16 (average epoch stats below) +epoch 016 | loss 4.163 | nll_loss 2.544 | ppl 5.83 | wps 453512 | ups 1.05 | wpb 433548 | bsz 16506.4 | num_updates 26983 | lr 0.000385021 | gnorm 0.236 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 26618 +epoch 016 | loss 4.163 | nll_loss 2.544 | ppl 5.83 | wps 453512 | ups 1.05 | wpb 433548 | bsz 16506.4 | num_updates 26983 | lr 0.000385021 | gnorm 0.236 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 26618 +epoch 016 | loss 4.163 | nll_loss 2.544 | ppl 5.83 | wps 453512 | ups 1.05 | wpb 433548 | bsz 16506.4 | num_updates 26983 | lr 0.000385021 | gnorm 0.236 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 26618 +epoch 016 | loss 4.163 | nll_loss 2.544 | ppl 5.83 | wps 453512 | ups 1.05 | wpb 433548 | bsz 16506.4 | num_updates 26983 | lr 0.000385021 | gnorm 0.236 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 26618 +epoch 016 | loss 4.163 | nll_loss 2.544 | ppl 5.83 | wps 453512 | ups 1.05 | wpb 433548 | bsz 16506.4 | num_updates 26983 | lr 0.000385021 | gnorm 0.236 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 26618 +epoch 016 | loss 4.163 | nll_loss 2.544 | ppl 5.83 | wps 453512 | ups 1.05 | wpb 433548 | bsz 16506.4 | num_updates 26983 | lr 0.000385021 | gnorm 0.236 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 26618 +epoch 016 | loss 4.163 | nll_loss 2.544 | ppl 5.83 | wps 453512 | ups 1.05 | wpb 433548 | bsz 16506.4 | num_updates 26983 | lr 0.000385021 | gnorm 0.236 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 26618 +epoch 016 | loss 4.163 | nll_loss 2.544 | ppl 5.83 | wps 453512 | ups 1.05 | wpb 433548 | bsz 16506.4 | num_updates 26983 | lr 0.000385021 | gnorm 0.236 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 26618 +epoch 016 | loss 4.163 | nll_loss 2.544 | ppl 5.83 | wps 453512 | ups 1.05 | wpb 433548 | bsz 16506.4 | num_updates 26983 | lr 0.000385021 | gnorm 0.236 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 26618 +epoch 016 | loss 4.163 | nll_loss 2.544 | ppl 5.83 | wps 453512 | ups 1.05 | wpb 433548 | bsz 16506.4 | num_updates 26983 | lr 0.000385021 | gnorm 0.236 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 26618 +epoch 016 | loss 4.163 | nll_loss 2.544 | ppl 5.83 | wps 453512 | ups 1.05 | wpb 433548 | bsz 16506.4 | num_updates 26983 | lr 0.000385021 | gnorm 0.236 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 26618 +epoch 016 | loss 4.163 | nll_loss 2.544 | ppl 5.83 | wps 453512 | ups 1.05 | wpb 433548 | bsz 16506.4 | num_updates 26983 | lr 0.000385021 | gnorm 0.236 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 26618 +epoch 016 | loss 4.163 | nll_loss 2.544 | ppl 5.83 | wps 453512 | ups 1.05 | wpb 433548 | bsz 16506.4 | num_updates 26983 | lr 0.000385021 | gnorm 0.236 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 26618 +epoch 016 | loss 4.163 | nll_loss 2.544 | ppl 5.83 | wps 453512 | ups 1.05 | wpb 433548 | bsz 16506.4 | num_updates 26983 | lr 0.000385021 | gnorm 0.236 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 26618 +epoch 016 | loss 4.163 | nll_loss 2.544 | ppl 5.83 | wps 453512 | ups 1.05 | wpb 433548 | bsz 16506.4 | num_updates 26983 | lr 0.000385021 | gnorm 0.236 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 26618 +epoch 016 | loss 4.163 | nll_loss 2.544 | ppl 5.83 | wps 453512 | ups 1.05 | wpb 433548 | bsz 16506.4 | num_updates 26983 | lr 0.000385021 | gnorm 0.236 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 26618 +Start iterating over samples +epoch 017: 17 / 1689 loss=4.178, nll_loss=2.561, ppl=5.9, wps=461326, ups=1.07, wpb=432278, bsz=16594.2, num_updates=27000, lr=0.0003849, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=26634 +epoch 017: 17 / 1689 loss=4.178, nll_loss=2.561, ppl=5.9, wps=461326, ups=1.07, wpb=432278, bsz=16594.2, num_updates=27000, lr=0.0003849, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=26634 +epoch 017: 17 / 1689 loss=4.178, nll_loss=2.561, ppl=5.9, wps=461326, ups=1.07, wpb=432278, bsz=16594.2, num_updates=27000, lr=0.0003849, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=26634 +epoch 017: 17 / 1689 loss=4.178, nll_loss=2.561, ppl=5.9, wps=461326, ups=1.07, wpb=432278, bsz=16594.2, num_updates=27000, lr=0.0003849, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=26634 +epoch 017: 17 / 1689 loss=4.178, nll_loss=2.561, ppl=5.9, wps=461326, ups=1.07, wpb=432278, bsz=16594.2, num_updates=27000, lr=0.0003849, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=26634 +epoch 017: 17 / 1689 loss=4.178, nll_loss=2.561, ppl=5.9, wps=461326, ups=1.07, wpb=432278, bsz=16594.2, num_updates=27000, lr=0.0003849, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=26634 +epoch 017: 17 / 1689 loss=4.178, nll_loss=2.561, ppl=5.9, wps=461326, ups=1.07, wpb=432278, bsz=16594.2, num_updates=27000, lr=0.0003849, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=26634 +epoch 017: 17 / 1689 loss=4.178, nll_loss=2.561, ppl=5.9, wps=461326, ups=1.07, wpb=432278, bsz=16594.2, num_updates=27000, lr=0.0003849, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=26634 +epoch 017: 17 / 1689 loss=4.178, nll_loss=2.561, ppl=5.9, wps=461326, ups=1.07, wpb=432278, bsz=16594.2, num_updates=27000, lr=0.0003849, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=26634 +epoch 017: 17 / 1689 loss=4.178, nll_loss=2.561, ppl=5.9, wps=461326, ups=1.07, wpb=432278, bsz=16594.2, num_updates=27000, lr=0.0003849, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=26634 +epoch 017: 17 / 1689 loss=4.178, nll_loss=2.561, ppl=5.9, wps=461326, ups=1.07, wpb=432278, bsz=16594.2, num_updates=27000, lr=0.0003849, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=26634 +epoch 017: 17 / 1689 loss=4.178, nll_loss=2.561, ppl=5.9, wps=461326, ups=1.07, wpb=432278, bsz=16594.2, num_updates=27000, lr=0.0003849, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=26634 +epoch 017: 17 / 1689 loss=4.178, nll_loss=2.561, ppl=5.9, wps=461326, ups=1.07, wpb=432278, bsz=16594.2, num_updates=27000, lr=0.0003849, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=26634 +epoch 017: 17 / 1689 loss=4.178, nll_loss=2.561, ppl=5.9, wps=461326, ups=1.07, wpb=432278, bsz=16594.2, num_updates=27000, lr=0.0003849, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=26634 +epoch 017: 17 / 1689 loss=4.178, nll_loss=2.561, ppl=5.9, wps=461326, ups=1.07, wpb=432278, bsz=16594.2, num_updates=27000, lr=0.0003849, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=26634 +epoch 017: 17 / 1689 loss=4.178, nll_loss=2.561, ppl=5.9, wps=461326, ups=1.07, wpb=432278, bsz=16594.2, num_updates=27000, lr=0.0003849, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=26634 +epoch 017: 17 / 1689 loss=4.178, nll_loss=2.561, ppl=5.9, wps=461326, ups=1.07, wpb=432278, bsz=16594.2, num_updates=27000, lr=0.0003849, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=26634 +begin validation on "valid" subset +epoch 017 | valid on 'valid' subset | loss 4.26 | nll_loss 2.624 | ppl 6.16 | wps 0 | wpb 42662 | bsz 2032 | num_updates 27000 | best_loss 4.257 +epoch 017 | valid on 'valid' subset | loss 4.26 | nll_loss 2.624 | ppl 6.16 | wps 0 | wpb 42662 | bsz 2032 | num_updates 27000 | best_loss 4.257 +epoch 017 | valid on 'valid' subset | loss 4.26 | nll_loss 2.624 | ppl 6.16 | wps 0 | wpb 42662 | bsz 2032 | num_updates 27000 | best_loss 4.257 +epoch 017 | valid on 'valid' subset | loss 4.26 | nll_loss 2.624 | ppl 6.16 | wps 0 | wpb 42662 | bsz 2032 | num_updates 27000 | best_loss 4.257 +epoch 017 | valid on 'valid' subset | loss 4.26 | nll_loss 2.624 | ppl 6.16 | wps 0 | wpb 42662 | bsz 2032 | num_updates 27000 | best_loss 4.257 +epoch 017 | valid on 'valid' subset | loss 4.26 | nll_loss 2.624 | ppl 6.16 | wps 0 | wpb 42662 | bsz 2032 | num_updates 27000 | best_loss 4.257 +epoch 017 | valid on 'valid' subset | loss 4.26 | nll_loss 2.624 | ppl 6.16 | wps 0 | wpb 42662 | bsz 2032 | num_updates 27000 | best_loss 4.257 +epoch 017 | valid on 'valid' subset | loss 4.26 | nll_loss 2.624 | ppl 6.16 | wps 0 | wpb 42662 | bsz 2032 | num_updates 27000 | best_loss 4.257 +epoch 017 | valid on 'valid' subset | loss 4.26 | nll_loss 2.624 | ppl 6.16 | wps 0 | wpb 42662 | bsz 2032 | num_updates 27000 | best_loss 4.257 +epoch 017 | valid on 'valid' subset | loss 4.26 | nll_loss 2.624 | ppl 6.16 | wps 0 | wpb 42662 | bsz 2032 | num_updates 27000 | best_loss 4.257 +epoch 017 | valid on 'valid' subset | loss 4.26 | nll_loss 2.624 | ppl 6.16 | wps 0 | wpb 42662 | bsz 2032 | num_updates 27000 | best_loss 4.257 +epoch 017 | valid on 'valid' subset | loss 4.26 | nll_loss 2.624 | ppl 6.16 | wps 0 | wpb 42662 | bsz 2032 | num_updates 27000 | best_loss 4.257 +epoch 017 | valid on 'valid' subset | loss 4.26 | nll_loss 2.624 | ppl 6.16 | wps 0 | wpb 42662 | bsz 2032 | num_updates 27000 | best_loss 4.257 +epoch 017 | valid on 'valid' subset | loss 4.26 | nll_loss 2.624 | ppl 6.16 | wps 0 | wpb 42662 | bsz 2032 | num_updates 27000 | best_loss 4.257 +epoch 017 | valid on 'valid' subset | loss 4.26 | nll_loss 2.624 | ppl 6.16 | wps 0 | wpb 42662 | bsz 2032 | num_updates 27000 | best_loss 4.257 +epoch 017 | valid on 'valid' subset | loss 4.26 | nll_loss 2.624 | ppl 6.16 | wps 0 | wpb 42662 | bsz 2032 | num_updates 27000 | best_loss 4.257 +epoch 017 | valid on 'valid' subset | loss 4.26 | nll_loss 2.624 | ppl 6.16 | wps 0 | wpb 42662 | bsz 2032 | num_updates 27000 | best_loss 4.257 +epoch 017: 117 / 1689 loss=4.134, nll_loss=2.511, ppl=5.7, wps=411441, ups=0.94, wpb=436432, bsz=16644.3, num_updates=27100, lr=0.000384189, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=26740 +epoch 017: 117 / 1689 loss=4.134, nll_loss=2.511, ppl=5.7, wps=411441, ups=0.94, wpb=436432, bsz=16644.3, num_updates=27100, lr=0.000384189, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=26740 +epoch 017: 117 / 1689 loss=4.134, nll_loss=2.511, ppl=5.7, wps=411441, ups=0.94, wpb=436432, bsz=16644.3, num_updates=27100, lr=0.000384189, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=26740 +epoch 017: 117 / 1689 loss=4.134, nll_loss=2.511, ppl=5.7, wps=411441, ups=0.94, wpb=436432, bsz=16644.3, num_updates=27100, lr=0.000384189, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=26740 +epoch 017: 117 / 1689 loss=4.134, nll_loss=2.511, ppl=5.7, wps=411441, ups=0.94, wpb=436432, bsz=16644.3, num_updates=27100, lr=0.000384189, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=26740 +epoch 017: 117 / 1689 loss=4.134, nll_loss=2.511, ppl=5.7, wps=411441, ups=0.94, wpb=436432, bsz=16644.3, num_updates=27100, lr=0.000384189, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=26740 +epoch 017: 117 / 1689 loss=4.134, nll_loss=2.511, ppl=5.7, wps=411441, ups=0.94, wpb=436432, bsz=16644.3, num_updates=27100, lr=0.000384189, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=26740 +epoch 017: 117 / 1689 loss=4.134, nll_loss=2.511, ppl=5.7, wps=411441, ups=0.94, wpb=436432, bsz=16644.3, num_updates=27100, lr=0.000384189, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=26740 +epoch 017: 117 / 1689 loss=4.134, nll_loss=2.511, ppl=5.7, wps=411441, ups=0.94, wpb=436432, bsz=16644.3, num_updates=27100, lr=0.000384189, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=26740 +epoch 017: 117 / 1689 loss=4.134, nll_loss=2.511, ppl=5.7, wps=411441, ups=0.94, wpb=436432, bsz=16644.3, num_updates=27100, lr=0.000384189, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=26740 +epoch 017: 117 / 1689 loss=4.134, nll_loss=2.511, ppl=5.7, wps=411441, ups=0.94, wpb=436432, bsz=16644.3, num_updates=27100, lr=0.000384189, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=26740 +epoch 017: 117 / 1689 loss=4.134, nll_loss=2.511, ppl=5.7, wps=411441, ups=0.94, wpb=436432, bsz=16644.3, num_updates=27100, lr=0.000384189, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=26740 +epoch 017: 117 / 1689 loss=4.134, nll_loss=2.511, ppl=5.7, wps=411441, ups=0.94, wpb=436432, bsz=16644.3, num_updates=27100, lr=0.000384189, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=26740 +epoch 017: 117 / 1689 loss=4.134, nll_loss=2.511, ppl=5.7, wps=411441, ups=0.94, wpb=436432, bsz=16644.3, num_updates=27100, lr=0.000384189, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=26740 +epoch 017: 117 / 1689 loss=4.134, nll_loss=2.511, ppl=5.7, wps=411441, ups=0.94, wpb=436432, bsz=16644.3, num_updates=27100, lr=0.000384189, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=26740 +epoch 017: 117 / 1689 loss=4.134, nll_loss=2.511, ppl=5.7, wps=411441, ups=0.94, wpb=436432, bsz=16644.3, num_updates=27100, lr=0.000384189, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=26740 +epoch 017: 117 / 1689 loss=4.134, nll_loss=2.511, ppl=5.7, wps=411441, ups=0.94, wpb=436432, bsz=16644.3, num_updates=27100, lr=0.000384189, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=26740 +epoch 017: 217 / 1689 loss=4.134, nll_loss=2.511, ppl=5.7, wps=459816, ups=1.06, wpb=432640, bsz=16405.2, num_updates=27200, lr=0.000383482, gnorm=0.241, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=26835 +epoch 017: 217 / 1689 loss=4.134, nll_loss=2.511, ppl=5.7, wps=459816, ups=1.06, wpb=432640, bsz=16405.2, num_updates=27200, lr=0.000383482, gnorm=0.241, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=26835 +epoch 017: 217 / 1689 loss=4.134, nll_loss=2.511, ppl=5.7, wps=459816, ups=1.06, wpb=432640, bsz=16405.2, num_updates=27200, lr=0.000383482, gnorm=0.241, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=26835 +epoch 017: 217 / 1689 loss=4.134, nll_loss=2.511, ppl=5.7, wps=459816, ups=1.06, wpb=432640, bsz=16405.2, num_updates=27200, lr=0.000383482, gnorm=0.241, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=26835 +epoch 017: 217 / 1689 loss=4.134, nll_loss=2.511, ppl=5.7, wps=459816, ups=1.06, wpb=432640, bsz=16405.2, num_updates=27200, lr=0.000383482, gnorm=0.241, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=26835 +epoch 017: 217 / 1689 loss=4.134, nll_loss=2.511, ppl=5.7, wps=459816, ups=1.06, wpb=432640, bsz=16405.2, num_updates=27200, lr=0.000383482, gnorm=0.241, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=26835 +epoch 017: 217 / 1689 loss=4.134, nll_loss=2.511, ppl=5.7, wps=459816, ups=1.06, wpb=432640, bsz=16405.2, num_updates=27200, lr=0.000383482, gnorm=0.241, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=26835 +epoch 017: 217 / 1689 loss=4.134, nll_loss=2.511, ppl=5.7, wps=459816, ups=1.06, wpb=432640, bsz=16405.2, num_updates=27200, lr=0.000383482, gnorm=0.241, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=26835 +epoch 017: 217 / 1689 loss=4.134, nll_loss=2.511, ppl=5.7, wps=459816, ups=1.06, wpb=432640, bsz=16405.2, num_updates=27200, lr=0.000383482, gnorm=0.241, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=26835 +epoch 017: 217 / 1689 loss=4.134, nll_loss=2.511, ppl=5.7, wps=459816, ups=1.06, wpb=432640, bsz=16405.2, num_updates=27200, lr=0.000383482, gnorm=0.241, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=26835 +epoch 017: 217 / 1689 loss=4.134, nll_loss=2.511, ppl=5.7, wps=459816, ups=1.06, wpb=432640, bsz=16405.2, num_updates=27200, lr=0.000383482, gnorm=0.241, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=26835 +epoch 017: 217 / 1689 loss=4.134, nll_loss=2.511, ppl=5.7, wps=459816, ups=1.06, wpb=432640, bsz=16405.2, num_updates=27200, lr=0.000383482, gnorm=0.241, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=26835 +epoch 017: 217 / 1689 loss=4.134, nll_loss=2.511, ppl=5.7, wps=459816, ups=1.06, wpb=432640, bsz=16405.2, num_updates=27200, lr=0.000383482, gnorm=0.241, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=26835 +epoch 017: 217 / 1689 loss=4.134, nll_loss=2.511, ppl=5.7, wps=459816, ups=1.06, wpb=432640, bsz=16405.2, num_updates=27200, lr=0.000383482, gnorm=0.241, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=26835 +epoch 017: 217 / 1689 loss=4.134, nll_loss=2.511, ppl=5.7, wps=459816, ups=1.06, wpb=432640, bsz=16405.2, num_updates=27200, lr=0.000383482, gnorm=0.241, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=26835 +epoch 017: 217 / 1689 loss=4.134, nll_loss=2.511, ppl=5.7, wps=459816, ups=1.06, wpb=432640, bsz=16405.2, num_updates=27200, lr=0.000383482, gnorm=0.241, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=26835 +epoch 017: 217 / 1689 loss=4.134, nll_loss=2.511, ppl=5.7, wps=459816, ups=1.06, wpb=432640, bsz=16405.2, num_updates=27200, lr=0.000383482, gnorm=0.241, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=26835 +epoch 017: 317 / 1689 loss=4.142, nll_loss=2.52, ppl=5.74, wps=461766, ups=1.07, wpb=432466, bsz=16571.8, num_updates=27300, lr=0.00038278, gnorm=0.233, clip=0, loss_scale=2, train_wall=92, gb_free=18.9, wall=26928 +epoch 017: 317 / 1689 loss=4.142, nll_loss=2.52, ppl=5.74, wps=461766, ups=1.07, wpb=432466, bsz=16571.8, num_updates=27300, lr=0.00038278, gnorm=0.233, clip=0, loss_scale=2, train_wall=92, gb_free=18.9, wall=26928 +epoch 017: 317 / 1689 loss=4.142, nll_loss=2.52, ppl=5.74, wps=461766, ups=1.07, wpb=432466, bsz=16571.8, num_updates=27300, lr=0.00038278, gnorm=0.233, clip=0, loss_scale=2, train_wall=92, gb_free=18.9, wall=26928 +epoch 017: 317 / 1689 loss=4.142, nll_loss=2.52, ppl=5.74, wps=461766, ups=1.07, wpb=432466, bsz=16571.8, num_updates=27300, lr=0.00038278, gnorm=0.233, clip=0, loss_scale=2, train_wall=92, gb_free=18.9, wall=26928 +epoch 017: 317 / 1689 loss=4.142, nll_loss=2.52, ppl=5.74, wps=461766, ups=1.07, wpb=432466, bsz=16571.8, num_updates=27300, lr=0.00038278, gnorm=0.233, clip=0, loss_scale=2, train_wall=92, gb_free=18.9, wall=26928 +epoch 017: 317 / 1689 loss=4.142, nll_loss=2.52, ppl=5.74, wps=461766, ups=1.07, wpb=432466, bsz=16571.8, num_updates=27300, lr=0.00038278, gnorm=0.233, clip=0, loss_scale=2, train_wall=92, gb_free=18.9, wall=26928 +epoch 017: 317 / 1689 loss=4.142, nll_loss=2.52, ppl=5.74, wps=461766, ups=1.07, wpb=432466, bsz=16571.8, num_updates=27300, lr=0.00038278, gnorm=0.233, clip=0, loss_scale=2, train_wall=92, gb_free=18.9, wall=26928 +epoch 017: 317 / 1689 loss=4.142, nll_loss=2.52, ppl=5.74, wps=461766, ups=1.07, wpb=432466, bsz=16571.8, num_updates=27300, lr=0.00038278, gnorm=0.233, clip=0, loss_scale=2, train_wall=92, gb_free=18.9, wall=26928 +epoch 017: 317 / 1689 loss=4.142, nll_loss=2.52, ppl=5.74, wps=461766, ups=1.07, wpb=432466, bsz=16571.8, num_updates=27300, lr=0.00038278, gnorm=0.233, clip=0, loss_scale=2, train_wall=92, gb_free=18.9, wall=26928 +epoch 017: 317 / 1689 loss=4.142, nll_loss=2.52, ppl=5.74, wps=461766, ups=1.07, wpb=432466, bsz=16571.8, num_updates=27300, lr=0.00038278, gnorm=0.233, clip=0, loss_scale=2, train_wall=92, gb_free=18.9, wall=26928 +epoch 017: 317 / 1689 loss=4.142, nll_loss=2.52, ppl=5.74, wps=461766, ups=1.07, wpb=432466, bsz=16571.8, num_updates=27300, lr=0.00038278, gnorm=0.233, clip=0, loss_scale=2, train_wall=92, gb_free=18.9, wall=26928 +epoch 017: 317 / 1689 loss=4.142, nll_loss=2.52, ppl=5.74, wps=461766, ups=1.07, wpb=432466, bsz=16571.8, num_updates=27300, lr=0.00038278, gnorm=0.233, clip=0, loss_scale=2, train_wall=92, gb_free=18.9, wall=26928 +epoch 017: 317 / 1689 loss=4.142, nll_loss=2.52, ppl=5.74, wps=461766, ups=1.07, wpb=432466, bsz=16571.8, num_updates=27300, lr=0.00038278, gnorm=0.233, clip=0, loss_scale=2, train_wall=92, gb_free=18.9, wall=26928 +epoch 017: 317 / 1689 loss=4.142, nll_loss=2.52, ppl=5.74, wps=461766, ups=1.07, wpb=432466, bsz=16571.8, num_updates=27300, lr=0.00038278, gnorm=0.233, clip=0, loss_scale=2, train_wall=92, gb_free=18.9, wall=26928 +epoch 017: 317 / 1689 loss=4.142, nll_loss=2.52, ppl=5.74, wps=461766, ups=1.07, wpb=432466, bsz=16571.8, num_updates=27300, lr=0.00038278, gnorm=0.233, clip=0, loss_scale=2, train_wall=92, gb_free=18.9, wall=26928 +epoch 017: 317 / 1689 loss=4.142, nll_loss=2.52, ppl=5.74, wps=461766, ups=1.07, wpb=432466, bsz=16571.8, num_updates=27300, lr=0.00038278, gnorm=0.233, clip=0, loss_scale=2, train_wall=92, gb_free=18.9, wall=26928 +epoch 017: 317 / 1689 loss=4.142, nll_loss=2.52, ppl=5.74, wps=461766, ups=1.07, wpb=432466, bsz=16571.8, num_updates=27300, lr=0.00038278, gnorm=0.233, clip=0, loss_scale=2, train_wall=92, gb_free=18.9, wall=26928 +epoch 017: 417 / 1689 loss=4.154, nll_loss=2.534, ppl=5.79, wps=462470, ups=1.06, wpb=434912, bsz=16578.3, num_updates=27400, lr=0.00038208, gnorm=0.238, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=27022 +epoch 017: 417 / 1689 loss=4.154, nll_loss=2.534, ppl=5.79, wps=462470, ups=1.06, wpb=434912, bsz=16578.3, num_updates=27400, lr=0.00038208, gnorm=0.238, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=27022 +epoch 017: 417 / 1689 loss=4.154, nll_loss=2.534, ppl=5.79, wps=462470, ups=1.06, wpb=434912, bsz=16578.3, num_updates=27400, lr=0.00038208, gnorm=0.238, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=27022 +epoch 017: 417 / 1689 loss=4.154, nll_loss=2.534, ppl=5.79, wps=462470, ups=1.06, wpb=434912, bsz=16578.3, num_updates=27400, lr=0.00038208, gnorm=0.238, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=27022 +epoch 017: 417 / 1689 loss=4.154, nll_loss=2.534, ppl=5.79, wps=462470, ups=1.06, wpb=434912, bsz=16578.3, num_updates=27400, lr=0.00038208, gnorm=0.238, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=27022 +epoch 017: 417 / 1689 loss=4.154, nll_loss=2.534, ppl=5.79, wps=462470, ups=1.06, wpb=434912, bsz=16578.3, num_updates=27400, lr=0.00038208, gnorm=0.238, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=27022 +epoch 017: 417 / 1689 loss=4.154, nll_loss=2.534, ppl=5.79, wps=462470, ups=1.06, wpb=434912, bsz=16578.3, num_updates=27400, lr=0.00038208, gnorm=0.238, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=27022 +epoch 017: 417 / 1689 loss=4.154, nll_loss=2.534, ppl=5.79, wps=462470, ups=1.06, wpb=434912, bsz=16578.3, num_updates=27400, lr=0.00038208, gnorm=0.238, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=27022 +epoch 017: 417 / 1689 loss=4.154, nll_loss=2.534, ppl=5.79, wps=462470, ups=1.06, wpb=434912, bsz=16578.3, num_updates=27400, lr=0.00038208, gnorm=0.238, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=27022 +epoch 017: 417 / 1689 loss=4.154, nll_loss=2.534, ppl=5.79, wps=462470, ups=1.06, wpb=434912, bsz=16578.3, num_updates=27400, lr=0.00038208, gnorm=0.238, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=27022 +epoch 017: 417 / 1689 loss=4.154, nll_loss=2.534, ppl=5.79, wps=462470, ups=1.06, wpb=434912, bsz=16578.3, num_updates=27400, lr=0.00038208, gnorm=0.238, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=27022 +epoch 017: 417 / 1689 loss=4.154, nll_loss=2.534, ppl=5.79, wps=462470, ups=1.06, wpb=434912, bsz=16578.3, num_updates=27400, lr=0.00038208, gnorm=0.238, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=27022 +epoch 017: 417 / 1689 loss=4.154, nll_loss=2.534, ppl=5.79, wps=462470, ups=1.06, wpb=434912, bsz=16578.3, num_updates=27400, lr=0.00038208, gnorm=0.238, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=27022 +epoch 017: 417 / 1689 loss=4.154, nll_loss=2.534, ppl=5.79, wps=462470, ups=1.06, wpb=434912, bsz=16578.3, num_updates=27400, lr=0.00038208, gnorm=0.238, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=27022 +epoch 017: 417 / 1689 loss=4.154, nll_loss=2.534, ppl=5.79, wps=462470, ups=1.06, wpb=434912, bsz=16578.3, num_updates=27400, lr=0.00038208, gnorm=0.238, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=27022 +epoch 017: 417 / 1689 loss=4.154, nll_loss=2.534, ppl=5.79, wps=462470, ups=1.06, wpb=434912, bsz=16578.3, num_updates=27400, lr=0.00038208, gnorm=0.238, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=27022 +epoch 017: 417 / 1689 loss=4.154, nll_loss=2.534, ppl=5.79, wps=462470, ups=1.06, wpb=434912, bsz=16578.3, num_updates=27400, lr=0.00038208, gnorm=0.238, clip=0, loss_scale=2, train_wall=93, gb_free=18.7, wall=27022 +epoch 017: 518 / 1689 loss=4.149, nll_loss=2.528, ppl=5.77, wps=459139, ups=1.06, wpb=435115, bsz=16338.6, num_updates=27500, lr=0.000381385, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=20.8, wall=27117 +epoch 017: 518 / 1689 loss=4.149, nll_loss=2.528, ppl=5.77, wps=459139, ups=1.06, wpb=435115, bsz=16338.6, num_updates=27500, lr=0.000381385, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=20.8, wall=27117 +epoch 017: 518 / 1689 loss=4.149, nll_loss=2.528, ppl=5.77, wps=459139, ups=1.06, wpb=435115, bsz=16338.6, num_updates=27500, lr=0.000381385, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=20.8, wall=27117 +epoch 017: 518 / 1689 loss=4.149, nll_loss=2.528, ppl=5.77, wps=459139, ups=1.06, wpb=435115, bsz=16338.6, num_updates=27500, lr=0.000381385, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=20.8, wall=27117 +epoch 017: 518 / 1689 loss=4.149, nll_loss=2.528, ppl=5.77, wps=459139, ups=1.06, wpb=435115, bsz=16338.6, num_updates=27500, lr=0.000381385, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=20.8, wall=27117 +epoch 017: 518 / 1689 loss=4.149, nll_loss=2.528, ppl=5.77, wps=459139, ups=1.06, wpb=435115, bsz=16338.6, num_updates=27500, lr=0.000381385, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=20.8, wall=27117 +epoch 017: 518 / 1689 loss=4.149, nll_loss=2.528, ppl=5.77, wps=459139, ups=1.06, wpb=435115, bsz=16338.6, num_updates=27500, lr=0.000381385, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=20.8, wall=27117 +epoch 017: 518 / 1689 loss=4.149, nll_loss=2.528, ppl=5.77, wps=459139, ups=1.06, wpb=435115, bsz=16338.6, num_updates=27500, lr=0.000381385, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=20.8, wall=27117 +epoch 017: 518 / 1689 loss=4.149, nll_loss=2.528, ppl=5.77, wps=459139, ups=1.06, wpb=435115, bsz=16338.6, num_updates=27500, lr=0.000381385, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=20.8, wall=27117 +epoch 017: 518 / 1689 loss=4.149, nll_loss=2.528, ppl=5.77, wps=459139, ups=1.06, wpb=435115, bsz=16338.6, num_updates=27500, lr=0.000381385, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=20.8, wall=27117 +epoch 017: 518 / 1689 loss=4.149, nll_loss=2.528, ppl=5.77, wps=459139, ups=1.06, wpb=435115, bsz=16338.6, num_updates=27500, lr=0.000381385, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=20.8, wall=27117 +epoch 017: 518 / 1689 loss=4.149, nll_loss=2.528, ppl=5.77, wps=459139, ups=1.06, wpb=435115, bsz=16338.6, num_updates=27500, lr=0.000381385, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=20.8, wall=27117 +epoch 017: 518 / 1689 loss=4.149, nll_loss=2.528, ppl=5.77, wps=459139, ups=1.06, wpb=435115, bsz=16338.6, num_updates=27500, lr=0.000381385, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=20.8, wall=27117 +epoch 017: 518 / 1689 loss=4.149, nll_loss=2.528, ppl=5.77, wps=459139, ups=1.06, wpb=435115, bsz=16338.6, num_updates=27500, lr=0.000381385, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=20.8, wall=27117 +epoch 017: 518 / 1689 loss=4.149, nll_loss=2.528, ppl=5.77, wps=459139, ups=1.06, wpb=435115, bsz=16338.6, num_updates=27500, lr=0.000381385, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=20.8, wall=27117 +epoch 017: 518 / 1689 loss=4.149, nll_loss=2.528, ppl=5.77, wps=459139, ups=1.06, wpb=435115, bsz=16338.6, num_updates=27500, lr=0.000381385, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=20.8, wall=27117 +epoch 017: 518 / 1689 loss=4.149, nll_loss=2.528, ppl=5.77, wps=459139, ups=1.06, wpb=435115, bsz=16338.6, num_updates=27500, lr=0.000381385, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=20.8, wall=27117 +epoch 017: 618 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=464628, ups=1.07, wpb=432344, bsz=16207.8, num_updates=27600, lr=0.000380693, gnorm=0.246, clip=0, loss_scale=1, train_wall=92, gb_free=21.7, wall=27210 +epoch 017: 618 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=464628, ups=1.07, wpb=432344, bsz=16207.8, num_updates=27600, lr=0.000380693, gnorm=0.246, clip=0, loss_scale=1, train_wall=92, gb_free=21.7, wall=27210 +epoch 017: 618 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=464628, ups=1.07, wpb=432344, bsz=16207.8, num_updates=27600, lr=0.000380693, gnorm=0.246, clip=0, loss_scale=1, train_wall=92, gb_free=21.7, wall=27210 +epoch 017: 618 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=464628, ups=1.07, wpb=432344, bsz=16207.8, num_updates=27600, lr=0.000380693, gnorm=0.246, clip=0, loss_scale=1, train_wall=92, gb_free=21.7, wall=27210 +epoch 017: 618 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=464628, ups=1.07, wpb=432344, bsz=16207.8, num_updates=27600, lr=0.000380693, gnorm=0.246, clip=0, loss_scale=1, train_wall=92, gb_free=21.7, wall=27210 +epoch 017: 618 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=464628, ups=1.07, wpb=432344, bsz=16207.8, num_updates=27600, lr=0.000380693, gnorm=0.246, clip=0, loss_scale=1, train_wall=92, gb_free=21.7, wall=27210 +epoch 017: 618 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=464628, ups=1.07, wpb=432344, bsz=16207.8, num_updates=27600, lr=0.000380693, gnorm=0.246, clip=0, loss_scale=1, train_wall=92, gb_free=21.7, wall=27210 +epoch 017: 618 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=464628, ups=1.07, wpb=432344, bsz=16207.8, num_updates=27600, lr=0.000380693, gnorm=0.246, clip=0, loss_scale=1, train_wall=92, gb_free=21.7, wall=27210 +epoch 017: 618 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=464628, ups=1.07, wpb=432344, bsz=16207.8, num_updates=27600, lr=0.000380693, gnorm=0.246, clip=0, loss_scale=1, train_wall=92, gb_free=21.7, wall=27210 +epoch 017: 618 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=464628, ups=1.07, wpb=432344, bsz=16207.8, num_updates=27600, lr=0.000380693, gnorm=0.246, clip=0, loss_scale=1, train_wall=92, gb_free=21.7, wall=27210 +epoch 017: 618 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=464628, ups=1.07, wpb=432344, bsz=16207.8, num_updates=27600, lr=0.000380693, gnorm=0.246, clip=0, loss_scale=1, train_wall=92, gb_free=21.7, wall=27210 +epoch 017: 618 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=464628, ups=1.07, wpb=432344, bsz=16207.8, num_updates=27600, lr=0.000380693, gnorm=0.246, clip=0, loss_scale=1, train_wall=92, gb_free=21.7, wall=27210 +epoch 017: 618 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=464628, ups=1.07, wpb=432344, bsz=16207.8, num_updates=27600, lr=0.000380693, gnorm=0.246, clip=0, loss_scale=1, train_wall=92, gb_free=21.7, wall=27210 +epoch 017: 618 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=464628, ups=1.07, wpb=432344, bsz=16207.8, num_updates=27600, lr=0.000380693, gnorm=0.246, clip=0, loss_scale=1, train_wall=92, gb_free=21.7, wall=27210 +epoch 017: 618 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=464628, ups=1.07, wpb=432344, bsz=16207.8, num_updates=27600, lr=0.000380693, gnorm=0.246, clip=0, loss_scale=1, train_wall=92, gb_free=21.7, wall=27210 +epoch 017: 618 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=464628, ups=1.07, wpb=432344, bsz=16207.8, num_updates=27600, lr=0.000380693, gnorm=0.246, clip=0, loss_scale=1, train_wall=92, gb_free=21.7, wall=27210 +epoch 017: 618 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=464628, ups=1.07, wpb=432344, bsz=16207.8, num_updates=27600, lr=0.000380693, gnorm=0.246, clip=0, loss_scale=1, train_wall=92, gb_free=21.7, wall=27210 +epoch 017: 718 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=461061, ups=1.07, wpb=432754, bsz=16377.3, num_updates=27700, lr=0.000380006, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=27304 +epoch 017: 718 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=461061, ups=1.07, wpb=432754, bsz=16377.3, num_updates=27700, lr=0.000380006, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=27304 +epoch 017: 718 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=461061, ups=1.07, wpb=432754, bsz=16377.3, num_updates=27700, lr=0.000380006, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=27304 +epoch 017: 718 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=461061, ups=1.07, wpb=432754, bsz=16377.3, num_updates=27700, lr=0.000380006, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=27304 +epoch 017: 718 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=461061, ups=1.07, wpb=432754, bsz=16377.3, num_updates=27700, lr=0.000380006, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=27304 +epoch 017: 718 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=461061, ups=1.07, wpb=432754, bsz=16377.3, num_updates=27700, lr=0.000380006, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=27304 +epoch 017: 718 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=461061, ups=1.07, wpb=432754, bsz=16377.3, num_updates=27700, lr=0.000380006, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=27304 +epoch 017: 718 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=461061, ups=1.07, wpb=432754, bsz=16377.3, num_updates=27700, lr=0.000380006, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=27304 +epoch 017: 718 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=461061, ups=1.07, wpb=432754, bsz=16377.3, num_updates=27700, lr=0.000380006, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=27304 +epoch 017: 718 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=461061, ups=1.07, wpb=432754, bsz=16377.3, num_updates=27700, lr=0.000380006, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=27304 +epoch 017: 718 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=461061, ups=1.07, wpb=432754, bsz=16377.3, num_updates=27700, lr=0.000380006, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=27304 +epoch 017: 718 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=461061, ups=1.07, wpb=432754, bsz=16377.3, num_updates=27700, lr=0.000380006, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=27304 +epoch 017: 718 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=461061, ups=1.07, wpb=432754, bsz=16377.3, num_updates=27700, lr=0.000380006, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=27304 +epoch 017: 718 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=461061, ups=1.07, wpb=432754, bsz=16377.3, num_updates=27700, lr=0.000380006, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=27304 +epoch 017: 718 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=461061, ups=1.07, wpb=432754, bsz=16377.3, num_updates=27700, lr=0.000380006, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=27304 +epoch 017: 718 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=461061, ups=1.07, wpb=432754, bsz=16377.3, num_updates=27700, lr=0.000380006, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=27304 +epoch 017: 718 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=461061, ups=1.07, wpb=432754, bsz=16377.3, num_updates=27700, lr=0.000380006, gnorm=0.233, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=27304 +epoch 017: 818 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=459742, ups=1.06, wpb=433086, bsz=17119.7, num_updates=27800, lr=0.000379322, gnorm=0.228, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=27398 +epoch 017: 818 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=459742, ups=1.06, wpb=433086, bsz=17119.7, num_updates=27800, lr=0.000379322, gnorm=0.228, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=27398 +epoch 017: 818 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=459742, ups=1.06, wpb=433086, bsz=17119.7, num_updates=27800, lr=0.000379322, gnorm=0.228, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=27398 +epoch 017: 818 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=459742, ups=1.06, wpb=433086, bsz=17119.7, num_updates=27800, lr=0.000379322, gnorm=0.228, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=27398 +epoch 017: 818 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=459742, ups=1.06, wpb=433086, bsz=17119.7, num_updates=27800, lr=0.000379322, gnorm=0.228, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=27398 +epoch 017: 818 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=459742, ups=1.06, wpb=433086, bsz=17119.7, num_updates=27800, lr=0.000379322, gnorm=0.228, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=27398 +epoch 017: 818 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=459742, ups=1.06, wpb=433086, bsz=17119.7, num_updates=27800, lr=0.000379322, gnorm=0.228, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=27398 +epoch 017: 818 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=459742, ups=1.06, wpb=433086, bsz=17119.7, num_updates=27800, lr=0.000379322, gnorm=0.228, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=27398 +epoch 017: 818 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=459742, ups=1.06, wpb=433086, bsz=17119.7, num_updates=27800, lr=0.000379322, gnorm=0.228, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=27398 +epoch 017: 818 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=459742, ups=1.06, wpb=433086, bsz=17119.7, num_updates=27800, lr=0.000379322, gnorm=0.228, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=27398 +epoch 017: 818 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=459742, ups=1.06, wpb=433086, bsz=17119.7, num_updates=27800, lr=0.000379322, gnorm=0.228, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=27398 +epoch 017: 818 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=459742, ups=1.06, wpb=433086, bsz=17119.7, num_updates=27800, lr=0.000379322, gnorm=0.228, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=27398 +epoch 017: 818 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=459742, ups=1.06, wpb=433086, bsz=17119.7, num_updates=27800, lr=0.000379322, gnorm=0.228, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=27398 +epoch 017: 818 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=459742, ups=1.06, wpb=433086, bsz=17119.7, num_updates=27800, lr=0.000379322, gnorm=0.228, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=27398 +epoch 017: 818 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=459742, ups=1.06, wpb=433086, bsz=17119.7, num_updates=27800, lr=0.000379322, gnorm=0.228, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=27398 +epoch 017: 818 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=459742, ups=1.06, wpb=433086, bsz=17119.7, num_updates=27800, lr=0.000379322, gnorm=0.228, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=27398 +epoch 017: 818 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=459742, ups=1.06, wpb=433086, bsz=17119.7, num_updates=27800, lr=0.000379322, gnorm=0.228, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=27398 +epoch 017: 918 / 1689 loss=4.169, nll_loss=2.551, ppl=5.86, wps=462349, ups=1.07, wpb=434097, bsz=16707.1, num_updates=27900, lr=0.000378641, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=18.3, wall=27492 +epoch 017: 918 / 1689 loss=4.169, nll_loss=2.551, ppl=5.86, wps=462349, ups=1.07, wpb=434097, bsz=16707.1, num_updates=27900, lr=0.000378641, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=18.3, wall=27492 +epoch 017: 918 / 1689 loss=4.169, nll_loss=2.551, ppl=5.86, wps=462349, ups=1.07, wpb=434097, bsz=16707.1, num_updates=27900, lr=0.000378641, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=18.3, wall=27492 +epoch 017: 918 / 1689 loss=4.169, nll_loss=2.551, ppl=5.86, wps=462349, ups=1.07, wpb=434097, bsz=16707.1, num_updates=27900, lr=0.000378641, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=18.3, wall=27492 +epoch 017: 918 / 1689 loss=4.169, nll_loss=2.551, ppl=5.86, wps=462349, ups=1.07, wpb=434097, bsz=16707.1, num_updates=27900, lr=0.000378641, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=18.3, wall=27492 +epoch 017: 918 / 1689 loss=4.169, nll_loss=2.551, ppl=5.86, wps=462349, ups=1.07, wpb=434097, bsz=16707.1, num_updates=27900, lr=0.000378641, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=18.3, wall=27492 +epoch 017: 918 / 1689 loss=4.169, nll_loss=2.551, ppl=5.86, wps=462349, ups=1.07, wpb=434097, bsz=16707.1, num_updates=27900, lr=0.000378641, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=18.3, wall=27492 +epoch 017: 918 / 1689 loss=4.169, nll_loss=2.551, ppl=5.86, wps=462349, ups=1.07, wpb=434097, bsz=16707.1, num_updates=27900, lr=0.000378641, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=18.3, wall=27492 +epoch 017: 918 / 1689 loss=4.169, nll_loss=2.551, ppl=5.86, wps=462349, ups=1.07, wpb=434097, bsz=16707.1, num_updates=27900, lr=0.000378641, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=18.3, wall=27492 +epoch 017: 918 / 1689 loss=4.169, nll_loss=2.551, ppl=5.86, wps=462349, ups=1.07, wpb=434097, bsz=16707.1, num_updates=27900, lr=0.000378641, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=18.3, wall=27492 +epoch 017: 918 / 1689 loss=4.169, nll_loss=2.551, ppl=5.86, wps=462349, ups=1.07, wpb=434097, bsz=16707.1, num_updates=27900, lr=0.000378641, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=18.3, wall=27492 +epoch 017: 918 / 1689 loss=4.169, nll_loss=2.551, ppl=5.86, wps=462349, ups=1.07, wpb=434097, bsz=16707.1, num_updates=27900, lr=0.000378641, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=18.3, wall=27492 +epoch 017: 918 / 1689 loss=4.169, nll_loss=2.551, ppl=5.86, wps=462349, ups=1.07, wpb=434097, bsz=16707.1, num_updates=27900, lr=0.000378641, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=18.3, wall=27492 +epoch 017: 918 / 1689 loss=4.169, nll_loss=2.551, ppl=5.86, wps=462349, ups=1.07, wpb=434097, bsz=16707.1, num_updates=27900, lr=0.000378641, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=18.3, wall=27492 +epoch 017: 918 / 1689 loss=4.169, nll_loss=2.551, ppl=5.86, wps=462349, ups=1.07, wpb=434097, bsz=16707.1, num_updates=27900, lr=0.000378641, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=18.3, wall=27492 +epoch 017: 918 / 1689 loss=4.169, nll_loss=2.551, ppl=5.86, wps=462349, ups=1.07, wpb=434097, bsz=16707.1, num_updates=27900, lr=0.000378641, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=18.3, wall=27492 +epoch 017: 918 / 1689 loss=4.169, nll_loss=2.551, ppl=5.86, wps=462349, ups=1.07, wpb=434097, bsz=16707.1, num_updates=27900, lr=0.000378641, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=18.3, wall=27492 +epoch 017: 1018 / 1689 loss=4.153, nll_loss=2.533, ppl=5.79, wps=460997, ups=1.06, wpb=433777, bsz=16248.5, num_updates=28000, lr=0.000377964, gnorm=0.237, clip=0, loss_scale=2, train_wall=93, gb_free=18.4, wall=27586 +epoch 017: 1018 / 1689 loss=4.153, nll_loss=2.533, ppl=5.79, wps=460997, ups=1.06, wpb=433777, bsz=16248.5, num_updates=28000, lr=0.000377964, gnorm=0.237, clip=0, loss_scale=2, train_wall=93, gb_free=18.4, wall=27586 +epoch 017: 1018 / 1689 loss=4.153, nll_loss=2.533, ppl=5.79, wps=460997, ups=1.06, wpb=433777, bsz=16248.5, num_updates=28000, lr=0.000377964, gnorm=0.237, clip=0, loss_scale=2, train_wall=93, gb_free=18.4, wall=27586 +epoch 017: 1018 / 1689 loss=4.153, nll_loss=2.533, ppl=5.79, wps=460997, ups=1.06, wpb=433777, bsz=16248.5, num_updates=28000, lr=0.000377964, gnorm=0.237, clip=0, loss_scale=2, train_wall=93, gb_free=18.4, wall=27586 +epoch 017: 1018 / 1689 loss=4.153, nll_loss=2.533, ppl=5.79, wps=460997, ups=1.06, wpb=433777, bsz=16248.5, num_updates=28000, lr=0.000377964, gnorm=0.237, clip=0, loss_scale=2, train_wall=93, gb_free=18.4, wall=27586 +epoch 017: 1018 / 1689 loss=4.153, nll_loss=2.533, ppl=5.79, wps=460997, ups=1.06, wpb=433777, bsz=16248.5, num_updates=28000, lr=0.000377964, gnorm=0.237, clip=0, loss_scale=2, train_wall=93, gb_free=18.4, wall=27586 +epoch 017: 1018 / 1689 loss=4.153, nll_loss=2.533, ppl=5.79, wps=460997, ups=1.06, wpb=433777, bsz=16248.5, num_updates=28000, lr=0.000377964, gnorm=0.237, clip=0, loss_scale=2, train_wall=93, gb_free=18.4, wall=27586 +epoch 017: 1018 / 1689 loss=4.153, nll_loss=2.533, ppl=5.79, wps=460997, ups=1.06, wpb=433777, bsz=16248.5, num_updates=28000, lr=0.000377964, gnorm=0.237, clip=0, loss_scale=2, train_wall=93, gb_free=18.4, wall=27586 +epoch 017: 1018 / 1689 loss=4.153, nll_loss=2.533, ppl=5.79, wps=460997, ups=1.06, wpb=433777, bsz=16248.5, num_updates=28000, lr=0.000377964, gnorm=0.237, clip=0, loss_scale=2, train_wall=93, gb_free=18.4, wall=27586 +epoch 017: 1018 / 1689 loss=4.153, nll_loss=2.533, ppl=5.79, wps=460997, ups=1.06, wpb=433777, bsz=16248.5, num_updates=28000, lr=0.000377964, gnorm=0.237, clip=0, loss_scale=2, train_wall=93, gb_free=18.4, wall=27586 +epoch 017: 1018 / 1689 loss=4.153, nll_loss=2.533, ppl=5.79, wps=460997, ups=1.06, wpb=433777, bsz=16248.5, num_updates=28000, lr=0.000377964, gnorm=0.237, clip=0, loss_scale=2, train_wall=93, gb_free=18.4, wall=27586 +epoch 017: 1018 / 1689 loss=4.153, nll_loss=2.533, ppl=5.79, wps=460997, ups=1.06, wpb=433777, bsz=16248.5, num_updates=28000, lr=0.000377964, gnorm=0.237, clip=0, loss_scale=2, train_wall=93, gb_free=18.4, wall=27586 +epoch 017: 1018 / 1689 loss=4.153, nll_loss=2.533, ppl=5.79, wps=460997, ups=1.06, wpb=433777, bsz=16248.5, num_updates=28000, lr=0.000377964, gnorm=0.237, clip=0, loss_scale=2, train_wall=93, gb_free=18.4, wall=27586 +epoch 017: 1018 / 1689 loss=4.153, nll_loss=2.533, ppl=5.79, wps=460997, ups=1.06, wpb=433777, bsz=16248.5, num_updates=28000, lr=0.000377964, gnorm=0.237, clip=0, loss_scale=2, train_wall=93, gb_free=18.4, wall=27586 +epoch 017: 1018 / 1689 loss=4.153, nll_loss=2.533, ppl=5.79, wps=460997, ups=1.06, wpb=433777, bsz=16248.5, num_updates=28000, lr=0.000377964, gnorm=0.237, clip=0, loss_scale=2, train_wall=93, gb_free=18.4, wall=27586 +epoch 017: 1018 / 1689 loss=4.153, nll_loss=2.533, ppl=5.79, wps=460997, ups=1.06, wpb=433777, bsz=16248.5, num_updates=28000, lr=0.000377964, gnorm=0.237, clip=0, loss_scale=2, train_wall=93, gb_free=18.4, wall=27586 +epoch 017: 1018 / 1689 loss=4.153, nll_loss=2.533, ppl=5.79, wps=460997, ups=1.06, wpb=433777, bsz=16248.5, num_updates=28000, lr=0.000377964, gnorm=0.237, clip=0, loss_scale=2, train_wall=93, gb_free=18.4, wall=27586 +begin validation on "valid" subset +epoch 017 | valid on 'valid' subset | loss 4.251 | nll_loss 2.615 | ppl 6.13 | wps 0 | wpb 42662 | bsz 2032 | num_updates 28000 | best_loss 4.251 +epoch 017 | valid on 'valid' subset | loss 4.251 | nll_loss 2.615 | ppl 6.13 | wps 0 | wpb 42662 | bsz 2032 | num_updates 28000 | best_loss 4.251 +epoch 017 | valid on 'valid' subset | loss 4.251 | nll_loss 2.615 | ppl 6.13 | wps 0 | wpb 42662 | bsz 2032 | num_updates 28000 | best_loss 4.251 +epoch 017 | valid on 'valid' subset | loss 4.251 | nll_loss 2.615 | ppl 6.13 | wps 0 | wpb 42662 | bsz 2032 | num_updates 28000 | best_loss 4.251 +epoch 017 | valid on 'valid' subset | loss 4.251 | nll_loss 2.615 | ppl 6.13 | wps 0 | wpb 42662 | bsz 2032 | num_updates 28000 | best_loss 4.251 +epoch 017 | valid on 'valid' subset | loss 4.251 | nll_loss 2.615 | ppl 6.13 | wps 0 | wpb 42662 | bsz 2032 | num_updates 28000 | best_loss 4.251 +epoch 017 | valid on 'valid' subset | loss 4.251 | nll_loss 2.615 | ppl 6.13 | wps 0 | wpb 42662 | bsz 2032 | num_updates 28000 | best_loss 4.251 +epoch 017 | valid on 'valid' subset | loss 4.251 | nll_loss 2.615 | ppl 6.13 | wps 0 | wpb 42662 | bsz 2032 | num_updates 28000 | best_loss 4.251 +epoch 017 | valid on 'valid' subset | loss 4.251 | nll_loss 2.615 | ppl 6.13 | wps 0 | wpb 42662 | bsz 2032 | num_updates 28000 | best_loss 4.251 +epoch 017 | valid on 'valid' subset | loss 4.251 | nll_loss 2.615 | ppl 6.13 | wps 0 | wpb 42662 | bsz 2032 | num_updates 28000 | best_loss 4.251 +epoch 017 | valid on 'valid' subset | loss 4.251 | nll_loss 2.615 | ppl 6.13 | wps 0 | wpb 42662 | bsz 2032 | num_updates 28000 | best_loss 4.251 +epoch 017 | valid on 'valid' subset | loss 4.251 | nll_loss 2.615 | ppl 6.13 | wps 0 | wpb 42662 | bsz 2032 | num_updates 28000 | best_loss 4.251 +epoch 017 | valid on 'valid' subset | loss 4.251 | nll_loss 2.615 | ppl 6.13 | wps 0 | wpb 42662 | bsz 2032 | num_updates 28000 | best_loss 4.251 +epoch 017 | valid on 'valid' subset | loss 4.251 | nll_loss 2.615 | ppl 6.13 | wps 0 | wpb 42662 | bsz 2032 | num_updates 28000 | best_loss 4.251 +epoch 017 | valid on 'valid' subset | loss 4.251 | nll_loss 2.615 | ppl 6.13 | wps 0 | wpb 42662 | bsz 2032 | num_updates 28000 | best_loss 4.251 +epoch 017 | valid on 'valid' subset | loss 4.251 | nll_loss 2.615 | ppl 6.13 | wps 0 | wpb 42662 | bsz 2032 | num_updates 28000 | best_loss 4.251 +epoch 017 | valid on 'valid' subset | loss 4.251 | nll_loss 2.615 | ppl 6.13 | wps 0 | wpb 42662 | bsz 2032 | num_updates 28000 | best_loss 4.251 +epoch 017: 1118 / 1689 loss=4.162, nll_loss=2.544, ppl=5.83, wps=305936, ups=0.7, wpb=435283, bsz=16741.7, num_updates=28100, lr=0.000377291, gnorm=0.24, clip=0, loss_scale=2, train_wall=106, gb_free=19.6, wall=27728 +epoch 017: 1118 / 1689 loss=4.162, nll_loss=2.544, ppl=5.83, wps=305936, ups=0.7, wpb=435283, bsz=16741.7, num_updates=28100, lr=0.000377291, gnorm=0.24, clip=0, loss_scale=2, train_wall=106, gb_free=19.6, wall=27728 +epoch 017: 1118 / 1689 loss=4.162, nll_loss=2.544, ppl=5.83, wps=305936, ups=0.7, wpb=435283, bsz=16741.7, num_updates=28100, lr=0.000377291, gnorm=0.24, clip=0, loss_scale=2, train_wall=106, gb_free=19.6, wall=27728 +epoch 017: 1118 / 1689 loss=4.162, nll_loss=2.544, ppl=5.83, wps=305936, ups=0.7, wpb=435283, bsz=16741.7, num_updates=28100, lr=0.000377291, gnorm=0.24, clip=0, loss_scale=2, train_wall=106, gb_free=19.6, wall=27728 +epoch 017: 1118 / 1689 loss=4.162, nll_loss=2.544, ppl=5.83, wps=305936, ups=0.7, wpb=435283, bsz=16741.7, num_updates=28100, lr=0.000377291, gnorm=0.24, clip=0, loss_scale=2, train_wall=106, gb_free=19.6, wall=27728 +epoch 017: 1118 / 1689 loss=4.162, nll_loss=2.544, ppl=5.83, wps=305936, ups=0.7, wpb=435283, bsz=16741.7, num_updates=28100, lr=0.000377291, gnorm=0.24, clip=0, loss_scale=2, train_wall=106, gb_free=19.6, wall=27728 +epoch 017: 1118 / 1689 loss=4.162, nll_loss=2.544, ppl=5.83, wps=305936, ups=0.7, wpb=435283, bsz=16741.7, num_updates=28100, lr=0.000377291, gnorm=0.24, clip=0, loss_scale=2, train_wall=106, gb_free=19.6, wall=27728 +epoch 017: 1118 / 1689 loss=4.162, nll_loss=2.544, ppl=5.83, wps=305936, ups=0.7, wpb=435283, bsz=16741.7, num_updates=28100, lr=0.000377291, gnorm=0.24, clip=0, loss_scale=2, train_wall=106, gb_free=19.6, wall=27728 +epoch 017: 1118 / 1689 loss=4.162, nll_loss=2.544, ppl=5.83, wps=305936, ups=0.7, wpb=435283, bsz=16741.7, num_updates=28100, lr=0.000377291, gnorm=0.24, clip=0, loss_scale=2, train_wall=106, gb_free=19.6, wall=27728 +epoch 017: 1118 / 1689 loss=4.162, nll_loss=2.544, ppl=5.83, wps=305936, ups=0.7, wpb=435283, bsz=16741.7, num_updates=28100, lr=0.000377291, gnorm=0.24, clip=0, loss_scale=2, train_wall=106, gb_free=19.6, wall=27728 +epoch 017: 1118 / 1689 loss=4.162, nll_loss=2.544, ppl=5.83, wps=305936, ups=0.7, wpb=435283, bsz=16741.7, num_updates=28100, lr=0.000377291, gnorm=0.24, clip=0, loss_scale=2, train_wall=106, gb_free=19.6, wall=27728 +epoch 017: 1118 / 1689 loss=4.162, nll_loss=2.544, ppl=5.83, wps=305936, ups=0.7, wpb=435283, bsz=16741.7, num_updates=28100, lr=0.000377291, gnorm=0.24, clip=0, loss_scale=2, train_wall=106, gb_free=19.6, wall=27728 +epoch 017: 1118 / 1689 loss=4.162, nll_loss=2.544, ppl=5.83, wps=305936, ups=0.7, wpb=435283, bsz=16741.7, num_updates=28100, lr=0.000377291, gnorm=0.24, clip=0, loss_scale=2, train_wall=106, gb_free=19.6, wall=27728 +epoch 017: 1118 / 1689 loss=4.162, nll_loss=2.544, ppl=5.83, wps=305936, ups=0.7, wpb=435283, bsz=16741.7, num_updates=28100, lr=0.000377291, gnorm=0.24, clip=0, loss_scale=2, train_wall=106, gb_free=19.6, wall=27728 +epoch 017: 1118 / 1689 loss=4.162, nll_loss=2.544, ppl=5.83, wps=305936, ups=0.7, wpb=435283, bsz=16741.7, num_updates=28100, lr=0.000377291, gnorm=0.24, clip=0, loss_scale=2, train_wall=106, gb_free=19.6, wall=27728 +epoch 017: 1118 / 1689 loss=4.162, nll_loss=2.544, ppl=5.83, wps=305936, ups=0.7, wpb=435283, bsz=16741.7, num_updates=28100, lr=0.000377291, gnorm=0.24, clip=0, loss_scale=2, train_wall=106, gb_free=19.6, wall=27728 +epoch 017: 1118 / 1689 loss=4.162, nll_loss=2.544, ppl=5.83, wps=305936, ups=0.7, wpb=435283, bsz=16741.7, num_updates=28100, lr=0.000377291, gnorm=0.24, clip=0, loss_scale=2, train_wall=106, gb_free=19.6, wall=27728 +epoch 017: 1218 / 1689 loss=4.156, nll_loss=2.536, ppl=5.8, wps=465371, ups=1.07, wpb=433844, bsz=16269.1, num_updates=28200, lr=0.000376622, gnorm=0.239, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=27822 +epoch 017: 1218 / 1689 loss=4.156, nll_loss=2.536, ppl=5.8, wps=465371, ups=1.07, wpb=433844, bsz=16269.1, num_updates=28200, lr=0.000376622, gnorm=0.239, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=27822 +epoch 017: 1218 / 1689 loss=4.156, nll_loss=2.536, ppl=5.8, wps=465371, ups=1.07, wpb=433844, bsz=16269.1, num_updates=28200, lr=0.000376622, gnorm=0.239, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=27822 +epoch 017: 1218 / 1689 loss=4.156, nll_loss=2.536, ppl=5.8, wps=465371, ups=1.07, wpb=433844, bsz=16269.1, num_updates=28200, lr=0.000376622, gnorm=0.239, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=27822 +epoch 017: 1218 / 1689 loss=4.156, nll_loss=2.536, ppl=5.8, wps=465371, ups=1.07, wpb=433844, bsz=16269.1, num_updates=28200, lr=0.000376622, gnorm=0.239, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=27822 +epoch 017: 1218 / 1689 loss=4.156, nll_loss=2.536, ppl=5.8, wps=465371, ups=1.07, wpb=433844, bsz=16269.1, num_updates=28200, lr=0.000376622, gnorm=0.239, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=27822 +epoch 017: 1218 / 1689 loss=4.156, nll_loss=2.536, ppl=5.8, wps=465371, ups=1.07, wpb=433844, bsz=16269.1, num_updates=28200, lr=0.000376622, gnorm=0.239, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=27822 +epoch 017: 1218 / 1689 loss=4.156, nll_loss=2.536, ppl=5.8, wps=465371, ups=1.07, wpb=433844, bsz=16269.1, num_updates=28200, lr=0.000376622, gnorm=0.239, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=27822 +epoch 017: 1218 / 1689 loss=4.156, nll_loss=2.536, ppl=5.8, wps=465371, ups=1.07, wpb=433844, bsz=16269.1, num_updates=28200, lr=0.000376622, gnorm=0.239, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=27822 +epoch 017: 1218 / 1689 loss=4.156, nll_loss=2.536, ppl=5.8, wps=465371, ups=1.07, wpb=433844, bsz=16269.1, num_updates=28200, lr=0.000376622, gnorm=0.239, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=27822 +epoch 017: 1218 / 1689 loss=4.156, nll_loss=2.536, ppl=5.8, wps=465371, ups=1.07, wpb=433844, bsz=16269.1, num_updates=28200, lr=0.000376622, gnorm=0.239, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=27822 +epoch 017: 1218 / 1689 loss=4.156, nll_loss=2.536, ppl=5.8, wps=465371, ups=1.07, wpb=433844, bsz=16269.1, num_updates=28200, lr=0.000376622, gnorm=0.239, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=27822 +epoch 017: 1218 / 1689 loss=4.156, nll_loss=2.536, ppl=5.8, wps=465371, ups=1.07, wpb=433844, bsz=16269.1, num_updates=28200, lr=0.000376622, gnorm=0.239, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=27822 +epoch 017: 1218 / 1689 loss=4.156, nll_loss=2.536, ppl=5.8, wps=465371, ups=1.07, wpb=433844, bsz=16269.1, num_updates=28200, lr=0.000376622, gnorm=0.239, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=27822 +epoch 017: 1218 / 1689 loss=4.156, nll_loss=2.536, ppl=5.8, wps=465371, ups=1.07, wpb=433844, bsz=16269.1, num_updates=28200, lr=0.000376622, gnorm=0.239, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=27822 +epoch 017: 1218 / 1689 loss=4.156, nll_loss=2.536, ppl=5.8, wps=465371, ups=1.07, wpb=433844, bsz=16269.1, num_updates=28200, lr=0.000376622, gnorm=0.239, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=27822 +epoch 017: 1218 / 1689 loss=4.156, nll_loss=2.536, ppl=5.8, wps=465371, ups=1.07, wpb=433844, bsz=16269.1, num_updates=28200, lr=0.000376622, gnorm=0.239, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=27822 +epoch 017: 1318 / 1689 loss=4.161, nll_loss=2.542, ppl=5.83, wps=462004, ups=1.07, wpb=431936, bsz=16283, num_updates=28300, lr=0.000375956, gnorm=0.23, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=27915 +epoch 017: 1318 / 1689 loss=4.161, nll_loss=2.542, ppl=5.83, wps=462004, ups=1.07, wpb=431936, bsz=16283, num_updates=28300, lr=0.000375956, gnorm=0.23, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=27915 +epoch 017: 1318 / 1689 loss=4.161, nll_loss=2.542, ppl=5.83, wps=462004, ups=1.07, wpb=431936, bsz=16283, num_updates=28300, lr=0.000375956, gnorm=0.23, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=27915 +epoch 017: 1318 / 1689 loss=4.161, nll_loss=2.542, ppl=5.83, wps=462004, ups=1.07, wpb=431936, bsz=16283, num_updates=28300, lr=0.000375956, gnorm=0.23, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=27915 +epoch 017: 1318 / 1689 loss=4.161, nll_loss=2.542, ppl=5.83, wps=462004, ups=1.07, wpb=431936, bsz=16283, num_updates=28300, lr=0.000375956, gnorm=0.23, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=27915 +epoch 017: 1318 / 1689 loss=4.161, nll_loss=2.542, ppl=5.83, wps=462004, ups=1.07, wpb=431936, bsz=16283, num_updates=28300, lr=0.000375956, gnorm=0.23, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=27915 +epoch 017: 1318 / 1689 loss=4.161, nll_loss=2.542, ppl=5.83, wps=462004, ups=1.07, wpb=431936, bsz=16283, num_updates=28300, lr=0.000375956, gnorm=0.23, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=27915 +epoch 017: 1318 / 1689 loss=4.161, nll_loss=2.542, ppl=5.83, wps=462004, ups=1.07, wpb=431936, bsz=16283, num_updates=28300, lr=0.000375956, gnorm=0.23, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=27915 +epoch 017: 1318 / 1689 loss=4.161, nll_loss=2.542, ppl=5.83, wps=462004, ups=1.07, wpb=431936, bsz=16283, num_updates=28300, lr=0.000375956, gnorm=0.23, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=27915 +epoch 017: 1318 / 1689 loss=4.161, nll_loss=2.542, ppl=5.83, wps=462004, ups=1.07, wpb=431936, bsz=16283, num_updates=28300, lr=0.000375956, gnorm=0.23, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=27915 +epoch 017: 1318 / 1689 loss=4.161, nll_loss=2.542, ppl=5.83, wps=462004, ups=1.07, wpb=431936, bsz=16283, num_updates=28300, lr=0.000375956, gnorm=0.23, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=27915 +epoch 017: 1318 / 1689 loss=4.161, nll_loss=2.542, ppl=5.83, wps=462004, ups=1.07, wpb=431936, bsz=16283, num_updates=28300, lr=0.000375956, gnorm=0.23, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=27915 +epoch 017: 1318 / 1689 loss=4.161, nll_loss=2.542, ppl=5.83, wps=462004, ups=1.07, wpb=431936, bsz=16283, num_updates=28300, lr=0.000375956, gnorm=0.23, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=27915 +epoch 017: 1318 / 1689 loss=4.161, nll_loss=2.542, ppl=5.83, wps=462004, ups=1.07, wpb=431936, bsz=16283, num_updates=28300, lr=0.000375956, gnorm=0.23, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=27915 +epoch 017: 1318 / 1689 loss=4.161, nll_loss=2.542, ppl=5.83, wps=462004, ups=1.07, wpb=431936, bsz=16283, num_updates=28300, lr=0.000375956, gnorm=0.23, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=27915 +epoch 017: 1318 / 1689 loss=4.161, nll_loss=2.542, ppl=5.83, wps=462004, ups=1.07, wpb=431936, bsz=16283, num_updates=28300, lr=0.000375956, gnorm=0.23, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=27915 +epoch 017: 1318 / 1689 loss=4.161, nll_loss=2.542, ppl=5.83, wps=462004, ups=1.07, wpb=431936, bsz=16283, num_updates=28300, lr=0.000375956, gnorm=0.23, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=27915 +epoch 017: 1419 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=458387, ups=1.06, wpb=433079, bsz=16564.6, num_updates=28400, lr=0.000375293, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=18.9, wall=28010 +epoch 017: 1419 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=458387, ups=1.06, wpb=433079, bsz=16564.6, num_updates=28400, lr=0.000375293, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=18.9, wall=28010 +epoch 017: 1419 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=458387, ups=1.06, wpb=433079, bsz=16564.6, num_updates=28400, lr=0.000375293, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=18.9, wall=28010 +epoch 017: 1419 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=458387, ups=1.06, wpb=433079, bsz=16564.6, num_updates=28400, lr=0.000375293, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=18.9, wall=28010 +epoch 017: 1419 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=458387, ups=1.06, wpb=433079, bsz=16564.6, num_updates=28400, lr=0.000375293, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=18.9, wall=28010 +epoch 017: 1419 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=458387, ups=1.06, wpb=433079, bsz=16564.6, num_updates=28400, lr=0.000375293, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=18.9, wall=28010 +epoch 017: 1419 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=458387, ups=1.06, wpb=433079, bsz=16564.6, num_updates=28400, lr=0.000375293, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=18.9, wall=28010 +epoch 017: 1419 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=458387, ups=1.06, wpb=433079, bsz=16564.6, num_updates=28400, lr=0.000375293, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=18.9, wall=28010 +epoch 017: 1419 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=458387, ups=1.06, wpb=433079, bsz=16564.6, num_updates=28400, lr=0.000375293, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=18.9, wall=28010 +epoch 017: 1419 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=458387, ups=1.06, wpb=433079, bsz=16564.6, num_updates=28400, lr=0.000375293, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=18.9, wall=28010 +epoch 017: 1419 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=458387, ups=1.06, wpb=433079, bsz=16564.6, num_updates=28400, lr=0.000375293, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=18.9, wall=28010 +epoch 017: 1419 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=458387, ups=1.06, wpb=433079, bsz=16564.6, num_updates=28400, lr=0.000375293, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=18.9, wall=28010 +epoch 017: 1419 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=458387, ups=1.06, wpb=433079, bsz=16564.6, num_updates=28400, lr=0.000375293, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=18.9, wall=28010 +epoch 017: 1419 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=458387, ups=1.06, wpb=433079, bsz=16564.6, num_updates=28400, lr=0.000375293, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=18.9, wall=28010 +epoch 017: 1419 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=458387, ups=1.06, wpb=433079, bsz=16564.6, num_updates=28400, lr=0.000375293, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=18.9, wall=28010 +epoch 017: 1419 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=458387, ups=1.06, wpb=433079, bsz=16564.6, num_updates=28400, lr=0.000375293, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=18.9, wall=28010 +epoch 017: 1419 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=458387, ups=1.06, wpb=433079, bsz=16564.6, num_updates=28400, lr=0.000375293, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=18.9, wall=28010 +epoch 017: 1519 / 1689 loss=4.163, nll_loss=2.545, ppl=5.84, wps=459118, ups=1.06, wpb=431802, bsz=16651.8, num_updates=28500, lr=0.000374634, gnorm=0.244, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=28104 +epoch 017: 1519 / 1689 loss=4.163, nll_loss=2.545, ppl=5.84, wps=459118, ups=1.06, wpb=431802, bsz=16651.8, num_updates=28500, lr=0.000374634, gnorm=0.244, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=28104 +epoch 017: 1519 / 1689 loss=4.163, nll_loss=2.545, ppl=5.84, wps=459118, ups=1.06, wpb=431802, bsz=16651.8, num_updates=28500, lr=0.000374634, gnorm=0.244, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=28104 +epoch 017: 1519 / 1689 loss=4.163, nll_loss=2.545, ppl=5.84, wps=459118, ups=1.06, wpb=431802, bsz=16651.8, num_updates=28500, lr=0.000374634, gnorm=0.244, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=28104 +epoch 017: 1519 / 1689 loss=4.163, nll_loss=2.545, ppl=5.84, wps=459118, ups=1.06, wpb=431802, bsz=16651.8, num_updates=28500, lr=0.000374634, gnorm=0.244, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=28104 +epoch 017: 1519 / 1689 loss=4.163, nll_loss=2.545, ppl=5.84, wps=459118, ups=1.06, wpb=431802, bsz=16651.8, num_updates=28500, lr=0.000374634, gnorm=0.244, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=28104 +epoch 017: 1519 / 1689 loss=4.163, nll_loss=2.545, ppl=5.84, wps=459118, ups=1.06, wpb=431802, bsz=16651.8, num_updates=28500, lr=0.000374634, gnorm=0.244, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=28104 +epoch 017: 1519 / 1689 loss=4.163, nll_loss=2.545, ppl=5.84, wps=459118, ups=1.06, wpb=431802, bsz=16651.8, num_updates=28500, lr=0.000374634, gnorm=0.244, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=28104 +epoch 017: 1519 / 1689 loss=4.163, nll_loss=2.545, ppl=5.84, wps=459118, ups=1.06, wpb=431802, bsz=16651.8, num_updates=28500, lr=0.000374634, gnorm=0.244, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=28104 +epoch 017: 1519 / 1689 loss=4.163, nll_loss=2.545, ppl=5.84, wps=459118, ups=1.06, wpb=431802, bsz=16651.8, num_updates=28500, lr=0.000374634, gnorm=0.244, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=28104 +epoch 017: 1519 / 1689 loss=4.163, nll_loss=2.545, ppl=5.84, wps=459118, ups=1.06, wpb=431802, bsz=16651.8, num_updates=28500, lr=0.000374634, gnorm=0.244, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=28104 +epoch 017: 1519 / 1689 loss=4.163, nll_loss=2.545, ppl=5.84, wps=459118, ups=1.06, wpb=431802, bsz=16651.8, num_updates=28500, lr=0.000374634, gnorm=0.244, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=28104 +epoch 017: 1519 / 1689 loss=4.163, nll_loss=2.545, ppl=5.84, wps=459118, ups=1.06, wpb=431802, bsz=16651.8, num_updates=28500, lr=0.000374634, gnorm=0.244, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=28104 +epoch 017: 1519 / 1689 loss=4.163, nll_loss=2.545, ppl=5.84, wps=459118, ups=1.06, wpb=431802, bsz=16651.8, num_updates=28500, lr=0.000374634, gnorm=0.244, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=28104 +epoch 017: 1519 / 1689 loss=4.163, nll_loss=2.545, ppl=5.84, wps=459118, ups=1.06, wpb=431802, bsz=16651.8, num_updates=28500, lr=0.000374634, gnorm=0.244, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=28104 +epoch 017: 1519 / 1689 loss=4.163, nll_loss=2.545, ppl=5.84, wps=459118, ups=1.06, wpb=431802, bsz=16651.8, num_updates=28500, lr=0.000374634, gnorm=0.244, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=28104 +epoch 017: 1519 / 1689 loss=4.163, nll_loss=2.545, ppl=5.84, wps=459118, ups=1.06, wpb=431802, bsz=16651.8, num_updates=28500, lr=0.000374634, gnorm=0.244, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=28104 +epoch 017: 1619 / 1689 loss=4.174, nll_loss=2.558, ppl=5.89, wps=465458, ups=1.07, wpb=435664, bsz=16751.8, num_updates=28600, lr=0.000373979, gnorm=0.234, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=28197 +epoch 017: 1619 / 1689 loss=4.174, nll_loss=2.558, ppl=5.89, wps=465458, ups=1.07, wpb=435664, bsz=16751.8, num_updates=28600, lr=0.000373979, gnorm=0.234, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=28197 +epoch 017: 1619 / 1689 loss=4.174, nll_loss=2.558, ppl=5.89, wps=465458, ups=1.07, wpb=435664, bsz=16751.8, num_updates=28600, lr=0.000373979, gnorm=0.234, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=28197 +epoch 017: 1619 / 1689 loss=4.174, nll_loss=2.558, ppl=5.89, wps=465458, ups=1.07, wpb=435664, bsz=16751.8, num_updates=28600, lr=0.000373979, gnorm=0.234, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=28197 +epoch 017: 1619 / 1689 loss=4.174, nll_loss=2.558, ppl=5.89, wps=465458, ups=1.07, wpb=435664, bsz=16751.8, num_updates=28600, lr=0.000373979, gnorm=0.234, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=28197 +epoch 017: 1619 / 1689 loss=4.174, nll_loss=2.558, ppl=5.89, wps=465458, ups=1.07, wpb=435664, bsz=16751.8, num_updates=28600, lr=0.000373979, gnorm=0.234, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=28197 +epoch 017: 1619 / 1689 loss=4.174, nll_loss=2.558, ppl=5.89, wps=465458, ups=1.07, wpb=435664, bsz=16751.8, num_updates=28600, lr=0.000373979, gnorm=0.234, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=28197 +epoch 017: 1619 / 1689 loss=4.174, nll_loss=2.558, ppl=5.89, wps=465458, ups=1.07, wpb=435664, bsz=16751.8, num_updates=28600, lr=0.000373979, gnorm=0.234, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=28197 +epoch 017: 1619 / 1689 loss=4.174, nll_loss=2.558, ppl=5.89, wps=465458, ups=1.07, wpb=435664, bsz=16751.8, num_updates=28600, lr=0.000373979, gnorm=0.234, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=28197 +epoch 017: 1619 / 1689 loss=4.174, nll_loss=2.558, ppl=5.89, wps=465458, ups=1.07, wpb=435664, bsz=16751.8, num_updates=28600, lr=0.000373979, gnorm=0.234, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=28197 +epoch 017: 1619 / 1689 loss=4.174, nll_loss=2.558, ppl=5.89, wps=465458, ups=1.07, wpb=435664, bsz=16751.8, num_updates=28600, lr=0.000373979, gnorm=0.234, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=28197 +epoch 017: 1619 / 1689 loss=4.174, nll_loss=2.558, ppl=5.89, wps=465458, ups=1.07, wpb=435664, bsz=16751.8, num_updates=28600, lr=0.000373979, gnorm=0.234, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=28197 +epoch 017: 1619 / 1689 loss=4.174, nll_loss=2.558, ppl=5.89, wps=465458, ups=1.07, wpb=435664, bsz=16751.8, num_updates=28600, lr=0.000373979, gnorm=0.234, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=28197 +epoch 017: 1619 / 1689 loss=4.174, nll_loss=2.558, ppl=5.89, wps=465458, ups=1.07, wpb=435664, bsz=16751.8, num_updates=28600, lr=0.000373979, gnorm=0.234, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=28197 +epoch 017: 1619 / 1689 loss=4.174, nll_loss=2.558, ppl=5.89, wps=465458, ups=1.07, wpb=435664, bsz=16751.8, num_updates=28600, lr=0.000373979, gnorm=0.234, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=28197 +epoch 017: 1619 / 1689 loss=4.174, nll_loss=2.558, ppl=5.89, wps=465458, ups=1.07, wpb=435664, bsz=16751.8, num_updates=28600, lr=0.000373979, gnorm=0.234, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=28197 +epoch 017: 1619 / 1689 loss=4.174, nll_loss=2.558, ppl=5.89, wps=465458, ups=1.07, wpb=435664, bsz=16751.8, num_updates=28600, lr=0.000373979, gnorm=0.234, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=28197 +end of epoch 17 (average epoch stats below) +epoch 017 | loss 4.153 | nll_loss 2.533 | ppl 5.79 | wps 444838 | ups 1.03 | wpb 433538 | bsz 16507.1 | num_updates 28670 | lr 0.000373522 | gnorm 0.234 | clip 0 | loss_scale 1 | train_wall 1576 | gb_free 19.9 | wall 28262 +epoch 017 | loss 4.153 | nll_loss 2.533 | ppl 5.79 | wps 444838 | ups 1.03 | wpb 433538 | bsz 16507.1 | num_updates 28670 | lr 0.000373522 | gnorm 0.234 | clip 0 | loss_scale 1 | train_wall 1576 | gb_free 19.9 | wall 28262 +epoch 017 | loss 4.153 | nll_loss 2.533 | ppl 5.79 | wps 444838 | ups 1.03 | wpb 433538 | bsz 16507.1 | num_updates 28670 | lr 0.000373522 | gnorm 0.234 | clip 0 | loss_scale 1 | train_wall 1576 | gb_free 19.9 | wall 28262 +epoch 017 | loss 4.153 | nll_loss 2.533 | ppl 5.79 | wps 444838 | ups 1.03 | wpb 433538 | bsz 16507.1 | num_updates 28670 | lr 0.000373522 | gnorm 0.234 | clip 0 | loss_scale 1 | train_wall 1576 | gb_free 19.9 | wall 28262 +epoch 017 | loss 4.153 | nll_loss 2.533 | ppl 5.79 | wps 444838 | ups 1.03 | wpb 433538 | bsz 16507.1 | num_updates 28670 | lr 0.000373522 | gnorm 0.234 | clip 0 | loss_scale 1 | train_wall 1576 | gb_free 19.9 | wall 28262 +epoch 017 | loss 4.153 | nll_loss 2.533 | ppl 5.79 | wps 444838 | ups 1.03 | wpb 433538 | bsz 16507.1 | num_updates 28670 | lr 0.000373522 | gnorm 0.234 | clip 0 | loss_scale 1 | train_wall 1576 | gb_free 19.9 | wall 28262 +epoch 017 | loss 4.153 | nll_loss 2.533 | ppl 5.79 | wps 444838 | ups 1.03 | wpb 433538 | bsz 16507.1 | num_updates 28670 | lr 0.000373522 | gnorm 0.234 | clip 0 | loss_scale 1 | train_wall 1576 | gb_free 19.9 | wall 28262 +epoch 017 | loss 4.153 | nll_loss 2.533 | ppl 5.79 | wps 444838 | ups 1.03 | wpb 433538 | bsz 16507.1 | num_updates 28670 | lr 0.000373522 | gnorm 0.234 | clip 0 | loss_scale 1 | train_wall 1576 | gb_free 19.9 | wall 28262 +epoch 017 | loss 4.153 | nll_loss 2.533 | ppl 5.79 | wps 444838 | ups 1.03 | wpb 433538 | bsz 16507.1 | num_updates 28670 | lr 0.000373522 | gnorm 0.234 | clip 0 | loss_scale 1 | train_wall 1576 | gb_free 19.9 | wall 28262 +epoch 017 | loss 4.153 | nll_loss 2.533 | ppl 5.79 | wps 444838 | ups 1.03 | wpb 433538 | bsz 16507.1 | num_updates 28670 | lr 0.000373522 | gnorm 0.234 | clip 0 | loss_scale 1 | train_wall 1576 | gb_free 19.9 | wall 28262 +epoch 017 | loss 4.153 | nll_loss 2.533 | ppl 5.79 | wps 444838 | ups 1.03 | wpb 433538 | bsz 16507.1 | num_updates 28670 | lr 0.000373522 | gnorm 0.234 | clip 0 | loss_scale 1 | train_wall 1576 | gb_free 19.9 | wall 28262 +epoch 017 | loss 4.153 | nll_loss 2.533 | ppl 5.79 | wps 444838 | ups 1.03 | wpb 433538 | bsz 16507.1 | num_updates 28670 | lr 0.000373522 | gnorm 0.234 | clip 0 | loss_scale 1 | train_wall 1576 | gb_free 19.9 | wall 28262 +epoch 017 | loss 4.153 | nll_loss 2.533 | ppl 5.79 | wps 444838 | ups 1.03 | wpb 433538 | bsz 16507.1 | num_updates 28670 | lr 0.000373522 | gnorm 0.234 | clip 0 | loss_scale 1 | train_wall 1576 | gb_free 19.9 | wall 28262 +epoch 017 | loss 4.153 | nll_loss 2.533 | ppl 5.79 | wps 444838 | ups 1.03 | wpb 433538 | bsz 16507.1 | num_updates 28670 | lr 0.000373522 | gnorm 0.234 | clip 0 | loss_scale 1 | train_wall 1576 | gb_free 19.9 | wall 28262 +epoch 017 | loss 4.153 | nll_loss 2.533 | ppl 5.79 | wps 444838 | ups 1.03 | wpb 433538 | bsz 16507.1 | num_updates 28670 | lr 0.000373522 | gnorm 0.234 | clip 0 | loss_scale 1 | train_wall 1576 | gb_free 19.9 | wall 28262 +epoch 017 | loss 4.153 | nll_loss 2.533 | ppl 5.79 | wps 444838 | ups 1.03 | wpb 433538 | bsz 16507.1 | num_updates 28670 | lr 0.000373522 | gnorm 0.234 | clip 0 | loss_scale 1 | train_wall 1576 | gb_free 19.9 | wall 28262 +epoch 017 | loss 4.153 | nll_loss 2.533 | ppl 5.79 | wps 444838 | ups 1.03 | wpb 433538 | bsz 16507.1 | num_updates 28670 | lr 0.000373522 | gnorm 0.234 | clip 0 | loss_scale 1 | train_wall 1576 | gb_free 19.9 | wall 28262 +Start iterating over samples +epoch 018: 30 / 1689 loss=4.146, nll_loss=2.525, ppl=5.76, wps=323564, ups=0.75, wpb=430857, bsz=16055.3, num_updates=28700, lr=0.000373327, gnorm=0.232, clip=0, loss_scale=1, train_wall=110, gb_free=20.9, wall=28330 +epoch 018: 30 / 1689 loss=4.146, nll_loss=2.525, ppl=5.76, wps=323564, ups=0.75, wpb=430857, bsz=16055.3, num_updates=28700, lr=0.000373327, gnorm=0.232, clip=0, loss_scale=1, train_wall=110, gb_free=20.9, wall=28330 +epoch 018: 30 / 1689 loss=4.146, nll_loss=2.525, ppl=5.76, wps=323564, ups=0.75, wpb=430857, bsz=16055.3, num_updates=28700, lr=0.000373327, gnorm=0.232, clip=0, loss_scale=1, train_wall=110, gb_free=20.9, wall=28330 +epoch 018: 30 / 1689 loss=4.146, nll_loss=2.525, ppl=5.76, wps=323564, ups=0.75, wpb=430857, bsz=16055.3, num_updates=28700, lr=0.000373327, gnorm=0.232, clip=0, loss_scale=1, train_wall=110, gb_free=20.9, wall=28330 +epoch 018: 30 / 1689 loss=4.146, nll_loss=2.525, ppl=5.76, wps=323564, ups=0.75, wpb=430857, bsz=16055.3, num_updates=28700, lr=0.000373327, gnorm=0.232, clip=0, loss_scale=1, train_wall=110, gb_free=20.9, wall=28330 +epoch 018: 30 / 1689 loss=4.146, nll_loss=2.525, ppl=5.76, wps=323564, ups=0.75, wpb=430857, bsz=16055.3, num_updates=28700, lr=0.000373327, gnorm=0.232, clip=0, loss_scale=1, train_wall=110, gb_free=20.9, wall=28330 +epoch 018: 30 / 1689 loss=4.146, nll_loss=2.525, ppl=5.76, wps=323564, ups=0.75, wpb=430857, bsz=16055.3, num_updates=28700, lr=0.000373327, gnorm=0.232, clip=0, loss_scale=1, train_wall=110, gb_free=20.9, wall=28330 +epoch 018: 30 / 1689 loss=4.146, nll_loss=2.525, ppl=5.76, wps=323564, ups=0.75, wpb=430857, bsz=16055.3, num_updates=28700, lr=0.000373327, gnorm=0.232, clip=0, loss_scale=1, train_wall=110, gb_free=20.9, wall=28330 +epoch 018: 30 / 1689 loss=4.146, nll_loss=2.525, ppl=5.76, wps=323564, ups=0.75, wpb=430857, bsz=16055.3, num_updates=28700, lr=0.000373327, gnorm=0.232, clip=0, loss_scale=1, train_wall=110, gb_free=20.9, wall=28330 +epoch 018: 30 / 1689 loss=4.146, nll_loss=2.525, ppl=5.76, wps=323564, ups=0.75, wpb=430857, bsz=16055.3, num_updates=28700, lr=0.000373327, gnorm=0.232, clip=0, loss_scale=1, train_wall=110, gb_free=20.9, wall=28330 +epoch 018: 30 / 1689 loss=4.146, nll_loss=2.525, ppl=5.76, wps=323564, ups=0.75, wpb=430857, bsz=16055.3, num_updates=28700, lr=0.000373327, gnorm=0.232, clip=0, loss_scale=1, train_wall=110, gb_free=20.9, wall=28330 +epoch 018: 30 / 1689 loss=4.146, nll_loss=2.525, ppl=5.76, wps=323564, ups=0.75, wpb=430857, bsz=16055.3, num_updates=28700, lr=0.000373327, gnorm=0.232, clip=0, loss_scale=1, train_wall=110, gb_free=20.9, wall=28330 +epoch 018: 30 / 1689 loss=4.146, nll_loss=2.525, ppl=5.76, wps=323564, ups=0.75, wpb=430857, bsz=16055.3, num_updates=28700, lr=0.000373327, gnorm=0.232, clip=0, loss_scale=1, train_wall=110, gb_free=20.9, wall=28330 +epoch 018: 30 / 1689 loss=4.146, nll_loss=2.525, ppl=5.76, wps=323564, ups=0.75, wpb=430857, bsz=16055.3, num_updates=28700, lr=0.000373327, gnorm=0.232, clip=0, loss_scale=1, train_wall=110, gb_free=20.9, wall=28330 +epoch 018: 30 / 1689 loss=4.146, nll_loss=2.525, ppl=5.76, wps=323564, ups=0.75, wpb=430857, bsz=16055.3, num_updates=28700, lr=0.000373327, gnorm=0.232, clip=0, loss_scale=1, train_wall=110, gb_free=20.9, wall=28330 +epoch 018: 30 / 1689 loss=4.146, nll_loss=2.525, ppl=5.76, wps=323564, ups=0.75, wpb=430857, bsz=16055.3, num_updates=28700, lr=0.000373327, gnorm=0.232, clip=0, loss_scale=1, train_wall=110, gb_free=20.9, wall=28330 +epoch 018: 30 / 1689 loss=4.146, nll_loss=2.525, ppl=5.76, wps=323564, ups=0.75, wpb=430857, bsz=16055.3, num_updates=28700, lr=0.000373327, gnorm=0.232, clip=0, loss_scale=1, train_wall=110, gb_free=20.9, wall=28330 +epoch 018: 30 / 1689 loss=4.146, nll_loss=2.525, ppl=5.76, wps=323564, ups=0.75, wpb=430857, bsz=16055.3, num_updates=28700, lr=0.000373327, gnorm=0.232, clip=0, loss_scale=1, train_wall=110, gb_free=20.9, wall=28330 +epoch 018: 130 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=463802, ups=1.07, wpb=434687, bsz=16448.4, num_updates=28800, lr=0.000372678, gnorm=0.229, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=28424 +epoch 018: 130 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=463802, ups=1.07, wpb=434687, bsz=16448.4, num_updates=28800, lr=0.000372678, gnorm=0.229, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=28424 +epoch 018: 130 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=463802, ups=1.07, wpb=434687, bsz=16448.4, num_updates=28800, lr=0.000372678, gnorm=0.229, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=28424 +epoch 018: 130 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=463802, ups=1.07, wpb=434687, bsz=16448.4, num_updates=28800, lr=0.000372678, gnorm=0.229, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=28424 +epoch 018: 130 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=463802, ups=1.07, wpb=434687, bsz=16448.4, num_updates=28800, lr=0.000372678, gnorm=0.229, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=28424 +epoch 018: 130 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=463802, ups=1.07, wpb=434687, bsz=16448.4, num_updates=28800, lr=0.000372678, gnorm=0.229, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=28424 +epoch 018: 130 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=463802, ups=1.07, wpb=434687, bsz=16448.4, num_updates=28800, lr=0.000372678, gnorm=0.229, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=28424 +epoch 018: 130 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=463802, ups=1.07, wpb=434687, bsz=16448.4, num_updates=28800, lr=0.000372678, gnorm=0.229, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=28424 +epoch 018: 130 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=463802, ups=1.07, wpb=434687, bsz=16448.4, num_updates=28800, lr=0.000372678, gnorm=0.229, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=28424 +epoch 018: 130 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=463802, ups=1.07, wpb=434687, bsz=16448.4, num_updates=28800, lr=0.000372678, gnorm=0.229, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=28424 +epoch 018: 130 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=463802, ups=1.07, wpb=434687, bsz=16448.4, num_updates=28800, lr=0.000372678, gnorm=0.229, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=28424 +epoch 018: 130 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=463802, ups=1.07, wpb=434687, bsz=16448.4, num_updates=28800, lr=0.000372678, gnorm=0.229, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=28424 +epoch 018: 130 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=463802, ups=1.07, wpb=434687, bsz=16448.4, num_updates=28800, lr=0.000372678, gnorm=0.229, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=28424 +epoch 018: 130 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=463802, ups=1.07, wpb=434687, bsz=16448.4, num_updates=28800, lr=0.000372678, gnorm=0.229, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=28424 +epoch 018: 130 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=463802, ups=1.07, wpb=434687, bsz=16448.4, num_updates=28800, lr=0.000372678, gnorm=0.229, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=28424 +epoch 018: 130 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=463802, ups=1.07, wpb=434687, bsz=16448.4, num_updates=28800, lr=0.000372678, gnorm=0.229, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=28424 +epoch 018: 130 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=463802, ups=1.07, wpb=434687, bsz=16448.4, num_updates=28800, lr=0.000372678, gnorm=0.229, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=28424 +epoch 018: 130 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=463802, ups=1.07, wpb=434687, bsz=16448.4, num_updates=28800, lr=0.000372678, gnorm=0.229, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=28424 +epoch 018: 231 / 1689 loss=4.138, nll_loss=2.515, ppl=5.72, wps=459798, ups=1.05, wpb=436644, bsz=16421.9, num_updates=28900, lr=0.000372033, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=28519 +epoch 018: 231 / 1689 loss=4.138, nll_loss=2.515, ppl=5.72, wps=459798, ups=1.05, wpb=436644, bsz=16421.9, num_updates=28900, lr=0.000372033, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=28519 +epoch 018: 231 / 1689 loss=4.138, nll_loss=2.515, ppl=5.72, wps=459798, ups=1.05, wpb=436644, bsz=16421.9, num_updates=28900, lr=0.000372033, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=28519 +epoch 018: 231 / 1689 loss=4.138, nll_loss=2.515, ppl=5.72, wps=459798, ups=1.05, wpb=436644, bsz=16421.9, num_updates=28900, lr=0.000372033, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=28519 +epoch 018: 231 / 1689 loss=4.138, nll_loss=2.515, ppl=5.72, wps=459798, ups=1.05, wpb=436644, bsz=16421.9, num_updates=28900, lr=0.000372033, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=28519 +epoch 018: 231 / 1689 loss=4.138, nll_loss=2.515, ppl=5.72, wps=459798, ups=1.05, wpb=436644, bsz=16421.9, num_updates=28900, lr=0.000372033, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=28519 +epoch 018: 231 / 1689 loss=4.138, nll_loss=2.515, ppl=5.72, wps=459798, ups=1.05, wpb=436644, bsz=16421.9, num_updates=28900, lr=0.000372033, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=28519 +epoch 018: 231 / 1689 loss=4.138, nll_loss=2.515, ppl=5.72, wps=459798, ups=1.05, wpb=436644, bsz=16421.9, num_updates=28900, lr=0.000372033, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=28519 +epoch 018: 231 / 1689 loss=4.138, nll_loss=2.515, ppl=5.72, wps=459798, ups=1.05, wpb=436644, bsz=16421.9, num_updates=28900, lr=0.000372033, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=28519 +epoch 018: 231 / 1689 loss=4.138, nll_loss=2.515, ppl=5.72, wps=459798, ups=1.05, wpb=436644, bsz=16421.9, num_updates=28900, lr=0.000372033, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=28519 +epoch 018: 231 / 1689 loss=4.138, nll_loss=2.515, ppl=5.72, wps=459798, ups=1.05, wpb=436644, bsz=16421.9, num_updates=28900, lr=0.000372033, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=28519 +epoch 018: 231 / 1689 loss=4.138, nll_loss=2.515, ppl=5.72, wps=459798, ups=1.05, wpb=436644, bsz=16421.9, num_updates=28900, lr=0.000372033, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=28519 +epoch 018: 231 / 1689 loss=4.138, nll_loss=2.515, ppl=5.72, wps=459798, ups=1.05, wpb=436644, bsz=16421.9, num_updates=28900, lr=0.000372033, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=28519 +epoch 018: 231 / 1689 loss=4.138, nll_loss=2.515, ppl=5.72, wps=459798, ups=1.05, wpb=436644, bsz=16421.9, num_updates=28900, lr=0.000372033, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=28519 +epoch 018: 231 / 1689 loss=4.138, nll_loss=2.515, ppl=5.72, wps=459798, ups=1.05, wpb=436644, bsz=16421.9, num_updates=28900, lr=0.000372033, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=28519 +epoch 018: 231 / 1689 loss=4.138, nll_loss=2.515, ppl=5.72, wps=459798, ups=1.05, wpb=436644, bsz=16421.9, num_updates=28900, lr=0.000372033, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=28519 +epoch 018: 231 / 1689 loss=4.138, nll_loss=2.515, ppl=5.72, wps=459798, ups=1.05, wpb=436644, bsz=16421.9, num_updates=28900, lr=0.000372033, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=28519 +epoch 018: 231 / 1689 loss=4.138, nll_loss=2.515, ppl=5.72, wps=459798, ups=1.05, wpb=436644, bsz=16421.9, num_updates=28900, lr=0.000372033, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=28519 +epoch 018: 331 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=460795, ups=1.06, wpb=433650, bsz=16584.2, num_updates=29000, lr=0.000371391, gnorm=0.243, clip=0, loss_scale=1, train_wall=92, gb_free=18.4, wall=28613 +epoch 018: 331 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=460795, ups=1.06, wpb=433650, bsz=16584.2, num_updates=29000, lr=0.000371391, gnorm=0.243, clip=0, loss_scale=1, train_wall=92, gb_free=18.4, wall=28613 +epoch 018: 331 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=460795, ups=1.06, wpb=433650, bsz=16584.2, num_updates=29000, lr=0.000371391, gnorm=0.243, clip=0, loss_scale=1, train_wall=92, gb_free=18.4, wall=28613 +epoch 018: 331 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=460795, ups=1.06, wpb=433650, bsz=16584.2, num_updates=29000, lr=0.000371391, gnorm=0.243, clip=0, loss_scale=1, train_wall=92, gb_free=18.4, wall=28613 +epoch 018: 331 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=460795, ups=1.06, wpb=433650, bsz=16584.2, num_updates=29000, lr=0.000371391, gnorm=0.243, clip=0, loss_scale=1, train_wall=92, gb_free=18.4, wall=28613 +epoch 018: 331 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=460795, ups=1.06, wpb=433650, bsz=16584.2, num_updates=29000, lr=0.000371391, gnorm=0.243, clip=0, loss_scale=1, train_wall=92, gb_free=18.4, wall=28613 +epoch 018: 331 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=460795, ups=1.06, wpb=433650, bsz=16584.2, num_updates=29000, lr=0.000371391, gnorm=0.243, clip=0, loss_scale=1, train_wall=92, gb_free=18.4, wall=28613 +epoch 018: 331 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=460795, ups=1.06, wpb=433650, bsz=16584.2, num_updates=29000, lr=0.000371391, gnorm=0.243, clip=0, loss_scale=1, train_wall=92, gb_free=18.4, wall=28613 +epoch 018: 331 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=460795, ups=1.06, wpb=433650, bsz=16584.2, num_updates=29000, lr=0.000371391, gnorm=0.243, clip=0, loss_scale=1, train_wall=92, gb_free=18.4, wall=28613 +epoch 018: 331 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=460795, ups=1.06, wpb=433650, bsz=16584.2, num_updates=29000, lr=0.000371391, gnorm=0.243, clip=0, loss_scale=1, train_wall=92, gb_free=18.4, wall=28613 +epoch 018: 331 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=460795, ups=1.06, wpb=433650, bsz=16584.2, num_updates=29000, lr=0.000371391, gnorm=0.243, clip=0, loss_scale=1, train_wall=92, gb_free=18.4, wall=28613 +epoch 018: 331 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=460795, ups=1.06, wpb=433650, bsz=16584.2, num_updates=29000, lr=0.000371391, gnorm=0.243, clip=0, loss_scale=1, train_wall=92, gb_free=18.4, wall=28613 +epoch 018: 331 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=460795, ups=1.06, wpb=433650, bsz=16584.2, num_updates=29000, lr=0.000371391, gnorm=0.243, clip=0, loss_scale=1, train_wall=92, gb_free=18.4, wall=28613 +epoch 018: 331 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=460795, ups=1.06, wpb=433650, bsz=16584.2, num_updates=29000, lr=0.000371391, gnorm=0.243, clip=0, loss_scale=1, train_wall=92, gb_free=18.4, wall=28613 +epoch 018: 331 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=460795, ups=1.06, wpb=433650, bsz=16584.2, num_updates=29000, lr=0.000371391, gnorm=0.243, clip=0, loss_scale=1, train_wall=92, gb_free=18.4, wall=28613 +epoch 018: 331 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=460795, ups=1.06, wpb=433650, bsz=16584.2, num_updates=29000, lr=0.000371391, gnorm=0.243, clip=0, loss_scale=1, train_wall=92, gb_free=18.4, wall=28613 +epoch 018: 331 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=460795, ups=1.06, wpb=433650, bsz=16584.2, num_updates=29000, lr=0.000371391, gnorm=0.243, clip=0, loss_scale=1, train_wall=92, gb_free=18.4, wall=28613 +epoch 018: 331 / 1689 loss=4.147, nll_loss=2.526, ppl=5.76, wps=460795, ups=1.06, wpb=433650, bsz=16584.2, num_updates=29000, lr=0.000371391, gnorm=0.243, clip=0, loss_scale=1, train_wall=92, gb_free=18.4, wall=28613 +begin validation on "valid" subset +epoch 018 | valid on 'valid' subset | loss 4.264 | nll_loss 2.632 | ppl 6.2 | wps 0 | wpb 42662 | bsz 2032 | num_updates 29000 | best_loss 4.251 +epoch 018 | valid on 'valid' subset | loss 4.264 | nll_loss 2.632 | ppl 6.2 | wps 0 | wpb 42662 | bsz 2032 | num_updates 29000 | best_loss 4.251 +epoch 018 | valid on 'valid' subset | loss 4.264 | nll_loss 2.632 | ppl 6.2 | wps 0 | wpb 42662 | bsz 2032 | num_updates 29000 | best_loss 4.251 +epoch 018 | valid on 'valid' subset | loss 4.264 | nll_loss 2.632 | ppl 6.2 | wps 0 | wpb 42662 | bsz 2032 | num_updates 29000 | best_loss 4.251 +epoch 018 | valid on 'valid' subset | loss 4.264 | nll_loss 2.632 | ppl 6.2 | wps 0 | wpb 42662 | bsz 2032 | num_updates 29000 | best_loss 4.251 +epoch 018 | valid on 'valid' subset | loss 4.264 | nll_loss 2.632 | ppl 6.2 | wps 0 | wpb 42662 | bsz 2032 | num_updates 29000 | best_loss 4.251 +epoch 018 | valid on 'valid' subset | loss 4.264 | nll_loss 2.632 | ppl 6.2 | wps 0 | wpb 42662 | bsz 2032 | num_updates 29000 | best_loss 4.251 +epoch 018 | valid on 'valid' subset | loss 4.264 | nll_loss 2.632 | ppl 6.2 | wps 0 | wpb 42662 | bsz 2032 | num_updates 29000 | best_loss 4.251 +epoch 018 | valid on 'valid' subset | loss 4.264 | nll_loss 2.632 | ppl 6.2 | wps 0 | wpb 42662 | bsz 2032 | num_updates 29000 | best_loss 4.251 +epoch 018 | valid on 'valid' subset | loss 4.264 | nll_loss 2.632 | ppl 6.2 | wps 0 | wpb 42662 | bsz 2032 | num_updates 29000 | best_loss 4.251 +epoch 018 | valid on 'valid' subset | loss 4.264 | nll_loss 2.632 | ppl 6.2 | wps 0 | wpb 42662 | bsz 2032 | num_updates 29000 | best_loss 4.251 +epoch 018 | valid on 'valid' subset | loss 4.264 | nll_loss 2.632 | ppl 6.2 | wps 0 | wpb 42662 | bsz 2032 | num_updates 29000 | best_loss 4.251 +epoch 018 | valid on 'valid' subset | loss 4.264 | nll_loss 2.632 | ppl 6.2 | wps 0 | wpb 42662 | bsz 2032 | num_updates 29000 | best_loss 4.251 +epoch 018 | valid on 'valid' subset | loss 4.264 | nll_loss 2.632 | ppl 6.2 | wps 0 | wpb 42662 | bsz 2032 | num_updates 29000 | best_loss 4.251 +epoch 018 | valid on 'valid' subset | loss 4.264 | nll_loss 2.632 | ppl 6.2 | wps 0 | wpb 42662 | bsz 2032 | num_updates 29000 | best_loss 4.251 +epoch 018 | valid on 'valid' subset | loss 4.264 | nll_loss 2.632 | ppl 6.2 | wps 0 | wpb 42662 | bsz 2032 | num_updates 29000 | best_loss 4.251 +epoch 018 | valid on 'valid' subset | loss 4.264 | nll_loss 2.632 | ppl 6.2 | wps 0 | wpb 42662 | bsz 2032 | num_updates 29000 | best_loss 4.251 +epoch 018 | valid on 'valid' subset | loss 4.264 | nll_loss 2.632 | ppl 6.2 | wps 0 | wpb 42662 | bsz 2032 | num_updates 29000 | best_loss 4.251 +epoch 018: 431 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=400373, ups=0.92, wpb=433128, bsz=16709.8, num_updates=29100, lr=0.000370752, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=28721 +epoch 018: 431 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=400373, ups=0.92, wpb=433128, bsz=16709.8, num_updates=29100, lr=0.000370752, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=28721 +epoch 018: 431 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=400373, ups=0.92, wpb=433128, bsz=16709.8, num_updates=29100, lr=0.000370752, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=28721 +epoch 018: 431 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=400373, ups=0.92, wpb=433128, bsz=16709.8, num_updates=29100, lr=0.000370752, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=28721 +epoch 018: 431 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=400373, ups=0.92, wpb=433128, bsz=16709.8, num_updates=29100, lr=0.000370752, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=28721 +epoch 018: 431 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=400373, ups=0.92, wpb=433128, bsz=16709.8, num_updates=29100, lr=0.000370752, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=28721 +epoch 018: 431 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=400373, ups=0.92, wpb=433128, bsz=16709.8, num_updates=29100, lr=0.000370752, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=28721 +epoch 018: 431 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=400373, ups=0.92, wpb=433128, bsz=16709.8, num_updates=29100, lr=0.000370752, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=28721 +epoch 018: 431 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=400373, ups=0.92, wpb=433128, bsz=16709.8, num_updates=29100, lr=0.000370752, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=28721 +epoch 018: 431 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=400373, ups=0.92, wpb=433128, bsz=16709.8, num_updates=29100, lr=0.000370752, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=28721 +epoch 018: 431 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=400373, ups=0.92, wpb=433128, bsz=16709.8, num_updates=29100, lr=0.000370752, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=28721 +epoch 018: 431 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=400373, ups=0.92, wpb=433128, bsz=16709.8, num_updates=29100, lr=0.000370752, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=28721 +epoch 018: 431 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=400373, ups=0.92, wpb=433128, bsz=16709.8, num_updates=29100, lr=0.000370752, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=28721 +epoch 018: 431 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=400373, ups=0.92, wpb=433128, bsz=16709.8, num_updates=29100, lr=0.000370752, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=28721 +epoch 018: 431 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=400373, ups=0.92, wpb=433128, bsz=16709.8, num_updates=29100, lr=0.000370752, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=28721 +epoch 018: 431 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=400373, ups=0.92, wpb=433128, bsz=16709.8, num_updates=29100, lr=0.000370752, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=28721 +epoch 018: 431 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=400373, ups=0.92, wpb=433128, bsz=16709.8, num_updates=29100, lr=0.000370752, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=28721 +epoch 018: 431 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=400373, ups=0.92, wpb=433128, bsz=16709.8, num_updates=29100, lr=0.000370752, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=28721 +epoch 018: 531 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=457087, ups=1.06, wpb=431879, bsz=16439.9, num_updates=29200, lr=0.000370117, gnorm=0.235, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=28816 +epoch 018: 531 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=457087, ups=1.06, wpb=431879, bsz=16439.9, num_updates=29200, lr=0.000370117, gnorm=0.235, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=28816 +epoch 018: 531 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=457087, ups=1.06, wpb=431879, bsz=16439.9, num_updates=29200, lr=0.000370117, gnorm=0.235, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=28816 +epoch 018: 531 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=457087, ups=1.06, wpb=431879, bsz=16439.9, num_updates=29200, lr=0.000370117, gnorm=0.235, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=28816 +epoch 018: 531 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=457087, ups=1.06, wpb=431879, bsz=16439.9, num_updates=29200, lr=0.000370117, gnorm=0.235, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=28816 +epoch 018: 531 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=457087, ups=1.06, wpb=431879, bsz=16439.9, num_updates=29200, lr=0.000370117, gnorm=0.235, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=28816 +epoch 018: 531 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=457087, ups=1.06, wpb=431879, bsz=16439.9, num_updates=29200, lr=0.000370117, gnorm=0.235, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=28816 +epoch 018: 531 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=457087, ups=1.06, wpb=431879, bsz=16439.9, num_updates=29200, lr=0.000370117, gnorm=0.235, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=28816 +epoch 018: 531 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=457087, ups=1.06, wpb=431879, bsz=16439.9, num_updates=29200, lr=0.000370117, gnorm=0.235, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=28816 +epoch 018: 531 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=457087, ups=1.06, wpb=431879, bsz=16439.9, num_updates=29200, lr=0.000370117, gnorm=0.235, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=28816 +epoch 018: 531 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=457087, ups=1.06, wpb=431879, bsz=16439.9, num_updates=29200, lr=0.000370117, gnorm=0.235, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=28816 +epoch 018: 531 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=457087, ups=1.06, wpb=431879, bsz=16439.9, num_updates=29200, lr=0.000370117, gnorm=0.235, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=28816 +epoch 018: 531 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=457087, ups=1.06, wpb=431879, bsz=16439.9, num_updates=29200, lr=0.000370117, gnorm=0.235, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=28816 +epoch 018: 531 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=457087, ups=1.06, wpb=431879, bsz=16439.9, num_updates=29200, lr=0.000370117, gnorm=0.235, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=28816 +epoch 018: 531 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=457087, ups=1.06, wpb=431879, bsz=16439.9, num_updates=29200, lr=0.000370117, gnorm=0.235, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=28816 +epoch 018: 531 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=457087, ups=1.06, wpb=431879, bsz=16439.9, num_updates=29200, lr=0.000370117, gnorm=0.235, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=28816 +epoch 018: 531 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=457087, ups=1.06, wpb=431879, bsz=16439.9, num_updates=29200, lr=0.000370117, gnorm=0.235, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=28816 +epoch 018: 531 / 1689 loss=4.128, nll_loss=2.504, ppl=5.67, wps=457087, ups=1.06, wpb=431879, bsz=16439.9, num_updates=29200, lr=0.000370117, gnorm=0.235, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=28816 +epoch 018: 631 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=458736, ups=1.06, wpb=433575, bsz=16305.6, num_updates=29300, lr=0.000369484, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=21, wall=28910 +epoch 018: 631 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=458736, ups=1.06, wpb=433575, bsz=16305.6, num_updates=29300, lr=0.000369484, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=21, wall=28910 +epoch 018: 631 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=458736, ups=1.06, wpb=433575, bsz=16305.6, num_updates=29300, lr=0.000369484, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=21, wall=28910 +epoch 018: 631 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=458736, ups=1.06, wpb=433575, bsz=16305.6, num_updates=29300, lr=0.000369484, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=21, wall=28910 +epoch 018: 631 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=458736, ups=1.06, wpb=433575, bsz=16305.6, num_updates=29300, lr=0.000369484, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=21, wall=28910 +epoch 018: 631 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=458736, ups=1.06, wpb=433575, bsz=16305.6, num_updates=29300, lr=0.000369484, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=21, wall=28910 +epoch 018: 631 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=458736, ups=1.06, wpb=433575, bsz=16305.6, num_updates=29300, lr=0.000369484, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=21, wall=28910 +epoch 018: 631 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=458736, ups=1.06, wpb=433575, bsz=16305.6, num_updates=29300, lr=0.000369484, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=21, wall=28910 +epoch 018: 631 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=458736, ups=1.06, wpb=433575, bsz=16305.6, num_updates=29300, lr=0.000369484, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=21, wall=28910 +epoch 018: 631 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=458736, ups=1.06, wpb=433575, bsz=16305.6, num_updates=29300, lr=0.000369484, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=21, wall=28910 +epoch 018: 631 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=458736, ups=1.06, wpb=433575, bsz=16305.6, num_updates=29300, lr=0.000369484, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=21, wall=28910 +epoch 018: 631 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=458736, ups=1.06, wpb=433575, bsz=16305.6, num_updates=29300, lr=0.000369484, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=21, wall=28910 +epoch 018: 631 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=458736, ups=1.06, wpb=433575, bsz=16305.6, num_updates=29300, lr=0.000369484, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=21, wall=28910 +epoch 018: 631 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=458736, ups=1.06, wpb=433575, bsz=16305.6, num_updates=29300, lr=0.000369484, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=21, wall=28910 +epoch 018: 631 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=458736, ups=1.06, wpb=433575, bsz=16305.6, num_updates=29300, lr=0.000369484, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=21, wall=28910 +epoch 018: 631 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=458736, ups=1.06, wpb=433575, bsz=16305.6, num_updates=29300, lr=0.000369484, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=21, wall=28910 +epoch 018: 631 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=458736, ups=1.06, wpb=433575, bsz=16305.6, num_updates=29300, lr=0.000369484, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=21, wall=28910 +epoch 018: 631 / 1689 loss=4.139, nll_loss=2.517, ppl=5.72, wps=458736, ups=1.06, wpb=433575, bsz=16305.6, num_updates=29300, lr=0.000369484, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=21, wall=28910 +epoch 018: 731 / 1689 loss=4.144, nll_loss=2.523, ppl=5.75, wps=462083, ups=1.06, wpb=434216, bsz=16557.8, num_updates=29400, lr=0.000368856, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=29004 +epoch 018: 731 / 1689 loss=4.144, nll_loss=2.523, ppl=5.75, wps=462083, ups=1.06, wpb=434216, bsz=16557.8, num_updates=29400, lr=0.000368856, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=29004 +epoch 018: 731 / 1689 loss=4.144, nll_loss=2.523, ppl=5.75, wps=462083, ups=1.06, wpb=434216, bsz=16557.8, num_updates=29400, lr=0.000368856, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=29004 +epoch 018: 731 / 1689 loss=4.144, nll_loss=2.523, ppl=5.75, wps=462083, ups=1.06, wpb=434216, bsz=16557.8, num_updates=29400, lr=0.000368856, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=29004 +epoch 018: 731 / 1689 loss=4.144, nll_loss=2.523, ppl=5.75, wps=462083, ups=1.06, wpb=434216, bsz=16557.8, num_updates=29400, lr=0.000368856, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=29004 +epoch 018: 731 / 1689 loss=4.144, nll_loss=2.523, ppl=5.75, wps=462083, ups=1.06, wpb=434216, bsz=16557.8, num_updates=29400, lr=0.000368856, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=29004 +epoch 018: 731 / 1689 loss=4.144, nll_loss=2.523, ppl=5.75, wps=462083, ups=1.06, wpb=434216, bsz=16557.8, num_updates=29400, lr=0.000368856, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=29004 +epoch 018: 731 / 1689 loss=4.144, nll_loss=2.523, ppl=5.75, wps=462083, ups=1.06, wpb=434216, bsz=16557.8, num_updates=29400, lr=0.000368856, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=29004 +epoch 018: 731 / 1689 loss=4.144, nll_loss=2.523, ppl=5.75, wps=462083, ups=1.06, wpb=434216, bsz=16557.8, num_updates=29400, lr=0.000368856, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=29004 +epoch 018: 731 / 1689 loss=4.144, nll_loss=2.523, ppl=5.75, wps=462083, ups=1.06, wpb=434216, bsz=16557.8, num_updates=29400, lr=0.000368856, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=29004 +epoch 018: 731 / 1689 loss=4.144, nll_loss=2.523, ppl=5.75, wps=462083, ups=1.06, wpb=434216, bsz=16557.8, num_updates=29400, lr=0.000368856, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=29004 +epoch 018: 731 / 1689 loss=4.144, nll_loss=2.523, ppl=5.75, wps=462083, ups=1.06, wpb=434216, bsz=16557.8, num_updates=29400, lr=0.000368856, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=29004 +epoch 018: 731 / 1689 loss=4.144, nll_loss=2.523, ppl=5.75, wps=462083, ups=1.06, wpb=434216, bsz=16557.8, num_updates=29400, lr=0.000368856, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=29004 +epoch 018: 731 / 1689 loss=4.144, nll_loss=2.523, ppl=5.75, wps=462083, ups=1.06, wpb=434216, bsz=16557.8, num_updates=29400, lr=0.000368856, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=29004 +epoch 018: 731 / 1689 loss=4.144, nll_loss=2.523, ppl=5.75, wps=462083, ups=1.06, wpb=434216, bsz=16557.8, num_updates=29400, lr=0.000368856, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=29004 +epoch 018: 731 / 1689 loss=4.144, nll_loss=2.523, ppl=5.75, wps=462083, ups=1.06, wpb=434216, bsz=16557.8, num_updates=29400, lr=0.000368856, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=29004 +epoch 018: 731 / 1689 loss=4.144, nll_loss=2.523, ppl=5.75, wps=462083, ups=1.06, wpb=434216, bsz=16557.8, num_updates=29400, lr=0.000368856, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=29004 +epoch 018: 731 / 1689 loss=4.144, nll_loss=2.523, ppl=5.75, wps=462083, ups=1.06, wpb=434216, bsz=16557.8, num_updates=29400, lr=0.000368856, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=19.6, wall=29004 +epoch 018: 832 / 1689 loss=4.151, nll_loss=2.531, ppl=5.78, wps=455080, ups=1.05, wpb=434718, bsz=16990.2, num_updates=29500, lr=0.00036823, gnorm=0.237, clip=0, loss_scale=1, train_wall=94, gb_free=18.3, wall=29100 +epoch 018: 832 / 1689 loss=4.151, nll_loss=2.531, ppl=5.78, wps=455080, ups=1.05, wpb=434718, bsz=16990.2, num_updates=29500, lr=0.00036823, gnorm=0.237, clip=0, loss_scale=1, train_wall=94, gb_free=18.3, wall=29100 +epoch 018: 832 / 1689 loss=4.151, nll_loss=2.531, ppl=5.78, wps=455080, ups=1.05, wpb=434718, bsz=16990.2, num_updates=29500, lr=0.00036823, gnorm=0.237, clip=0, loss_scale=1, train_wall=94, gb_free=18.3, wall=29100 +epoch 018: 832 / 1689 loss=4.151, nll_loss=2.531, ppl=5.78, wps=455080, ups=1.05, wpb=434718, bsz=16990.2, num_updates=29500, lr=0.00036823, gnorm=0.237, clip=0, loss_scale=1, train_wall=94, gb_free=18.3, wall=29100 +epoch 018: 832 / 1689 loss=4.151, nll_loss=2.531, ppl=5.78, wps=455080, ups=1.05, wpb=434718, bsz=16990.2, num_updates=29500, lr=0.00036823, gnorm=0.237, clip=0, loss_scale=1, train_wall=94, gb_free=18.3, wall=29100 +epoch 018: 832 / 1689 loss=4.151, nll_loss=2.531, ppl=5.78, wps=455080, ups=1.05, wpb=434718, bsz=16990.2, num_updates=29500, lr=0.00036823, gnorm=0.237, clip=0, loss_scale=1, train_wall=94, gb_free=18.3, wall=29100 +epoch 018: 832 / 1689 loss=4.151, nll_loss=2.531, ppl=5.78, wps=455080, ups=1.05, wpb=434718, bsz=16990.2, num_updates=29500, lr=0.00036823, gnorm=0.237, clip=0, loss_scale=1, train_wall=94, gb_free=18.3, wall=29100 +epoch 018: 832 / 1689 loss=4.151, nll_loss=2.531, ppl=5.78, wps=455080, ups=1.05, wpb=434718, bsz=16990.2, num_updates=29500, lr=0.00036823, gnorm=0.237, clip=0, loss_scale=1, train_wall=94, gb_free=18.3, wall=29100 +epoch 018: 832 / 1689 loss=4.151, nll_loss=2.531, ppl=5.78, wps=455080, ups=1.05, wpb=434718, bsz=16990.2, num_updates=29500, lr=0.00036823, gnorm=0.237, clip=0, loss_scale=1, train_wall=94, gb_free=18.3, wall=29100 +epoch 018: 832 / 1689 loss=4.151, nll_loss=2.531, ppl=5.78, wps=455080, ups=1.05, wpb=434718, bsz=16990.2, num_updates=29500, lr=0.00036823, gnorm=0.237, clip=0, loss_scale=1, train_wall=94, gb_free=18.3, wall=29100 +epoch 018: 832 / 1689 loss=4.151, nll_loss=2.531, ppl=5.78, wps=455080, ups=1.05, wpb=434718, bsz=16990.2, num_updates=29500, lr=0.00036823, gnorm=0.237, clip=0, loss_scale=1, train_wall=94, gb_free=18.3, wall=29100 +epoch 018: 832 / 1689 loss=4.151, nll_loss=2.531, ppl=5.78, wps=455080, ups=1.05, wpb=434718, bsz=16990.2, num_updates=29500, lr=0.00036823, gnorm=0.237, clip=0, loss_scale=1, train_wall=94, gb_free=18.3, wall=29100 +epoch 018: 832 / 1689 loss=4.151, nll_loss=2.531, ppl=5.78, wps=455080, ups=1.05, wpb=434718, bsz=16990.2, num_updates=29500, lr=0.00036823, gnorm=0.237, clip=0, loss_scale=1, train_wall=94, gb_free=18.3, wall=29100 +epoch 018: 832 / 1689 loss=4.151, nll_loss=2.531, ppl=5.78, wps=455080, ups=1.05, wpb=434718, bsz=16990.2, num_updates=29500, lr=0.00036823, gnorm=0.237, clip=0, loss_scale=1, train_wall=94, gb_free=18.3, wall=29100 +epoch 018: 832 / 1689 loss=4.151, nll_loss=2.531, ppl=5.78, wps=455080, ups=1.05, wpb=434718, bsz=16990.2, num_updates=29500, lr=0.00036823, gnorm=0.237, clip=0, loss_scale=1, train_wall=94, gb_free=18.3, wall=29100 +epoch 018: 832 / 1689 loss=4.151, nll_loss=2.531, ppl=5.78, wps=455080, ups=1.05, wpb=434718, bsz=16990.2, num_updates=29500, lr=0.00036823, gnorm=0.237, clip=0, loss_scale=1, train_wall=94, gb_free=18.3, wall=29100 +epoch 018: 832 / 1689 loss=4.151, nll_loss=2.531, ppl=5.78, wps=455080, ups=1.05, wpb=434718, bsz=16990.2, num_updates=29500, lr=0.00036823, gnorm=0.237, clip=0, loss_scale=1, train_wall=94, gb_free=18.3, wall=29100 +epoch 018: 832 / 1689 loss=4.151, nll_loss=2.531, ppl=5.78, wps=455080, ups=1.05, wpb=434718, bsz=16990.2, num_updates=29500, lr=0.00036823, gnorm=0.237, clip=0, loss_scale=1, train_wall=94, gb_free=18.3, wall=29100 +epoch 018: 932 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=463191, ups=1.07, wpb=433854, bsz=16230.4, num_updates=29600, lr=0.000367607, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=29194 +epoch 018: 932 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=463191, ups=1.07, wpb=433854, bsz=16230.4, num_updates=29600, lr=0.000367607, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=29194 +epoch 018: 932 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=463191, ups=1.07, wpb=433854, bsz=16230.4, num_updates=29600, lr=0.000367607, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=29194 +epoch 018: 932 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=463191, ups=1.07, wpb=433854, bsz=16230.4, num_updates=29600, lr=0.000367607, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=29194 +epoch 018: 932 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=463191, ups=1.07, wpb=433854, bsz=16230.4, num_updates=29600, lr=0.000367607, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=29194 +epoch 018: 932 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=463191, ups=1.07, wpb=433854, bsz=16230.4, num_updates=29600, lr=0.000367607, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=29194 +epoch 018: 932 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=463191, ups=1.07, wpb=433854, bsz=16230.4, num_updates=29600, lr=0.000367607, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=29194 +epoch 018: 932 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=463191, ups=1.07, wpb=433854, bsz=16230.4, num_updates=29600, lr=0.000367607, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=29194 +epoch 018: 932 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=463191, ups=1.07, wpb=433854, bsz=16230.4, num_updates=29600, lr=0.000367607, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=29194 +epoch 018: 932 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=463191, ups=1.07, wpb=433854, bsz=16230.4, num_updates=29600, lr=0.000367607, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=29194 +epoch 018: 932 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=463191, ups=1.07, wpb=433854, bsz=16230.4, num_updates=29600, lr=0.000367607, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=29194 +epoch 018: 932 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=463191, ups=1.07, wpb=433854, bsz=16230.4, num_updates=29600, lr=0.000367607, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=29194 +epoch 018: 932 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=463191, ups=1.07, wpb=433854, bsz=16230.4, num_updates=29600, lr=0.000367607, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=29194 +epoch 018: 932 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=463191, ups=1.07, wpb=433854, bsz=16230.4, num_updates=29600, lr=0.000367607, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=29194 +epoch 018: 932 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=463191, ups=1.07, wpb=433854, bsz=16230.4, num_updates=29600, lr=0.000367607, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=29194 +epoch 018: 932 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=463191, ups=1.07, wpb=433854, bsz=16230.4, num_updates=29600, lr=0.000367607, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=29194 +epoch 018: 932 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=463191, ups=1.07, wpb=433854, bsz=16230.4, num_updates=29600, lr=0.000367607, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=29194 +epoch 018: 932 / 1689 loss=4.15, nll_loss=2.53, ppl=5.78, wps=463191, ups=1.07, wpb=433854, bsz=16230.4, num_updates=29600, lr=0.000367607, gnorm=0.241, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=29194 +epoch 018: 1032 / 1689 loss=4.15, nll_loss=2.529, ppl=5.77, wps=463366, ups=1.07, wpb=434238, bsz=16362.2, num_updates=29700, lr=0.000366988, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=29287 +epoch 018: 1032 / 1689 loss=4.15, nll_loss=2.529, ppl=5.77, wps=463366, ups=1.07, wpb=434238, bsz=16362.2, num_updates=29700, lr=0.000366988, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=29287 +epoch 018: 1032 / 1689 loss=4.15, nll_loss=2.529, ppl=5.77, wps=463366, ups=1.07, wpb=434238, bsz=16362.2, num_updates=29700, lr=0.000366988, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=29287 +epoch 018: 1032 / 1689 loss=4.15, nll_loss=2.529, ppl=5.77, wps=463366, ups=1.07, wpb=434238, bsz=16362.2, num_updates=29700, lr=0.000366988, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=29287 +epoch 018: 1032 / 1689 loss=4.15, nll_loss=2.529, ppl=5.77, wps=463366, ups=1.07, wpb=434238, bsz=16362.2, num_updates=29700, lr=0.000366988, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=29287 +epoch 018: 1032 / 1689 loss=4.15, nll_loss=2.529, ppl=5.77, wps=463366, ups=1.07, wpb=434238, bsz=16362.2, num_updates=29700, lr=0.000366988, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=29287 +epoch 018: 1032 / 1689 loss=4.15, nll_loss=2.529, ppl=5.77, wps=463366, ups=1.07, wpb=434238, bsz=16362.2, num_updates=29700, lr=0.000366988, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=29287 +epoch 018: 1032 / 1689 loss=4.15, nll_loss=2.529, ppl=5.77, wps=463366, ups=1.07, wpb=434238, bsz=16362.2, num_updates=29700, lr=0.000366988, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=29287 +epoch 018: 1032 / 1689 loss=4.15, nll_loss=2.529, ppl=5.77, wps=463366, ups=1.07, wpb=434238, bsz=16362.2, num_updates=29700, lr=0.000366988, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=29287 +epoch 018: 1032 / 1689 loss=4.15, nll_loss=2.529, ppl=5.77, wps=463366, ups=1.07, wpb=434238, bsz=16362.2, num_updates=29700, lr=0.000366988, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=29287 +epoch 018: 1032 / 1689 loss=4.15, nll_loss=2.529, ppl=5.77, wps=463366, ups=1.07, wpb=434238, bsz=16362.2, num_updates=29700, lr=0.000366988, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=29287 +epoch 018: 1032 / 1689 loss=4.15, nll_loss=2.529, ppl=5.77, wps=463366, ups=1.07, wpb=434238, bsz=16362.2, num_updates=29700, lr=0.000366988, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=29287 +epoch 018: 1032 / 1689 loss=4.15, nll_loss=2.529, ppl=5.77, wps=463366, ups=1.07, wpb=434238, bsz=16362.2, num_updates=29700, lr=0.000366988, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=29287 +epoch 018: 1032 / 1689 loss=4.15, nll_loss=2.529, ppl=5.77, wps=463366, ups=1.07, wpb=434238, bsz=16362.2, num_updates=29700, lr=0.000366988, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=29287 +epoch 018: 1032 / 1689 loss=4.15, nll_loss=2.529, ppl=5.77, wps=463366, ups=1.07, wpb=434238, bsz=16362.2, num_updates=29700, lr=0.000366988, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=29287 +epoch 018: 1032 / 1689 loss=4.15, nll_loss=2.529, ppl=5.77, wps=463366, ups=1.07, wpb=434238, bsz=16362.2, num_updates=29700, lr=0.000366988, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=29287 +epoch 018: 1032 / 1689 loss=4.15, nll_loss=2.529, ppl=5.77, wps=463366, ups=1.07, wpb=434238, bsz=16362.2, num_updates=29700, lr=0.000366988, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=29287 +epoch 018: 1032 / 1689 loss=4.15, nll_loss=2.529, ppl=5.77, wps=463366, ups=1.07, wpb=434238, bsz=16362.2, num_updates=29700, lr=0.000366988, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=29287 +epoch 018: 1132 / 1689 loss=4.155, nll_loss=2.536, ppl=5.8, wps=465342, ups=1.07, wpb=434745, bsz=16456.8, num_updates=29800, lr=0.000366372, gnorm=0.23, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=29381 +epoch 018: 1132 / 1689 loss=4.155, nll_loss=2.536, ppl=5.8, wps=465342, ups=1.07, wpb=434745, bsz=16456.8, num_updates=29800, lr=0.000366372, gnorm=0.23, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=29381 +epoch 018: 1132 / 1689 loss=4.155, nll_loss=2.536, ppl=5.8, wps=465342, ups=1.07, wpb=434745, bsz=16456.8, num_updates=29800, lr=0.000366372, gnorm=0.23, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=29381 +epoch 018: 1132 / 1689 loss=4.155, nll_loss=2.536, ppl=5.8, wps=465342, ups=1.07, wpb=434745, bsz=16456.8, num_updates=29800, lr=0.000366372, gnorm=0.23, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=29381 +epoch 018: 1132 / 1689 loss=4.155, nll_loss=2.536, ppl=5.8, wps=465342, ups=1.07, wpb=434745, bsz=16456.8, num_updates=29800, lr=0.000366372, gnorm=0.23, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=29381 +epoch 018: 1132 / 1689 loss=4.155, nll_loss=2.536, ppl=5.8, wps=465342, ups=1.07, wpb=434745, bsz=16456.8, num_updates=29800, lr=0.000366372, gnorm=0.23, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=29381 +epoch 018: 1132 / 1689 loss=4.155, nll_loss=2.536, ppl=5.8, wps=465342, ups=1.07, wpb=434745, bsz=16456.8, num_updates=29800, lr=0.000366372, gnorm=0.23, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=29381 +epoch 018: 1132 / 1689 loss=4.155, nll_loss=2.536, ppl=5.8, wps=465342, ups=1.07, wpb=434745, bsz=16456.8, num_updates=29800, lr=0.000366372, gnorm=0.23, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=29381 +epoch 018: 1132 / 1689 loss=4.155, nll_loss=2.536, ppl=5.8, wps=465342, ups=1.07, wpb=434745, bsz=16456.8, num_updates=29800, lr=0.000366372, gnorm=0.23, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=29381 +epoch 018: 1132 / 1689 loss=4.155, nll_loss=2.536, ppl=5.8, wps=465342, ups=1.07, wpb=434745, bsz=16456.8, num_updates=29800, lr=0.000366372, gnorm=0.23, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=29381 +epoch 018: 1132 / 1689 loss=4.155, nll_loss=2.536, ppl=5.8, wps=465342, ups=1.07, wpb=434745, bsz=16456.8, num_updates=29800, lr=0.000366372, gnorm=0.23, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=29381 +epoch 018: 1132 / 1689 loss=4.155, nll_loss=2.536, ppl=5.8, wps=465342, ups=1.07, wpb=434745, bsz=16456.8, num_updates=29800, lr=0.000366372, gnorm=0.23, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=29381 +epoch 018: 1132 / 1689 loss=4.155, nll_loss=2.536, ppl=5.8, wps=465342, ups=1.07, wpb=434745, bsz=16456.8, num_updates=29800, lr=0.000366372, gnorm=0.23, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=29381 +epoch 018: 1132 / 1689 loss=4.155, nll_loss=2.536, ppl=5.8, wps=465342, ups=1.07, wpb=434745, bsz=16456.8, num_updates=29800, lr=0.000366372, gnorm=0.23, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=29381 +epoch 018: 1132 / 1689 loss=4.155, nll_loss=2.536, ppl=5.8, wps=465342, ups=1.07, wpb=434745, bsz=16456.8, num_updates=29800, lr=0.000366372, gnorm=0.23, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=29381 +epoch 018: 1132 / 1689 loss=4.155, nll_loss=2.536, ppl=5.8, wps=465342, ups=1.07, wpb=434745, bsz=16456.8, num_updates=29800, lr=0.000366372, gnorm=0.23, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=29381 +epoch 018: 1132 / 1689 loss=4.155, nll_loss=2.536, ppl=5.8, wps=465342, ups=1.07, wpb=434745, bsz=16456.8, num_updates=29800, lr=0.000366372, gnorm=0.23, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=29381 +epoch 018: 1132 / 1689 loss=4.155, nll_loss=2.536, ppl=5.8, wps=465342, ups=1.07, wpb=434745, bsz=16456.8, num_updates=29800, lr=0.000366372, gnorm=0.23, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=29381 +epoch 018: 1232 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464031, ups=1.07, wpb=432783, bsz=16589.8, num_updates=29900, lr=0.000365758, gnorm=0.234, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=29474 +epoch 018: 1232 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464031, ups=1.07, wpb=432783, bsz=16589.8, num_updates=29900, lr=0.000365758, gnorm=0.234, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=29474 +epoch 018: 1232 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464031, ups=1.07, wpb=432783, bsz=16589.8, num_updates=29900, lr=0.000365758, gnorm=0.234, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=29474 +epoch 018: 1232 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464031, ups=1.07, wpb=432783, bsz=16589.8, num_updates=29900, lr=0.000365758, gnorm=0.234, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=29474 +epoch 018: 1232 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464031, ups=1.07, wpb=432783, bsz=16589.8, num_updates=29900, lr=0.000365758, gnorm=0.234, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=29474 +epoch 018: 1232 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464031, ups=1.07, wpb=432783, bsz=16589.8, num_updates=29900, lr=0.000365758, gnorm=0.234, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=29474 +epoch 018: 1232 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464031, ups=1.07, wpb=432783, bsz=16589.8, num_updates=29900, lr=0.000365758, gnorm=0.234, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=29474 +epoch 018: 1232 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464031, ups=1.07, wpb=432783, bsz=16589.8, num_updates=29900, lr=0.000365758, gnorm=0.234, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=29474 +epoch 018: 1232 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464031, ups=1.07, wpb=432783, bsz=16589.8, num_updates=29900, lr=0.000365758, gnorm=0.234, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=29474 +epoch 018: 1232 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464031, ups=1.07, wpb=432783, bsz=16589.8, num_updates=29900, lr=0.000365758, gnorm=0.234, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=29474 +epoch 018: 1232 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464031, ups=1.07, wpb=432783, bsz=16589.8, num_updates=29900, lr=0.000365758, gnorm=0.234, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=29474 +epoch 018: 1232 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464031, ups=1.07, wpb=432783, bsz=16589.8, num_updates=29900, lr=0.000365758, gnorm=0.234, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=29474 +epoch 018: 1232 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464031, ups=1.07, wpb=432783, bsz=16589.8, num_updates=29900, lr=0.000365758, gnorm=0.234, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=29474 +epoch 018: 1232 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464031, ups=1.07, wpb=432783, bsz=16589.8, num_updates=29900, lr=0.000365758, gnorm=0.234, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=29474 +epoch 018: 1232 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464031, ups=1.07, wpb=432783, bsz=16589.8, num_updates=29900, lr=0.000365758, gnorm=0.234, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=29474 +epoch 018: 1232 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464031, ups=1.07, wpb=432783, bsz=16589.8, num_updates=29900, lr=0.000365758, gnorm=0.234, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=29474 +epoch 018: 1232 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464031, ups=1.07, wpb=432783, bsz=16589.8, num_updates=29900, lr=0.000365758, gnorm=0.234, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=29474 +epoch 018: 1232 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464031, ups=1.07, wpb=432783, bsz=16589.8, num_updates=29900, lr=0.000365758, gnorm=0.234, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=29474 +epoch 018: 1333 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=458913, ups=1.05, wpb=435845, bsz=16333.2, num_updates=30000, lr=0.000365148, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=29569 +epoch 018: 1333 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=458913, ups=1.05, wpb=435845, bsz=16333.2, num_updates=30000, lr=0.000365148, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=29569 +epoch 018: 1333 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=458913, ups=1.05, wpb=435845, bsz=16333.2, num_updates=30000, lr=0.000365148, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=29569 +epoch 018: 1333 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=458913, ups=1.05, wpb=435845, bsz=16333.2, num_updates=30000, lr=0.000365148, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=29569 +epoch 018: 1333 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=458913, ups=1.05, wpb=435845, bsz=16333.2, num_updates=30000, lr=0.000365148, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=29569 +epoch 018: 1333 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=458913, ups=1.05, wpb=435845, bsz=16333.2, num_updates=30000, lr=0.000365148, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=29569 +epoch 018: 1333 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=458913, ups=1.05, wpb=435845, bsz=16333.2, num_updates=30000, lr=0.000365148, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=29569 +epoch 018: 1333 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=458913, ups=1.05, wpb=435845, bsz=16333.2, num_updates=30000, lr=0.000365148, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=29569 +epoch 018: 1333 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=458913, ups=1.05, wpb=435845, bsz=16333.2, num_updates=30000, lr=0.000365148, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=29569 +epoch 018: 1333 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=458913, ups=1.05, wpb=435845, bsz=16333.2, num_updates=30000, lr=0.000365148, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=29569 +epoch 018: 1333 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=458913, ups=1.05, wpb=435845, bsz=16333.2, num_updates=30000, lr=0.000365148, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=29569 +epoch 018: 1333 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=458913, ups=1.05, wpb=435845, bsz=16333.2, num_updates=30000, lr=0.000365148, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=29569 +epoch 018: 1333 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=458913, ups=1.05, wpb=435845, bsz=16333.2, num_updates=30000, lr=0.000365148, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=29569 +epoch 018: 1333 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=458913, ups=1.05, wpb=435845, bsz=16333.2, num_updates=30000, lr=0.000365148, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=29569 +epoch 018: 1333 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=458913, ups=1.05, wpb=435845, bsz=16333.2, num_updates=30000, lr=0.000365148, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=29569 +epoch 018: 1333 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=458913, ups=1.05, wpb=435845, bsz=16333.2, num_updates=30000, lr=0.000365148, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=29569 +epoch 018: 1333 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=458913, ups=1.05, wpb=435845, bsz=16333.2, num_updates=30000, lr=0.000365148, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=29569 +epoch 018: 1333 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=458913, ups=1.05, wpb=435845, bsz=16333.2, num_updates=30000, lr=0.000365148, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=29569 +begin validation on "valid" subset +epoch 018 | valid on 'valid' subset | loss 4.256 | nll_loss 2.621 | ppl 6.15 | wps 0 | wpb 42662 | bsz 2032 | num_updates 30000 | best_loss 4.251 +epoch 018 | valid on 'valid' subset | loss 4.256 | nll_loss 2.621 | ppl 6.15 | wps 0 | wpb 42662 | bsz 2032 | num_updates 30000 | best_loss 4.251 +epoch 018 | valid on 'valid' subset | loss 4.256 | nll_loss 2.621 | ppl 6.15 | wps 0 | wpb 42662 | bsz 2032 | num_updates 30000 | best_loss 4.251 +epoch 018 | valid on 'valid' subset | loss 4.256 | nll_loss 2.621 | ppl 6.15 | wps 0 | wpb 42662 | bsz 2032 | num_updates 30000 | best_loss 4.251 +epoch 018 | valid on 'valid' subset | loss 4.256 | nll_loss 2.621 | ppl 6.15 | wps 0 | wpb 42662 | bsz 2032 | num_updates 30000 | best_loss 4.251 +epoch 018 | valid on 'valid' subset | loss 4.256 | nll_loss 2.621 | ppl 6.15 | wps 0 | wpb 42662 | bsz 2032 | num_updates 30000 | best_loss 4.251 +epoch 018 | valid on 'valid' subset | loss 4.256 | nll_loss 2.621 | ppl 6.15 | wps 0 | wpb 42662 | bsz 2032 | num_updates 30000 | best_loss 4.251 +epoch 018 | valid on 'valid' subset | loss 4.256 | nll_loss 2.621 | ppl 6.15 | wps 0 | wpb 42662 | bsz 2032 | num_updates 30000 | best_loss 4.251 +epoch 018 | valid on 'valid' subset | loss 4.256 | nll_loss 2.621 | ppl 6.15 | wps 0 | wpb 42662 | bsz 2032 | num_updates 30000 | best_loss 4.251 +epoch 018 | valid on 'valid' subset | loss 4.256 | nll_loss 2.621 | ppl 6.15 | wps 0 | wpb 42662 | bsz 2032 | num_updates 30000 | best_loss 4.251 +epoch 018 | valid on 'valid' subset | loss 4.256 | nll_loss 2.621 | ppl 6.15 | wps 0 | wpb 42662 | bsz 2032 | num_updates 30000 | best_loss 4.251 +epoch 018 | valid on 'valid' subset | loss 4.256 | nll_loss 2.621 | ppl 6.15 | wps 0 | wpb 42662 | bsz 2032 | num_updates 30000 | best_loss 4.251 +epoch 018 | valid on 'valid' subset | loss 4.256 | nll_loss 2.621 | ppl 6.15 | wps 0 | wpb 42662 | bsz 2032 | num_updates 30000 | best_loss 4.251 +epoch 018 | valid on 'valid' subset | loss 4.256 | nll_loss 2.621 | ppl 6.15 | wps 0 | wpb 42662 | bsz 2032 | num_updates 30000 | best_loss 4.251 +epoch 018 | valid on 'valid' subset | loss 4.256 | nll_loss 2.621 | ppl 6.15 | wps 0 | wpb 42662 | bsz 2032 | num_updates 30000 | best_loss 4.251 +epoch 018 | valid on 'valid' subset | loss 4.256 | nll_loss 2.621 | ppl 6.15 | wps 0 | wpb 42662 | bsz 2032 | num_updates 30000 | best_loss 4.251 +epoch 018 | valid on 'valid' subset | loss 4.256 | nll_loss 2.621 | ppl 6.15 | wps 0 | wpb 42662 | bsz 2032 | num_updates 30000 | best_loss 4.251 +epoch 018 | valid on 'valid' subset | loss 4.256 | nll_loss 2.621 | ppl 6.15 | wps 0 | wpb 42662 | bsz 2032 | num_updates 30000 | best_loss 4.251 +epoch 018: 1433 / 1689 loss=4.15, nll_loss=2.531, ppl=5.78, wps=318700, ups=0.73, wpb=434145, bsz=16375.2, num_updates=30100, lr=0.000364541, gnorm=0.25, clip=0, loss_scale=1, train_wall=113, gb_free=19.5, wall=29705 +epoch 018: 1433 / 1689 loss=4.15, nll_loss=2.531, ppl=5.78, wps=318700, ups=0.73, wpb=434145, bsz=16375.2, num_updates=30100, lr=0.000364541, gnorm=0.25, clip=0, loss_scale=1, train_wall=113, gb_free=19.5, wall=29705 +epoch 018: 1433 / 1689 loss=4.15, nll_loss=2.531, ppl=5.78, wps=318700, ups=0.73, wpb=434145, bsz=16375.2, num_updates=30100, lr=0.000364541, gnorm=0.25, clip=0, loss_scale=1, train_wall=113, gb_free=19.5, wall=29705 +epoch 018: 1433 / 1689 loss=4.15, nll_loss=2.531, ppl=5.78, wps=318700, ups=0.73, wpb=434145, bsz=16375.2, num_updates=30100, lr=0.000364541, gnorm=0.25, clip=0, loss_scale=1, train_wall=113, gb_free=19.5, wall=29705 +epoch 018: 1433 / 1689 loss=4.15, nll_loss=2.531, ppl=5.78, wps=318700, ups=0.73, wpb=434145, bsz=16375.2, num_updates=30100, lr=0.000364541, gnorm=0.25, clip=0, loss_scale=1, train_wall=113, gb_free=19.5, wall=29705 +epoch 018: 1433 / 1689 loss=4.15, nll_loss=2.531, ppl=5.78, wps=318700, ups=0.73, wpb=434145, bsz=16375.2, num_updates=30100, lr=0.000364541, gnorm=0.25, clip=0, loss_scale=1, train_wall=113, gb_free=19.5, wall=29705 +epoch 018: 1433 / 1689 loss=4.15, nll_loss=2.531, ppl=5.78, wps=318700, ups=0.73, wpb=434145, bsz=16375.2, num_updates=30100, lr=0.000364541, gnorm=0.25, clip=0, loss_scale=1, train_wall=113, gb_free=19.5, wall=29705 +epoch 018: 1433 / 1689 loss=4.15, nll_loss=2.531, ppl=5.78, wps=318700, ups=0.73, wpb=434145, bsz=16375.2, num_updates=30100, lr=0.000364541, gnorm=0.25, clip=0, loss_scale=1, train_wall=113, gb_free=19.5, wall=29705 +epoch 018: 1433 / 1689 loss=4.15, nll_loss=2.531, ppl=5.78, wps=318700, ups=0.73, wpb=434145, bsz=16375.2, num_updates=30100, lr=0.000364541, gnorm=0.25, clip=0, loss_scale=1, train_wall=113, gb_free=19.5, wall=29705 +epoch 018: 1433 / 1689 loss=4.15, nll_loss=2.531, ppl=5.78, wps=318700, ups=0.73, wpb=434145, bsz=16375.2, num_updates=30100, lr=0.000364541, gnorm=0.25, clip=0, loss_scale=1, train_wall=113, gb_free=19.5, wall=29705 +epoch 018: 1433 / 1689 loss=4.15, nll_loss=2.531, ppl=5.78, wps=318700, ups=0.73, wpb=434145, bsz=16375.2, num_updates=30100, lr=0.000364541, gnorm=0.25, clip=0, loss_scale=1, train_wall=113, gb_free=19.5, wall=29705 +epoch 018: 1433 / 1689 loss=4.15, nll_loss=2.531, ppl=5.78, wps=318700, ups=0.73, wpb=434145, bsz=16375.2, num_updates=30100, lr=0.000364541, gnorm=0.25, clip=0, loss_scale=1, train_wall=113, gb_free=19.5, wall=29705 +epoch 018: 1433 / 1689 loss=4.15, nll_loss=2.531, ppl=5.78, wps=318700, ups=0.73, wpb=434145, bsz=16375.2, num_updates=30100, lr=0.000364541, gnorm=0.25, clip=0, loss_scale=1, train_wall=113, gb_free=19.5, wall=29705 +epoch 018: 1433 / 1689 loss=4.15, nll_loss=2.531, ppl=5.78, wps=318700, ups=0.73, wpb=434145, bsz=16375.2, num_updates=30100, lr=0.000364541, gnorm=0.25, clip=0, loss_scale=1, train_wall=113, gb_free=19.5, wall=29705 +epoch 018: 1433 / 1689 loss=4.15, nll_loss=2.531, ppl=5.78, wps=318700, ups=0.73, wpb=434145, bsz=16375.2, num_updates=30100, lr=0.000364541, gnorm=0.25, clip=0, loss_scale=1, train_wall=113, gb_free=19.5, wall=29705 +epoch 018: 1433 / 1689 loss=4.15, nll_loss=2.531, ppl=5.78, wps=318700, ups=0.73, wpb=434145, bsz=16375.2, num_updates=30100, lr=0.000364541, gnorm=0.25, clip=0, loss_scale=1, train_wall=113, gb_free=19.5, wall=29705 +epoch 018: 1433 / 1689 loss=4.15, nll_loss=2.531, ppl=5.78, wps=318700, ups=0.73, wpb=434145, bsz=16375.2, num_updates=30100, lr=0.000364541, gnorm=0.25, clip=0, loss_scale=1, train_wall=113, gb_free=19.5, wall=29705 +epoch 018: 1433 / 1689 loss=4.15, nll_loss=2.531, ppl=5.78, wps=318700, ups=0.73, wpb=434145, bsz=16375.2, num_updates=30100, lr=0.000364541, gnorm=0.25, clip=0, loss_scale=1, train_wall=113, gb_free=19.5, wall=29705 +epoch 018: 1533 / 1689 loss=4.138, nll_loss=2.517, ppl=5.73, wps=468589, ups=1.09, wpb=431239, bsz=16390.8, num_updates=30200, lr=0.000363937, gnorm=0.233, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=29797 +epoch 018: 1533 / 1689 loss=4.138, nll_loss=2.517, ppl=5.73, wps=468589, ups=1.09, wpb=431239, bsz=16390.8, num_updates=30200, lr=0.000363937, gnorm=0.233, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=29797 +epoch 018: 1533 / 1689 loss=4.138, nll_loss=2.517, ppl=5.73, wps=468589, ups=1.09, wpb=431239, bsz=16390.8, num_updates=30200, lr=0.000363937, gnorm=0.233, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=29797 +epoch 018: 1533 / 1689 loss=4.138, nll_loss=2.517, ppl=5.73, wps=468589, ups=1.09, wpb=431239, bsz=16390.8, num_updates=30200, lr=0.000363937, gnorm=0.233, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=29797 +epoch 018: 1533 / 1689 loss=4.138, nll_loss=2.517, ppl=5.73, wps=468589, ups=1.09, wpb=431239, bsz=16390.8, num_updates=30200, lr=0.000363937, gnorm=0.233, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=29797 +epoch 018: 1533 / 1689 loss=4.138, nll_loss=2.517, ppl=5.73, wps=468589, ups=1.09, wpb=431239, bsz=16390.8, num_updates=30200, lr=0.000363937, gnorm=0.233, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=29797 +epoch 018: 1533 / 1689 loss=4.138, nll_loss=2.517, ppl=5.73, wps=468589, ups=1.09, wpb=431239, bsz=16390.8, num_updates=30200, lr=0.000363937, gnorm=0.233, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=29797 +epoch 018: 1533 / 1689 loss=4.138, nll_loss=2.517, ppl=5.73, wps=468589, ups=1.09, wpb=431239, bsz=16390.8, num_updates=30200, lr=0.000363937, gnorm=0.233, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=29797 +epoch 018: 1533 / 1689 loss=4.138, nll_loss=2.517, ppl=5.73, wps=468589, ups=1.09, wpb=431239, bsz=16390.8, num_updates=30200, lr=0.000363937, gnorm=0.233, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=29797 +epoch 018: 1533 / 1689 loss=4.138, nll_loss=2.517, ppl=5.73, wps=468589, ups=1.09, wpb=431239, bsz=16390.8, num_updates=30200, lr=0.000363937, gnorm=0.233, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=29797 +epoch 018: 1533 / 1689 loss=4.138, nll_loss=2.517, ppl=5.73, wps=468589, ups=1.09, wpb=431239, bsz=16390.8, num_updates=30200, lr=0.000363937, gnorm=0.233, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=29797 +epoch 018: 1533 / 1689 loss=4.138, nll_loss=2.517, ppl=5.73, wps=468589, ups=1.09, wpb=431239, bsz=16390.8, num_updates=30200, lr=0.000363937, gnorm=0.233, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=29797 +epoch 018: 1533 / 1689 loss=4.138, nll_loss=2.517, ppl=5.73, wps=468589, ups=1.09, wpb=431239, bsz=16390.8, num_updates=30200, lr=0.000363937, gnorm=0.233, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=29797 +epoch 018: 1533 / 1689 loss=4.138, nll_loss=2.517, ppl=5.73, wps=468589, ups=1.09, wpb=431239, bsz=16390.8, num_updates=30200, lr=0.000363937, gnorm=0.233, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=29797 +epoch 018: 1533 / 1689 loss=4.138, nll_loss=2.517, ppl=5.73, wps=468589, ups=1.09, wpb=431239, bsz=16390.8, num_updates=30200, lr=0.000363937, gnorm=0.233, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=29797 +epoch 018: 1533 / 1689 loss=4.138, nll_loss=2.517, ppl=5.73, wps=468589, ups=1.09, wpb=431239, bsz=16390.8, num_updates=30200, lr=0.000363937, gnorm=0.233, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=29797 +epoch 018: 1533 / 1689 loss=4.138, nll_loss=2.517, ppl=5.73, wps=468589, ups=1.09, wpb=431239, bsz=16390.8, num_updates=30200, lr=0.000363937, gnorm=0.233, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=29797 +epoch 018: 1533 / 1689 loss=4.138, nll_loss=2.517, ppl=5.73, wps=468589, ups=1.09, wpb=431239, bsz=16390.8, num_updates=30200, lr=0.000363937, gnorm=0.233, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=29797 +epoch 018: 1633 / 1689 loss=4.142, nll_loss=2.521, ppl=5.74, wps=460496, ups=1.07, wpb=430491, bsz=16886.8, num_updates=30300, lr=0.000363336, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=29891 +epoch 018: 1633 / 1689 loss=4.142, nll_loss=2.521, ppl=5.74, wps=460496, ups=1.07, wpb=430491, bsz=16886.8, num_updates=30300, lr=0.000363336, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=29891 +epoch 018: 1633 / 1689 loss=4.142, nll_loss=2.521, ppl=5.74, wps=460496, ups=1.07, wpb=430491, bsz=16886.8, num_updates=30300, lr=0.000363336, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=29891 +epoch 018: 1633 / 1689 loss=4.142, nll_loss=2.521, ppl=5.74, wps=460496, ups=1.07, wpb=430491, bsz=16886.8, num_updates=30300, lr=0.000363336, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=29891 +epoch 018: 1633 / 1689 loss=4.142, nll_loss=2.521, ppl=5.74, wps=460496, ups=1.07, wpb=430491, bsz=16886.8, num_updates=30300, lr=0.000363336, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=29891 +epoch 018: 1633 / 1689 loss=4.142, nll_loss=2.521, ppl=5.74, wps=460496, ups=1.07, wpb=430491, bsz=16886.8, num_updates=30300, lr=0.000363336, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=29891 +epoch 018: 1633 / 1689 loss=4.142, nll_loss=2.521, ppl=5.74, wps=460496, ups=1.07, wpb=430491, bsz=16886.8, num_updates=30300, lr=0.000363336, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=29891 +epoch 018: 1633 / 1689 loss=4.142, nll_loss=2.521, ppl=5.74, wps=460496, ups=1.07, wpb=430491, bsz=16886.8, num_updates=30300, lr=0.000363336, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=29891 +epoch 018: 1633 / 1689 loss=4.142, nll_loss=2.521, ppl=5.74, wps=460496, ups=1.07, wpb=430491, bsz=16886.8, num_updates=30300, lr=0.000363336, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=29891 +epoch 018: 1633 / 1689 loss=4.142, nll_loss=2.521, ppl=5.74, wps=460496, ups=1.07, wpb=430491, bsz=16886.8, num_updates=30300, lr=0.000363336, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=29891 +epoch 018: 1633 / 1689 loss=4.142, nll_loss=2.521, ppl=5.74, wps=460496, ups=1.07, wpb=430491, bsz=16886.8, num_updates=30300, lr=0.000363336, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=29891 +epoch 018: 1633 / 1689 loss=4.142, nll_loss=2.521, ppl=5.74, wps=460496, ups=1.07, wpb=430491, bsz=16886.8, num_updates=30300, lr=0.000363336, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=29891 +epoch 018: 1633 / 1689 loss=4.142, nll_loss=2.521, ppl=5.74, wps=460496, ups=1.07, wpb=430491, bsz=16886.8, num_updates=30300, lr=0.000363336, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=29891 +epoch 018: 1633 / 1689 loss=4.142, nll_loss=2.521, ppl=5.74, wps=460496, ups=1.07, wpb=430491, bsz=16886.8, num_updates=30300, lr=0.000363336, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=29891 +epoch 018: 1633 / 1689 loss=4.142, nll_loss=2.521, ppl=5.74, wps=460496, ups=1.07, wpb=430491, bsz=16886.8, num_updates=30300, lr=0.000363336, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=29891 +epoch 018: 1633 / 1689 loss=4.142, nll_loss=2.521, ppl=5.74, wps=460496, ups=1.07, wpb=430491, bsz=16886.8, num_updates=30300, lr=0.000363336, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=29891 +epoch 018: 1633 / 1689 loss=4.142, nll_loss=2.521, ppl=5.74, wps=460496, ups=1.07, wpb=430491, bsz=16886.8, num_updates=30300, lr=0.000363336, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=29891 +epoch 018: 1633 / 1689 loss=4.142, nll_loss=2.521, ppl=5.74, wps=460496, ups=1.07, wpb=430491, bsz=16886.8, num_updates=30300, lr=0.000363336, gnorm=0.234, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=29891 +end of epoch 18 (average epoch stats below) +epoch 018 | loss 4.143 | nll_loss 2.522 | ppl 5.75 | wps 434975 | ups 1 | wpb 433524 | bsz 16509.6 | num_updates 30356 | lr 0.000363001 | gnorm 0.231 | clip 0 | loss_scale 1 | train_wall 1600 | gb_free 21.2 | wall 29942 +epoch 018 | loss 4.143 | nll_loss 2.522 | ppl 5.75 | wps 434975 | ups 1 | wpb 433524 | bsz 16509.6 | num_updates 30356 | lr 0.000363001 | gnorm 0.231 | clip 0 | loss_scale 1 | train_wall 1600 | gb_free 21.2 | wall 29942 +epoch 018 | loss 4.143 | nll_loss 2.522 | ppl 5.75 | wps 434975 | ups 1 | wpb 433524 | bsz 16509.6 | num_updates 30356 | lr 0.000363001 | gnorm 0.231 | clip 0 | loss_scale 1 | train_wall 1600 | gb_free 21.2 | wall 29942 +epoch 018 | loss 4.143 | nll_loss 2.522 | ppl 5.75 | wps 434975 | ups 1 | wpb 433524 | bsz 16509.6 | num_updates 30356 | lr 0.000363001 | gnorm 0.231 | clip 0 | loss_scale 1 | train_wall 1600 | gb_free 21.2 | wall 29942 +epoch 018 | loss 4.143 | nll_loss 2.522 | ppl 5.75 | wps 434975 | ups 1 | wpb 433524 | bsz 16509.6 | num_updates 30356 | lr 0.000363001 | gnorm 0.231 | clip 0 | loss_scale 1 | train_wall 1600 | gb_free 21.2 | wall 29942 +epoch 018 | loss 4.143 | nll_loss 2.522 | ppl 5.75 | wps 434975 | ups 1 | wpb 433524 | bsz 16509.6 | num_updates 30356 | lr 0.000363001 | gnorm 0.231 | clip 0 | loss_scale 1 | train_wall 1600 | gb_free 21.2 | wall 29942 +epoch 018 | loss 4.143 | nll_loss 2.522 | ppl 5.75 | wps 434975 | ups 1 | wpb 433524 | bsz 16509.6 | num_updates 30356 | lr 0.000363001 | gnorm 0.231 | clip 0 | loss_scale 1 | train_wall 1600 | gb_free 21.2 | wall 29942 +epoch 018 | loss 4.143 | nll_loss 2.522 | ppl 5.75 | wps 434975 | ups 1 | wpb 433524 | bsz 16509.6 | num_updates 30356 | lr 0.000363001 | gnorm 0.231 | clip 0 | loss_scale 1 | train_wall 1600 | gb_free 21.2 | wall 29942 +epoch 018 | loss 4.143 | nll_loss 2.522 | ppl 5.75 | wps 434975 | ups 1 | wpb 433524 | bsz 16509.6 | num_updates 30356 | lr 0.000363001 | gnorm 0.231 | clip 0 | loss_scale 1 | train_wall 1600 | gb_free 21.2 | wall 29942 +epoch 018 | loss 4.143 | nll_loss 2.522 | ppl 5.75 | wps 434975 | ups 1 | wpb 433524 | bsz 16509.6 | num_updates 30356 | lr 0.000363001 | gnorm 0.231 | clip 0 | loss_scale 1 | train_wall 1600 | gb_free 21.2 | wall 29942 +epoch 018 | loss 4.143 | nll_loss 2.522 | ppl 5.75 | wps 434975 | ups 1 | wpb 433524 | bsz 16509.6 | num_updates 30356 | lr 0.000363001 | gnorm 0.231 | clip 0 | loss_scale 1 | train_wall 1600 | gb_free 21.2 | wall 29942 +epoch 018 | loss 4.143 | nll_loss 2.522 | ppl 5.75 | wps 434975 | ups 1 | wpb 433524 | bsz 16509.6 | num_updates 30356 | lr 0.000363001 | gnorm 0.231 | clip 0 | loss_scale 1 | train_wall 1600 | gb_free 21.2 | wall 29942 +epoch 018 | loss 4.143 | nll_loss 2.522 | ppl 5.75 | wps 434975 | ups 1 | wpb 433524 | bsz 16509.6 | num_updates 30356 | lr 0.000363001 | gnorm 0.231 | clip 0 | loss_scale 1 | train_wall 1600 | gb_free 21.2 | wall 29942 +epoch 018 | loss 4.143 | nll_loss 2.522 | ppl 5.75 | wps 434975 | ups 1 | wpb 433524 | bsz 16509.6 | num_updates 30356 | lr 0.000363001 | gnorm 0.231 | clip 0 | loss_scale 1 | train_wall 1600 | gb_free 21.2 | wall 29942 +epoch 018 | loss 4.143 | nll_loss 2.522 | ppl 5.75 | wps 434975 | ups 1 | wpb 433524 | bsz 16509.6 | num_updates 30356 | lr 0.000363001 | gnorm 0.231 | clip 0 | loss_scale 1 | train_wall 1600 | gb_free 21.2 | wall 29942 +epoch 018 | loss 4.143 | nll_loss 2.522 | ppl 5.75 | wps 434975 | ups 1 | wpb 433524 | bsz 16509.6 | num_updates 30356 | lr 0.000363001 | gnorm 0.231 | clip 0 | loss_scale 1 | train_wall 1600 | gb_free 21.2 | wall 29942 +epoch 018 | loss 4.143 | nll_loss 2.522 | ppl 5.75 | wps 434975 | ups 1 | wpb 433524 | bsz 16509.6 | num_updates 30356 | lr 0.000363001 | gnorm 0.231 | clip 0 | loss_scale 1 | train_wall 1600 | gb_free 21.2 | wall 29942 +epoch 018 | loss 4.143 | nll_loss 2.522 | ppl 5.75 | wps 434975 | ups 1 | wpb 433524 | bsz 16509.6 | num_updates 30356 | lr 0.000363001 | gnorm 0.231 | clip 0 | loss_scale 1 | train_wall 1600 | gb_free 21.2 | wall 29942 +Start iterating over samples +epoch 019: 44 / 1689 loss=4.145, nll_loss=2.525, ppl=5.76, wps=463225, ups=1.07, wpb=431080, bsz=16610.4, num_updates=30400, lr=0.000362738, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=29984 +epoch 019: 44 / 1689 loss=4.145, nll_loss=2.525, ppl=5.76, wps=463225, ups=1.07, wpb=431080, bsz=16610.4, num_updates=30400, lr=0.000362738, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=29984 +epoch 019: 44 / 1689 loss=4.145, nll_loss=2.525, ppl=5.76, wps=463225, ups=1.07, wpb=431080, bsz=16610.4, num_updates=30400, lr=0.000362738, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=29984 +epoch 019: 44 / 1689 loss=4.145, nll_loss=2.525, ppl=5.76, wps=463225, ups=1.07, wpb=431080, bsz=16610.4, num_updates=30400, lr=0.000362738, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=29984 +epoch 019: 44 / 1689 loss=4.145, nll_loss=2.525, ppl=5.76, wps=463225, ups=1.07, wpb=431080, bsz=16610.4, num_updates=30400, lr=0.000362738, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=29984 +epoch 019: 44 / 1689 loss=4.145, nll_loss=2.525, ppl=5.76, wps=463225, ups=1.07, wpb=431080, bsz=16610.4, num_updates=30400, lr=0.000362738, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=29984 +epoch 019: 44 / 1689 loss=4.145, nll_loss=2.525, ppl=5.76, wps=463225, ups=1.07, wpb=431080, bsz=16610.4, num_updates=30400, lr=0.000362738, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=29984 +epoch 019: 44 / 1689 loss=4.145, nll_loss=2.525, ppl=5.76, wps=463225, ups=1.07, wpb=431080, bsz=16610.4, num_updates=30400, lr=0.000362738, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=29984 +epoch 019: 44 / 1689 loss=4.145, nll_loss=2.525, ppl=5.76, wps=463225, ups=1.07, wpb=431080, bsz=16610.4, num_updates=30400, lr=0.000362738, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=29984 +epoch 019: 44 / 1689 loss=4.145, nll_loss=2.525, ppl=5.76, wps=463225, ups=1.07, wpb=431080, bsz=16610.4, num_updates=30400, lr=0.000362738, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=29984 +epoch 019: 44 / 1689 loss=4.145, nll_loss=2.525, ppl=5.76, wps=463225, ups=1.07, wpb=431080, bsz=16610.4, num_updates=30400, lr=0.000362738, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=29984 +epoch 019: 44 / 1689 loss=4.145, nll_loss=2.525, ppl=5.76, wps=463225, ups=1.07, wpb=431080, bsz=16610.4, num_updates=30400, lr=0.000362738, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=29984 +epoch 019: 44 / 1689 loss=4.145, nll_loss=2.525, ppl=5.76, wps=463225, ups=1.07, wpb=431080, bsz=16610.4, num_updates=30400, lr=0.000362738, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=29984 +epoch 019: 44 / 1689 loss=4.145, nll_loss=2.525, ppl=5.76, wps=463225, ups=1.07, wpb=431080, bsz=16610.4, num_updates=30400, lr=0.000362738, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=29984 +epoch 019: 44 / 1689 loss=4.145, nll_loss=2.525, ppl=5.76, wps=463225, ups=1.07, wpb=431080, bsz=16610.4, num_updates=30400, lr=0.000362738, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=29984 +epoch 019: 44 / 1689 loss=4.145, nll_loss=2.525, ppl=5.76, wps=463225, ups=1.07, wpb=431080, bsz=16610.4, num_updates=30400, lr=0.000362738, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=29984 +epoch 019: 44 / 1689 loss=4.145, nll_loss=2.525, ppl=5.76, wps=463225, ups=1.07, wpb=431080, bsz=16610.4, num_updates=30400, lr=0.000362738, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=29984 +epoch 019: 44 / 1689 loss=4.145, nll_loss=2.525, ppl=5.76, wps=463225, ups=1.07, wpb=431080, bsz=16610.4, num_updates=30400, lr=0.000362738, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=29984 +epoch 019: 44 / 1689 loss=4.145, nll_loss=2.525, ppl=5.76, wps=463225, ups=1.07, wpb=431080, bsz=16610.4, num_updates=30400, lr=0.000362738, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=29984 +epoch 019: 145 / 1689 loss=4.103, nll_loss=2.477, ppl=5.57, wps=456504, ups=1.06, wpb=432441, bsz=17005.3, num_updates=30500, lr=0.000362143, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=30078 +epoch 019: 145 / 1689 loss=4.103, nll_loss=2.477, ppl=5.57, wps=456504, ups=1.06, wpb=432441, bsz=17005.3, num_updates=30500, lr=0.000362143, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=30078 +epoch 019: 145 / 1689 loss=4.103, nll_loss=2.477, ppl=5.57, wps=456504, ups=1.06, wpb=432441, bsz=17005.3, num_updates=30500, lr=0.000362143, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=30078 +epoch 019: 145 / 1689 loss=4.103, nll_loss=2.477, ppl=5.57, wps=456504, ups=1.06, wpb=432441, bsz=17005.3, num_updates=30500, lr=0.000362143, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=30078 +epoch 019: 145 / 1689 loss=4.103, nll_loss=2.477, ppl=5.57, wps=456504, ups=1.06, wpb=432441, bsz=17005.3, num_updates=30500, lr=0.000362143, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=30078 +epoch 019: 145 / 1689 loss=4.103, nll_loss=2.477, ppl=5.57, wps=456504, ups=1.06, wpb=432441, bsz=17005.3, num_updates=30500, lr=0.000362143, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=30078 +epoch 019: 145 / 1689 loss=4.103, nll_loss=2.477, ppl=5.57, wps=456504, ups=1.06, wpb=432441, bsz=17005.3, num_updates=30500, lr=0.000362143, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=30078 +epoch 019: 145 / 1689 loss=4.103, nll_loss=2.477, ppl=5.57, wps=456504, ups=1.06, wpb=432441, bsz=17005.3, num_updates=30500, lr=0.000362143, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=30078 +epoch 019: 145 / 1689 loss=4.103, nll_loss=2.477, ppl=5.57, wps=456504, ups=1.06, wpb=432441, bsz=17005.3, num_updates=30500, lr=0.000362143, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=30078 +epoch 019: 145 / 1689 loss=4.103, nll_loss=2.477, ppl=5.57, wps=456504, ups=1.06, wpb=432441, bsz=17005.3, num_updates=30500, lr=0.000362143, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=30078 +epoch 019: 145 / 1689 loss=4.103, nll_loss=2.477, ppl=5.57, wps=456504, ups=1.06, wpb=432441, bsz=17005.3, num_updates=30500, lr=0.000362143, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=30078 +epoch 019: 145 / 1689 loss=4.103, nll_loss=2.477, ppl=5.57, wps=456504, ups=1.06, wpb=432441, bsz=17005.3, num_updates=30500, lr=0.000362143, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=30078 +epoch 019: 145 / 1689 loss=4.103, nll_loss=2.477, ppl=5.57, wps=456504, ups=1.06, wpb=432441, bsz=17005.3, num_updates=30500, lr=0.000362143, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=30078 +epoch 019: 145 / 1689 loss=4.103, nll_loss=2.477, ppl=5.57, wps=456504, ups=1.06, wpb=432441, bsz=17005.3, num_updates=30500, lr=0.000362143, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=30078 +epoch 019: 145 / 1689 loss=4.103, nll_loss=2.477, ppl=5.57, wps=456504, ups=1.06, wpb=432441, bsz=17005.3, num_updates=30500, lr=0.000362143, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=30078 +epoch 019: 145 / 1689 loss=4.103, nll_loss=2.477, ppl=5.57, wps=456504, ups=1.06, wpb=432441, bsz=17005.3, num_updates=30500, lr=0.000362143, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=30078 +epoch 019: 145 / 1689 loss=4.103, nll_loss=2.477, ppl=5.57, wps=456504, ups=1.06, wpb=432441, bsz=17005.3, num_updates=30500, lr=0.000362143, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=30078 +epoch 019: 145 / 1689 loss=4.103, nll_loss=2.477, ppl=5.57, wps=456504, ups=1.06, wpb=432441, bsz=17005.3, num_updates=30500, lr=0.000362143, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=30078 +epoch 019: 145 / 1689 loss=4.103, nll_loss=2.477, ppl=5.57, wps=456504, ups=1.06, wpb=432441, bsz=17005.3, num_updates=30500, lr=0.000362143, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=30078 +epoch 019: 245 / 1689 loss=4.119, nll_loss=2.494, ppl=5.63, wps=464502, ups=1.07, wpb=432117, bsz=16444.1, num_updates=30600, lr=0.000361551, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=30172 +epoch 019: 245 / 1689 loss=4.119, nll_loss=2.494, ppl=5.63, wps=464502, ups=1.07, wpb=432117, bsz=16444.1, num_updates=30600, lr=0.000361551, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=30172 +epoch 019: 245 / 1689 loss=4.119, nll_loss=2.494, ppl=5.63, wps=464502, ups=1.07, wpb=432117, bsz=16444.1, num_updates=30600, lr=0.000361551, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=30172 +epoch 019: 245 / 1689 loss=4.119, nll_loss=2.494, ppl=5.63, wps=464502, ups=1.07, wpb=432117, bsz=16444.1, num_updates=30600, lr=0.000361551, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=30172 +epoch 019: 245 / 1689 loss=4.119, nll_loss=2.494, ppl=5.63, wps=464502, ups=1.07, wpb=432117, bsz=16444.1, num_updates=30600, lr=0.000361551, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=30172 +epoch 019: 245 / 1689 loss=4.119, nll_loss=2.494, ppl=5.63, wps=464502, ups=1.07, wpb=432117, bsz=16444.1, num_updates=30600, lr=0.000361551, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=30172 +epoch 019: 245 / 1689 loss=4.119, nll_loss=2.494, ppl=5.63, wps=464502, ups=1.07, wpb=432117, bsz=16444.1, num_updates=30600, lr=0.000361551, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=30172 +epoch 019: 245 / 1689 loss=4.119, nll_loss=2.494, ppl=5.63, wps=464502, ups=1.07, wpb=432117, bsz=16444.1, num_updates=30600, lr=0.000361551, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=30172 +epoch 019: 245 / 1689 loss=4.119, nll_loss=2.494, ppl=5.63, wps=464502, ups=1.07, wpb=432117, bsz=16444.1, num_updates=30600, lr=0.000361551, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=30172 +epoch 019: 245 / 1689 loss=4.119, nll_loss=2.494, ppl=5.63, wps=464502, ups=1.07, wpb=432117, bsz=16444.1, num_updates=30600, lr=0.000361551, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=30172 +epoch 019: 245 / 1689 loss=4.119, nll_loss=2.494, ppl=5.63, wps=464502, ups=1.07, wpb=432117, bsz=16444.1, num_updates=30600, lr=0.000361551, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=30172 +epoch 019: 245 / 1689 loss=4.119, nll_loss=2.494, ppl=5.63, wps=464502, ups=1.07, wpb=432117, bsz=16444.1, num_updates=30600, lr=0.000361551, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=30172 +epoch 019: 245 / 1689 loss=4.119, nll_loss=2.494, ppl=5.63, wps=464502, ups=1.07, wpb=432117, bsz=16444.1, num_updates=30600, lr=0.000361551, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=30172 +epoch 019: 245 / 1689 loss=4.119, nll_loss=2.494, ppl=5.63, wps=464502, ups=1.07, wpb=432117, bsz=16444.1, num_updates=30600, lr=0.000361551, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=30172 +epoch 019: 245 / 1689 loss=4.119, nll_loss=2.494, ppl=5.63, wps=464502, ups=1.07, wpb=432117, bsz=16444.1, num_updates=30600, lr=0.000361551, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=30172 +epoch 019: 245 / 1689 loss=4.119, nll_loss=2.494, ppl=5.63, wps=464502, ups=1.07, wpb=432117, bsz=16444.1, num_updates=30600, lr=0.000361551, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=30172 +epoch 019: 245 / 1689 loss=4.119, nll_loss=2.494, ppl=5.63, wps=464502, ups=1.07, wpb=432117, bsz=16444.1, num_updates=30600, lr=0.000361551, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=30172 +epoch 019: 245 / 1689 loss=4.119, nll_loss=2.494, ppl=5.63, wps=464502, ups=1.07, wpb=432117, bsz=16444.1, num_updates=30600, lr=0.000361551, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=30172 +epoch 019: 245 / 1689 loss=4.119, nll_loss=2.494, ppl=5.63, wps=464502, ups=1.07, wpb=432117, bsz=16444.1, num_updates=30600, lr=0.000361551, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=30172 +epoch 019: 345 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458932, ups=1.06, wpb=433040, bsz=16658.9, num_updates=30700, lr=0.000360961, gnorm=0.245, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.2, wall=30266 +epoch 019: 345 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458932, ups=1.06, wpb=433040, bsz=16658.9, num_updates=30700, lr=0.000360961, gnorm=0.245, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.2, wall=30266 +epoch 019: 345 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458932, ups=1.06, wpb=433040, bsz=16658.9, num_updates=30700, lr=0.000360961, gnorm=0.245, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.2, wall=30266 +epoch 019: 345 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458932, ups=1.06, wpb=433040, bsz=16658.9, num_updates=30700, lr=0.000360961, gnorm=0.245, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.2, wall=30266 +epoch 019: 345 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458932, ups=1.06, wpb=433040, bsz=16658.9, num_updates=30700, lr=0.000360961, gnorm=0.245, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.2, wall=30266 +epoch 019: 345 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458932, ups=1.06, wpb=433040, bsz=16658.9, num_updates=30700, lr=0.000360961, gnorm=0.245, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.2, wall=30266 +epoch 019: 345 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458932, ups=1.06, wpb=433040, bsz=16658.9, num_updates=30700, lr=0.000360961, gnorm=0.245, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.2, wall=30266 +epoch 019: 345 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458932, ups=1.06, wpb=433040, bsz=16658.9, num_updates=30700, lr=0.000360961, gnorm=0.245, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.2, wall=30266 +epoch 019: 345 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458932, ups=1.06, wpb=433040, bsz=16658.9, num_updates=30700, lr=0.000360961, gnorm=0.245, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.2, wall=30266 +epoch 019: 345 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458932, ups=1.06, wpb=433040, bsz=16658.9, num_updates=30700, lr=0.000360961, gnorm=0.245, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.2, wall=30266 +epoch 019: 345 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458932, ups=1.06, wpb=433040, bsz=16658.9, num_updates=30700, lr=0.000360961, gnorm=0.245, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.2, wall=30266 +epoch 019: 345 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458932, ups=1.06, wpb=433040, bsz=16658.9, num_updates=30700, lr=0.000360961, gnorm=0.245, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.2, wall=30266 +epoch 019: 345 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458932, ups=1.06, wpb=433040, bsz=16658.9, num_updates=30700, lr=0.000360961, gnorm=0.245, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.2, wall=30266 +epoch 019: 345 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458932, ups=1.06, wpb=433040, bsz=16658.9, num_updates=30700, lr=0.000360961, gnorm=0.245, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.2, wall=30266 +epoch 019: 345 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458932, ups=1.06, wpb=433040, bsz=16658.9, num_updates=30700, lr=0.000360961, gnorm=0.245, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.2, wall=30266 +epoch 019: 345 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458932, ups=1.06, wpb=433040, bsz=16658.9, num_updates=30700, lr=0.000360961, gnorm=0.245, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.2, wall=30266 +epoch 019: 345 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458932, ups=1.06, wpb=433040, bsz=16658.9, num_updates=30700, lr=0.000360961, gnorm=0.245, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.2, wall=30266 +epoch 019: 345 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458932, ups=1.06, wpb=433040, bsz=16658.9, num_updates=30700, lr=0.000360961, gnorm=0.245, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.2, wall=30266 +epoch 019: 345 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458932, ups=1.06, wpb=433040, bsz=16658.9, num_updates=30700, lr=0.000360961, gnorm=0.245, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.2, wall=30266 +epoch 019: 445 / 1689 loss=4.126, nll_loss=2.502, ppl=5.66, wps=461206, ups=1.06, wpb=434485, bsz=16242.8, num_updates=30800, lr=0.000360375, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=30360 +epoch 019: 445 / 1689 loss=4.126, nll_loss=2.502, ppl=5.66, wps=461206, ups=1.06, wpb=434485, bsz=16242.8, num_updates=30800, lr=0.000360375, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=30360 +epoch 019: 445 / 1689 loss=4.126, nll_loss=2.502, ppl=5.66, wps=461206, ups=1.06, wpb=434485, bsz=16242.8, num_updates=30800, lr=0.000360375, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=30360 +epoch 019: 445 / 1689 loss=4.126, nll_loss=2.502, ppl=5.66, wps=461206, ups=1.06, wpb=434485, bsz=16242.8, num_updates=30800, lr=0.000360375, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=30360 +epoch 019: 445 / 1689 loss=4.126, nll_loss=2.502, ppl=5.66, wps=461206, ups=1.06, wpb=434485, bsz=16242.8, num_updates=30800, lr=0.000360375, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=30360 +epoch 019: 445 / 1689 loss=4.126, nll_loss=2.502, ppl=5.66, wps=461206, ups=1.06, wpb=434485, bsz=16242.8, num_updates=30800, lr=0.000360375, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=30360 +epoch 019: 445 / 1689 loss=4.126, nll_loss=2.502, ppl=5.66, wps=461206, ups=1.06, wpb=434485, bsz=16242.8, num_updates=30800, lr=0.000360375, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=30360 +epoch 019: 445 / 1689 loss=4.126, nll_loss=2.502, ppl=5.66, wps=461206, ups=1.06, wpb=434485, bsz=16242.8, num_updates=30800, lr=0.000360375, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=30360 +epoch 019: 445 / 1689 loss=4.126, nll_loss=2.502, ppl=5.66, wps=461206, ups=1.06, wpb=434485, bsz=16242.8, num_updates=30800, lr=0.000360375, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=30360 +epoch 019: 445 / 1689 loss=4.126, nll_loss=2.502, ppl=5.66, wps=461206, ups=1.06, wpb=434485, bsz=16242.8, num_updates=30800, lr=0.000360375, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=30360 +epoch 019: 445 / 1689 loss=4.126, nll_loss=2.502, ppl=5.66, wps=461206, ups=1.06, wpb=434485, bsz=16242.8, num_updates=30800, lr=0.000360375, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=30360 +epoch 019: 445 / 1689 loss=4.126, nll_loss=2.502, ppl=5.66, wps=461206, ups=1.06, wpb=434485, bsz=16242.8, num_updates=30800, lr=0.000360375, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=30360 +epoch 019: 445 / 1689 loss=4.126, nll_loss=2.502, ppl=5.66, wps=461206, ups=1.06, wpb=434485, bsz=16242.8, num_updates=30800, lr=0.000360375, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=30360 +epoch 019: 445 / 1689 loss=4.126, nll_loss=2.502, ppl=5.66, wps=461206, ups=1.06, wpb=434485, bsz=16242.8, num_updates=30800, lr=0.000360375, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=30360 +epoch 019: 445 / 1689 loss=4.126, nll_loss=2.502, ppl=5.66, wps=461206, ups=1.06, wpb=434485, bsz=16242.8, num_updates=30800, lr=0.000360375, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=30360 +epoch 019: 445 / 1689 loss=4.126, nll_loss=2.502, ppl=5.66, wps=461206, ups=1.06, wpb=434485, bsz=16242.8, num_updates=30800, lr=0.000360375, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=30360 +epoch 019: 445 / 1689 loss=4.126, nll_loss=2.502, ppl=5.66, wps=461206, ups=1.06, wpb=434485, bsz=16242.8, num_updates=30800, lr=0.000360375, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=30360 +epoch 019: 445 / 1689 loss=4.126, nll_loss=2.502, ppl=5.66, wps=461206, ups=1.06, wpb=434485, bsz=16242.8, num_updates=30800, lr=0.000360375, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=30360 +epoch 019: 445 / 1689 loss=4.126, nll_loss=2.502, ppl=5.66, wps=461206, ups=1.06, wpb=434485, bsz=16242.8, num_updates=30800, lr=0.000360375, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=30360 +epoch 019: 545 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464018, ups=1.07, wpb=434279, bsz=16514.1, num_updates=30900, lr=0.000359791, gnorm=0.237, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=30454 +epoch 019: 545 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464018, ups=1.07, wpb=434279, bsz=16514.1, num_updates=30900, lr=0.000359791, gnorm=0.237, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=30454 +epoch 019: 545 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464018, ups=1.07, wpb=434279, bsz=16514.1, num_updates=30900, lr=0.000359791, gnorm=0.237, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=30454 +epoch 019: 545 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464018, ups=1.07, wpb=434279, bsz=16514.1, num_updates=30900, lr=0.000359791, gnorm=0.237, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=30454 +epoch 019: 545 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464018, ups=1.07, wpb=434279, bsz=16514.1, num_updates=30900, lr=0.000359791, gnorm=0.237, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=30454 +epoch 019: 545 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464018, ups=1.07, wpb=434279, bsz=16514.1, num_updates=30900, lr=0.000359791, gnorm=0.237, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=30454 +epoch 019: 545 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464018, ups=1.07, wpb=434279, bsz=16514.1, num_updates=30900, lr=0.000359791, gnorm=0.237, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=30454 +epoch 019: 545 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464018, ups=1.07, wpb=434279, bsz=16514.1, num_updates=30900, lr=0.000359791, gnorm=0.237, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=30454 +epoch 019: 545 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464018, ups=1.07, wpb=434279, bsz=16514.1, num_updates=30900, lr=0.000359791, gnorm=0.237, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=30454 +epoch 019: 545 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464018, ups=1.07, wpb=434279, bsz=16514.1, num_updates=30900, lr=0.000359791, gnorm=0.237, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=30454 +epoch 019: 545 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464018, ups=1.07, wpb=434279, bsz=16514.1, num_updates=30900, lr=0.000359791, gnorm=0.237, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=30454 +epoch 019: 545 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464018, ups=1.07, wpb=434279, bsz=16514.1, num_updates=30900, lr=0.000359791, gnorm=0.237, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=30454 +epoch 019: 545 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464018, ups=1.07, wpb=434279, bsz=16514.1, num_updates=30900, lr=0.000359791, gnorm=0.237, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=30454 +epoch 019: 545 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464018, ups=1.07, wpb=434279, bsz=16514.1, num_updates=30900, lr=0.000359791, gnorm=0.237, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=30454 +epoch 019: 545 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464018, ups=1.07, wpb=434279, bsz=16514.1, num_updates=30900, lr=0.000359791, gnorm=0.237, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=30454 +epoch 019: 545 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464018, ups=1.07, wpb=434279, bsz=16514.1, num_updates=30900, lr=0.000359791, gnorm=0.237, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=30454 +epoch 019: 545 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464018, ups=1.07, wpb=434279, bsz=16514.1, num_updates=30900, lr=0.000359791, gnorm=0.237, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=30454 +epoch 019: 545 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464018, ups=1.07, wpb=434279, bsz=16514.1, num_updates=30900, lr=0.000359791, gnorm=0.237, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=30454 +epoch 019: 545 / 1689 loss=4.149, nll_loss=2.529, ppl=5.77, wps=464018, ups=1.07, wpb=434279, bsz=16514.1, num_updates=30900, lr=0.000359791, gnorm=0.237, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=30454 +epoch 019: 645 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=458890, ups=1.06, wpb=433103, bsz=16906.1, num_updates=31000, lr=0.000359211, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=30548 +epoch 019: 645 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=458890, ups=1.06, wpb=433103, bsz=16906.1, num_updates=31000, lr=0.000359211, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=30548 +epoch 019: 645 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=458890, ups=1.06, wpb=433103, bsz=16906.1, num_updates=31000, lr=0.000359211, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=30548 +epoch 019: 645 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=458890, ups=1.06, wpb=433103, bsz=16906.1, num_updates=31000, lr=0.000359211, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=30548 +epoch 019: 645 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=458890, ups=1.06, wpb=433103, bsz=16906.1, num_updates=31000, lr=0.000359211, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=30548 +epoch 019: 645 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=458890, ups=1.06, wpb=433103, bsz=16906.1, num_updates=31000, lr=0.000359211, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=30548 +epoch 019: 645 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=458890, ups=1.06, wpb=433103, bsz=16906.1, num_updates=31000, lr=0.000359211, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=30548 +epoch 019: 645 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=458890, ups=1.06, wpb=433103, bsz=16906.1, num_updates=31000, lr=0.000359211, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=30548 +epoch 019: 645 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=458890, ups=1.06, wpb=433103, bsz=16906.1, num_updates=31000, lr=0.000359211, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=30548 +epoch 019: 645 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=458890, ups=1.06, wpb=433103, bsz=16906.1, num_updates=31000, lr=0.000359211, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=30548 +epoch 019: 645 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=458890, ups=1.06, wpb=433103, bsz=16906.1, num_updates=31000, lr=0.000359211, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=30548 +epoch 019: 645 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=458890, ups=1.06, wpb=433103, bsz=16906.1, num_updates=31000, lr=0.000359211, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=30548 +epoch 019: 645 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=458890, ups=1.06, wpb=433103, bsz=16906.1, num_updates=31000, lr=0.000359211, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=30548 +epoch 019: 645 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=458890, ups=1.06, wpb=433103, bsz=16906.1, num_updates=31000, lr=0.000359211, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=30548 +epoch 019: 645 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=458890, ups=1.06, wpb=433103, bsz=16906.1, num_updates=31000, lr=0.000359211, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=30548 +epoch 019: 645 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=458890, ups=1.06, wpb=433103, bsz=16906.1, num_updates=31000, lr=0.000359211, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=30548 +epoch 019: 645 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=458890, ups=1.06, wpb=433103, bsz=16906.1, num_updates=31000, lr=0.000359211, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=30548 +epoch 019: 645 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=458890, ups=1.06, wpb=433103, bsz=16906.1, num_updates=31000, lr=0.000359211, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=30548 +epoch 019: 645 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=458890, ups=1.06, wpb=433103, bsz=16906.1, num_updates=31000, lr=0.000359211, gnorm=0.226, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=30548 +begin validation on "valid" subset +epoch 019 | valid on 'valid' subset | loss 4.256 | nll_loss 2.619 | ppl 6.14 | wps 0 | wpb 42662 | bsz 2032 | num_updates 31000 | best_loss 4.251 +epoch 019 | valid on 'valid' subset | loss 4.256 | nll_loss 2.619 | ppl 6.14 | wps 0 | wpb 42662 | bsz 2032 | num_updates 31000 | best_loss 4.251 +epoch 019 | valid on 'valid' subset | loss 4.256 | nll_loss 2.619 | ppl 6.14 | wps 0 | wpb 42662 | bsz 2032 | num_updates 31000 | best_loss 4.251 +epoch 019 | valid on 'valid' subset | loss 4.256 | nll_loss 2.619 | ppl 6.14 | wps 0 | wpb 42662 | bsz 2032 | num_updates 31000 | best_loss 4.251 +epoch 019 | valid on 'valid' subset | loss 4.256 | nll_loss 2.619 | ppl 6.14 | wps 0 | wpb 42662 | bsz 2032 | num_updates 31000 | best_loss 4.251 +epoch 019 | valid on 'valid' subset | loss 4.256 | nll_loss 2.619 | ppl 6.14 | wps 0 | wpb 42662 | bsz 2032 | num_updates 31000 | best_loss 4.251 +epoch 019 | valid on 'valid' subset | loss 4.256 | nll_loss 2.619 | ppl 6.14 | wps 0 | wpb 42662 | bsz 2032 | num_updates 31000 | best_loss 4.251 +epoch 019 | valid on 'valid' subset | loss 4.256 | nll_loss 2.619 | ppl 6.14 | wps 0 | wpb 42662 | bsz 2032 | num_updates 31000 | best_loss 4.251 +epoch 019 | valid on 'valid' subset | loss 4.256 | nll_loss 2.619 | ppl 6.14 | wps 0 | wpb 42662 | bsz 2032 | num_updates 31000 | best_loss 4.251 +epoch 019 | valid on 'valid' subset | loss 4.256 | nll_loss 2.619 | ppl 6.14 | wps 0 | wpb 42662 | bsz 2032 | num_updates 31000 | best_loss 4.251 +epoch 019 | valid on 'valid' subset | loss 4.256 | nll_loss 2.619 | ppl 6.14 | wps 0 | wpb 42662 | bsz 2032 | num_updates 31000 | best_loss 4.251 +epoch 019 | valid on 'valid' subset | loss 4.256 | nll_loss 2.619 | ppl 6.14 | wps 0 | wpb 42662 | bsz 2032 | num_updates 31000 | best_loss 4.251 +epoch 019 | valid on 'valid' subset | loss 4.256 | nll_loss 2.619 | ppl 6.14 | wps 0 | wpb 42662 | bsz 2032 | num_updates 31000 | best_loss 4.251 +epoch 019 | valid on 'valid' subset | loss 4.256 | nll_loss 2.619 | ppl 6.14 | wps 0 | wpb 42662 | bsz 2032 | num_updates 31000 | best_loss 4.251 +epoch 019 | valid on 'valid' subset | loss 4.256 | nll_loss 2.619 | ppl 6.14 | wps 0 | wpb 42662 | bsz 2032 | num_updates 31000 | best_loss 4.251 +epoch 019 | valid on 'valid' subset | loss 4.256 | nll_loss 2.619 | ppl 6.14 | wps 0 | wpb 42662 | bsz 2032 | num_updates 31000 | best_loss 4.251 +epoch 019 | valid on 'valid' subset | loss 4.256 | nll_loss 2.619 | ppl 6.14 | wps 0 | wpb 42662 | bsz 2032 | num_updates 31000 | best_loss 4.251 +epoch 019 | valid on 'valid' subset | loss 4.256 | nll_loss 2.619 | ppl 6.14 | wps 0 | wpb 42662 | bsz 2032 | num_updates 31000 | best_loss 4.251 +epoch 019 | valid on 'valid' subset | loss 4.256 | nll_loss 2.619 | ppl 6.14 | wps 0 | wpb 42662 | bsz 2032 | num_updates 31000 | best_loss 4.251 +epoch 019: 745 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=403959, ups=0.93, wpb=432391, bsz=16777.7, num_updates=31100, lr=0.000358633, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=30655 +epoch 019: 745 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=403959, ups=0.93, wpb=432391, bsz=16777.7, num_updates=31100, lr=0.000358633, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=30655 +epoch 019: 745 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=403959, ups=0.93, wpb=432391, bsz=16777.7, num_updates=31100, lr=0.000358633, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=30655 +epoch 019: 745 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=403959, ups=0.93, wpb=432391, bsz=16777.7, num_updates=31100, lr=0.000358633, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=30655 +epoch 019: 745 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=403959, ups=0.93, wpb=432391, bsz=16777.7, num_updates=31100, lr=0.000358633, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=30655 +epoch 019: 745 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=403959, ups=0.93, wpb=432391, bsz=16777.7, num_updates=31100, lr=0.000358633, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=30655 +epoch 019: 745 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=403959, ups=0.93, wpb=432391, bsz=16777.7, num_updates=31100, lr=0.000358633, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=30655 +epoch 019: 745 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=403959, ups=0.93, wpb=432391, bsz=16777.7, num_updates=31100, lr=0.000358633, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=30655 +epoch 019: 745 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=403959, ups=0.93, wpb=432391, bsz=16777.7, num_updates=31100, lr=0.000358633, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=30655 +epoch 019: 745 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=403959, ups=0.93, wpb=432391, bsz=16777.7, num_updates=31100, lr=0.000358633, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=30655 +epoch 019: 745 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=403959, ups=0.93, wpb=432391, bsz=16777.7, num_updates=31100, lr=0.000358633, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=30655 +epoch 019: 745 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=403959, ups=0.93, wpb=432391, bsz=16777.7, num_updates=31100, lr=0.000358633, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=30655 +epoch 019: 745 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=403959, ups=0.93, wpb=432391, bsz=16777.7, num_updates=31100, lr=0.000358633, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=30655 +epoch 019: 745 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=403959, ups=0.93, wpb=432391, bsz=16777.7, num_updates=31100, lr=0.000358633, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=30655 +epoch 019: 745 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=403959, ups=0.93, wpb=432391, bsz=16777.7, num_updates=31100, lr=0.000358633, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=30655 +epoch 019: 745 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=403959, ups=0.93, wpb=432391, bsz=16777.7, num_updates=31100, lr=0.000358633, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=30655 +epoch 019: 745 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=403959, ups=0.93, wpb=432391, bsz=16777.7, num_updates=31100, lr=0.000358633, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=30655 +epoch 019: 745 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=403959, ups=0.93, wpb=432391, bsz=16777.7, num_updates=31100, lr=0.000358633, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=30655 +epoch 019: 745 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=403959, ups=0.93, wpb=432391, bsz=16777.7, num_updates=31100, lr=0.000358633, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=30655 +epoch 019: 845 / 1689 loss=4.126, nll_loss=2.503, ppl=5.67, wps=463532, ups=1.06, wpb=435752, bsz=16395.4, num_updates=31200, lr=0.000358057, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30749 +epoch 019: 845 / 1689 loss=4.126, nll_loss=2.503, ppl=5.67, wps=463532, ups=1.06, wpb=435752, bsz=16395.4, num_updates=31200, lr=0.000358057, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30749 +epoch 019: 845 / 1689 loss=4.126, nll_loss=2.503, ppl=5.67, wps=463532, ups=1.06, wpb=435752, bsz=16395.4, num_updates=31200, lr=0.000358057, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30749 +epoch 019: 845 / 1689 loss=4.126, nll_loss=2.503, ppl=5.67, wps=463532, ups=1.06, wpb=435752, bsz=16395.4, num_updates=31200, lr=0.000358057, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30749 +epoch 019: 845 / 1689 loss=4.126, nll_loss=2.503, ppl=5.67, wps=463532, ups=1.06, wpb=435752, bsz=16395.4, num_updates=31200, lr=0.000358057, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30749 +epoch 019: 845 / 1689 loss=4.126, nll_loss=2.503, ppl=5.67, wps=463532, ups=1.06, wpb=435752, bsz=16395.4, num_updates=31200, lr=0.000358057, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30749 +epoch 019: 845 / 1689 loss=4.126, nll_loss=2.503, ppl=5.67, wps=463532, ups=1.06, wpb=435752, bsz=16395.4, num_updates=31200, lr=0.000358057, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30749 +epoch 019: 845 / 1689 loss=4.126, nll_loss=2.503, ppl=5.67, wps=463532, ups=1.06, wpb=435752, bsz=16395.4, num_updates=31200, lr=0.000358057, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30749 +epoch 019: 845 / 1689 loss=4.126, nll_loss=2.503, ppl=5.67, wps=463532, ups=1.06, wpb=435752, bsz=16395.4, num_updates=31200, lr=0.000358057, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30749 +epoch 019: 845 / 1689 loss=4.126, nll_loss=2.503, ppl=5.67, wps=463532, ups=1.06, wpb=435752, bsz=16395.4, num_updates=31200, lr=0.000358057, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30749 +epoch 019: 845 / 1689 loss=4.126, nll_loss=2.503, ppl=5.67, wps=463532, ups=1.06, wpb=435752, bsz=16395.4, num_updates=31200, lr=0.000358057, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30749 +epoch 019: 845 / 1689 loss=4.126, nll_loss=2.503, ppl=5.67, wps=463532, ups=1.06, wpb=435752, bsz=16395.4, num_updates=31200, lr=0.000358057, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30749 +epoch 019: 845 / 1689 loss=4.126, nll_loss=2.503, ppl=5.67, wps=463532, ups=1.06, wpb=435752, bsz=16395.4, num_updates=31200, lr=0.000358057, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30749 +epoch 019: 845 / 1689 loss=4.126, nll_loss=2.503, ppl=5.67, wps=463532, ups=1.06, wpb=435752, bsz=16395.4, num_updates=31200, lr=0.000358057, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30749 +epoch 019: 845 / 1689 loss=4.126, nll_loss=2.503, ppl=5.67, wps=463532, ups=1.06, wpb=435752, bsz=16395.4, num_updates=31200, lr=0.000358057, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30749 +epoch 019: 845 / 1689 loss=4.126, nll_loss=2.503, ppl=5.67, wps=463532, ups=1.06, wpb=435752, bsz=16395.4, num_updates=31200, lr=0.000358057, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30749 +epoch 019: 845 / 1689 loss=4.126, nll_loss=2.503, ppl=5.67, wps=463532, ups=1.06, wpb=435752, bsz=16395.4, num_updates=31200, lr=0.000358057, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30749 +epoch 019: 845 / 1689 loss=4.126, nll_loss=2.503, ppl=5.67, wps=463532, ups=1.06, wpb=435752, bsz=16395.4, num_updates=31200, lr=0.000358057, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30749 +epoch 019: 845 / 1689 loss=4.126, nll_loss=2.503, ppl=5.67, wps=463532, ups=1.06, wpb=435752, bsz=16395.4, num_updates=31200, lr=0.000358057, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30749 +epoch 019: 945 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=462206, ups=1.07, wpb=433696, bsz=16303.5, num_updates=31300, lr=0.000357485, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=30843 +epoch 019: 945 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=462206, ups=1.07, wpb=433696, bsz=16303.5, num_updates=31300, lr=0.000357485, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=30843 +epoch 019: 945 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=462206, ups=1.07, wpb=433696, bsz=16303.5, num_updates=31300, lr=0.000357485, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=30843 +epoch 019: 945 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=462206, ups=1.07, wpb=433696, bsz=16303.5, num_updates=31300, lr=0.000357485, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=30843 +epoch 019: 945 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=462206, ups=1.07, wpb=433696, bsz=16303.5, num_updates=31300, lr=0.000357485, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=30843 +epoch 019: 945 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=462206, ups=1.07, wpb=433696, bsz=16303.5, num_updates=31300, lr=0.000357485, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=30843 +epoch 019: 945 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=462206, ups=1.07, wpb=433696, bsz=16303.5, num_updates=31300, lr=0.000357485, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=30843 +epoch 019: 945 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=462206, ups=1.07, wpb=433696, bsz=16303.5, num_updates=31300, lr=0.000357485, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=30843 +epoch 019: 945 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=462206, ups=1.07, wpb=433696, bsz=16303.5, num_updates=31300, lr=0.000357485, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=30843 +epoch 019: 945 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=462206, ups=1.07, wpb=433696, bsz=16303.5, num_updates=31300, lr=0.000357485, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=30843 +epoch 019: 945 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=462206, ups=1.07, wpb=433696, bsz=16303.5, num_updates=31300, lr=0.000357485, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=30843 +epoch 019: 945 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=462206, ups=1.07, wpb=433696, bsz=16303.5, num_updates=31300, lr=0.000357485, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=30843 +epoch 019: 945 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=462206, ups=1.07, wpb=433696, bsz=16303.5, num_updates=31300, lr=0.000357485, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=30843 +epoch 019: 945 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=462206, ups=1.07, wpb=433696, bsz=16303.5, num_updates=31300, lr=0.000357485, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=30843 +epoch 019: 945 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=462206, ups=1.07, wpb=433696, bsz=16303.5, num_updates=31300, lr=0.000357485, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=30843 +epoch 019: 945 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=462206, ups=1.07, wpb=433696, bsz=16303.5, num_updates=31300, lr=0.000357485, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=30843 +epoch 019: 945 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=462206, ups=1.07, wpb=433696, bsz=16303.5, num_updates=31300, lr=0.000357485, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=30843 +epoch 019: 945 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=462206, ups=1.07, wpb=433696, bsz=16303.5, num_updates=31300, lr=0.000357485, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=30843 +epoch 019: 945 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=462206, ups=1.07, wpb=433696, bsz=16303.5, num_updates=31300, lr=0.000357485, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=30843 +epoch 019: 1045 / 1689 loss=4.134, nll_loss=2.512, ppl=5.71, wps=462842, ups=1.07, wpb=432928, bsz=16353.5, num_updates=31400, lr=0.000356915, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30936 +epoch 019: 1045 / 1689 loss=4.134, nll_loss=2.512, ppl=5.71, wps=462842, ups=1.07, wpb=432928, bsz=16353.5, num_updates=31400, lr=0.000356915, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30936 +epoch 019: 1045 / 1689 loss=4.134, nll_loss=2.512, ppl=5.71, wps=462842, ups=1.07, wpb=432928, bsz=16353.5, num_updates=31400, lr=0.000356915, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30936 +epoch 019: 1045 / 1689 loss=4.134, nll_loss=2.512, ppl=5.71, wps=462842, ups=1.07, wpb=432928, bsz=16353.5, num_updates=31400, lr=0.000356915, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30936 +epoch 019: 1045 / 1689 loss=4.134, nll_loss=2.512, ppl=5.71, wps=462842, ups=1.07, wpb=432928, bsz=16353.5, num_updates=31400, lr=0.000356915, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30936 +epoch 019: 1045 / 1689 loss=4.134, nll_loss=2.512, ppl=5.71, wps=462842, ups=1.07, wpb=432928, bsz=16353.5, num_updates=31400, lr=0.000356915, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30936 +epoch 019: 1045 / 1689 loss=4.134, nll_loss=2.512, ppl=5.71, wps=462842, ups=1.07, wpb=432928, bsz=16353.5, num_updates=31400, lr=0.000356915, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30936 +epoch 019: 1045 / 1689 loss=4.134, nll_loss=2.512, ppl=5.71, wps=462842, ups=1.07, wpb=432928, bsz=16353.5, num_updates=31400, lr=0.000356915, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30936 +epoch 019: 1045 / 1689 loss=4.134, nll_loss=2.512, ppl=5.71, wps=462842, ups=1.07, wpb=432928, bsz=16353.5, num_updates=31400, lr=0.000356915, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30936 +epoch 019: 1045 / 1689 loss=4.134, nll_loss=2.512, ppl=5.71, wps=462842, ups=1.07, wpb=432928, bsz=16353.5, num_updates=31400, lr=0.000356915, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30936 +epoch 019: 1045 / 1689 loss=4.134, nll_loss=2.512, ppl=5.71, wps=462842, ups=1.07, wpb=432928, bsz=16353.5, num_updates=31400, lr=0.000356915, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30936 +epoch 019: 1045 / 1689 loss=4.134, nll_loss=2.512, ppl=5.71, wps=462842, ups=1.07, wpb=432928, bsz=16353.5, num_updates=31400, lr=0.000356915, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30936 +epoch 019: 1045 / 1689 loss=4.134, nll_loss=2.512, ppl=5.71, wps=462842, ups=1.07, wpb=432928, bsz=16353.5, num_updates=31400, lr=0.000356915, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30936 +epoch 019: 1045 / 1689 loss=4.134, nll_loss=2.512, ppl=5.71, wps=462842, ups=1.07, wpb=432928, bsz=16353.5, num_updates=31400, lr=0.000356915, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30936 +epoch 019: 1045 / 1689 loss=4.134, nll_loss=2.512, ppl=5.71, wps=462842, ups=1.07, wpb=432928, bsz=16353.5, num_updates=31400, lr=0.000356915, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30936 +epoch 019: 1045 / 1689 loss=4.134, nll_loss=2.512, ppl=5.71, wps=462842, ups=1.07, wpb=432928, bsz=16353.5, num_updates=31400, lr=0.000356915, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30936 +epoch 019: 1045 / 1689 loss=4.134, nll_loss=2.512, ppl=5.71, wps=462842, ups=1.07, wpb=432928, bsz=16353.5, num_updates=31400, lr=0.000356915, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30936 +epoch 019: 1045 / 1689 loss=4.134, nll_loss=2.512, ppl=5.71, wps=462842, ups=1.07, wpb=432928, bsz=16353.5, num_updates=31400, lr=0.000356915, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30936 +epoch 019: 1045 / 1689 loss=4.134, nll_loss=2.512, ppl=5.71, wps=462842, ups=1.07, wpb=432928, bsz=16353.5, num_updates=31400, lr=0.000356915, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=30936 +epoch 019: 1145 / 1689 loss=4.143, nll_loss=2.523, ppl=5.75, wps=465875, ups=1.07, wpb=435258, bsz=16303.9, num_updates=31500, lr=0.000356348, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=31030 +epoch 019: 1145 / 1689 loss=4.143, nll_loss=2.523, ppl=5.75, wps=465875, ups=1.07, wpb=435258, bsz=16303.9, num_updates=31500, lr=0.000356348, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=31030 +epoch 019: 1145 / 1689 loss=4.143, nll_loss=2.523, ppl=5.75, wps=465875, ups=1.07, wpb=435258, bsz=16303.9, num_updates=31500, lr=0.000356348, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=31030 +epoch 019: 1145 / 1689 loss=4.143, nll_loss=2.523, ppl=5.75, wps=465875, ups=1.07, wpb=435258, bsz=16303.9, num_updates=31500, lr=0.000356348, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=31030 +epoch 019: 1145 / 1689 loss=4.143, nll_loss=2.523, ppl=5.75, wps=465875, ups=1.07, wpb=435258, bsz=16303.9, num_updates=31500, lr=0.000356348, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=31030 +epoch 019: 1145 / 1689 loss=4.143, nll_loss=2.523, ppl=5.75, wps=465875, ups=1.07, wpb=435258, bsz=16303.9, num_updates=31500, lr=0.000356348, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=31030 +epoch 019: 1145 / 1689 loss=4.143, nll_loss=2.523, ppl=5.75, wps=465875, ups=1.07, wpb=435258, bsz=16303.9, num_updates=31500, lr=0.000356348, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=31030 +epoch 019: 1145 / 1689 loss=4.143, nll_loss=2.523, ppl=5.75, wps=465875, ups=1.07, wpb=435258, bsz=16303.9, num_updates=31500, lr=0.000356348, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=31030 +epoch 019: 1145 / 1689 loss=4.143, nll_loss=2.523, ppl=5.75, wps=465875, ups=1.07, wpb=435258, bsz=16303.9, num_updates=31500, lr=0.000356348, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=31030 +epoch 019: 1145 / 1689 loss=4.143, nll_loss=2.523, ppl=5.75, wps=465875, ups=1.07, wpb=435258, bsz=16303.9, num_updates=31500, lr=0.000356348, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=31030 +epoch 019: 1145 / 1689 loss=4.143, nll_loss=2.523, ppl=5.75, wps=465875, ups=1.07, wpb=435258, bsz=16303.9, num_updates=31500, lr=0.000356348, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=31030 +epoch 019: 1145 / 1689 loss=4.143, nll_loss=2.523, ppl=5.75, wps=465875, ups=1.07, wpb=435258, bsz=16303.9, num_updates=31500, lr=0.000356348, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=31030 +epoch 019: 1145 / 1689 loss=4.143, nll_loss=2.523, ppl=5.75, wps=465875, ups=1.07, wpb=435258, bsz=16303.9, num_updates=31500, lr=0.000356348, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=31030 +epoch 019: 1145 / 1689 loss=4.143, nll_loss=2.523, ppl=5.75, wps=465875, ups=1.07, wpb=435258, bsz=16303.9, num_updates=31500, lr=0.000356348, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=31030 +epoch 019: 1145 / 1689 loss=4.143, nll_loss=2.523, ppl=5.75, wps=465875, ups=1.07, wpb=435258, bsz=16303.9, num_updates=31500, lr=0.000356348, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=31030 +epoch 019: 1145 / 1689 loss=4.143, nll_loss=2.523, ppl=5.75, wps=465875, ups=1.07, wpb=435258, bsz=16303.9, num_updates=31500, lr=0.000356348, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=31030 +epoch 019: 1145 / 1689 loss=4.143, nll_loss=2.523, ppl=5.75, wps=465875, ups=1.07, wpb=435258, bsz=16303.9, num_updates=31500, lr=0.000356348, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=31030 +epoch 019: 1145 / 1689 loss=4.143, nll_loss=2.523, ppl=5.75, wps=465875, ups=1.07, wpb=435258, bsz=16303.9, num_updates=31500, lr=0.000356348, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=31030 +epoch 019: 1145 / 1689 loss=4.143, nll_loss=2.523, ppl=5.75, wps=465875, ups=1.07, wpb=435258, bsz=16303.9, num_updates=31500, lr=0.000356348, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19, wall=31030 +epoch 019: 1245 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=462122, ups=1.06, wpb=434246, bsz=16727.6, num_updates=31600, lr=0.000355784, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=31124 +epoch 019: 1245 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=462122, ups=1.06, wpb=434246, bsz=16727.6, num_updates=31600, lr=0.000355784, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=31124 +epoch 019: 1245 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=462122, ups=1.06, wpb=434246, bsz=16727.6, num_updates=31600, lr=0.000355784, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=31124 +epoch 019: 1245 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=462122, ups=1.06, wpb=434246, bsz=16727.6, num_updates=31600, lr=0.000355784, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=31124 +epoch 019: 1245 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=462122, ups=1.06, wpb=434246, bsz=16727.6, num_updates=31600, lr=0.000355784, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=31124 +epoch 019: 1245 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=462122, ups=1.06, wpb=434246, bsz=16727.6, num_updates=31600, lr=0.000355784, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=31124 +epoch 019: 1245 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=462122, ups=1.06, wpb=434246, bsz=16727.6, num_updates=31600, lr=0.000355784, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=31124 +epoch 019: 1245 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=462122, ups=1.06, wpb=434246, bsz=16727.6, num_updates=31600, lr=0.000355784, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=31124 +epoch 019: 1245 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=462122, ups=1.06, wpb=434246, bsz=16727.6, num_updates=31600, lr=0.000355784, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=31124 +epoch 019: 1245 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=462122, ups=1.06, wpb=434246, bsz=16727.6, num_updates=31600, lr=0.000355784, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=31124 +epoch 019: 1245 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=462122, ups=1.06, wpb=434246, bsz=16727.6, num_updates=31600, lr=0.000355784, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=31124 +epoch 019: 1245 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=462122, ups=1.06, wpb=434246, bsz=16727.6, num_updates=31600, lr=0.000355784, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=31124 +epoch 019: 1245 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=462122, ups=1.06, wpb=434246, bsz=16727.6, num_updates=31600, lr=0.000355784, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=31124 +epoch 019: 1245 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=462122, ups=1.06, wpb=434246, bsz=16727.6, num_updates=31600, lr=0.000355784, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=31124 +epoch 019: 1245 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=462122, ups=1.06, wpb=434246, bsz=16727.6, num_updates=31600, lr=0.000355784, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=31124 +epoch 019: 1245 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=462122, ups=1.06, wpb=434246, bsz=16727.6, num_updates=31600, lr=0.000355784, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=31124 +epoch 019: 1245 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=462122, ups=1.06, wpb=434246, bsz=16727.6, num_updates=31600, lr=0.000355784, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=31124 +epoch 019: 1245 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=462122, ups=1.06, wpb=434246, bsz=16727.6, num_updates=31600, lr=0.000355784, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=31124 +epoch 019: 1245 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=462122, ups=1.06, wpb=434246, bsz=16727.6, num_updates=31600, lr=0.000355784, gnorm=0.241, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=31124 +epoch 019: 1346 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=457916, ups=1.05, wpb=435588, bsz=16693.6, num_updates=31700, lr=0.000355222, gnorm=0.23, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=31219 +epoch 019: 1346 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=457916, ups=1.05, wpb=435588, bsz=16693.6, num_updates=31700, lr=0.000355222, gnorm=0.23, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=31219 +epoch 019: 1346 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=457916, ups=1.05, wpb=435588, bsz=16693.6, num_updates=31700, lr=0.000355222, gnorm=0.23, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=31219 +epoch 019: 1346 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=457916, ups=1.05, wpb=435588, bsz=16693.6, num_updates=31700, lr=0.000355222, gnorm=0.23, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=31219 +epoch 019: 1346 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=457916, ups=1.05, wpb=435588, bsz=16693.6, num_updates=31700, lr=0.000355222, gnorm=0.23, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=31219 +epoch 019: 1346 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=457916, ups=1.05, wpb=435588, bsz=16693.6, num_updates=31700, lr=0.000355222, gnorm=0.23, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=31219 +epoch 019: 1346 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=457916, ups=1.05, wpb=435588, bsz=16693.6, num_updates=31700, lr=0.000355222, gnorm=0.23, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=31219 +epoch 019: 1346 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=457916, ups=1.05, wpb=435588, bsz=16693.6, num_updates=31700, lr=0.000355222, gnorm=0.23, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=31219 +epoch 019: 1346 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=457916, ups=1.05, wpb=435588, bsz=16693.6, num_updates=31700, lr=0.000355222, gnorm=0.23, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=31219 +epoch 019: 1346 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=457916, ups=1.05, wpb=435588, bsz=16693.6, num_updates=31700, lr=0.000355222, gnorm=0.23, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=31219 +epoch 019: 1346 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=457916, ups=1.05, wpb=435588, bsz=16693.6, num_updates=31700, lr=0.000355222, gnorm=0.23, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=31219 +epoch 019: 1346 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=457916, ups=1.05, wpb=435588, bsz=16693.6, num_updates=31700, lr=0.000355222, gnorm=0.23, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=31219 +epoch 019: 1346 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=457916, ups=1.05, wpb=435588, bsz=16693.6, num_updates=31700, lr=0.000355222, gnorm=0.23, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=31219 +epoch 019: 1346 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=457916, ups=1.05, wpb=435588, bsz=16693.6, num_updates=31700, lr=0.000355222, gnorm=0.23, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=31219 +epoch 019: 1346 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=457916, ups=1.05, wpb=435588, bsz=16693.6, num_updates=31700, lr=0.000355222, gnorm=0.23, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=31219 +epoch 019: 1346 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=457916, ups=1.05, wpb=435588, bsz=16693.6, num_updates=31700, lr=0.000355222, gnorm=0.23, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=31219 +epoch 019: 1346 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=457916, ups=1.05, wpb=435588, bsz=16693.6, num_updates=31700, lr=0.000355222, gnorm=0.23, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=31219 +epoch 019: 1346 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=457916, ups=1.05, wpb=435588, bsz=16693.6, num_updates=31700, lr=0.000355222, gnorm=0.23, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=31219 +epoch 019: 1346 / 1689 loss=4.149, nll_loss=2.53, ppl=5.77, wps=457916, ups=1.05, wpb=435588, bsz=16693.6, num_updates=31700, lr=0.000355222, gnorm=0.23, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=31219 +epoch 019: 1446 / 1689 loss=4.136, nll_loss=2.514, ppl=5.71, wps=461127, ups=1.07, wpb=432016, bsz=16232.2, num_updates=31800, lr=0.000354663, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=31313 +epoch 019: 1446 / 1689 loss=4.136, nll_loss=2.514, ppl=5.71, wps=461127, ups=1.07, wpb=432016, bsz=16232.2, num_updates=31800, lr=0.000354663, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=31313 +epoch 019: 1446 / 1689 loss=4.136, nll_loss=2.514, ppl=5.71, wps=461127, ups=1.07, wpb=432016, bsz=16232.2, num_updates=31800, lr=0.000354663, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=31313 +epoch 019: 1446 / 1689 loss=4.136, nll_loss=2.514, ppl=5.71, wps=461127, ups=1.07, wpb=432016, bsz=16232.2, num_updates=31800, lr=0.000354663, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=31313 +epoch 019: 1446 / 1689 loss=4.136, nll_loss=2.514, ppl=5.71, wps=461127, ups=1.07, wpb=432016, bsz=16232.2, num_updates=31800, lr=0.000354663, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=31313 +epoch 019: 1446 / 1689 loss=4.136, nll_loss=2.514, ppl=5.71, wps=461127, ups=1.07, wpb=432016, bsz=16232.2, num_updates=31800, lr=0.000354663, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=31313 +epoch 019: 1446 / 1689 loss=4.136, nll_loss=2.514, ppl=5.71, wps=461127, ups=1.07, wpb=432016, bsz=16232.2, num_updates=31800, lr=0.000354663, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=31313 +epoch 019: 1446 / 1689 loss=4.136, nll_loss=2.514, ppl=5.71, wps=461127, ups=1.07, wpb=432016, bsz=16232.2, num_updates=31800, lr=0.000354663, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=31313 +epoch 019: 1446 / 1689 loss=4.136, nll_loss=2.514, ppl=5.71, wps=461127, ups=1.07, wpb=432016, bsz=16232.2, num_updates=31800, lr=0.000354663, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=31313 +epoch 019: 1446 / 1689 loss=4.136, nll_loss=2.514, ppl=5.71, wps=461127, ups=1.07, wpb=432016, bsz=16232.2, num_updates=31800, lr=0.000354663, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=31313 +epoch 019: 1446 / 1689 loss=4.136, nll_loss=2.514, ppl=5.71, wps=461127, ups=1.07, wpb=432016, bsz=16232.2, num_updates=31800, lr=0.000354663, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=31313 +epoch 019: 1446 / 1689 loss=4.136, nll_loss=2.514, ppl=5.71, wps=461127, ups=1.07, wpb=432016, bsz=16232.2, num_updates=31800, lr=0.000354663, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=31313 +epoch 019: 1446 / 1689 loss=4.136, nll_loss=2.514, ppl=5.71, wps=461127, ups=1.07, wpb=432016, bsz=16232.2, num_updates=31800, lr=0.000354663, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=31313 +epoch 019: 1446 / 1689 loss=4.136, nll_loss=2.514, ppl=5.71, wps=461127, ups=1.07, wpb=432016, bsz=16232.2, num_updates=31800, lr=0.000354663, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=31313 +epoch 019: 1446 / 1689 loss=4.136, nll_loss=2.514, ppl=5.71, wps=461127, ups=1.07, wpb=432016, bsz=16232.2, num_updates=31800, lr=0.000354663, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=31313 +epoch 019: 1446 / 1689 loss=4.136, nll_loss=2.514, ppl=5.71, wps=461127, ups=1.07, wpb=432016, bsz=16232.2, num_updates=31800, lr=0.000354663, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=31313 +epoch 019: 1446 / 1689 loss=4.136, nll_loss=2.514, ppl=5.71, wps=461127, ups=1.07, wpb=432016, bsz=16232.2, num_updates=31800, lr=0.000354663, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=31313 +epoch 019: 1446 / 1689 loss=4.136, nll_loss=2.514, ppl=5.71, wps=461127, ups=1.07, wpb=432016, bsz=16232.2, num_updates=31800, lr=0.000354663, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=31313 +epoch 019: 1446 / 1689 loss=4.136, nll_loss=2.514, ppl=5.71, wps=461127, ups=1.07, wpb=432016, bsz=16232.2, num_updates=31800, lr=0.000354663, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=31313 +epoch 019: 1546 / 1689 loss=4.148, nll_loss=2.528, ppl=5.77, wps=462006, ups=1.06, wpb=434411, bsz=16221.4, num_updates=31900, lr=0.000354107, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=31407 +epoch 019: 1546 / 1689 loss=4.148, nll_loss=2.528, ppl=5.77, wps=462006, ups=1.06, wpb=434411, bsz=16221.4, num_updates=31900, lr=0.000354107, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=31407 +epoch 019: 1546 / 1689 loss=4.148, nll_loss=2.528, ppl=5.77, wps=462006, ups=1.06, wpb=434411, bsz=16221.4, num_updates=31900, lr=0.000354107, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=31407 +epoch 019: 1546 / 1689 loss=4.148, nll_loss=2.528, ppl=5.77, wps=462006, ups=1.06, wpb=434411, bsz=16221.4, num_updates=31900, lr=0.000354107, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=31407 +epoch 019: 1546 / 1689 loss=4.148, nll_loss=2.528, ppl=5.77, wps=462006, ups=1.06, wpb=434411, bsz=16221.4, num_updates=31900, lr=0.000354107, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=31407 +epoch 019: 1546 / 1689 loss=4.148, nll_loss=2.528, ppl=5.77, wps=462006, ups=1.06, wpb=434411, bsz=16221.4, num_updates=31900, lr=0.000354107, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=31407 +epoch 019: 1546 / 1689 loss=4.148, nll_loss=2.528, ppl=5.77, wps=462006, ups=1.06, wpb=434411, bsz=16221.4, num_updates=31900, lr=0.000354107, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=31407 +epoch 019: 1546 / 1689 loss=4.148, nll_loss=2.528, ppl=5.77, wps=462006, ups=1.06, wpb=434411, bsz=16221.4, num_updates=31900, lr=0.000354107, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=31407 +epoch 019: 1546 / 1689 loss=4.148, nll_loss=2.528, ppl=5.77, wps=462006, ups=1.06, wpb=434411, bsz=16221.4, num_updates=31900, lr=0.000354107, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=31407 +epoch 019: 1546 / 1689 loss=4.148, nll_loss=2.528, ppl=5.77, wps=462006, ups=1.06, wpb=434411, bsz=16221.4, num_updates=31900, lr=0.000354107, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=31407 +epoch 019: 1546 / 1689 loss=4.148, nll_loss=2.528, ppl=5.77, wps=462006, ups=1.06, wpb=434411, bsz=16221.4, num_updates=31900, lr=0.000354107, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=31407 +epoch 019: 1546 / 1689 loss=4.148, nll_loss=2.528, ppl=5.77, wps=462006, ups=1.06, wpb=434411, bsz=16221.4, num_updates=31900, lr=0.000354107, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=31407 +epoch 019: 1546 / 1689 loss=4.148, nll_loss=2.528, ppl=5.77, wps=462006, ups=1.06, wpb=434411, bsz=16221.4, num_updates=31900, lr=0.000354107, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=31407 +epoch 019: 1546 / 1689 loss=4.148, nll_loss=2.528, ppl=5.77, wps=462006, ups=1.06, wpb=434411, bsz=16221.4, num_updates=31900, lr=0.000354107, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=31407 +epoch 019: 1546 / 1689 loss=4.148, nll_loss=2.528, ppl=5.77, wps=462006, ups=1.06, wpb=434411, bsz=16221.4, num_updates=31900, lr=0.000354107, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=31407 +epoch 019: 1546 / 1689 loss=4.148, nll_loss=2.528, ppl=5.77, wps=462006, ups=1.06, wpb=434411, bsz=16221.4, num_updates=31900, lr=0.000354107, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=31407 +epoch 019: 1546 / 1689 loss=4.148, nll_loss=2.528, ppl=5.77, wps=462006, ups=1.06, wpb=434411, bsz=16221.4, num_updates=31900, lr=0.000354107, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=31407 +epoch 019: 1546 / 1689 loss=4.148, nll_loss=2.528, ppl=5.77, wps=462006, ups=1.06, wpb=434411, bsz=16221.4, num_updates=31900, lr=0.000354107, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=31407 +epoch 019: 1546 / 1689 loss=4.148, nll_loss=2.528, ppl=5.77, wps=462006, ups=1.06, wpb=434411, bsz=16221.4, num_updates=31900, lr=0.000354107, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=31407 +epoch 019: 1646 / 1689 loss=4.146, nll_loss=2.527, ppl=5.76, wps=464882, ups=1.07, wpb=433223, bsz=16618.9, num_updates=32000, lr=0.000353553, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=31500 +epoch 019: 1646 / 1689 loss=4.146, nll_loss=2.527, ppl=5.76, wps=464882, ups=1.07, wpb=433223, bsz=16618.9, num_updates=32000, lr=0.000353553, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=31500 +epoch 019: 1646 / 1689 loss=4.146, nll_loss=2.527, ppl=5.76, wps=464882, ups=1.07, wpb=433223, bsz=16618.9, num_updates=32000, lr=0.000353553, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=31500 +epoch 019: 1646 / 1689 loss=4.146, nll_loss=2.527, ppl=5.76, wps=464882, ups=1.07, wpb=433223, bsz=16618.9, num_updates=32000, lr=0.000353553, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=31500 +epoch 019: 1646 / 1689 loss=4.146, nll_loss=2.527, ppl=5.76, wps=464882, ups=1.07, wpb=433223, bsz=16618.9, num_updates=32000, lr=0.000353553, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=31500 +epoch 019: 1646 / 1689 loss=4.146, nll_loss=2.527, ppl=5.76, wps=464882, ups=1.07, wpb=433223, bsz=16618.9, num_updates=32000, lr=0.000353553, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=31500 +epoch 019: 1646 / 1689 loss=4.146, nll_loss=2.527, ppl=5.76, wps=464882, ups=1.07, wpb=433223, bsz=16618.9, num_updates=32000, lr=0.000353553, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=31500 +epoch 019: 1646 / 1689 loss=4.146, nll_loss=2.527, ppl=5.76, wps=464882, ups=1.07, wpb=433223, bsz=16618.9, num_updates=32000, lr=0.000353553, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=31500 +epoch 019: 1646 / 1689 loss=4.146, nll_loss=2.527, ppl=5.76, wps=464882, ups=1.07, wpb=433223, bsz=16618.9, num_updates=32000, lr=0.000353553, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=31500 +epoch 019: 1646 / 1689 loss=4.146, nll_loss=2.527, ppl=5.76, wps=464882, ups=1.07, wpb=433223, bsz=16618.9, num_updates=32000, lr=0.000353553, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=31500 +epoch 019: 1646 / 1689 loss=4.146, nll_loss=2.527, ppl=5.76, wps=464882, ups=1.07, wpb=433223, bsz=16618.9, num_updates=32000, lr=0.000353553, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=31500 +epoch 019: 1646 / 1689 loss=4.146, nll_loss=2.527, ppl=5.76, wps=464882, ups=1.07, wpb=433223, bsz=16618.9, num_updates=32000, lr=0.000353553, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=31500 +epoch 019: 1646 / 1689 loss=4.146, nll_loss=2.527, ppl=5.76, wps=464882, ups=1.07, wpb=433223, bsz=16618.9, num_updates=32000, lr=0.000353553, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=31500 +epoch 019: 1646 / 1689 loss=4.146, nll_loss=2.527, ppl=5.76, wps=464882, ups=1.07, wpb=433223, bsz=16618.9, num_updates=32000, lr=0.000353553, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=31500 +epoch 019: 1646 / 1689 loss=4.146, nll_loss=2.527, ppl=5.76, wps=464882, ups=1.07, wpb=433223, bsz=16618.9, num_updates=32000, lr=0.000353553, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=31500 +epoch 019: 1646 / 1689 loss=4.146, nll_loss=2.527, ppl=5.76, wps=464882, ups=1.07, wpb=433223, bsz=16618.9, num_updates=32000, lr=0.000353553, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=31500 +epoch 019: 1646 / 1689 loss=4.146, nll_loss=2.527, ppl=5.76, wps=464882, ups=1.07, wpb=433223, bsz=16618.9, num_updates=32000, lr=0.000353553, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=31500 +epoch 019: 1646 / 1689 loss=4.146, nll_loss=2.527, ppl=5.76, wps=464882, ups=1.07, wpb=433223, bsz=16618.9, num_updates=32000, lr=0.000353553, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=31500 +epoch 019: 1646 / 1689 loss=4.146, nll_loss=2.527, ppl=5.76, wps=464882, ups=1.07, wpb=433223, bsz=16618.9, num_updates=32000, lr=0.000353553, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=31500 +begin validation on "valid" subset +epoch 019 | valid on 'valid' subset | loss 4.241 | nll_loss 2.608 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 32000 | best_loss 4.241 +epoch 019 | valid on 'valid' subset | loss 4.241 | nll_loss 2.608 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 32000 | best_loss 4.241 +epoch 019 | valid on 'valid' subset | loss 4.241 | nll_loss 2.608 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 32000 | best_loss 4.241 +epoch 019 | valid on 'valid' subset | loss 4.241 | nll_loss 2.608 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 32000 | best_loss 4.241 +epoch 019 | valid on 'valid' subset | loss 4.241 | nll_loss 2.608 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 32000 | best_loss 4.241 +epoch 019 | valid on 'valid' subset | loss 4.241 | nll_loss 2.608 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 32000 | best_loss 4.241 +epoch 019 | valid on 'valid' subset | loss 4.241 | nll_loss 2.608 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 32000 | best_loss 4.241 +epoch 019 | valid on 'valid' subset | loss 4.241 | nll_loss 2.608 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 32000 | best_loss 4.241 +epoch 019 | valid on 'valid' subset | loss 4.241 | nll_loss 2.608 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 32000 | best_loss 4.241 +epoch 019 | valid on 'valid' subset | loss 4.241 | nll_loss 2.608 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 32000 | best_loss 4.241 +epoch 019 | valid on 'valid' subset | loss 4.241 | nll_loss 2.608 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 32000 | best_loss 4.241 +epoch 019 | valid on 'valid' subset | loss 4.241 | nll_loss 2.608 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 32000 | best_loss 4.241 +epoch 019 | valid on 'valid' subset | loss 4.241 | nll_loss 2.608 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 32000 | best_loss 4.241 +epoch 019 | valid on 'valid' subset | loss 4.241 | nll_loss 2.608 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 32000 | best_loss 4.241 +epoch 019 | valid on 'valid' subset | loss 4.241 | nll_loss 2.608 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 32000 | best_loss 4.241 +epoch 019 | valid on 'valid' subset | loss 4.241 | nll_loss 2.608 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 32000 | best_loss 4.241 +epoch 019 | valid on 'valid' subset | loss 4.241 | nll_loss 2.608 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 32000 | best_loss 4.241 +epoch 019 | valid on 'valid' subset | loss 4.241 | nll_loss 2.608 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 32000 | best_loss 4.241 +epoch 019 | valid on 'valid' subset | loss 4.241 | nll_loss 2.608 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 32000 | best_loss 4.241 +end of epoch 19 (average epoch stats below) +epoch 019 | loss 4.135 | nll_loss 2.513 | ppl 5.71 | wps 452363 | ups 1.04 | wpb 433531 | bsz 16507.7 | num_updates 32043 | lr 0.000353316 | gnorm 0.226 | clip 0 | loss_scale 1 | train_wall 1558 | gb_free 20.4 | wall 31559 +epoch 019 | loss 4.135 | nll_loss 2.513 | ppl 5.71 | wps 452363 | ups 1.04 | wpb 433531 | bsz 16507.7 | num_updates 32043 | lr 0.000353316 | gnorm 0.226 | clip 0 | loss_scale 1 | train_wall 1558 | gb_free 20.4 | wall 31559 +epoch 019 | loss 4.135 | nll_loss 2.513 | ppl 5.71 | wps 452363 | ups 1.04 | wpb 433531 | bsz 16507.7 | num_updates 32043 | lr 0.000353316 | gnorm 0.226 | clip 0 | loss_scale 1 | train_wall 1558 | gb_free 20.4 | wall 31559 +epoch 019 | loss 4.135 | nll_loss 2.513 | ppl 5.71 | wps 452363 | ups 1.04 | wpb 433531 | bsz 16507.7 | num_updates 32043 | lr 0.000353316 | gnorm 0.226 | clip 0 | loss_scale 1 | train_wall 1558 | gb_free 20.4 | wall 31559 +epoch 019 | loss 4.135 | nll_loss 2.513 | ppl 5.71 | wps 452363 | ups 1.04 | wpb 433531 | bsz 16507.7 | num_updates 32043 | lr 0.000353316 | gnorm 0.226 | clip 0 | loss_scale 1 | train_wall 1558 | gb_free 20.4 | wall 31559 +epoch 019 | loss 4.135 | nll_loss 2.513 | ppl 5.71 | wps 452363 | ups 1.04 | wpb 433531 | bsz 16507.7 | num_updates 32043 | lr 0.000353316 | gnorm 0.226 | clip 0 | loss_scale 1 | train_wall 1558 | gb_free 20.4 | wall 31559 +epoch 019 | loss 4.135 | nll_loss 2.513 | ppl 5.71 | wps 452363 | ups 1.04 | wpb 433531 | bsz 16507.7 | num_updates 32043 | lr 0.000353316 | gnorm 0.226 | clip 0 | loss_scale 1 | train_wall 1558 | gb_free 20.4 | wall 31559 +epoch 019 | loss 4.135 | nll_loss 2.513 | ppl 5.71 | wps 452363 | ups 1.04 | wpb 433531 | bsz 16507.7 | num_updates 32043 | lr 0.000353316 | gnorm 0.226 | clip 0 | loss_scale 1 | train_wall 1558 | gb_free 20.4 | wall 31559 +epoch 019 | loss 4.135 | nll_loss 2.513 | ppl 5.71 | wps 452363 | ups 1.04 | wpb 433531 | bsz 16507.7 | num_updates 32043 | lr 0.000353316 | gnorm 0.226 | clip 0 | loss_scale 1 | train_wall 1558 | gb_free 20.4 | wall 31559 +epoch 019 | loss 4.135 | nll_loss 2.513 | ppl 5.71 | wps 452363 | ups 1.04 | wpb 433531 | bsz 16507.7 | num_updates 32043 | lr 0.000353316 | gnorm 0.226 | clip 0 | loss_scale 1 | train_wall 1558 | gb_free 20.4 | wall 31559 +epoch 019 | loss 4.135 | nll_loss 2.513 | ppl 5.71 | wps 452363 | ups 1.04 | wpb 433531 | bsz 16507.7 | num_updates 32043 | lr 0.000353316 | gnorm 0.226 | clip 0 | loss_scale 1 | train_wall 1558 | gb_free 20.4 | wall 31559 +epoch 019 | loss 4.135 | nll_loss 2.513 | ppl 5.71 | wps 452363 | ups 1.04 | wpb 433531 | bsz 16507.7 | num_updates 32043 | lr 0.000353316 | gnorm 0.226 | clip 0 | loss_scale 1 | train_wall 1558 | gb_free 20.4 | wall 31559 +epoch 019 | loss 4.135 | nll_loss 2.513 | ppl 5.71 | wps 452363 | ups 1.04 | wpb 433531 | bsz 16507.7 | num_updates 32043 | lr 0.000353316 | gnorm 0.226 | clip 0 | loss_scale 1 | train_wall 1558 | gb_free 20.4 | wall 31559 +epoch 019 | loss 4.135 | nll_loss 2.513 | ppl 5.71 | wps 452363 | ups 1.04 | wpb 433531 | bsz 16507.7 | num_updates 32043 | lr 0.000353316 | gnorm 0.226 | clip 0 | loss_scale 1 | train_wall 1558 | gb_free 20.4 | wall 31559 +epoch 019 | loss 4.135 | nll_loss 2.513 | ppl 5.71 | wps 452363 | ups 1.04 | wpb 433531 | bsz 16507.7 | num_updates 32043 | lr 0.000353316 | gnorm 0.226 | clip 0 | loss_scale 1 | train_wall 1558 | gb_free 20.4 | wall 31559 +epoch 019 | loss 4.135 | nll_loss 2.513 | ppl 5.71 | wps 452363 | ups 1.04 | wpb 433531 | bsz 16507.7 | num_updates 32043 | lr 0.000353316 | gnorm 0.226 | clip 0 | loss_scale 1 | train_wall 1558 | gb_free 20.4 | wall 31559 +epoch 019 | loss 4.135 | nll_loss 2.513 | ppl 5.71 | wps 452363 | ups 1.04 | wpb 433531 | bsz 16507.7 | num_updates 32043 | lr 0.000353316 | gnorm 0.226 | clip 0 | loss_scale 1 | train_wall 1558 | gb_free 20.4 | wall 31559 +epoch 019 | loss 4.135 | nll_loss 2.513 | ppl 5.71 | wps 452363 | ups 1.04 | wpb 433531 | bsz 16507.7 | num_updates 32043 | lr 0.000353316 | gnorm 0.226 | clip 0 | loss_scale 1 | train_wall 1558 | gb_free 20.4 | wall 31559 +epoch 019 | loss 4.135 | nll_loss 2.513 | ppl 5.71 | wps 452363 | ups 1.04 | wpb 433531 | bsz 16507.7 | num_updates 32043 | lr 0.000353316 | gnorm 0.226 | clip 0 | loss_scale 1 | train_wall 1558 | gb_free 20.4 | wall 31559 +Start iterating over samples +epoch 020: 57 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=379081, ups=0.88, wpb=429405, bsz=16242.4, num_updates=32100, lr=0.000353002, gnorm=0.235, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=31613 +epoch 020: 57 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=379081, ups=0.88, wpb=429405, bsz=16242.4, num_updates=32100, lr=0.000353002, gnorm=0.235, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=31613 +epoch 020: 57 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=379081, ups=0.88, wpb=429405, bsz=16242.4, num_updates=32100, lr=0.000353002, gnorm=0.235, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=31613 +epoch 020: 57 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=379081, ups=0.88, wpb=429405, bsz=16242.4, num_updates=32100, lr=0.000353002, gnorm=0.235, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=31613 +epoch 020: 57 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=379081, ups=0.88, wpb=429405, bsz=16242.4, num_updates=32100, lr=0.000353002, gnorm=0.235, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=31613 +epoch 020: 57 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=379081, ups=0.88, wpb=429405, bsz=16242.4, num_updates=32100, lr=0.000353002, gnorm=0.235, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=31613 +epoch 020: 57 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=379081, ups=0.88, wpb=429405, bsz=16242.4, num_updates=32100, lr=0.000353002, gnorm=0.235, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=31613 +epoch 020: 57 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=379081, ups=0.88, wpb=429405, bsz=16242.4, num_updates=32100, lr=0.000353002, gnorm=0.235, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=31613 +epoch 020: 57 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=379081, ups=0.88, wpb=429405, bsz=16242.4, num_updates=32100, lr=0.000353002, gnorm=0.235, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=31613 +epoch 020: 57 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=379081, ups=0.88, wpb=429405, bsz=16242.4, num_updates=32100, lr=0.000353002, gnorm=0.235, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=31613 +epoch 020: 57 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=379081, ups=0.88, wpb=429405, bsz=16242.4, num_updates=32100, lr=0.000353002, gnorm=0.235, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=31613 +epoch 020: 57 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=379081, ups=0.88, wpb=429405, bsz=16242.4, num_updates=32100, lr=0.000353002, gnorm=0.235, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=31613 +epoch 020: 57 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=379081, ups=0.88, wpb=429405, bsz=16242.4, num_updates=32100, lr=0.000353002, gnorm=0.235, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=31613 +epoch 020: 57 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=379081, ups=0.88, wpb=429405, bsz=16242.4, num_updates=32100, lr=0.000353002, gnorm=0.235, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=31613 +epoch 020: 57 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=379081, ups=0.88, wpb=429405, bsz=16242.4, num_updates=32100, lr=0.000353002, gnorm=0.235, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=31613 +epoch 020: 57 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=379081, ups=0.88, wpb=429405, bsz=16242.4, num_updates=32100, lr=0.000353002, gnorm=0.235, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=31613 +epoch 020: 57 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=379081, ups=0.88, wpb=429405, bsz=16242.4, num_updates=32100, lr=0.000353002, gnorm=0.235, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=31613 +epoch 020: 57 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=379081, ups=0.88, wpb=429405, bsz=16242.4, num_updates=32100, lr=0.000353002, gnorm=0.235, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=31613 +epoch 020: 57 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=379081, ups=0.88, wpb=429405, bsz=16242.4, num_updates=32100, lr=0.000353002, gnorm=0.235, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=31613 +epoch 020: 57 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=379081, ups=0.88, wpb=429405, bsz=16242.4, num_updates=32100, lr=0.000353002, gnorm=0.235, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=31613 +epoch 020: 157 / 1689 loss=4.115, nll_loss=2.489, ppl=5.62, wps=463081, ups=1.07, wpb=433431, bsz=16208.6, num_updates=32200, lr=0.000352454, gnorm=0.221, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=31707 +epoch 020: 157 / 1689 loss=4.115, nll_loss=2.489, ppl=5.62, wps=463081, ups=1.07, wpb=433431, bsz=16208.6, num_updates=32200, lr=0.000352454, gnorm=0.221, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=31707 +epoch 020: 157 / 1689 loss=4.115, nll_loss=2.489, ppl=5.62, wps=463081, ups=1.07, wpb=433431, bsz=16208.6, num_updates=32200, lr=0.000352454, gnorm=0.221, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=31707 +epoch 020: 157 / 1689 loss=4.115, nll_loss=2.489, ppl=5.62, wps=463081, ups=1.07, wpb=433431, bsz=16208.6, num_updates=32200, lr=0.000352454, gnorm=0.221, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=31707 +epoch 020: 157 / 1689 loss=4.115, nll_loss=2.489, ppl=5.62, wps=463081, ups=1.07, wpb=433431, bsz=16208.6, num_updates=32200, lr=0.000352454, gnorm=0.221, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=31707 +epoch 020: 157 / 1689 loss=4.115, nll_loss=2.489, ppl=5.62, wps=463081, ups=1.07, wpb=433431, bsz=16208.6, num_updates=32200, lr=0.000352454, gnorm=0.221, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=31707 +epoch 020: 157 / 1689 loss=4.115, nll_loss=2.489, ppl=5.62, wps=463081, ups=1.07, wpb=433431, bsz=16208.6, num_updates=32200, lr=0.000352454, gnorm=0.221, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=31707 +epoch 020: 157 / 1689 loss=4.115, nll_loss=2.489, ppl=5.62, wps=463081, ups=1.07, wpb=433431, bsz=16208.6, num_updates=32200, lr=0.000352454, gnorm=0.221, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=31707 +epoch 020: 157 / 1689 loss=4.115, nll_loss=2.489, ppl=5.62, wps=463081, ups=1.07, wpb=433431, bsz=16208.6, num_updates=32200, lr=0.000352454, gnorm=0.221, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=31707 +epoch 020: 157 / 1689 loss=4.115, nll_loss=2.489, ppl=5.62, wps=463081, ups=1.07, wpb=433431, bsz=16208.6, num_updates=32200, lr=0.000352454, gnorm=0.221, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=31707 +epoch 020: 157 / 1689 loss=4.115, nll_loss=2.489, ppl=5.62, wps=463081, ups=1.07, wpb=433431, bsz=16208.6, num_updates=32200, lr=0.000352454, gnorm=0.221, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=31707 +epoch 020: 157 / 1689 loss=4.115, nll_loss=2.489, ppl=5.62, wps=463081, ups=1.07, wpb=433431, bsz=16208.6, num_updates=32200, lr=0.000352454, gnorm=0.221, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=31707 +epoch 020: 157 / 1689 loss=4.115, nll_loss=2.489, ppl=5.62, wps=463081, ups=1.07, wpb=433431, bsz=16208.6, num_updates=32200, lr=0.000352454, gnorm=0.221, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=31707 +epoch 020: 157 / 1689 loss=4.115, nll_loss=2.489, ppl=5.62, wps=463081, ups=1.07, wpb=433431, bsz=16208.6, num_updates=32200, lr=0.000352454, gnorm=0.221, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=31707 +epoch 020: 157 / 1689 loss=4.115, nll_loss=2.489, ppl=5.62, wps=463081, ups=1.07, wpb=433431, bsz=16208.6, num_updates=32200, lr=0.000352454, gnorm=0.221, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=31707 +epoch 020: 157 / 1689 loss=4.115, nll_loss=2.489, ppl=5.62, wps=463081, ups=1.07, wpb=433431, bsz=16208.6, num_updates=32200, lr=0.000352454, gnorm=0.221, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=31707 +epoch 020: 157 / 1689 loss=4.115, nll_loss=2.489, ppl=5.62, wps=463081, ups=1.07, wpb=433431, bsz=16208.6, num_updates=32200, lr=0.000352454, gnorm=0.221, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=31707 +epoch 020: 157 / 1689 loss=4.115, nll_loss=2.489, ppl=5.62, wps=463081, ups=1.07, wpb=433431, bsz=16208.6, num_updates=32200, lr=0.000352454, gnorm=0.221, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=31707 +epoch 020: 157 / 1689 loss=4.115, nll_loss=2.489, ppl=5.62, wps=463081, ups=1.07, wpb=433431, bsz=16208.6, num_updates=32200, lr=0.000352454, gnorm=0.221, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=31707 +epoch 020: 157 / 1689 loss=4.115, nll_loss=2.489, ppl=5.62, wps=463081, ups=1.07, wpb=433431, bsz=16208.6, num_updates=32200, lr=0.000352454, gnorm=0.221, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=31707 +epoch 020: 259 / 1689 loss=4.116, nll_loss=2.492, ppl=5.62, wps=454914, ups=1.05, wpb=433488, bsz=16273.3, num_updates=32300, lr=0.000351908, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=95, gb_free=18.9, wall=31802 +epoch 020: 259 / 1689 loss=4.116, nll_loss=2.492, ppl=5.62, wps=454914, ups=1.05, wpb=433488, bsz=16273.3, num_updates=32300, lr=0.000351908, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=95, gb_free=18.9, wall=31802 +epoch 020: 259 / 1689 loss=4.116, nll_loss=2.492, ppl=5.62, wps=454914, ups=1.05, wpb=433488, bsz=16273.3, num_updates=32300, lr=0.000351908, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=95, gb_free=18.9, wall=31802 +epoch 020: 259 / 1689 loss=4.116, nll_loss=2.492, ppl=5.62, wps=454914, ups=1.05, wpb=433488, bsz=16273.3, num_updates=32300, lr=0.000351908, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=95, gb_free=18.9, wall=31802 +epoch 020: 259 / 1689 loss=4.116, nll_loss=2.492, ppl=5.62, wps=454914, ups=1.05, wpb=433488, bsz=16273.3, num_updates=32300, lr=0.000351908, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=95, gb_free=18.9, wall=31802 +epoch 020: 259 / 1689 loss=4.116, nll_loss=2.492, ppl=5.62, wps=454914, ups=1.05, wpb=433488, bsz=16273.3, num_updates=32300, lr=0.000351908, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=95, gb_free=18.9, wall=31802 +epoch 020: 259 / 1689 loss=4.116, nll_loss=2.492, ppl=5.62, wps=454914, ups=1.05, wpb=433488, bsz=16273.3, num_updates=32300, lr=0.000351908, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=95, gb_free=18.9, wall=31802 +epoch 020: 259 / 1689 loss=4.116, nll_loss=2.492, ppl=5.62, wps=454914, ups=1.05, wpb=433488, bsz=16273.3, num_updates=32300, lr=0.000351908, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=95, gb_free=18.9, wall=31802 +epoch 020: 259 / 1689 loss=4.116, nll_loss=2.492, ppl=5.62, wps=454914, ups=1.05, wpb=433488, bsz=16273.3, num_updates=32300, lr=0.000351908, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=95, gb_free=18.9, wall=31802 +epoch 020: 259 / 1689 loss=4.116, nll_loss=2.492, ppl=5.62, wps=454914, ups=1.05, wpb=433488, bsz=16273.3, num_updates=32300, lr=0.000351908, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=95, gb_free=18.9, wall=31802 +epoch 020: 259 / 1689 loss=4.116, nll_loss=2.492, ppl=5.62, wps=454914, ups=1.05, wpb=433488, bsz=16273.3, num_updates=32300, lr=0.000351908, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=95, gb_free=18.9, wall=31802 +epoch 020: 259 / 1689 loss=4.116, nll_loss=2.492, ppl=5.62, wps=454914, ups=1.05, wpb=433488, bsz=16273.3, num_updates=32300, lr=0.000351908, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=95, gb_free=18.9, wall=31802 +epoch 020: 259 / 1689 loss=4.116, nll_loss=2.492, ppl=5.62, wps=454914, ups=1.05, wpb=433488, bsz=16273.3, num_updates=32300, lr=0.000351908, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=95, gb_free=18.9, wall=31802 +epoch 020: 259 / 1689 loss=4.116, nll_loss=2.492, ppl=5.62, wps=454914, ups=1.05, wpb=433488, bsz=16273.3, num_updates=32300, lr=0.000351908, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=95, gb_free=18.9, wall=31802 +epoch 020: 259 / 1689 loss=4.116, nll_loss=2.492, ppl=5.62, wps=454914, ups=1.05, wpb=433488, bsz=16273.3, num_updates=32300, lr=0.000351908, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=95, gb_free=18.9, wall=31802 +epoch 020: 259 / 1689 loss=4.116, nll_loss=2.492, ppl=5.62, wps=454914, ups=1.05, wpb=433488, bsz=16273.3, num_updates=32300, lr=0.000351908, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=95, gb_free=18.9, wall=31802 +epoch 020: 259 / 1689 loss=4.116, nll_loss=2.492, ppl=5.62, wps=454914, ups=1.05, wpb=433488, bsz=16273.3, num_updates=32300, lr=0.000351908, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=95, gb_free=18.9, wall=31802 +epoch 020: 259 / 1689 loss=4.116, nll_loss=2.492, ppl=5.62, wps=454914, ups=1.05, wpb=433488, bsz=16273.3, num_updates=32300, lr=0.000351908, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=95, gb_free=18.9, wall=31802 +epoch 020: 259 / 1689 loss=4.116, nll_loss=2.492, ppl=5.62, wps=454914, ups=1.05, wpb=433488, bsz=16273.3, num_updates=32300, lr=0.000351908, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=95, gb_free=18.9, wall=31802 +epoch 020: 259 / 1689 loss=4.116, nll_loss=2.492, ppl=5.62, wps=454914, ups=1.05, wpb=433488, bsz=16273.3, num_updates=32300, lr=0.000351908, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=95, gb_free=18.9, wall=31802 +epoch 020: 359 / 1689 loss=4.108, nll_loss=2.482, ppl=5.59, wps=461910, ups=1.07, wpb=433356, bsz=16661.7, num_updates=32400, lr=0.000351364, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=31896 +epoch 020: 359 / 1689 loss=4.108, nll_loss=2.482, ppl=5.59, wps=461910, ups=1.07, wpb=433356, bsz=16661.7, num_updates=32400, lr=0.000351364, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=31896 +epoch 020: 359 / 1689 loss=4.108, nll_loss=2.482, ppl=5.59, wps=461910, ups=1.07, wpb=433356, bsz=16661.7, num_updates=32400, lr=0.000351364, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=31896 +epoch 020: 359 / 1689 loss=4.108, nll_loss=2.482, ppl=5.59, wps=461910, ups=1.07, wpb=433356, bsz=16661.7, num_updates=32400, lr=0.000351364, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=31896 +epoch 020: 359 / 1689 loss=4.108, nll_loss=2.482, ppl=5.59, wps=461910, ups=1.07, wpb=433356, bsz=16661.7, num_updates=32400, lr=0.000351364, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=31896 +epoch 020: 359 / 1689 loss=4.108, nll_loss=2.482, ppl=5.59, wps=461910, ups=1.07, wpb=433356, bsz=16661.7, num_updates=32400, lr=0.000351364, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=31896 +epoch 020: 359 / 1689 loss=4.108, nll_loss=2.482, ppl=5.59, wps=461910, ups=1.07, wpb=433356, bsz=16661.7, num_updates=32400, lr=0.000351364, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=31896 +epoch 020: 359 / 1689 loss=4.108, nll_loss=2.482, ppl=5.59, wps=461910, ups=1.07, wpb=433356, bsz=16661.7, num_updates=32400, lr=0.000351364, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=31896 +epoch 020: 359 / 1689 loss=4.108, nll_loss=2.482, ppl=5.59, wps=461910, ups=1.07, wpb=433356, bsz=16661.7, num_updates=32400, lr=0.000351364, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=31896 +epoch 020: 359 / 1689 loss=4.108, nll_loss=2.482, ppl=5.59, wps=461910, ups=1.07, wpb=433356, bsz=16661.7, num_updates=32400, lr=0.000351364, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=31896 +epoch 020: 359 / 1689 loss=4.108, nll_loss=2.482, ppl=5.59, wps=461910, ups=1.07, wpb=433356, bsz=16661.7, num_updates=32400, lr=0.000351364, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=31896 +epoch 020: 359 / 1689 loss=4.108, nll_loss=2.482, ppl=5.59, wps=461910, ups=1.07, wpb=433356, bsz=16661.7, num_updates=32400, lr=0.000351364, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=31896 +epoch 020: 359 / 1689 loss=4.108, nll_loss=2.482, ppl=5.59, wps=461910, ups=1.07, wpb=433356, bsz=16661.7, num_updates=32400, lr=0.000351364, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=31896 +epoch 020: 359 / 1689 loss=4.108, nll_loss=2.482, ppl=5.59, wps=461910, ups=1.07, wpb=433356, bsz=16661.7, num_updates=32400, lr=0.000351364, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=31896 +epoch 020: 359 / 1689 loss=4.108, nll_loss=2.482, ppl=5.59, wps=461910, ups=1.07, wpb=433356, bsz=16661.7, num_updates=32400, lr=0.000351364, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=31896 +epoch 020: 359 / 1689 loss=4.108, nll_loss=2.482, ppl=5.59, wps=461910, ups=1.07, wpb=433356, bsz=16661.7, num_updates=32400, lr=0.000351364, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=31896 +epoch 020: 359 / 1689 loss=4.108, nll_loss=2.482, ppl=5.59, wps=461910, ups=1.07, wpb=433356, bsz=16661.7, num_updates=32400, lr=0.000351364, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=31896 +epoch 020: 359 / 1689 loss=4.108, nll_loss=2.482, ppl=5.59, wps=461910, ups=1.07, wpb=433356, bsz=16661.7, num_updates=32400, lr=0.000351364, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=31896 +epoch 020: 359 / 1689 loss=4.108, nll_loss=2.482, ppl=5.59, wps=461910, ups=1.07, wpb=433356, bsz=16661.7, num_updates=32400, lr=0.000351364, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=31896 +epoch 020: 359 / 1689 loss=4.108, nll_loss=2.482, ppl=5.59, wps=461910, ups=1.07, wpb=433356, bsz=16661.7, num_updates=32400, lr=0.000351364, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=31896 +epoch 020: 459 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=458635, ups=1.06, wpb=433580, bsz=16855.6, num_updates=32500, lr=0.000350823, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=31990 +epoch 020: 459 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=458635, ups=1.06, wpb=433580, bsz=16855.6, num_updates=32500, lr=0.000350823, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=31990 +epoch 020: 459 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=458635, ups=1.06, wpb=433580, bsz=16855.6, num_updates=32500, lr=0.000350823, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=31990 +epoch 020: 459 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=458635, ups=1.06, wpb=433580, bsz=16855.6, num_updates=32500, lr=0.000350823, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=31990 +epoch 020: 459 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=458635, ups=1.06, wpb=433580, bsz=16855.6, num_updates=32500, lr=0.000350823, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=31990 +epoch 020: 459 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=458635, ups=1.06, wpb=433580, bsz=16855.6, num_updates=32500, lr=0.000350823, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=31990 +epoch 020: 459 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=458635, ups=1.06, wpb=433580, bsz=16855.6, num_updates=32500, lr=0.000350823, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=31990 +epoch 020: 459 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=458635, ups=1.06, wpb=433580, bsz=16855.6, num_updates=32500, lr=0.000350823, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=31990 +epoch 020: 459 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=458635, ups=1.06, wpb=433580, bsz=16855.6, num_updates=32500, lr=0.000350823, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=31990 +epoch 020: 459 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=458635, ups=1.06, wpb=433580, bsz=16855.6, num_updates=32500, lr=0.000350823, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=31990 +epoch 020: 459 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=458635, ups=1.06, wpb=433580, bsz=16855.6, num_updates=32500, lr=0.000350823, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=31990 +epoch 020: 459 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=458635, ups=1.06, wpb=433580, bsz=16855.6, num_updates=32500, lr=0.000350823, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=31990 +epoch 020: 459 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=458635, ups=1.06, wpb=433580, bsz=16855.6, num_updates=32500, lr=0.000350823, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=31990 +epoch 020: 459 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=458635, ups=1.06, wpb=433580, bsz=16855.6, num_updates=32500, lr=0.000350823, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=31990 +epoch 020: 459 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=458635, ups=1.06, wpb=433580, bsz=16855.6, num_updates=32500, lr=0.000350823, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=31990 +epoch 020: 459 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=458635, ups=1.06, wpb=433580, bsz=16855.6, num_updates=32500, lr=0.000350823, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=31990 +epoch 020: 459 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=458635, ups=1.06, wpb=433580, bsz=16855.6, num_updates=32500, lr=0.000350823, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=31990 +epoch 020: 459 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=458635, ups=1.06, wpb=433580, bsz=16855.6, num_updates=32500, lr=0.000350823, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=31990 +epoch 020: 459 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=458635, ups=1.06, wpb=433580, bsz=16855.6, num_updates=32500, lr=0.000350823, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=31990 +epoch 020: 459 / 1689 loss=4.12, nll_loss=2.496, ppl=5.64, wps=458635, ups=1.06, wpb=433580, bsz=16855.6, num_updates=32500, lr=0.000350823, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=31990 +epoch 020: 559 / 1689 loss=4.125, nll_loss=2.502, ppl=5.66, wps=463708, ups=1.07, wpb=435355, bsz=16728.3, num_updates=32600, lr=0.000350285, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.4, wall=32084 +epoch 020: 559 / 1689 loss=4.125, nll_loss=2.502, ppl=5.66, wps=463708, ups=1.07, wpb=435355, bsz=16728.3, num_updates=32600, lr=0.000350285, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.4, wall=32084 +epoch 020: 559 / 1689 loss=4.125, nll_loss=2.502, ppl=5.66, wps=463708, ups=1.07, wpb=435355, bsz=16728.3, num_updates=32600, lr=0.000350285, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.4, wall=32084 +epoch 020: 559 / 1689 loss=4.125, nll_loss=2.502, ppl=5.66, wps=463708, ups=1.07, wpb=435355, bsz=16728.3, num_updates=32600, lr=0.000350285, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.4, wall=32084 +epoch 020: 559 / 1689 loss=4.125, nll_loss=2.502, ppl=5.66, wps=463708, ups=1.07, wpb=435355, bsz=16728.3, num_updates=32600, lr=0.000350285, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.4, wall=32084 +epoch 020: 559 / 1689 loss=4.125, nll_loss=2.502, ppl=5.66, wps=463708, ups=1.07, wpb=435355, bsz=16728.3, num_updates=32600, lr=0.000350285, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.4, wall=32084 +epoch 020: 559 / 1689 loss=4.125, nll_loss=2.502, ppl=5.66, wps=463708, ups=1.07, wpb=435355, bsz=16728.3, num_updates=32600, lr=0.000350285, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.4, wall=32084 +epoch 020: 559 / 1689 loss=4.125, nll_loss=2.502, ppl=5.66, wps=463708, ups=1.07, wpb=435355, bsz=16728.3, num_updates=32600, lr=0.000350285, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.4, wall=32084 +epoch 020: 559 / 1689 loss=4.125, nll_loss=2.502, ppl=5.66, wps=463708, ups=1.07, wpb=435355, bsz=16728.3, num_updates=32600, lr=0.000350285, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.4, wall=32084 +epoch 020: 559 / 1689 loss=4.125, nll_loss=2.502, ppl=5.66, wps=463708, ups=1.07, wpb=435355, bsz=16728.3, num_updates=32600, lr=0.000350285, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.4, wall=32084 +epoch 020: 559 / 1689 loss=4.125, nll_loss=2.502, ppl=5.66, wps=463708, ups=1.07, wpb=435355, bsz=16728.3, num_updates=32600, lr=0.000350285, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.4, wall=32084 +epoch 020: 559 / 1689 loss=4.125, nll_loss=2.502, ppl=5.66, wps=463708, ups=1.07, wpb=435355, bsz=16728.3, num_updates=32600, lr=0.000350285, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.4, wall=32084 +epoch 020: 559 / 1689 loss=4.125, nll_loss=2.502, ppl=5.66, wps=463708, ups=1.07, wpb=435355, bsz=16728.3, num_updates=32600, lr=0.000350285, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.4, wall=32084 +epoch 020: 559 / 1689 loss=4.125, nll_loss=2.502, ppl=5.66, wps=463708, ups=1.07, wpb=435355, bsz=16728.3, num_updates=32600, lr=0.000350285, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.4, wall=32084 +epoch 020: 559 / 1689 loss=4.125, nll_loss=2.502, ppl=5.66, wps=463708, ups=1.07, wpb=435355, bsz=16728.3, num_updates=32600, lr=0.000350285, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.4, wall=32084 +epoch 020: 559 / 1689 loss=4.125, nll_loss=2.502, ppl=5.66, wps=463708, ups=1.07, wpb=435355, bsz=16728.3, num_updates=32600, lr=0.000350285, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.4, wall=32084 +epoch 020: 559 / 1689 loss=4.125, nll_loss=2.502, ppl=5.66, wps=463708, ups=1.07, wpb=435355, bsz=16728.3, num_updates=32600, lr=0.000350285, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.4, wall=32084 +epoch 020: 559 / 1689 loss=4.125, nll_loss=2.502, ppl=5.66, wps=463708, ups=1.07, wpb=435355, bsz=16728.3, num_updates=32600, lr=0.000350285, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.4, wall=32084 +epoch 020: 559 / 1689 loss=4.125, nll_loss=2.502, ppl=5.66, wps=463708, ups=1.07, wpb=435355, bsz=16728.3, num_updates=32600, lr=0.000350285, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.4, wall=32084 +epoch 020: 559 / 1689 loss=4.125, nll_loss=2.502, ppl=5.66, wps=463708, ups=1.07, wpb=435355, bsz=16728.3, num_updates=32600, lr=0.000350285, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.4, wall=32084 +epoch 020: 659 / 1689 loss=4.127, nll_loss=2.504, ppl=5.67, wps=464081, ups=1.07, wpb=434481, bsz=16422.6, num_updates=32700, lr=0.000349749, gnorm=0.243, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=32178 +epoch 020: 659 / 1689 loss=4.127, nll_loss=2.504, ppl=5.67, wps=464081, ups=1.07, wpb=434481, bsz=16422.6, num_updates=32700, lr=0.000349749, gnorm=0.243, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=32178 +epoch 020: 659 / 1689 loss=4.127, nll_loss=2.504, ppl=5.67, wps=464081, ups=1.07, wpb=434481, bsz=16422.6, num_updates=32700, lr=0.000349749, gnorm=0.243, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=32178 +epoch 020: 659 / 1689 loss=4.127, nll_loss=2.504, ppl=5.67, wps=464081, ups=1.07, wpb=434481, bsz=16422.6, num_updates=32700, lr=0.000349749, gnorm=0.243, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=32178 +epoch 020: 659 / 1689 loss=4.127, nll_loss=2.504, ppl=5.67, wps=464081, ups=1.07, wpb=434481, bsz=16422.6, num_updates=32700, lr=0.000349749, gnorm=0.243, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=32178 +epoch 020: 659 / 1689 loss=4.127, nll_loss=2.504, ppl=5.67, wps=464081, ups=1.07, wpb=434481, bsz=16422.6, num_updates=32700, lr=0.000349749, gnorm=0.243, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=32178 +epoch 020: 659 / 1689 loss=4.127, nll_loss=2.504, ppl=5.67, wps=464081, ups=1.07, wpb=434481, bsz=16422.6, num_updates=32700, lr=0.000349749, gnorm=0.243, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=32178 +epoch 020: 659 / 1689 loss=4.127, nll_loss=2.504, ppl=5.67, wps=464081, ups=1.07, wpb=434481, bsz=16422.6, num_updates=32700, lr=0.000349749, gnorm=0.243, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=32178 +epoch 020: 659 / 1689 loss=4.127, nll_loss=2.504, ppl=5.67, wps=464081, ups=1.07, wpb=434481, bsz=16422.6, num_updates=32700, lr=0.000349749, gnorm=0.243, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=32178 +epoch 020: 659 / 1689 loss=4.127, nll_loss=2.504, ppl=5.67, wps=464081, ups=1.07, wpb=434481, bsz=16422.6, num_updates=32700, lr=0.000349749, gnorm=0.243, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=32178 +epoch 020: 659 / 1689 loss=4.127, nll_loss=2.504, ppl=5.67, wps=464081, ups=1.07, wpb=434481, bsz=16422.6, num_updates=32700, lr=0.000349749, gnorm=0.243, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=32178 +epoch 020: 659 / 1689 loss=4.127, nll_loss=2.504, ppl=5.67, wps=464081, ups=1.07, wpb=434481, bsz=16422.6, num_updates=32700, lr=0.000349749, gnorm=0.243, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=32178 +epoch 020: 659 / 1689 loss=4.127, nll_loss=2.504, ppl=5.67, wps=464081, ups=1.07, wpb=434481, bsz=16422.6, num_updates=32700, lr=0.000349749, gnorm=0.243, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=32178 +epoch 020: 659 / 1689 loss=4.127, nll_loss=2.504, ppl=5.67, wps=464081, ups=1.07, wpb=434481, bsz=16422.6, num_updates=32700, lr=0.000349749, gnorm=0.243, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=32178 +epoch 020: 659 / 1689 loss=4.127, nll_loss=2.504, ppl=5.67, wps=464081, ups=1.07, wpb=434481, bsz=16422.6, num_updates=32700, lr=0.000349749, gnorm=0.243, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=32178 +epoch 020: 659 / 1689 loss=4.127, nll_loss=2.504, ppl=5.67, wps=464081, ups=1.07, wpb=434481, bsz=16422.6, num_updates=32700, lr=0.000349749, gnorm=0.243, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=32178 +epoch 020: 659 / 1689 loss=4.127, nll_loss=2.504, ppl=5.67, wps=464081, ups=1.07, wpb=434481, bsz=16422.6, num_updates=32700, lr=0.000349749, gnorm=0.243, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=32178 +epoch 020: 659 / 1689 loss=4.127, nll_loss=2.504, ppl=5.67, wps=464081, ups=1.07, wpb=434481, bsz=16422.6, num_updates=32700, lr=0.000349749, gnorm=0.243, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=32178 +epoch 020: 659 / 1689 loss=4.127, nll_loss=2.504, ppl=5.67, wps=464081, ups=1.07, wpb=434481, bsz=16422.6, num_updates=32700, lr=0.000349749, gnorm=0.243, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=32178 +epoch 020: 659 / 1689 loss=4.127, nll_loss=2.504, ppl=5.67, wps=464081, ups=1.07, wpb=434481, bsz=16422.6, num_updates=32700, lr=0.000349749, gnorm=0.243, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=32178 +epoch 020: 759 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=458950, ups=1.06, wpb=433497, bsz=17027.5, num_updates=32800, lr=0.000349215, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=32272 +epoch 020: 759 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=458950, ups=1.06, wpb=433497, bsz=17027.5, num_updates=32800, lr=0.000349215, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=32272 +epoch 020: 759 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=458950, ups=1.06, wpb=433497, bsz=17027.5, num_updates=32800, lr=0.000349215, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=32272 +epoch 020: 759 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=458950, ups=1.06, wpb=433497, bsz=17027.5, num_updates=32800, lr=0.000349215, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=32272 +epoch 020: 759 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=458950, ups=1.06, wpb=433497, bsz=17027.5, num_updates=32800, lr=0.000349215, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=32272 +epoch 020: 759 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=458950, ups=1.06, wpb=433497, bsz=17027.5, num_updates=32800, lr=0.000349215, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=32272 +epoch 020: 759 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=458950, ups=1.06, wpb=433497, bsz=17027.5, num_updates=32800, lr=0.000349215, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=32272 +epoch 020: 759 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=458950, ups=1.06, wpb=433497, bsz=17027.5, num_updates=32800, lr=0.000349215, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=32272 +epoch 020: 759 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=458950, ups=1.06, wpb=433497, bsz=17027.5, num_updates=32800, lr=0.000349215, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=32272 +epoch 020: 759 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=458950, ups=1.06, wpb=433497, bsz=17027.5, num_updates=32800, lr=0.000349215, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=32272 +epoch 020: 759 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=458950, ups=1.06, wpb=433497, bsz=17027.5, num_updates=32800, lr=0.000349215, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=32272 +epoch 020: 759 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=458950, ups=1.06, wpb=433497, bsz=17027.5, num_updates=32800, lr=0.000349215, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=32272 +epoch 020: 759 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=458950, ups=1.06, wpb=433497, bsz=17027.5, num_updates=32800, lr=0.000349215, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=32272 +epoch 020: 759 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=458950, ups=1.06, wpb=433497, bsz=17027.5, num_updates=32800, lr=0.000349215, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=32272 +epoch 020: 759 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=458950, ups=1.06, wpb=433497, bsz=17027.5, num_updates=32800, lr=0.000349215, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=32272 +epoch 020: 759 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=458950, ups=1.06, wpb=433497, bsz=17027.5, num_updates=32800, lr=0.000349215, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=32272 +epoch 020: 759 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=458950, ups=1.06, wpb=433497, bsz=17027.5, num_updates=32800, lr=0.000349215, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=32272 +epoch 020: 759 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=458950, ups=1.06, wpb=433497, bsz=17027.5, num_updates=32800, lr=0.000349215, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=32272 +epoch 020: 759 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=458950, ups=1.06, wpb=433497, bsz=17027.5, num_updates=32800, lr=0.000349215, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=32272 +epoch 020: 759 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=458950, ups=1.06, wpb=433497, bsz=17027.5, num_updates=32800, lr=0.000349215, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=32272 +epoch 020: 859 / 1689 loss=4.122, nll_loss=2.498, ppl=5.65, wps=460121, ups=1.06, wpb=433320, bsz=16419.5, num_updates=32900, lr=0.000348684, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=32367 +epoch 020: 859 / 1689 loss=4.122, nll_loss=2.498, ppl=5.65, wps=460121, ups=1.06, wpb=433320, bsz=16419.5, num_updates=32900, lr=0.000348684, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=32367 +epoch 020: 859 / 1689 loss=4.122, nll_loss=2.498, ppl=5.65, wps=460121, ups=1.06, wpb=433320, bsz=16419.5, num_updates=32900, lr=0.000348684, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=32367 +epoch 020: 859 / 1689 loss=4.122, nll_loss=2.498, ppl=5.65, wps=460121, ups=1.06, wpb=433320, bsz=16419.5, num_updates=32900, lr=0.000348684, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=32367 +epoch 020: 859 / 1689 loss=4.122, nll_loss=2.498, ppl=5.65, wps=460121, ups=1.06, wpb=433320, bsz=16419.5, num_updates=32900, lr=0.000348684, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=32367 +epoch 020: 859 / 1689 loss=4.122, nll_loss=2.498, ppl=5.65, wps=460121, ups=1.06, wpb=433320, bsz=16419.5, num_updates=32900, lr=0.000348684, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=32367 +epoch 020: 859 / 1689 loss=4.122, nll_loss=2.498, ppl=5.65, wps=460121, ups=1.06, wpb=433320, bsz=16419.5, num_updates=32900, lr=0.000348684, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=32367 +epoch 020: 859 / 1689 loss=4.122, nll_loss=2.498, ppl=5.65, wps=460121, ups=1.06, wpb=433320, bsz=16419.5, num_updates=32900, lr=0.000348684, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=32367 +epoch 020: 859 / 1689 loss=4.122, nll_loss=2.498, ppl=5.65, wps=460121, ups=1.06, wpb=433320, bsz=16419.5, num_updates=32900, lr=0.000348684, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=32367 +epoch 020: 859 / 1689 loss=4.122, nll_loss=2.498, ppl=5.65, wps=460121, ups=1.06, wpb=433320, bsz=16419.5, num_updates=32900, lr=0.000348684, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=32367 +epoch 020: 859 / 1689 loss=4.122, nll_loss=2.498, ppl=5.65, wps=460121, ups=1.06, wpb=433320, bsz=16419.5, num_updates=32900, lr=0.000348684, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=32367 +epoch 020: 859 / 1689 loss=4.122, nll_loss=2.498, ppl=5.65, wps=460121, ups=1.06, wpb=433320, bsz=16419.5, num_updates=32900, lr=0.000348684, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=32367 +epoch 020: 859 / 1689 loss=4.122, nll_loss=2.498, ppl=5.65, wps=460121, ups=1.06, wpb=433320, bsz=16419.5, num_updates=32900, lr=0.000348684, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=32367 +epoch 020: 859 / 1689 loss=4.122, nll_loss=2.498, ppl=5.65, wps=460121, ups=1.06, wpb=433320, bsz=16419.5, num_updates=32900, lr=0.000348684, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=32367 +epoch 020: 859 / 1689 loss=4.122, nll_loss=2.498, ppl=5.65, wps=460121, ups=1.06, wpb=433320, bsz=16419.5, num_updates=32900, lr=0.000348684, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=32367 +epoch 020: 859 / 1689 loss=4.122, nll_loss=2.498, ppl=5.65, wps=460121, ups=1.06, wpb=433320, bsz=16419.5, num_updates=32900, lr=0.000348684, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=32367 +epoch 020: 859 / 1689 loss=4.122, nll_loss=2.498, ppl=5.65, wps=460121, ups=1.06, wpb=433320, bsz=16419.5, num_updates=32900, lr=0.000348684, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=32367 +epoch 020: 859 / 1689 loss=4.122, nll_loss=2.498, ppl=5.65, wps=460121, ups=1.06, wpb=433320, bsz=16419.5, num_updates=32900, lr=0.000348684, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=32367 +epoch 020: 859 / 1689 loss=4.122, nll_loss=2.498, ppl=5.65, wps=460121, ups=1.06, wpb=433320, bsz=16419.5, num_updates=32900, lr=0.000348684, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=32367 +epoch 020: 859 / 1689 loss=4.122, nll_loss=2.498, ppl=5.65, wps=460121, ups=1.06, wpb=433320, bsz=16419.5, num_updates=32900, lr=0.000348684, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=32367 +epoch 020: 959 / 1689 loss=4.118, nll_loss=2.494, ppl=5.63, wps=460019, ups=1.07, wpb=431445, bsz=16508.2, num_updates=33000, lr=0.000348155, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=32460 +epoch 020: 959 / 1689 loss=4.118, nll_loss=2.494, ppl=5.63, wps=460019, ups=1.07, wpb=431445, bsz=16508.2, num_updates=33000, lr=0.000348155, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=32460 +epoch 020: 959 / 1689 loss=4.118, nll_loss=2.494, ppl=5.63, wps=460019, ups=1.07, wpb=431445, bsz=16508.2, num_updates=33000, lr=0.000348155, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=32460 +epoch 020: 959 / 1689 loss=4.118, nll_loss=2.494, ppl=5.63, wps=460019, ups=1.07, wpb=431445, bsz=16508.2, num_updates=33000, lr=0.000348155, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=32460 +epoch 020: 959 / 1689 loss=4.118, nll_loss=2.494, ppl=5.63, wps=460019, ups=1.07, wpb=431445, bsz=16508.2, num_updates=33000, lr=0.000348155, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=32460 +epoch 020: 959 / 1689 loss=4.118, nll_loss=2.494, ppl=5.63, wps=460019, ups=1.07, wpb=431445, bsz=16508.2, num_updates=33000, lr=0.000348155, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=32460 +epoch 020: 959 / 1689 loss=4.118, nll_loss=2.494, ppl=5.63, wps=460019, ups=1.07, wpb=431445, bsz=16508.2, num_updates=33000, lr=0.000348155, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=32460 +epoch 020: 959 / 1689 loss=4.118, nll_loss=2.494, ppl=5.63, wps=460019, ups=1.07, wpb=431445, bsz=16508.2, num_updates=33000, lr=0.000348155, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=32460 +epoch 020: 959 / 1689 loss=4.118, nll_loss=2.494, ppl=5.63, wps=460019, ups=1.07, wpb=431445, bsz=16508.2, num_updates=33000, lr=0.000348155, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=32460 +epoch 020: 959 / 1689 loss=4.118, nll_loss=2.494, ppl=5.63, wps=460019, ups=1.07, wpb=431445, bsz=16508.2, num_updates=33000, lr=0.000348155, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=32460 +epoch 020: 959 / 1689 loss=4.118, nll_loss=2.494, ppl=5.63, wps=460019, ups=1.07, wpb=431445, bsz=16508.2, num_updates=33000, lr=0.000348155, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=32460 +epoch 020: 959 / 1689 loss=4.118, nll_loss=2.494, ppl=5.63, wps=460019, ups=1.07, wpb=431445, bsz=16508.2, num_updates=33000, lr=0.000348155, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=32460 +epoch 020: 959 / 1689 loss=4.118, nll_loss=2.494, ppl=5.63, wps=460019, ups=1.07, wpb=431445, bsz=16508.2, num_updates=33000, lr=0.000348155, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=32460 +epoch 020: 959 / 1689 loss=4.118, nll_loss=2.494, ppl=5.63, wps=460019, ups=1.07, wpb=431445, bsz=16508.2, num_updates=33000, lr=0.000348155, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=32460 +epoch 020: 959 / 1689 loss=4.118, nll_loss=2.494, ppl=5.63, wps=460019, ups=1.07, wpb=431445, bsz=16508.2, num_updates=33000, lr=0.000348155, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=32460 +epoch 020: 959 / 1689 loss=4.118, nll_loss=2.494, ppl=5.63, wps=460019, ups=1.07, wpb=431445, bsz=16508.2, num_updates=33000, lr=0.000348155, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=32460 +epoch 020: 959 / 1689 loss=4.118, nll_loss=2.494, ppl=5.63, wps=460019, ups=1.07, wpb=431445, bsz=16508.2, num_updates=33000, lr=0.000348155, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=32460 +epoch 020: 959 / 1689 loss=4.118, nll_loss=2.494, ppl=5.63, wps=460019, ups=1.07, wpb=431445, bsz=16508.2, num_updates=33000, lr=0.000348155, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=32460 +epoch 020: 959 / 1689 loss=4.118, nll_loss=2.494, ppl=5.63, wps=460019, ups=1.07, wpb=431445, bsz=16508.2, num_updates=33000, lr=0.000348155, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=32460 +epoch 020: 959 / 1689 loss=4.118, nll_loss=2.494, ppl=5.63, wps=460019, ups=1.07, wpb=431445, bsz=16508.2, num_updates=33000, lr=0.000348155, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=32460 +begin validation on "valid" subset +epoch 020 | valid on 'valid' subset | loss 4.246 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 33000 | best_loss 4.241 +epoch 020 | valid on 'valid' subset | loss 4.246 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 33000 | best_loss 4.241 +epoch 020 | valid on 'valid' subset | loss 4.246 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 33000 | best_loss 4.241 +epoch 020 | valid on 'valid' subset | loss 4.246 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 33000 | best_loss 4.241 +epoch 020 | valid on 'valid' subset | loss 4.246 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 33000 | best_loss 4.241 +epoch 020 | valid on 'valid' subset | loss 4.246 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 33000 | best_loss 4.241 +epoch 020 | valid on 'valid' subset | loss 4.246 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 33000 | best_loss 4.241 +epoch 020 | valid on 'valid' subset | loss 4.246 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 33000 | best_loss 4.241 +epoch 020 | valid on 'valid' subset | loss 4.246 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 33000 | best_loss 4.241 +epoch 020 | valid on 'valid' subset | loss 4.246 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 33000 | best_loss 4.241 +epoch 020 | valid on 'valid' subset | loss 4.246 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 33000 | best_loss 4.241 +epoch 020 | valid on 'valid' subset | loss 4.246 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 33000 | best_loss 4.241 +epoch 020 | valid on 'valid' subset | loss 4.246 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 33000 | best_loss 4.241 +epoch 020 | valid on 'valid' subset | loss 4.246 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 33000 | best_loss 4.241 +epoch 020 | valid on 'valid' subset | loss 4.246 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 33000 | best_loss 4.241 +epoch 020 | valid on 'valid' subset | loss 4.246 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 33000 | best_loss 4.241 +epoch 020 | valid on 'valid' subset | loss 4.246 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 33000 | best_loss 4.241 +epoch 020 | valid on 'valid' subset | loss 4.246 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 33000 | best_loss 4.241 +epoch 020 | valid on 'valid' subset | loss 4.246 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 33000 | best_loss 4.241 +epoch 020 | valid on 'valid' subset | loss 4.246 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 33000 | best_loss 4.241 +epoch 020: 1059 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=339668, ups=0.78, wpb=433748, bsz=16155.7, num_updates=33100, lr=0.000347629, gnorm=0.221, clip=0, loss_scale=1, train_wall=108, gb_free=19.7, wall=32588 +epoch 020: 1059 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=339668, ups=0.78, wpb=433748, bsz=16155.7, num_updates=33100, lr=0.000347629, gnorm=0.221, clip=0, loss_scale=1, train_wall=108, gb_free=19.7, wall=32588 +epoch 020: 1059 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=339668, ups=0.78, wpb=433748, bsz=16155.7, num_updates=33100, lr=0.000347629, gnorm=0.221, clip=0, loss_scale=1, train_wall=108, gb_free=19.7, wall=32588 +epoch 020: 1059 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=339668, ups=0.78, wpb=433748, bsz=16155.7, num_updates=33100, lr=0.000347629, gnorm=0.221, clip=0, loss_scale=1, train_wall=108, gb_free=19.7, wall=32588 +epoch 020: 1059 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=339668, ups=0.78, wpb=433748, bsz=16155.7, num_updates=33100, lr=0.000347629, gnorm=0.221, clip=0, loss_scale=1, train_wall=108, gb_free=19.7, wall=32588 +epoch 020: 1059 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=339668, ups=0.78, wpb=433748, bsz=16155.7, num_updates=33100, lr=0.000347629, gnorm=0.221, clip=0, loss_scale=1, train_wall=108, gb_free=19.7, wall=32588 +epoch 020: 1059 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=339668, ups=0.78, wpb=433748, bsz=16155.7, num_updates=33100, lr=0.000347629, gnorm=0.221, clip=0, loss_scale=1, train_wall=108, gb_free=19.7, wall=32588 +epoch 020: 1059 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=339668, ups=0.78, wpb=433748, bsz=16155.7, num_updates=33100, lr=0.000347629, gnorm=0.221, clip=0, loss_scale=1, train_wall=108, gb_free=19.7, wall=32588 +epoch 020: 1059 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=339668, ups=0.78, wpb=433748, bsz=16155.7, num_updates=33100, lr=0.000347629, gnorm=0.221, clip=0, loss_scale=1, train_wall=108, gb_free=19.7, wall=32588 +epoch 020: 1059 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=339668, ups=0.78, wpb=433748, bsz=16155.7, num_updates=33100, lr=0.000347629, gnorm=0.221, clip=0, loss_scale=1, train_wall=108, gb_free=19.7, wall=32588 +epoch 020: 1059 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=339668, ups=0.78, wpb=433748, bsz=16155.7, num_updates=33100, lr=0.000347629, gnorm=0.221, clip=0, loss_scale=1, train_wall=108, gb_free=19.7, wall=32588 +epoch 020: 1059 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=339668, ups=0.78, wpb=433748, bsz=16155.7, num_updates=33100, lr=0.000347629, gnorm=0.221, clip=0, loss_scale=1, train_wall=108, gb_free=19.7, wall=32588 +epoch 020: 1059 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=339668, ups=0.78, wpb=433748, bsz=16155.7, num_updates=33100, lr=0.000347629, gnorm=0.221, clip=0, loss_scale=1, train_wall=108, gb_free=19.7, wall=32588 +epoch 020: 1059 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=339668, ups=0.78, wpb=433748, bsz=16155.7, num_updates=33100, lr=0.000347629, gnorm=0.221, clip=0, loss_scale=1, train_wall=108, gb_free=19.7, wall=32588 +epoch 020: 1059 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=339668, ups=0.78, wpb=433748, bsz=16155.7, num_updates=33100, lr=0.000347629, gnorm=0.221, clip=0, loss_scale=1, train_wall=108, gb_free=19.7, wall=32588 +epoch 020: 1059 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=339668, ups=0.78, wpb=433748, bsz=16155.7, num_updates=33100, lr=0.000347629, gnorm=0.221, clip=0, loss_scale=1, train_wall=108, gb_free=19.7, wall=32588 +epoch 020: 1059 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=339668, ups=0.78, wpb=433748, bsz=16155.7, num_updates=33100, lr=0.000347629, gnorm=0.221, clip=0, loss_scale=1, train_wall=108, gb_free=19.7, wall=32588 +epoch 020: 1059 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=339668, ups=0.78, wpb=433748, bsz=16155.7, num_updates=33100, lr=0.000347629, gnorm=0.221, clip=0, loss_scale=1, train_wall=108, gb_free=19.7, wall=32588 +epoch 020: 1059 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=339668, ups=0.78, wpb=433748, bsz=16155.7, num_updates=33100, lr=0.000347629, gnorm=0.221, clip=0, loss_scale=1, train_wall=108, gb_free=19.7, wall=32588 +epoch 020: 1059 / 1689 loss=4.135, nll_loss=2.513, ppl=5.71, wps=339668, ups=0.78, wpb=433748, bsz=16155.7, num_updates=33100, lr=0.000347629, gnorm=0.221, clip=0, loss_scale=1, train_wall=108, gb_free=19.7, wall=32588 +epoch 020: 1159 / 1689 loss=4.13, nll_loss=2.509, ppl=5.69, wps=462379, ups=1.06, wpb=434841, bsz=16538.2, num_updates=33200, lr=0.000347105, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=32682 +epoch 020: 1159 / 1689 loss=4.13, nll_loss=2.509, ppl=5.69, wps=462379, ups=1.06, wpb=434841, bsz=16538.2, num_updates=33200, lr=0.000347105, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=32682 +epoch 020: 1159 / 1689 loss=4.13, nll_loss=2.509, ppl=5.69, wps=462379, ups=1.06, wpb=434841, bsz=16538.2, num_updates=33200, lr=0.000347105, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=32682 +epoch 020: 1159 / 1689 loss=4.13, nll_loss=2.509, ppl=5.69, wps=462379, ups=1.06, wpb=434841, bsz=16538.2, num_updates=33200, lr=0.000347105, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=32682 +epoch 020: 1159 / 1689 loss=4.13, nll_loss=2.509, ppl=5.69, wps=462379, ups=1.06, wpb=434841, bsz=16538.2, num_updates=33200, lr=0.000347105, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=32682 +epoch 020: 1159 / 1689 loss=4.13, nll_loss=2.509, ppl=5.69, wps=462379, ups=1.06, wpb=434841, bsz=16538.2, num_updates=33200, lr=0.000347105, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=32682 +epoch 020: 1159 / 1689 loss=4.13, nll_loss=2.509, ppl=5.69, wps=462379, ups=1.06, wpb=434841, bsz=16538.2, num_updates=33200, lr=0.000347105, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=32682 +epoch 020: 1159 / 1689 loss=4.13, nll_loss=2.509, ppl=5.69, wps=462379, ups=1.06, wpb=434841, bsz=16538.2, num_updates=33200, lr=0.000347105, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=32682 +epoch 020: 1159 / 1689 loss=4.13, nll_loss=2.509, ppl=5.69, wps=462379, ups=1.06, wpb=434841, bsz=16538.2, num_updates=33200, lr=0.000347105, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=32682 +epoch 020: 1159 / 1689 loss=4.13, nll_loss=2.509, ppl=5.69, wps=462379, ups=1.06, wpb=434841, bsz=16538.2, num_updates=33200, lr=0.000347105, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=32682 +epoch 020: 1159 / 1689 loss=4.13, nll_loss=2.509, ppl=5.69, wps=462379, ups=1.06, wpb=434841, bsz=16538.2, num_updates=33200, lr=0.000347105, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=32682 +epoch 020: 1159 / 1689 loss=4.13, nll_loss=2.509, ppl=5.69, wps=462379, ups=1.06, wpb=434841, bsz=16538.2, num_updates=33200, lr=0.000347105, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=32682 +epoch 020: 1159 / 1689 loss=4.13, nll_loss=2.509, ppl=5.69, wps=462379, ups=1.06, wpb=434841, bsz=16538.2, num_updates=33200, lr=0.000347105, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=32682 +epoch 020: 1159 / 1689 loss=4.13, nll_loss=2.509, ppl=5.69, wps=462379, ups=1.06, wpb=434841, bsz=16538.2, num_updates=33200, lr=0.000347105, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=32682 +epoch 020: 1159 / 1689 loss=4.13, nll_loss=2.509, ppl=5.69, wps=462379, ups=1.06, wpb=434841, bsz=16538.2, num_updates=33200, lr=0.000347105, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=32682 +epoch 020: 1159 / 1689 loss=4.13, nll_loss=2.509, ppl=5.69, wps=462379, ups=1.06, wpb=434841, bsz=16538.2, num_updates=33200, lr=0.000347105, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=32682 +epoch 020: 1159 / 1689 loss=4.13, nll_loss=2.509, ppl=5.69, wps=462379, ups=1.06, wpb=434841, bsz=16538.2, num_updates=33200, lr=0.000347105, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=32682 +epoch 020: 1159 / 1689 loss=4.13, nll_loss=2.509, ppl=5.69, wps=462379, ups=1.06, wpb=434841, bsz=16538.2, num_updates=33200, lr=0.000347105, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=32682 +epoch 020: 1159 / 1689 loss=4.13, nll_loss=2.509, ppl=5.69, wps=462379, ups=1.06, wpb=434841, bsz=16538.2, num_updates=33200, lr=0.000347105, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=32682 +epoch 020: 1159 / 1689 loss=4.13, nll_loss=2.509, ppl=5.69, wps=462379, ups=1.06, wpb=434841, bsz=16538.2, num_updates=33200, lr=0.000347105, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=32682 +epoch 020: 1259 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=464713, ups=1.07, wpb=435884, bsz=16643, num_updates=33300, lr=0.000346583, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=32776 +epoch 020: 1259 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=464713, ups=1.07, wpb=435884, bsz=16643, num_updates=33300, lr=0.000346583, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=32776 +epoch 020: 1259 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=464713, ups=1.07, wpb=435884, bsz=16643, num_updates=33300, lr=0.000346583, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=32776 +epoch 020: 1259 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=464713, ups=1.07, wpb=435884, bsz=16643, num_updates=33300, lr=0.000346583, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=32776 +epoch 020: 1259 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=464713, ups=1.07, wpb=435884, bsz=16643, num_updates=33300, lr=0.000346583, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=32776 +epoch 020: 1259 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=464713, ups=1.07, wpb=435884, bsz=16643, num_updates=33300, lr=0.000346583, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=32776 +epoch 020: 1259 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=464713, ups=1.07, wpb=435884, bsz=16643, num_updates=33300, lr=0.000346583, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=32776 +epoch 020: 1259 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=464713, ups=1.07, wpb=435884, bsz=16643, num_updates=33300, lr=0.000346583, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=32776 +epoch 020: 1259 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=464713, ups=1.07, wpb=435884, bsz=16643, num_updates=33300, lr=0.000346583, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=32776 +epoch 020: 1259 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=464713, ups=1.07, wpb=435884, bsz=16643, num_updates=33300, lr=0.000346583, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=32776 +epoch 020: 1259 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=464713, ups=1.07, wpb=435884, bsz=16643, num_updates=33300, lr=0.000346583, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=32776 +epoch 020: 1259 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=464713, ups=1.07, wpb=435884, bsz=16643, num_updates=33300, lr=0.000346583, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=32776 +epoch 020: 1259 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=464713, ups=1.07, wpb=435884, bsz=16643, num_updates=33300, lr=0.000346583, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=32776 +epoch 020: 1259 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=464713, ups=1.07, wpb=435884, bsz=16643, num_updates=33300, lr=0.000346583, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=32776 +epoch 020: 1259 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=464713, ups=1.07, wpb=435884, bsz=16643, num_updates=33300, lr=0.000346583, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=32776 +epoch 020: 1259 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=464713, ups=1.07, wpb=435884, bsz=16643, num_updates=33300, lr=0.000346583, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=32776 +epoch 020: 1259 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=464713, ups=1.07, wpb=435884, bsz=16643, num_updates=33300, lr=0.000346583, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=32776 +epoch 020: 1259 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=464713, ups=1.07, wpb=435884, bsz=16643, num_updates=33300, lr=0.000346583, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=32776 +epoch 020: 1259 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=464713, ups=1.07, wpb=435884, bsz=16643, num_updates=33300, lr=0.000346583, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=32776 +epoch 020: 1259 / 1689 loss=4.134, nll_loss=2.513, ppl=5.71, wps=464713, ups=1.07, wpb=435884, bsz=16643, num_updates=33300, lr=0.000346583, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=32776 +epoch 020: 1359 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=463705, ups=1.07, wpb=432659, bsz=16348.6, num_updates=33400, lr=0.000346064, gnorm=0.211, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=32869 +epoch 020: 1359 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=463705, ups=1.07, wpb=432659, bsz=16348.6, num_updates=33400, lr=0.000346064, gnorm=0.211, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=32869 +epoch 020: 1359 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=463705, ups=1.07, wpb=432659, bsz=16348.6, num_updates=33400, lr=0.000346064, gnorm=0.211, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=32869 +epoch 020: 1359 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=463705, ups=1.07, wpb=432659, bsz=16348.6, num_updates=33400, lr=0.000346064, gnorm=0.211, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=32869 +epoch 020: 1359 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=463705, ups=1.07, wpb=432659, bsz=16348.6, num_updates=33400, lr=0.000346064, gnorm=0.211, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=32869 +epoch 020: 1359 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=463705, ups=1.07, wpb=432659, bsz=16348.6, num_updates=33400, lr=0.000346064, gnorm=0.211, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=32869 +epoch 020: 1359 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=463705, ups=1.07, wpb=432659, bsz=16348.6, num_updates=33400, lr=0.000346064, gnorm=0.211, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=32869 +epoch 020: 1359 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=463705, ups=1.07, wpb=432659, bsz=16348.6, num_updates=33400, lr=0.000346064, gnorm=0.211, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=32869 +epoch 020: 1359 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=463705, ups=1.07, wpb=432659, bsz=16348.6, num_updates=33400, lr=0.000346064, gnorm=0.211, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=32869 +epoch 020: 1359 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=463705, ups=1.07, wpb=432659, bsz=16348.6, num_updates=33400, lr=0.000346064, gnorm=0.211, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=32869 +epoch 020: 1359 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=463705, ups=1.07, wpb=432659, bsz=16348.6, num_updates=33400, lr=0.000346064, gnorm=0.211, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=32869 +epoch 020: 1359 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=463705, ups=1.07, wpb=432659, bsz=16348.6, num_updates=33400, lr=0.000346064, gnorm=0.211, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=32869 +epoch 020: 1359 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=463705, ups=1.07, wpb=432659, bsz=16348.6, num_updates=33400, lr=0.000346064, gnorm=0.211, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=32869 +epoch 020: 1359 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=463705, ups=1.07, wpb=432659, bsz=16348.6, num_updates=33400, lr=0.000346064, gnorm=0.211, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=32869 +epoch 020: 1359 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=463705, ups=1.07, wpb=432659, bsz=16348.6, num_updates=33400, lr=0.000346064, gnorm=0.211, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=32869 +epoch 020: 1359 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=463705, ups=1.07, wpb=432659, bsz=16348.6, num_updates=33400, lr=0.000346064, gnorm=0.211, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=32869 +epoch 020: 1359 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=463705, ups=1.07, wpb=432659, bsz=16348.6, num_updates=33400, lr=0.000346064, gnorm=0.211, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=32869 +epoch 020: 1359 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=463705, ups=1.07, wpb=432659, bsz=16348.6, num_updates=33400, lr=0.000346064, gnorm=0.211, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=32869 +epoch 020: 1359 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=463705, ups=1.07, wpb=432659, bsz=16348.6, num_updates=33400, lr=0.000346064, gnorm=0.211, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=32869 +epoch 020: 1359 / 1689 loss=4.135, nll_loss=2.514, ppl=5.71, wps=463705, ups=1.07, wpb=432659, bsz=16348.6, num_updates=33400, lr=0.000346064, gnorm=0.211, clip=0, loss_scale=2, train_wall=92, gb_free=18.7, wall=32869 +epoch 020: 1459 / 1689 loss=4.135, nll_loss=2.515, ppl=5.71, wps=458334, ups=1.06, wpb=432843, bsz=16489.7, num_updates=33500, lr=0.000345547, gnorm=0.227, clip=0, loss_scale=2, train_wall=93, gb_free=19.7, wall=32964 +epoch 020: 1459 / 1689 loss=4.135, nll_loss=2.515, ppl=5.71, wps=458334, ups=1.06, wpb=432843, bsz=16489.7, num_updates=33500, lr=0.000345547, gnorm=0.227, clip=0, loss_scale=2, train_wall=93, gb_free=19.7, wall=32964 +epoch 020: 1459 / 1689 loss=4.135, nll_loss=2.515, ppl=5.71, wps=458334, ups=1.06, wpb=432843, bsz=16489.7, num_updates=33500, lr=0.000345547, gnorm=0.227, clip=0, loss_scale=2, train_wall=93, gb_free=19.7, wall=32964 +epoch 020: 1459 / 1689 loss=4.135, nll_loss=2.515, ppl=5.71, wps=458334, ups=1.06, wpb=432843, bsz=16489.7, num_updates=33500, lr=0.000345547, gnorm=0.227, clip=0, loss_scale=2, train_wall=93, gb_free=19.7, wall=32964 +epoch 020: 1459 / 1689 loss=4.135, nll_loss=2.515, ppl=5.71, wps=458334, ups=1.06, wpb=432843, bsz=16489.7, num_updates=33500, lr=0.000345547, gnorm=0.227, clip=0, loss_scale=2, train_wall=93, gb_free=19.7, wall=32964 +epoch 020: 1459 / 1689 loss=4.135, nll_loss=2.515, ppl=5.71, wps=458334, ups=1.06, wpb=432843, bsz=16489.7, num_updates=33500, lr=0.000345547, gnorm=0.227, clip=0, loss_scale=2, train_wall=93, gb_free=19.7, wall=32964 +epoch 020: 1459 / 1689 loss=4.135, nll_loss=2.515, ppl=5.71, wps=458334, ups=1.06, wpb=432843, bsz=16489.7, num_updates=33500, lr=0.000345547, gnorm=0.227, clip=0, loss_scale=2, train_wall=93, gb_free=19.7, wall=32964 +epoch 020: 1459 / 1689 loss=4.135, nll_loss=2.515, ppl=5.71, wps=458334, ups=1.06, wpb=432843, bsz=16489.7, num_updates=33500, lr=0.000345547, gnorm=0.227, clip=0, loss_scale=2, train_wall=93, gb_free=19.7, wall=32964 +epoch 020: 1459 / 1689 loss=4.135, nll_loss=2.515, ppl=5.71, wps=458334, ups=1.06, wpb=432843, bsz=16489.7, num_updates=33500, lr=0.000345547, gnorm=0.227, clip=0, loss_scale=2, train_wall=93, gb_free=19.7, wall=32964 +epoch 020: 1459 / 1689 loss=4.135, nll_loss=2.515, ppl=5.71, wps=458334, ups=1.06, wpb=432843, bsz=16489.7, num_updates=33500, lr=0.000345547, gnorm=0.227, clip=0, loss_scale=2, train_wall=93, gb_free=19.7, wall=32964 +epoch 020: 1459 / 1689 loss=4.135, nll_loss=2.515, ppl=5.71, wps=458334, ups=1.06, wpb=432843, bsz=16489.7, num_updates=33500, lr=0.000345547, gnorm=0.227, clip=0, loss_scale=2, train_wall=93, gb_free=19.7, wall=32964 +epoch 020: 1459 / 1689 loss=4.135, nll_loss=2.515, ppl=5.71, wps=458334, ups=1.06, wpb=432843, bsz=16489.7, num_updates=33500, lr=0.000345547, gnorm=0.227, clip=0, loss_scale=2, train_wall=93, gb_free=19.7, wall=32964 +epoch 020: 1459 / 1689 loss=4.135, nll_loss=2.515, ppl=5.71, wps=458334, ups=1.06, wpb=432843, bsz=16489.7, num_updates=33500, lr=0.000345547, gnorm=0.227, clip=0, loss_scale=2, train_wall=93, gb_free=19.7, wall=32964 +epoch 020: 1459 / 1689 loss=4.135, nll_loss=2.515, ppl=5.71, wps=458334, ups=1.06, wpb=432843, bsz=16489.7, num_updates=33500, lr=0.000345547, gnorm=0.227, clip=0, loss_scale=2, train_wall=93, gb_free=19.7, wall=32964 +epoch 020: 1459 / 1689 loss=4.135, nll_loss=2.515, ppl=5.71, wps=458334, ups=1.06, wpb=432843, bsz=16489.7, num_updates=33500, lr=0.000345547, gnorm=0.227, clip=0, loss_scale=2, train_wall=93, gb_free=19.7, wall=32964 +epoch 020: 1459 / 1689 loss=4.135, nll_loss=2.515, ppl=5.71, wps=458334, ups=1.06, wpb=432843, bsz=16489.7, num_updates=33500, lr=0.000345547, gnorm=0.227, clip=0, loss_scale=2, train_wall=93, gb_free=19.7, wall=32964 +epoch 020: 1459 / 1689 loss=4.135, nll_loss=2.515, ppl=5.71, wps=458334, ups=1.06, wpb=432843, bsz=16489.7, num_updates=33500, lr=0.000345547, gnorm=0.227, clip=0, loss_scale=2, train_wall=93, gb_free=19.7, wall=32964 +epoch 020: 1459 / 1689 loss=4.135, nll_loss=2.515, ppl=5.71, wps=458334, ups=1.06, wpb=432843, bsz=16489.7, num_updates=33500, lr=0.000345547, gnorm=0.227, clip=0, loss_scale=2, train_wall=93, gb_free=19.7, wall=32964 +epoch 020: 1459 / 1689 loss=4.135, nll_loss=2.515, ppl=5.71, wps=458334, ups=1.06, wpb=432843, bsz=16489.7, num_updates=33500, lr=0.000345547, gnorm=0.227, clip=0, loss_scale=2, train_wall=93, gb_free=19.7, wall=32964 +epoch 020: 1459 / 1689 loss=4.135, nll_loss=2.515, ppl=5.71, wps=458334, ups=1.06, wpb=432843, bsz=16489.7, num_updates=33500, lr=0.000345547, gnorm=0.227, clip=0, loss_scale=2, train_wall=93, gb_free=19.7, wall=32964 +epoch 020: 1559 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=460646, ups=1.07, wpb=431911, bsz=16315.8, num_updates=33600, lr=0.000345033, gnorm=0.221, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=33057 +epoch 020: 1559 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=460646, ups=1.07, wpb=431911, bsz=16315.8, num_updates=33600, lr=0.000345033, gnorm=0.221, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=33057 +epoch 020: 1559 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=460646, ups=1.07, wpb=431911, bsz=16315.8, num_updates=33600, lr=0.000345033, gnorm=0.221, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=33057 +epoch 020: 1559 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=460646, ups=1.07, wpb=431911, bsz=16315.8, num_updates=33600, lr=0.000345033, gnorm=0.221, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=33057 +epoch 020: 1559 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=460646, ups=1.07, wpb=431911, bsz=16315.8, num_updates=33600, lr=0.000345033, gnorm=0.221, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=33057 +epoch 020: 1559 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=460646, ups=1.07, wpb=431911, bsz=16315.8, num_updates=33600, lr=0.000345033, gnorm=0.221, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=33057 +epoch 020: 1559 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=460646, ups=1.07, wpb=431911, bsz=16315.8, num_updates=33600, lr=0.000345033, gnorm=0.221, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=33057 +epoch 020: 1559 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=460646, ups=1.07, wpb=431911, bsz=16315.8, num_updates=33600, lr=0.000345033, gnorm=0.221, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=33057 +epoch 020: 1559 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=460646, ups=1.07, wpb=431911, bsz=16315.8, num_updates=33600, lr=0.000345033, gnorm=0.221, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=33057 +epoch 020: 1559 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=460646, ups=1.07, wpb=431911, bsz=16315.8, num_updates=33600, lr=0.000345033, gnorm=0.221, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=33057 +epoch 020: 1559 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=460646, ups=1.07, wpb=431911, bsz=16315.8, num_updates=33600, lr=0.000345033, gnorm=0.221, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=33057 +epoch 020: 1559 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=460646, ups=1.07, wpb=431911, bsz=16315.8, num_updates=33600, lr=0.000345033, gnorm=0.221, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=33057 +epoch 020: 1559 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=460646, ups=1.07, wpb=431911, bsz=16315.8, num_updates=33600, lr=0.000345033, gnorm=0.221, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=33057 +epoch 020: 1559 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=460646, ups=1.07, wpb=431911, bsz=16315.8, num_updates=33600, lr=0.000345033, gnorm=0.221, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=33057 +epoch 020: 1559 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=460646, ups=1.07, wpb=431911, bsz=16315.8, num_updates=33600, lr=0.000345033, gnorm=0.221, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=33057 +epoch 020: 1559 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=460646, ups=1.07, wpb=431911, bsz=16315.8, num_updates=33600, lr=0.000345033, gnorm=0.221, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=33057 +epoch 020: 1559 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=460646, ups=1.07, wpb=431911, bsz=16315.8, num_updates=33600, lr=0.000345033, gnorm=0.221, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=33057 +epoch 020: 1559 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=460646, ups=1.07, wpb=431911, bsz=16315.8, num_updates=33600, lr=0.000345033, gnorm=0.221, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=33057 +epoch 020: 1559 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=460646, ups=1.07, wpb=431911, bsz=16315.8, num_updates=33600, lr=0.000345033, gnorm=0.221, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=33057 +epoch 020: 1559 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=460646, ups=1.07, wpb=431911, bsz=16315.8, num_updates=33600, lr=0.000345033, gnorm=0.221, clip=0, loss_scale=2, train_wall=92, gb_free=20, wall=33057 +epoch 020: 1660 / 1689 loss=4.141, nll_loss=2.521, ppl=5.74, wps=455336, ups=1.04, wpb=435903, bsz=16577.7, num_updates=33700, lr=0.00034452, gnorm=0.23, clip=0, loss_scale=1, train_wall=94, gb_free=18.5, wall=33153 +epoch 020: 1660 / 1689 loss=4.141, nll_loss=2.521, ppl=5.74, wps=455336, ups=1.04, wpb=435903, bsz=16577.7, num_updates=33700, lr=0.00034452, gnorm=0.23, clip=0, loss_scale=1, train_wall=94, gb_free=18.5, wall=33153 +epoch 020: 1660 / 1689 loss=4.141, nll_loss=2.521, ppl=5.74, wps=455336, ups=1.04, wpb=435903, bsz=16577.7, num_updates=33700, lr=0.00034452, gnorm=0.23, clip=0, loss_scale=1, train_wall=94, gb_free=18.5, wall=33153 +epoch 020: 1660 / 1689 loss=4.141, nll_loss=2.521, ppl=5.74, wps=455336, ups=1.04, wpb=435903, bsz=16577.7, num_updates=33700, lr=0.00034452, gnorm=0.23, clip=0, loss_scale=1, train_wall=94, gb_free=18.5, wall=33153 +epoch 020: 1660 / 1689 loss=4.141, nll_loss=2.521, ppl=5.74, wps=455336, ups=1.04, wpb=435903, bsz=16577.7, num_updates=33700, lr=0.00034452, gnorm=0.23, clip=0, loss_scale=1, train_wall=94, gb_free=18.5, wall=33153 +epoch 020: 1660 / 1689 loss=4.141, nll_loss=2.521, ppl=5.74, wps=455336, ups=1.04, wpb=435903, bsz=16577.7, num_updates=33700, lr=0.00034452, gnorm=0.23, clip=0, loss_scale=1, train_wall=94, gb_free=18.5, wall=33153 +epoch 020: 1660 / 1689 loss=4.141, nll_loss=2.521, ppl=5.74, wps=455336, ups=1.04, wpb=435903, bsz=16577.7, num_updates=33700, lr=0.00034452, gnorm=0.23, clip=0, loss_scale=1, train_wall=94, gb_free=18.5, wall=33153 +epoch 020: 1660 / 1689 loss=4.141, nll_loss=2.521, ppl=5.74, wps=455336, ups=1.04, wpb=435903, bsz=16577.7, num_updates=33700, lr=0.00034452, gnorm=0.23, clip=0, loss_scale=1, train_wall=94, gb_free=18.5, wall=33153 +epoch 020: 1660 / 1689 loss=4.141, nll_loss=2.521, ppl=5.74, wps=455336, ups=1.04, wpb=435903, bsz=16577.7, num_updates=33700, lr=0.00034452, gnorm=0.23, clip=0, loss_scale=1, train_wall=94, gb_free=18.5, wall=33153 +epoch 020: 1660 / 1689 loss=4.141, nll_loss=2.521, ppl=5.74, wps=455336, ups=1.04, wpb=435903, bsz=16577.7, num_updates=33700, lr=0.00034452, gnorm=0.23, clip=0, loss_scale=1, train_wall=94, gb_free=18.5, wall=33153 +epoch 020: 1660 / 1689 loss=4.141, nll_loss=2.521, ppl=5.74, wps=455336, ups=1.04, wpb=435903, bsz=16577.7, num_updates=33700, lr=0.00034452, gnorm=0.23, clip=0, loss_scale=1, train_wall=94, gb_free=18.5, wall=33153 +epoch 020: 1660 / 1689 loss=4.141, nll_loss=2.521, ppl=5.74, wps=455336, ups=1.04, wpb=435903, bsz=16577.7, num_updates=33700, lr=0.00034452, gnorm=0.23, clip=0, loss_scale=1, train_wall=94, gb_free=18.5, wall=33153 +epoch 020: 1660 / 1689 loss=4.141, nll_loss=2.521, ppl=5.74, wps=455336, ups=1.04, wpb=435903, bsz=16577.7, num_updates=33700, lr=0.00034452, gnorm=0.23, clip=0, loss_scale=1, train_wall=94, gb_free=18.5, wall=33153 +epoch 020: 1660 / 1689 loss=4.141, nll_loss=2.521, ppl=5.74, wps=455336, ups=1.04, wpb=435903, bsz=16577.7, num_updates=33700, lr=0.00034452, gnorm=0.23, clip=0, loss_scale=1, train_wall=94, gb_free=18.5, wall=33153 +epoch 020: 1660 / 1689 loss=4.141, nll_loss=2.521, ppl=5.74, wps=455336, ups=1.04, wpb=435903, bsz=16577.7, num_updates=33700, lr=0.00034452, gnorm=0.23, clip=0, loss_scale=1, train_wall=94, gb_free=18.5, wall=33153 +epoch 020: 1660 / 1689 loss=4.141, nll_loss=2.521, ppl=5.74, wps=455336, ups=1.04, wpb=435903, bsz=16577.7, num_updates=33700, lr=0.00034452, gnorm=0.23, clip=0, loss_scale=1, train_wall=94, gb_free=18.5, wall=33153 +epoch 020: 1660 / 1689 loss=4.141, nll_loss=2.521, ppl=5.74, wps=455336, ups=1.04, wpb=435903, bsz=16577.7, num_updates=33700, lr=0.00034452, gnorm=0.23, clip=0, loss_scale=1, train_wall=94, gb_free=18.5, wall=33153 +epoch 020: 1660 / 1689 loss=4.141, nll_loss=2.521, ppl=5.74, wps=455336, ups=1.04, wpb=435903, bsz=16577.7, num_updates=33700, lr=0.00034452, gnorm=0.23, clip=0, loss_scale=1, train_wall=94, gb_free=18.5, wall=33153 +epoch 020: 1660 / 1689 loss=4.141, nll_loss=2.521, ppl=5.74, wps=455336, ups=1.04, wpb=435903, bsz=16577.7, num_updates=33700, lr=0.00034452, gnorm=0.23, clip=0, loss_scale=1, train_wall=94, gb_free=18.5, wall=33153 +epoch 020: 1660 / 1689 loss=4.141, nll_loss=2.521, ppl=5.74, wps=455336, ups=1.04, wpb=435903, bsz=16577.7, num_updates=33700, lr=0.00034452, gnorm=0.23, clip=0, loss_scale=1, train_wall=94, gb_free=18.5, wall=33153 +end of epoch 20 (average epoch stats below) +epoch 020 | loss 4.127 | nll_loss 2.504 | ppl 5.67 | wps 451022 | ups 1.04 | wpb 433509 | bsz 16500.8 | num_updates 33729 | lr 0.000344372 | gnorm 0.225 | clip 0 | loss_scale 1 | train_wall 1581 | gb_free 20 | wall 33179 +epoch 020 | loss 4.127 | nll_loss 2.504 | ppl 5.67 | wps 451022 | ups 1.04 | wpb 433509 | bsz 16500.8 | num_updates 33729 | lr 0.000344372 | gnorm 0.225 | clip 0 | loss_scale 1 | train_wall 1581 | gb_free 20 | wall 33179 +epoch 020 | loss 4.127 | nll_loss 2.504 | ppl 5.67 | wps 451022 | ups 1.04 | wpb 433509 | bsz 16500.8 | num_updates 33729 | lr 0.000344372 | gnorm 0.225 | clip 0 | loss_scale 1 | train_wall 1581 | gb_free 20 | wall 33179 +epoch 020 | loss 4.127 | nll_loss 2.504 | ppl 5.67 | wps 451022 | ups 1.04 | wpb 433509 | bsz 16500.8 | num_updates 33729 | lr 0.000344372 | gnorm 0.225 | clip 0 | loss_scale 1 | train_wall 1581 | gb_free 20 | wall 33179 +epoch 020 | loss 4.127 | nll_loss 2.504 | ppl 5.67 | wps 451022 | ups 1.04 | wpb 433509 | bsz 16500.8 | num_updates 33729 | lr 0.000344372 | gnorm 0.225 | clip 0 | loss_scale 1 | train_wall 1581 | gb_free 20 | wall 33179 +epoch 020 | loss 4.127 | nll_loss 2.504 | ppl 5.67 | wps 451022 | ups 1.04 | wpb 433509 | bsz 16500.8 | num_updates 33729 | lr 0.000344372 | gnorm 0.225 | clip 0 | loss_scale 1 | train_wall 1581 | gb_free 20 | wall 33179 +epoch 020 | loss 4.127 | nll_loss 2.504 | ppl 5.67 | wps 451022 | ups 1.04 | wpb 433509 | bsz 16500.8 | num_updates 33729 | lr 0.000344372 | gnorm 0.225 | clip 0 | loss_scale 1 | train_wall 1581 | gb_free 20 | wall 33179 +epoch 020 | loss 4.127 | nll_loss 2.504 | ppl 5.67 | wps 451022 | ups 1.04 | wpb 433509 | bsz 16500.8 | num_updates 33729 | lr 0.000344372 | gnorm 0.225 | clip 0 | loss_scale 1 | train_wall 1581 | gb_free 20 | wall 33179 +epoch 020 | loss 4.127 | nll_loss 2.504 | ppl 5.67 | wps 451022 | ups 1.04 | wpb 433509 | bsz 16500.8 | num_updates 33729 | lr 0.000344372 | gnorm 0.225 | clip 0 | loss_scale 1 | train_wall 1581 | gb_free 20 | wall 33179 +epoch 020 | loss 4.127 | nll_loss 2.504 | ppl 5.67 | wps 451022 | ups 1.04 | wpb 433509 | bsz 16500.8 | num_updates 33729 | lr 0.000344372 | gnorm 0.225 | clip 0 | loss_scale 1 | train_wall 1581 | gb_free 20 | wall 33179 +epoch 020 | loss 4.127 | nll_loss 2.504 | ppl 5.67 | wps 451022 | ups 1.04 | wpb 433509 | bsz 16500.8 | num_updates 33729 | lr 0.000344372 | gnorm 0.225 | clip 0 | loss_scale 1 | train_wall 1581 | gb_free 20 | wall 33179 +epoch 020 | loss 4.127 | nll_loss 2.504 | ppl 5.67 | wps 451022 | ups 1.04 | wpb 433509 | bsz 16500.8 | num_updates 33729 | lr 0.000344372 | gnorm 0.225 | clip 0 | loss_scale 1 | train_wall 1581 | gb_free 20 | wall 33179 +epoch 020 | loss 4.127 | nll_loss 2.504 | ppl 5.67 | wps 451022 | ups 1.04 | wpb 433509 | bsz 16500.8 | num_updates 33729 | lr 0.000344372 | gnorm 0.225 | clip 0 | loss_scale 1 | train_wall 1581 | gb_free 20 | wall 33179 +epoch 020 | loss 4.127 | nll_loss 2.504 | ppl 5.67 | wps 451022 | ups 1.04 | wpb 433509 | bsz 16500.8 | num_updates 33729 | lr 0.000344372 | gnorm 0.225 | clip 0 | loss_scale 1 | train_wall 1581 | gb_free 20 | wall 33179 +epoch 020 | loss 4.127 | nll_loss 2.504 | ppl 5.67 | wps 451022 | ups 1.04 | wpb 433509 | bsz 16500.8 | num_updates 33729 | lr 0.000344372 | gnorm 0.225 | clip 0 | loss_scale 1 | train_wall 1581 | gb_free 20 | wall 33179 +epoch 020 | loss 4.127 | nll_loss 2.504 | ppl 5.67 | wps 451022 | ups 1.04 | wpb 433509 | bsz 16500.8 | num_updates 33729 | lr 0.000344372 | gnorm 0.225 | clip 0 | loss_scale 1 | train_wall 1581 | gb_free 20 | wall 33179 +epoch 020 | loss 4.127 | nll_loss 2.504 | ppl 5.67 | wps 451022 | ups 1.04 | wpb 433509 | bsz 16500.8 | num_updates 33729 | lr 0.000344372 | gnorm 0.225 | clip 0 | loss_scale 1 | train_wall 1581 | gb_free 20 | wall 33179 +epoch 020 | loss 4.127 | nll_loss 2.504 | ppl 5.67 | wps 451022 | ups 1.04 | wpb 433509 | bsz 16500.8 | num_updates 33729 | lr 0.000344372 | gnorm 0.225 | clip 0 | loss_scale 1 | train_wall 1581 | gb_free 20 | wall 33179 +epoch 020 | loss 4.127 | nll_loss 2.504 | ppl 5.67 | wps 451022 | ups 1.04 | wpb 433509 | bsz 16500.8 | num_updates 33729 | lr 0.000344372 | gnorm 0.225 | clip 0 | loss_scale 1 | train_wall 1581 | gb_free 20 | wall 33179 +epoch 020 | loss 4.127 | nll_loss 2.504 | ppl 5.67 | wps 451022 | ups 1.04 | wpb 433509 | bsz 16500.8 | num_updates 33729 | lr 0.000344372 | gnorm 0.225 | clip 0 | loss_scale 1 | train_wall 1581 | gb_free 20 | wall 33179 +Start iterating over samples +epoch 021: 71 / 1689 loss=4.11, nll_loss=2.484, ppl=5.6, wps=457210, ups=1.07, wpb=429286, bsz=16291.2, num_updates=33800, lr=0.00034401, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=33247 +epoch 021: 71 / 1689 loss=4.11, nll_loss=2.484, ppl=5.6, wps=457210, ups=1.07, wpb=429286, bsz=16291.2, num_updates=33800, lr=0.00034401, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=33247 +epoch 021: 71 / 1689 loss=4.11, nll_loss=2.484, ppl=5.6, wps=457210, ups=1.07, wpb=429286, bsz=16291.2, num_updates=33800, lr=0.00034401, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=33247 +epoch 021: 71 / 1689 loss=4.11, nll_loss=2.484, ppl=5.6, wps=457210, ups=1.07, wpb=429286, bsz=16291.2, num_updates=33800, lr=0.00034401, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=33247 +epoch 021: 71 / 1689 loss=4.11, nll_loss=2.484, ppl=5.6, wps=457210, ups=1.07, wpb=429286, bsz=16291.2, num_updates=33800, lr=0.00034401, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=33247 +epoch 021: 71 / 1689 loss=4.11, nll_loss=2.484, ppl=5.6, wps=457210, ups=1.07, wpb=429286, bsz=16291.2, num_updates=33800, lr=0.00034401, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=33247 +epoch 021: 71 / 1689 loss=4.11, nll_loss=2.484, ppl=5.6, wps=457210, ups=1.07, wpb=429286, bsz=16291.2, num_updates=33800, lr=0.00034401, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=33247 +epoch 021: 71 / 1689 loss=4.11, nll_loss=2.484, ppl=5.6, wps=457210, ups=1.07, wpb=429286, bsz=16291.2, num_updates=33800, lr=0.00034401, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=33247 +epoch 021: 71 / 1689 loss=4.11, nll_loss=2.484, ppl=5.6, wps=457210, ups=1.07, wpb=429286, bsz=16291.2, num_updates=33800, lr=0.00034401, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=33247 +epoch 021: 71 / 1689 loss=4.11, nll_loss=2.484, ppl=5.6, wps=457210, ups=1.07, wpb=429286, bsz=16291.2, num_updates=33800, lr=0.00034401, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=33247 +epoch 021: 71 / 1689 loss=4.11, nll_loss=2.484, ppl=5.6, wps=457210, ups=1.07, wpb=429286, bsz=16291.2, num_updates=33800, lr=0.00034401, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=33247 +epoch 021: 71 / 1689 loss=4.11, nll_loss=2.484, ppl=5.6, wps=457210, ups=1.07, wpb=429286, bsz=16291.2, num_updates=33800, lr=0.00034401, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=33247 +epoch 021: 71 / 1689 loss=4.11, nll_loss=2.484, ppl=5.6, wps=457210, ups=1.07, wpb=429286, bsz=16291.2, num_updates=33800, lr=0.00034401, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=33247 +epoch 021: 71 / 1689 loss=4.11, nll_loss=2.484, ppl=5.6, wps=457210, ups=1.07, wpb=429286, bsz=16291.2, num_updates=33800, lr=0.00034401, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=33247 +epoch 021: 71 / 1689 loss=4.11, nll_loss=2.484, ppl=5.6, wps=457210, ups=1.07, wpb=429286, bsz=16291.2, num_updates=33800, lr=0.00034401, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=33247 +epoch 021: 71 / 1689 loss=4.11, nll_loss=2.484, ppl=5.6, wps=457210, ups=1.07, wpb=429286, bsz=16291.2, num_updates=33800, lr=0.00034401, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=33247 +epoch 021: 71 / 1689 loss=4.11, nll_loss=2.484, ppl=5.6, wps=457210, ups=1.07, wpb=429286, bsz=16291.2, num_updates=33800, lr=0.00034401, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=33247 +epoch 021: 71 / 1689 loss=4.11, nll_loss=2.484, ppl=5.6, wps=457210, ups=1.07, wpb=429286, bsz=16291.2, num_updates=33800, lr=0.00034401, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=33247 +epoch 021: 71 / 1689 loss=4.11, nll_loss=2.484, ppl=5.6, wps=457210, ups=1.07, wpb=429286, bsz=16291.2, num_updates=33800, lr=0.00034401, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=33247 +epoch 021: 71 / 1689 loss=4.11, nll_loss=2.484, ppl=5.6, wps=457210, ups=1.07, wpb=429286, bsz=16291.2, num_updates=33800, lr=0.00034401, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=33247 +epoch 021: 71 / 1689 loss=4.11, nll_loss=2.484, ppl=5.6, wps=457210, ups=1.07, wpb=429286, bsz=16291.2, num_updates=33800, lr=0.00034401, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=33247 +epoch 021: 171 / 1689 loss=4.102, nll_loss=2.475, ppl=5.56, wps=459986, ups=1.06, wpb=432894, bsz=16506.4, num_updates=33900, lr=0.000343503, gnorm=0.232, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=33341 +epoch 021: 171 / 1689 loss=4.102, nll_loss=2.475, ppl=5.56, wps=459986, ups=1.06, wpb=432894, bsz=16506.4, num_updates=33900, lr=0.000343503, gnorm=0.232, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=33341 +epoch 021: 171 / 1689 loss=4.102, nll_loss=2.475, ppl=5.56, wps=459986, ups=1.06, wpb=432894, bsz=16506.4, num_updates=33900, lr=0.000343503, gnorm=0.232, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=33341 +epoch 021: 171 / 1689 loss=4.102, nll_loss=2.475, ppl=5.56, wps=459986, ups=1.06, wpb=432894, bsz=16506.4, num_updates=33900, lr=0.000343503, gnorm=0.232, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=33341 +epoch 021: 171 / 1689 loss=4.102, nll_loss=2.475, ppl=5.56, wps=459986, ups=1.06, wpb=432894, bsz=16506.4, num_updates=33900, lr=0.000343503, gnorm=0.232, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=33341 +epoch 021: 171 / 1689 loss=4.102, nll_loss=2.475, ppl=5.56, wps=459986, ups=1.06, wpb=432894, bsz=16506.4, num_updates=33900, lr=0.000343503, gnorm=0.232, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=33341 +epoch 021: 171 / 1689 loss=4.102, nll_loss=2.475, ppl=5.56, wps=459986, ups=1.06, wpb=432894, bsz=16506.4, num_updates=33900, lr=0.000343503, gnorm=0.232, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=33341 +epoch 021: 171 / 1689 loss=4.102, nll_loss=2.475, ppl=5.56, wps=459986, ups=1.06, wpb=432894, bsz=16506.4, num_updates=33900, lr=0.000343503, gnorm=0.232, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=33341 +epoch 021: 171 / 1689 loss=4.102, nll_loss=2.475, ppl=5.56, wps=459986, ups=1.06, wpb=432894, bsz=16506.4, num_updates=33900, lr=0.000343503, gnorm=0.232, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=33341 +epoch 021: 171 / 1689 loss=4.102, nll_loss=2.475, ppl=5.56, wps=459986, ups=1.06, wpb=432894, bsz=16506.4, num_updates=33900, lr=0.000343503, gnorm=0.232, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=33341 +epoch 021: 171 / 1689 loss=4.102, nll_loss=2.475, ppl=5.56, wps=459986, ups=1.06, wpb=432894, bsz=16506.4, num_updates=33900, lr=0.000343503, gnorm=0.232, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=33341 +epoch 021: 171 / 1689 loss=4.102, nll_loss=2.475, ppl=5.56, wps=459986, ups=1.06, wpb=432894, bsz=16506.4, num_updates=33900, lr=0.000343503, gnorm=0.232, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=33341 +epoch 021: 171 / 1689 loss=4.102, nll_loss=2.475, ppl=5.56, wps=459986, ups=1.06, wpb=432894, bsz=16506.4, num_updates=33900, lr=0.000343503, gnorm=0.232, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=33341 +epoch 021: 171 / 1689 loss=4.102, nll_loss=2.475, ppl=5.56, wps=459986, ups=1.06, wpb=432894, bsz=16506.4, num_updates=33900, lr=0.000343503, gnorm=0.232, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=33341 +epoch 021: 171 / 1689 loss=4.102, nll_loss=2.475, ppl=5.56, wps=459986, ups=1.06, wpb=432894, bsz=16506.4, num_updates=33900, lr=0.000343503, gnorm=0.232, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=33341 +epoch 021: 171 / 1689 loss=4.102, nll_loss=2.475, ppl=5.56, wps=459986, ups=1.06, wpb=432894, bsz=16506.4, num_updates=33900, lr=0.000343503, gnorm=0.232, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=33341 +epoch 021: 171 / 1689 loss=4.102, nll_loss=2.475, ppl=5.56, wps=459986, ups=1.06, wpb=432894, bsz=16506.4, num_updates=33900, lr=0.000343503, gnorm=0.232, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=33341 +epoch 021: 171 / 1689 loss=4.102, nll_loss=2.475, ppl=5.56, wps=459986, ups=1.06, wpb=432894, bsz=16506.4, num_updates=33900, lr=0.000343503, gnorm=0.232, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=33341 +epoch 021: 171 / 1689 loss=4.102, nll_loss=2.475, ppl=5.56, wps=459986, ups=1.06, wpb=432894, bsz=16506.4, num_updates=33900, lr=0.000343503, gnorm=0.232, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=33341 +epoch 021: 171 / 1689 loss=4.102, nll_loss=2.475, ppl=5.56, wps=459986, ups=1.06, wpb=432894, bsz=16506.4, num_updates=33900, lr=0.000343503, gnorm=0.232, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=33341 +epoch 021: 171 / 1689 loss=4.102, nll_loss=2.475, ppl=5.56, wps=459986, ups=1.06, wpb=432894, bsz=16506.4, num_updates=33900, lr=0.000343503, gnorm=0.232, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=33341 +epoch 021: 271 / 1689 loss=4.107, nll_loss=2.481, ppl=5.58, wps=459153, ups=1.06, wpb=432532, bsz=16554.3, num_updates=34000, lr=0.000342997, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=33435 +epoch 021: 271 / 1689 loss=4.107, nll_loss=2.481, ppl=5.58, wps=459153, ups=1.06, wpb=432532, bsz=16554.3, num_updates=34000, lr=0.000342997, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=33435 +epoch 021: 271 / 1689 loss=4.107, nll_loss=2.481, ppl=5.58, wps=459153, ups=1.06, wpb=432532, bsz=16554.3, num_updates=34000, lr=0.000342997, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=33435 +epoch 021: 271 / 1689 loss=4.107, nll_loss=2.481, ppl=5.58, wps=459153, ups=1.06, wpb=432532, bsz=16554.3, num_updates=34000, lr=0.000342997, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=33435 +epoch 021: 271 / 1689 loss=4.107, nll_loss=2.481, ppl=5.58, wps=459153, ups=1.06, wpb=432532, bsz=16554.3, num_updates=34000, lr=0.000342997, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=33435 +epoch 021: 271 / 1689 loss=4.107, nll_loss=2.481, ppl=5.58, wps=459153, ups=1.06, wpb=432532, bsz=16554.3, num_updates=34000, lr=0.000342997, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=33435 +epoch 021: 271 / 1689 loss=4.107, nll_loss=2.481, ppl=5.58, wps=459153, ups=1.06, wpb=432532, bsz=16554.3, num_updates=34000, lr=0.000342997, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=33435 +epoch 021: 271 / 1689 loss=4.107, nll_loss=2.481, ppl=5.58, wps=459153, ups=1.06, wpb=432532, bsz=16554.3, num_updates=34000, lr=0.000342997, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=33435 +epoch 021: 271 / 1689 loss=4.107, nll_loss=2.481, ppl=5.58, wps=459153, ups=1.06, wpb=432532, bsz=16554.3, num_updates=34000, lr=0.000342997, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=33435 +epoch 021: 271 / 1689 loss=4.107, nll_loss=2.481, ppl=5.58, wps=459153, ups=1.06, wpb=432532, bsz=16554.3, num_updates=34000, lr=0.000342997, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=33435 +epoch 021: 271 / 1689 loss=4.107, nll_loss=2.481, ppl=5.58, wps=459153, ups=1.06, wpb=432532, bsz=16554.3, num_updates=34000, lr=0.000342997, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=33435 +epoch 021: 271 / 1689 loss=4.107, nll_loss=2.481, ppl=5.58, wps=459153, ups=1.06, wpb=432532, bsz=16554.3, num_updates=34000, lr=0.000342997, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=33435 +epoch 021: 271 / 1689 loss=4.107, nll_loss=2.481, ppl=5.58, wps=459153, ups=1.06, wpb=432532, bsz=16554.3, num_updates=34000, lr=0.000342997, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=33435 +epoch 021: 271 / 1689 loss=4.107, nll_loss=2.481, ppl=5.58, wps=459153, ups=1.06, wpb=432532, bsz=16554.3, num_updates=34000, lr=0.000342997, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=33435 +epoch 021: 271 / 1689 loss=4.107, nll_loss=2.481, ppl=5.58, wps=459153, ups=1.06, wpb=432532, bsz=16554.3, num_updates=34000, lr=0.000342997, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=33435 +epoch 021: 271 / 1689 loss=4.107, nll_loss=2.481, ppl=5.58, wps=459153, ups=1.06, wpb=432532, bsz=16554.3, num_updates=34000, lr=0.000342997, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=33435 +epoch 021: 271 / 1689 loss=4.107, nll_loss=2.481, ppl=5.58, wps=459153, ups=1.06, wpb=432532, bsz=16554.3, num_updates=34000, lr=0.000342997, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=33435 +epoch 021: 271 / 1689 loss=4.107, nll_loss=2.481, ppl=5.58, wps=459153, ups=1.06, wpb=432532, bsz=16554.3, num_updates=34000, lr=0.000342997, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=33435 +epoch 021: 271 / 1689 loss=4.107, nll_loss=2.481, ppl=5.58, wps=459153, ups=1.06, wpb=432532, bsz=16554.3, num_updates=34000, lr=0.000342997, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=33435 +epoch 021: 271 / 1689 loss=4.107, nll_loss=2.481, ppl=5.58, wps=459153, ups=1.06, wpb=432532, bsz=16554.3, num_updates=34000, lr=0.000342997, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=33435 +epoch 021: 271 / 1689 loss=4.107, nll_loss=2.481, ppl=5.58, wps=459153, ups=1.06, wpb=432532, bsz=16554.3, num_updates=34000, lr=0.000342997, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=33435 +begin validation on "valid" subset +epoch 021 | valid on 'valid' subset | loss 4.249 | nll_loss 2.614 | ppl 6.12 | wps 0 | wpb 42662 | bsz 2032 | num_updates 34000 | best_loss 4.241 +epoch 021 | valid on 'valid' subset | loss 4.249 | nll_loss 2.614 | ppl 6.12 | wps 0 | wpb 42662 | bsz 2032 | num_updates 34000 | best_loss 4.241 +epoch 021 | valid on 'valid' subset | loss 4.249 | nll_loss 2.614 | ppl 6.12 | wps 0 | wpb 42662 | bsz 2032 | num_updates 34000 | best_loss 4.241 +epoch 021 | valid on 'valid' subset | loss 4.249 | nll_loss 2.614 | ppl 6.12 | wps 0 | wpb 42662 | bsz 2032 | num_updates 34000 | best_loss 4.241 +epoch 021 | valid on 'valid' subset | loss 4.249 | nll_loss 2.614 | ppl 6.12 | wps 0 | wpb 42662 | bsz 2032 | num_updates 34000 | best_loss 4.241 +epoch 021 | valid on 'valid' subset | loss 4.249 | nll_loss 2.614 | ppl 6.12 | wps 0 | wpb 42662 | bsz 2032 | num_updates 34000 | best_loss 4.241 +epoch 021 | valid on 'valid' subset | loss 4.249 | nll_loss 2.614 | ppl 6.12 | wps 0 | wpb 42662 | bsz 2032 | num_updates 34000 | best_loss 4.241 +epoch 021 | valid on 'valid' subset | loss 4.249 | nll_loss 2.614 | ppl 6.12 | wps 0 | wpb 42662 | bsz 2032 | num_updates 34000 | best_loss 4.241 +epoch 021 | valid on 'valid' subset | loss 4.249 | nll_loss 2.614 | ppl 6.12 | wps 0 | wpb 42662 | bsz 2032 | num_updates 34000 | best_loss 4.241 +epoch 021 | valid on 'valid' subset | loss 4.249 | nll_loss 2.614 | ppl 6.12 | wps 0 | wpb 42662 | bsz 2032 | num_updates 34000 | best_loss 4.241 +epoch 021 | valid on 'valid' subset | loss 4.249 | nll_loss 2.614 | ppl 6.12 | wps 0 | wpb 42662 | bsz 2032 | num_updates 34000 | best_loss 4.241 +epoch 021 | valid on 'valid' subset | loss 4.249 | nll_loss 2.614 | ppl 6.12 | wps 0 | wpb 42662 | bsz 2032 | num_updates 34000 | best_loss 4.241 +epoch 021 | valid on 'valid' subset | loss 4.249 | nll_loss 2.614 | ppl 6.12 | wps 0 | wpb 42662 | bsz 2032 | num_updates 34000 | best_loss 4.241 +epoch 021 | valid on 'valid' subset | loss 4.249 | nll_loss 2.614 | ppl 6.12 | wps 0 | wpb 42662 | bsz 2032 | num_updates 34000 | best_loss 4.241 +epoch 021 | valid on 'valid' subset | loss 4.249 | nll_loss 2.614 | ppl 6.12 | wps 0 | wpb 42662 | bsz 2032 | num_updates 34000 | best_loss 4.241 +epoch 021 | valid on 'valid' subset | loss 4.249 | nll_loss 2.614 | ppl 6.12 | wps 0 | wpb 42662 | bsz 2032 | num_updates 34000 | best_loss 4.241 +epoch 021 | valid on 'valid' subset | loss 4.249 | nll_loss 2.614 | ppl 6.12 | wps 0 | wpb 42662 | bsz 2032 | num_updates 34000 | best_loss 4.241 +epoch 021 | valid on 'valid' subset | loss 4.249 | nll_loss 2.614 | ppl 6.12 | wps 0 | wpb 42662 | bsz 2032 | num_updates 34000 | best_loss 4.241 +epoch 021 | valid on 'valid' subset | loss 4.249 | nll_loss 2.614 | ppl 6.12 | wps 0 | wpb 42662 | bsz 2032 | num_updates 34000 | best_loss 4.241 +epoch 021 | valid on 'valid' subset | loss 4.249 | nll_loss 2.614 | ppl 6.12 | wps 0 | wpb 42662 | bsz 2032 | num_updates 34000 | best_loss 4.241 +epoch 021 | valid on 'valid' subset | loss 4.249 | nll_loss 2.614 | ppl 6.12 | wps 0 | wpb 42662 | bsz 2032 | num_updates 34000 | best_loss 4.241 +epoch 021: 371 / 1689 loss=4.111, nll_loss=2.487, ppl=5.6, wps=411900, ups=0.95, wpb=433022, bsz=16786.1, num_updates=34100, lr=0.000342494, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=33541 +epoch 021: 371 / 1689 loss=4.111, nll_loss=2.487, ppl=5.6, wps=411900, ups=0.95, wpb=433022, bsz=16786.1, num_updates=34100, lr=0.000342494, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=33541 +epoch 021: 371 / 1689 loss=4.111, nll_loss=2.487, ppl=5.6, wps=411900, ups=0.95, wpb=433022, bsz=16786.1, num_updates=34100, lr=0.000342494, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=33541 +epoch 021: 371 / 1689 loss=4.111, nll_loss=2.487, ppl=5.6, wps=411900, ups=0.95, wpb=433022, bsz=16786.1, num_updates=34100, lr=0.000342494, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=33541 +epoch 021: 371 / 1689 loss=4.111, nll_loss=2.487, ppl=5.6, wps=411900, ups=0.95, wpb=433022, bsz=16786.1, num_updates=34100, lr=0.000342494, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=33541 +epoch 021: 371 / 1689 loss=4.111, nll_loss=2.487, ppl=5.6, wps=411900, ups=0.95, wpb=433022, bsz=16786.1, num_updates=34100, lr=0.000342494, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=33541 +epoch 021: 371 / 1689 loss=4.111, nll_loss=2.487, ppl=5.6, wps=411900, ups=0.95, wpb=433022, bsz=16786.1, num_updates=34100, lr=0.000342494, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=33541 +epoch 021: 371 / 1689 loss=4.111, nll_loss=2.487, ppl=5.6, wps=411900, ups=0.95, wpb=433022, bsz=16786.1, num_updates=34100, lr=0.000342494, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=33541 +epoch 021: 371 / 1689 loss=4.111, nll_loss=2.487, ppl=5.6, wps=411900, ups=0.95, wpb=433022, bsz=16786.1, num_updates=34100, lr=0.000342494, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=33541 +epoch 021: 371 / 1689 loss=4.111, nll_loss=2.487, ppl=5.6, wps=411900, ups=0.95, wpb=433022, bsz=16786.1, num_updates=34100, lr=0.000342494, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=33541 +epoch 021: 371 / 1689 loss=4.111, nll_loss=2.487, ppl=5.6, wps=411900, ups=0.95, wpb=433022, bsz=16786.1, num_updates=34100, lr=0.000342494, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=33541 +epoch 021: 371 / 1689 loss=4.111, nll_loss=2.487, ppl=5.6, wps=411900, ups=0.95, wpb=433022, bsz=16786.1, num_updates=34100, lr=0.000342494, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=33541 +epoch 021: 371 / 1689 loss=4.111, nll_loss=2.487, ppl=5.6, wps=411900, ups=0.95, wpb=433022, bsz=16786.1, num_updates=34100, lr=0.000342494, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=33541 +epoch 021: 371 / 1689 loss=4.111, nll_loss=2.487, ppl=5.6, wps=411900, ups=0.95, wpb=433022, bsz=16786.1, num_updates=34100, lr=0.000342494, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=33541 +epoch 021: 371 / 1689 loss=4.111, nll_loss=2.487, ppl=5.6, wps=411900, ups=0.95, wpb=433022, bsz=16786.1, num_updates=34100, lr=0.000342494, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=33541 +epoch 021: 371 / 1689 loss=4.111, nll_loss=2.487, ppl=5.6, wps=411900, ups=0.95, wpb=433022, bsz=16786.1, num_updates=34100, lr=0.000342494, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=33541 +epoch 021: 371 / 1689 loss=4.111, nll_loss=2.487, ppl=5.6, wps=411900, ups=0.95, wpb=433022, bsz=16786.1, num_updates=34100, lr=0.000342494, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=33541 +epoch 021: 371 / 1689 loss=4.111, nll_loss=2.487, ppl=5.6, wps=411900, ups=0.95, wpb=433022, bsz=16786.1, num_updates=34100, lr=0.000342494, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=33541 +epoch 021: 371 / 1689 loss=4.111, nll_loss=2.487, ppl=5.6, wps=411900, ups=0.95, wpb=433022, bsz=16786.1, num_updates=34100, lr=0.000342494, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=33541 +epoch 021: 371 / 1689 loss=4.111, nll_loss=2.487, ppl=5.6, wps=411900, ups=0.95, wpb=433022, bsz=16786.1, num_updates=34100, lr=0.000342494, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=33541 +epoch 021: 371 / 1689 loss=4.111, nll_loss=2.487, ppl=5.6, wps=411900, ups=0.95, wpb=433022, bsz=16786.1, num_updates=34100, lr=0.000342494, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=33541 +epoch 021: 471 / 1689 loss=4.12, nll_loss=2.497, ppl=5.64, wps=463699, ups=1.07, wpb=432162, bsz=16738.3, num_updates=34200, lr=0.000341993, gnorm=0.222, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=33634 +epoch 021: 471 / 1689 loss=4.12, nll_loss=2.497, ppl=5.64, wps=463699, ups=1.07, wpb=432162, bsz=16738.3, num_updates=34200, lr=0.000341993, gnorm=0.222, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=33634 +epoch 021: 471 / 1689 loss=4.12, nll_loss=2.497, ppl=5.64, wps=463699, ups=1.07, wpb=432162, bsz=16738.3, num_updates=34200, lr=0.000341993, gnorm=0.222, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=33634 +epoch 021: 471 / 1689 loss=4.12, nll_loss=2.497, ppl=5.64, wps=463699, ups=1.07, wpb=432162, bsz=16738.3, num_updates=34200, lr=0.000341993, gnorm=0.222, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=33634 +epoch 021: 471 / 1689 loss=4.12, nll_loss=2.497, ppl=5.64, wps=463699, ups=1.07, wpb=432162, bsz=16738.3, num_updates=34200, lr=0.000341993, gnorm=0.222, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=33634 +epoch 021: 471 / 1689 loss=4.12, nll_loss=2.497, ppl=5.64, wps=463699, ups=1.07, wpb=432162, bsz=16738.3, num_updates=34200, lr=0.000341993, gnorm=0.222, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=33634 +epoch 021: 471 / 1689 loss=4.12, nll_loss=2.497, ppl=5.64, wps=463699, ups=1.07, wpb=432162, bsz=16738.3, num_updates=34200, lr=0.000341993, gnorm=0.222, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=33634 +epoch 021: 471 / 1689 loss=4.12, nll_loss=2.497, ppl=5.64, wps=463699, ups=1.07, wpb=432162, bsz=16738.3, num_updates=34200, lr=0.000341993, gnorm=0.222, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=33634 +epoch 021: 471 / 1689 loss=4.12, nll_loss=2.497, ppl=5.64, wps=463699, ups=1.07, wpb=432162, bsz=16738.3, num_updates=34200, lr=0.000341993, gnorm=0.222, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=33634 +epoch 021: 471 / 1689 loss=4.12, nll_loss=2.497, ppl=5.64, wps=463699, ups=1.07, wpb=432162, bsz=16738.3, num_updates=34200, lr=0.000341993, gnorm=0.222, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=33634 +epoch 021: 471 / 1689 loss=4.12, nll_loss=2.497, ppl=5.64, wps=463699, ups=1.07, wpb=432162, bsz=16738.3, num_updates=34200, lr=0.000341993, gnorm=0.222, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=33634 +epoch 021: 471 / 1689 loss=4.12, nll_loss=2.497, ppl=5.64, wps=463699, ups=1.07, wpb=432162, bsz=16738.3, num_updates=34200, lr=0.000341993, gnorm=0.222, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=33634 +epoch 021: 471 / 1689 loss=4.12, nll_loss=2.497, ppl=5.64, wps=463699, ups=1.07, wpb=432162, bsz=16738.3, num_updates=34200, lr=0.000341993, gnorm=0.222, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=33634 +epoch 021: 471 / 1689 loss=4.12, nll_loss=2.497, ppl=5.64, wps=463699, ups=1.07, wpb=432162, bsz=16738.3, num_updates=34200, lr=0.000341993, gnorm=0.222, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=33634 +epoch 021: 471 / 1689 loss=4.12, nll_loss=2.497, ppl=5.64, wps=463699, ups=1.07, wpb=432162, bsz=16738.3, num_updates=34200, lr=0.000341993, gnorm=0.222, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=33634 +epoch 021: 471 / 1689 loss=4.12, nll_loss=2.497, ppl=5.64, wps=463699, ups=1.07, wpb=432162, bsz=16738.3, num_updates=34200, lr=0.000341993, gnorm=0.222, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=33634 +epoch 021: 471 / 1689 loss=4.12, nll_loss=2.497, ppl=5.64, wps=463699, ups=1.07, wpb=432162, bsz=16738.3, num_updates=34200, lr=0.000341993, gnorm=0.222, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=33634 +epoch 021: 471 / 1689 loss=4.12, nll_loss=2.497, ppl=5.64, wps=463699, ups=1.07, wpb=432162, bsz=16738.3, num_updates=34200, lr=0.000341993, gnorm=0.222, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=33634 +epoch 021: 471 / 1689 loss=4.12, nll_loss=2.497, ppl=5.64, wps=463699, ups=1.07, wpb=432162, bsz=16738.3, num_updates=34200, lr=0.000341993, gnorm=0.222, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=33634 +epoch 021: 471 / 1689 loss=4.12, nll_loss=2.497, ppl=5.64, wps=463699, ups=1.07, wpb=432162, bsz=16738.3, num_updates=34200, lr=0.000341993, gnorm=0.222, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=33634 +epoch 021: 471 / 1689 loss=4.12, nll_loss=2.497, ppl=5.64, wps=463699, ups=1.07, wpb=432162, bsz=16738.3, num_updates=34200, lr=0.000341993, gnorm=0.222, clip=0, loss_scale=2, train_wall=92, gb_free=19.5, wall=33634 +epoch 021: 572 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458998, ups=1.05, wpb=437570, bsz=16454.6, num_updates=34300, lr=0.000341494, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=33729 +epoch 021: 572 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458998, ups=1.05, wpb=437570, bsz=16454.6, num_updates=34300, lr=0.000341494, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=33729 +epoch 021: 572 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458998, ups=1.05, wpb=437570, bsz=16454.6, num_updates=34300, lr=0.000341494, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=33729 +epoch 021: 572 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458998, ups=1.05, wpb=437570, bsz=16454.6, num_updates=34300, lr=0.000341494, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=33729 +epoch 021: 572 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458998, ups=1.05, wpb=437570, bsz=16454.6, num_updates=34300, lr=0.000341494, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=33729 +epoch 021: 572 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458998, ups=1.05, wpb=437570, bsz=16454.6, num_updates=34300, lr=0.000341494, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=33729 +epoch 021: 572 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458998, ups=1.05, wpb=437570, bsz=16454.6, num_updates=34300, lr=0.000341494, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=33729 +epoch 021: 572 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458998, ups=1.05, wpb=437570, bsz=16454.6, num_updates=34300, lr=0.000341494, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=33729 +epoch 021: 572 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458998, ups=1.05, wpb=437570, bsz=16454.6, num_updates=34300, lr=0.000341494, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=33729 +epoch 021: 572 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458998, ups=1.05, wpb=437570, bsz=16454.6, num_updates=34300, lr=0.000341494, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=33729 +epoch 021: 572 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458998, ups=1.05, wpb=437570, bsz=16454.6, num_updates=34300, lr=0.000341494, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=33729 +epoch 021: 572 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458998, ups=1.05, wpb=437570, bsz=16454.6, num_updates=34300, lr=0.000341494, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=33729 +epoch 021: 572 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458998, ups=1.05, wpb=437570, bsz=16454.6, num_updates=34300, lr=0.000341494, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=33729 +epoch 021: 572 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458998, ups=1.05, wpb=437570, bsz=16454.6, num_updates=34300, lr=0.000341494, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=33729 +epoch 021: 572 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458998, ups=1.05, wpb=437570, bsz=16454.6, num_updates=34300, lr=0.000341494, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=33729 +epoch 021: 572 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458998, ups=1.05, wpb=437570, bsz=16454.6, num_updates=34300, lr=0.000341494, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=33729 +epoch 021: 572 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458998, ups=1.05, wpb=437570, bsz=16454.6, num_updates=34300, lr=0.000341494, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=33729 +epoch 021: 572 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458998, ups=1.05, wpb=437570, bsz=16454.6, num_updates=34300, lr=0.000341494, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=33729 +epoch 021: 572 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458998, ups=1.05, wpb=437570, bsz=16454.6, num_updates=34300, lr=0.000341494, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=33729 +epoch 021: 572 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458998, ups=1.05, wpb=437570, bsz=16454.6, num_updates=34300, lr=0.000341494, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=33729 +epoch 021: 572 / 1689 loss=4.128, nll_loss=2.505, ppl=5.68, wps=458998, ups=1.05, wpb=437570, bsz=16454.6, num_updates=34300, lr=0.000341494, gnorm=0.22, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=33729 +epoch 021: 672 / 1689 loss=4.119, nll_loss=2.495, ppl=5.64, wps=462280, ups=1.07, wpb=432312, bsz=16457.6, num_updates=34400, lr=0.000340997, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=33823 +epoch 021: 672 / 1689 loss=4.119, nll_loss=2.495, ppl=5.64, wps=462280, ups=1.07, wpb=432312, bsz=16457.6, num_updates=34400, lr=0.000340997, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=33823 +epoch 021: 672 / 1689 loss=4.119, nll_loss=2.495, ppl=5.64, wps=462280, ups=1.07, wpb=432312, bsz=16457.6, num_updates=34400, lr=0.000340997, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=33823 +epoch 021: 672 / 1689 loss=4.119, nll_loss=2.495, ppl=5.64, wps=462280, ups=1.07, wpb=432312, bsz=16457.6, num_updates=34400, lr=0.000340997, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=33823 +epoch 021: 672 / 1689 loss=4.119, nll_loss=2.495, ppl=5.64, wps=462280, ups=1.07, wpb=432312, bsz=16457.6, num_updates=34400, lr=0.000340997, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=33823 +epoch 021: 672 / 1689 loss=4.119, nll_loss=2.495, ppl=5.64, wps=462280, ups=1.07, wpb=432312, bsz=16457.6, num_updates=34400, lr=0.000340997, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=33823 +epoch 021: 672 / 1689 loss=4.119, nll_loss=2.495, ppl=5.64, wps=462280, ups=1.07, wpb=432312, bsz=16457.6, num_updates=34400, lr=0.000340997, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=33823 +epoch 021: 672 / 1689 loss=4.119, nll_loss=2.495, ppl=5.64, wps=462280, ups=1.07, wpb=432312, bsz=16457.6, num_updates=34400, lr=0.000340997, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=33823 +epoch 021: 672 / 1689 loss=4.119, nll_loss=2.495, ppl=5.64, wps=462280, ups=1.07, wpb=432312, bsz=16457.6, num_updates=34400, lr=0.000340997, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=33823 +epoch 021: 672 / 1689 loss=4.119, nll_loss=2.495, ppl=5.64, wps=462280, ups=1.07, wpb=432312, bsz=16457.6, num_updates=34400, lr=0.000340997, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=33823 +epoch 021: 672 / 1689 loss=4.119, nll_loss=2.495, ppl=5.64, wps=462280, ups=1.07, wpb=432312, bsz=16457.6, num_updates=34400, lr=0.000340997, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=33823 +epoch 021: 672 / 1689 loss=4.119, nll_loss=2.495, ppl=5.64, wps=462280, ups=1.07, wpb=432312, bsz=16457.6, num_updates=34400, lr=0.000340997, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=33823 +epoch 021: 672 / 1689 loss=4.119, nll_loss=2.495, ppl=5.64, wps=462280, ups=1.07, wpb=432312, bsz=16457.6, num_updates=34400, lr=0.000340997, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=33823 +epoch 021: 672 / 1689 loss=4.119, nll_loss=2.495, ppl=5.64, wps=462280, ups=1.07, wpb=432312, bsz=16457.6, num_updates=34400, lr=0.000340997, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=33823 +epoch 021: 672 / 1689 loss=4.119, nll_loss=2.495, ppl=5.64, wps=462280, ups=1.07, wpb=432312, bsz=16457.6, num_updates=34400, lr=0.000340997, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=33823 +epoch 021: 672 / 1689 loss=4.119, nll_loss=2.495, ppl=5.64, wps=462280, ups=1.07, wpb=432312, bsz=16457.6, num_updates=34400, lr=0.000340997, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=33823 +epoch 021: 672 / 1689 loss=4.119, nll_loss=2.495, ppl=5.64, wps=462280, ups=1.07, wpb=432312, bsz=16457.6, num_updates=34400, lr=0.000340997, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=33823 +epoch 021: 672 / 1689 loss=4.119, nll_loss=2.495, ppl=5.64, wps=462280, ups=1.07, wpb=432312, bsz=16457.6, num_updates=34400, lr=0.000340997, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=33823 +epoch 021: 672 / 1689 loss=4.119, nll_loss=2.495, ppl=5.64, wps=462280, ups=1.07, wpb=432312, bsz=16457.6, num_updates=34400, lr=0.000340997, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=33823 +epoch 021: 672 / 1689 loss=4.119, nll_loss=2.495, ppl=5.64, wps=462280, ups=1.07, wpb=432312, bsz=16457.6, num_updates=34400, lr=0.000340997, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=33823 +epoch 021: 672 / 1689 loss=4.119, nll_loss=2.495, ppl=5.64, wps=462280, ups=1.07, wpb=432312, bsz=16457.6, num_updates=34400, lr=0.000340997, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=33823 +epoch 021: 772 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=460533, ups=1.06, wpb=433387, bsz=16617.7, num_updates=34500, lr=0.000340503, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=33917 +epoch 021: 772 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=460533, ups=1.06, wpb=433387, bsz=16617.7, num_updates=34500, lr=0.000340503, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=33917 +epoch 021: 772 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=460533, ups=1.06, wpb=433387, bsz=16617.7, num_updates=34500, lr=0.000340503, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=33917 +epoch 021: 772 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=460533, ups=1.06, wpb=433387, bsz=16617.7, num_updates=34500, lr=0.000340503, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=33917 +epoch 021: 772 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=460533, ups=1.06, wpb=433387, bsz=16617.7, num_updates=34500, lr=0.000340503, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=33917 +epoch 021: 772 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=460533, ups=1.06, wpb=433387, bsz=16617.7, num_updates=34500, lr=0.000340503, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=33917 +epoch 021: 772 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=460533, ups=1.06, wpb=433387, bsz=16617.7, num_updates=34500, lr=0.000340503, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=33917 +epoch 021: 772 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=460533, ups=1.06, wpb=433387, bsz=16617.7, num_updates=34500, lr=0.000340503, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=33917 +epoch 021: 772 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=460533, ups=1.06, wpb=433387, bsz=16617.7, num_updates=34500, lr=0.000340503, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=33917 +epoch 021: 772 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=460533, ups=1.06, wpb=433387, bsz=16617.7, num_updates=34500, lr=0.000340503, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=33917 +epoch 021: 772 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=460533, ups=1.06, wpb=433387, bsz=16617.7, num_updates=34500, lr=0.000340503, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=33917 +epoch 021: 772 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=460533, ups=1.06, wpb=433387, bsz=16617.7, num_updates=34500, lr=0.000340503, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=33917 +epoch 021: 772 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=460533, ups=1.06, wpb=433387, bsz=16617.7, num_updates=34500, lr=0.000340503, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=33917 +epoch 021: 772 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=460533, ups=1.06, wpb=433387, bsz=16617.7, num_updates=34500, lr=0.000340503, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=33917 +epoch 021: 772 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=460533, ups=1.06, wpb=433387, bsz=16617.7, num_updates=34500, lr=0.000340503, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=33917 +epoch 021: 772 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=460533, ups=1.06, wpb=433387, bsz=16617.7, num_updates=34500, lr=0.000340503, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=33917 +epoch 021: 772 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=460533, ups=1.06, wpb=433387, bsz=16617.7, num_updates=34500, lr=0.000340503, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=33917 +epoch 021: 772 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=460533, ups=1.06, wpb=433387, bsz=16617.7, num_updates=34500, lr=0.000340503, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=33917 +epoch 021: 772 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=460533, ups=1.06, wpb=433387, bsz=16617.7, num_updates=34500, lr=0.000340503, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=33917 +epoch 021: 772 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=460533, ups=1.06, wpb=433387, bsz=16617.7, num_updates=34500, lr=0.000340503, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=33917 +epoch 021: 772 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=460533, ups=1.06, wpb=433387, bsz=16617.7, num_updates=34500, lr=0.000340503, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=33917 +epoch 021: 872 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=463166, ups=1.07, wpb=433284, bsz=16319.7, num_updates=34600, lr=0.00034001, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=34010 +epoch 021: 872 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=463166, ups=1.07, wpb=433284, bsz=16319.7, num_updates=34600, lr=0.00034001, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=34010 +epoch 021: 872 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=463166, ups=1.07, wpb=433284, bsz=16319.7, num_updates=34600, lr=0.00034001, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=34010 +epoch 021: 872 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=463166, ups=1.07, wpb=433284, bsz=16319.7, num_updates=34600, lr=0.00034001, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=34010 +epoch 021: 872 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=463166, ups=1.07, wpb=433284, bsz=16319.7, num_updates=34600, lr=0.00034001, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=34010 +epoch 021: 872 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=463166, ups=1.07, wpb=433284, bsz=16319.7, num_updates=34600, lr=0.00034001, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=34010 +epoch 021: 872 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=463166, ups=1.07, wpb=433284, bsz=16319.7, num_updates=34600, lr=0.00034001, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=34010 +epoch 021: 872 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=463166, ups=1.07, wpb=433284, bsz=16319.7, num_updates=34600, lr=0.00034001, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=34010 +epoch 021: 872 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=463166, ups=1.07, wpb=433284, bsz=16319.7, num_updates=34600, lr=0.00034001, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=34010 +epoch 021: 872 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=463166, ups=1.07, wpb=433284, bsz=16319.7, num_updates=34600, lr=0.00034001, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=34010 +epoch 021: 872 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=463166, ups=1.07, wpb=433284, bsz=16319.7, num_updates=34600, lr=0.00034001, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=34010 +epoch 021: 872 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=463166, ups=1.07, wpb=433284, bsz=16319.7, num_updates=34600, lr=0.00034001, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=34010 +epoch 021: 872 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=463166, ups=1.07, wpb=433284, bsz=16319.7, num_updates=34600, lr=0.00034001, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=34010 +epoch 021: 872 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=463166, ups=1.07, wpb=433284, bsz=16319.7, num_updates=34600, lr=0.00034001, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=34010 +epoch 021: 872 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=463166, ups=1.07, wpb=433284, bsz=16319.7, num_updates=34600, lr=0.00034001, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=34010 +epoch 021: 872 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=463166, ups=1.07, wpb=433284, bsz=16319.7, num_updates=34600, lr=0.00034001, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=34010 +epoch 021: 872 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=463166, ups=1.07, wpb=433284, bsz=16319.7, num_updates=34600, lr=0.00034001, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=34010 +epoch 021: 872 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=463166, ups=1.07, wpb=433284, bsz=16319.7, num_updates=34600, lr=0.00034001, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=34010 +epoch 021: 872 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=463166, ups=1.07, wpb=433284, bsz=16319.7, num_updates=34600, lr=0.00034001, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=34010 +epoch 021: 872 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=463166, ups=1.07, wpb=433284, bsz=16319.7, num_updates=34600, lr=0.00034001, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=34010 +epoch 021: 872 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=463166, ups=1.07, wpb=433284, bsz=16319.7, num_updates=34600, lr=0.00034001, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=34010 +epoch 021: 972 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=465239, ups=1.07, wpb=433098, bsz=16375.6, num_updates=34700, lr=0.00033952, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=20.8, wall=34103 +epoch 021: 972 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=465239, ups=1.07, wpb=433098, bsz=16375.6, num_updates=34700, lr=0.00033952, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=20.8, wall=34103 +epoch 021: 972 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=465239, ups=1.07, wpb=433098, bsz=16375.6, num_updates=34700, lr=0.00033952, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=20.8, wall=34103 +epoch 021: 972 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=465239, ups=1.07, wpb=433098, bsz=16375.6, num_updates=34700, lr=0.00033952, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=20.8, wall=34103 +epoch 021: 972 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=465239, ups=1.07, wpb=433098, bsz=16375.6, num_updates=34700, lr=0.00033952, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=20.8, wall=34103 +epoch 021: 972 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=465239, ups=1.07, wpb=433098, bsz=16375.6, num_updates=34700, lr=0.00033952, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=20.8, wall=34103 +epoch 021: 972 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=465239, ups=1.07, wpb=433098, bsz=16375.6, num_updates=34700, lr=0.00033952, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=20.8, wall=34103 +epoch 021: 972 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=465239, ups=1.07, wpb=433098, bsz=16375.6, num_updates=34700, lr=0.00033952, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=20.8, wall=34103 +epoch 021: 972 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=465239, ups=1.07, wpb=433098, bsz=16375.6, num_updates=34700, lr=0.00033952, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=20.8, wall=34103 +epoch 021: 972 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=465239, ups=1.07, wpb=433098, bsz=16375.6, num_updates=34700, lr=0.00033952, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=20.8, wall=34103 +epoch 021: 972 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=465239, ups=1.07, wpb=433098, bsz=16375.6, num_updates=34700, lr=0.00033952, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=20.8, wall=34103 +epoch 021: 972 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=465239, ups=1.07, wpb=433098, bsz=16375.6, num_updates=34700, lr=0.00033952, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=20.8, wall=34103 +epoch 021: 972 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=465239, ups=1.07, wpb=433098, bsz=16375.6, num_updates=34700, lr=0.00033952, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=20.8, wall=34103 +epoch 021: 972 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=465239, ups=1.07, wpb=433098, bsz=16375.6, num_updates=34700, lr=0.00033952, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=20.8, wall=34103 +epoch 021: 972 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=465239, ups=1.07, wpb=433098, bsz=16375.6, num_updates=34700, lr=0.00033952, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=20.8, wall=34103 +epoch 021: 972 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=465239, ups=1.07, wpb=433098, bsz=16375.6, num_updates=34700, lr=0.00033952, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=20.8, wall=34103 +epoch 021: 972 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=465239, ups=1.07, wpb=433098, bsz=16375.6, num_updates=34700, lr=0.00033952, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=20.8, wall=34103 +epoch 021: 972 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=465239, ups=1.07, wpb=433098, bsz=16375.6, num_updates=34700, lr=0.00033952, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=20.8, wall=34103 +epoch 021: 972 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=465239, ups=1.07, wpb=433098, bsz=16375.6, num_updates=34700, lr=0.00033952, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=20.8, wall=34103 +epoch 021: 972 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=465239, ups=1.07, wpb=433098, bsz=16375.6, num_updates=34700, lr=0.00033952, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=20.8, wall=34103 +epoch 021: 972 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=465239, ups=1.07, wpb=433098, bsz=16375.6, num_updates=34700, lr=0.00033952, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=20.8, wall=34103 +epoch 021: 1072 / 1689 loss=4.113, nll_loss=2.489, ppl=5.62, wps=466268, ups=1.07, wpb=436432, bsz=16383.7, num_updates=34800, lr=0.000339032, gnorm=0.22, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=34197 +epoch 021: 1072 / 1689 loss=4.113, nll_loss=2.489, ppl=5.62, wps=466268, ups=1.07, wpb=436432, bsz=16383.7, num_updates=34800, lr=0.000339032, gnorm=0.22, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=34197 +epoch 021: 1072 / 1689 loss=4.113, nll_loss=2.489, ppl=5.62, wps=466268, ups=1.07, wpb=436432, bsz=16383.7, num_updates=34800, lr=0.000339032, gnorm=0.22, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=34197 +epoch 021: 1072 / 1689 loss=4.113, nll_loss=2.489, ppl=5.62, wps=466268, ups=1.07, wpb=436432, bsz=16383.7, num_updates=34800, lr=0.000339032, gnorm=0.22, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=34197 +epoch 021: 1072 / 1689 loss=4.113, nll_loss=2.489, ppl=5.62, wps=466268, ups=1.07, wpb=436432, bsz=16383.7, num_updates=34800, lr=0.000339032, gnorm=0.22, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=34197 +epoch 021: 1072 / 1689 loss=4.113, nll_loss=2.489, ppl=5.62, wps=466268, ups=1.07, wpb=436432, bsz=16383.7, num_updates=34800, lr=0.000339032, gnorm=0.22, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=34197 +epoch 021: 1072 / 1689 loss=4.113, nll_loss=2.489, ppl=5.62, wps=466268, ups=1.07, wpb=436432, bsz=16383.7, num_updates=34800, lr=0.000339032, gnorm=0.22, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=34197 +epoch 021: 1072 / 1689 loss=4.113, nll_loss=2.489, ppl=5.62, wps=466268, ups=1.07, wpb=436432, bsz=16383.7, num_updates=34800, lr=0.000339032, gnorm=0.22, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=34197 +epoch 021: 1072 / 1689 loss=4.113, nll_loss=2.489, ppl=5.62, wps=466268, ups=1.07, wpb=436432, bsz=16383.7, num_updates=34800, lr=0.000339032, gnorm=0.22, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=34197 +epoch 021: 1072 / 1689 loss=4.113, nll_loss=2.489, ppl=5.62, wps=466268, ups=1.07, wpb=436432, bsz=16383.7, num_updates=34800, lr=0.000339032, gnorm=0.22, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=34197 +epoch 021: 1072 / 1689 loss=4.113, nll_loss=2.489, ppl=5.62, wps=466268, ups=1.07, wpb=436432, bsz=16383.7, num_updates=34800, lr=0.000339032, gnorm=0.22, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=34197 +epoch 021: 1072 / 1689 loss=4.113, nll_loss=2.489, ppl=5.62, wps=466268, ups=1.07, wpb=436432, bsz=16383.7, num_updates=34800, lr=0.000339032, gnorm=0.22, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=34197 +epoch 021: 1072 / 1689 loss=4.113, nll_loss=2.489, ppl=5.62, wps=466268, ups=1.07, wpb=436432, bsz=16383.7, num_updates=34800, lr=0.000339032, gnorm=0.22, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=34197 +epoch 021: 1072 / 1689 loss=4.113, nll_loss=2.489, ppl=5.62, wps=466268, ups=1.07, wpb=436432, bsz=16383.7, num_updates=34800, lr=0.000339032, gnorm=0.22, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=34197 +epoch 021: 1072 / 1689 loss=4.113, nll_loss=2.489, ppl=5.62, wps=466268, ups=1.07, wpb=436432, bsz=16383.7, num_updates=34800, lr=0.000339032, gnorm=0.22, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=34197 +epoch 021: 1072 / 1689 loss=4.113, nll_loss=2.489, ppl=5.62, wps=466268, ups=1.07, wpb=436432, bsz=16383.7, num_updates=34800, lr=0.000339032, gnorm=0.22, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=34197 +epoch 021: 1072 / 1689 loss=4.113, nll_loss=2.489, ppl=5.62, wps=466268, ups=1.07, wpb=436432, bsz=16383.7, num_updates=34800, lr=0.000339032, gnorm=0.22, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=34197 +epoch 021: 1072 / 1689 loss=4.113, nll_loss=2.489, ppl=5.62, wps=466268, ups=1.07, wpb=436432, bsz=16383.7, num_updates=34800, lr=0.000339032, gnorm=0.22, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=34197 +epoch 021: 1072 / 1689 loss=4.113, nll_loss=2.489, ppl=5.62, wps=466268, ups=1.07, wpb=436432, bsz=16383.7, num_updates=34800, lr=0.000339032, gnorm=0.22, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=34197 +epoch 021: 1072 / 1689 loss=4.113, nll_loss=2.489, ppl=5.62, wps=466268, ups=1.07, wpb=436432, bsz=16383.7, num_updates=34800, lr=0.000339032, gnorm=0.22, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=34197 +epoch 021: 1072 / 1689 loss=4.113, nll_loss=2.489, ppl=5.62, wps=466268, ups=1.07, wpb=436432, bsz=16383.7, num_updates=34800, lr=0.000339032, gnorm=0.22, clip=0, loss_scale=2, train_wall=92, gb_free=18.4, wall=34197 +epoch 021: 1173 / 1689 loss=4.132, nll_loss=2.51, ppl=5.7, wps=455921, ups=1.05, wpb=433199, bsz=16787.2, num_updates=34900, lr=0.000338546, gnorm=0.232, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=34292 +epoch 021: 1173 / 1689 loss=4.132, nll_loss=2.51, ppl=5.7, wps=455921, ups=1.05, wpb=433199, bsz=16787.2, num_updates=34900, lr=0.000338546, gnorm=0.232, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=34292 +epoch 021: 1173 / 1689 loss=4.132, nll_loss=2.51, ppl=5.7, wps=455921, ups=1.05, wpb=433199, bsz=16787.2, num_updates=34900, lr=0.000338546, gnorm=0.232, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=34292 +epoch 021: 1173 / 1689 loss=4.132, nll_loss=2.51, ppl=5.7, wps=455921, ups=1.05, wpb=433199, bsz=16787.2, num_updates=34900, lr=0.000338546, gnorm=0.232, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=34292 +epoch 021: 1173 / 1689 loss=4.132, nll_loss=2.51, ppl=5.7, wps=455921, ups=1.05, wpb=433199, bsz=16787.2, num_updates=34900, lr=0.000338546, gnorm=0.232, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=34292 +epoch 021: 1173 / 1689 loss=4.132, nll_loss=2.51, ppl=5.7, wps=455921, ups=1.05, wpb=433199, bsz=16787.2, num_updates=34900, lr=0.000338546, gnorm=0.232, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=34292 +epoch 021: 1173 / 1689 loss=4.132, nll_loss=2.51, ppl=5.7, wps=455921, ups=1.05, wpb=433199, bsz=16787.2, num_updates=34900, lr=0.000338546, gnorm=0.232, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=34292 +epoch 021: 1173 / 1689 loss=4.132, nll_loss=2.51, ppl=5.7, wps=455921, ups=1.05, wpb=433199, bsz=16787.2, num_updates=34900, lr=0.000338546, gnorm=0.232, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=34292 +epoch 021: 1173 / 1689 loss=4.132, nll_loss=2.51, ppl=5.7, wps=455921, ups=1.05, wpb=433199, bsz=16787.2, num_updates=34900, lr=0.000338546, gnorm=0.232, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=34292 +epoch 021: 1173 / 1689 loss=4.132, nll_loss=2.51, ppl=5.7, wps=455921, ups=1.05, wpb=433199, bsz=16787.2, num_updates=34900, lr=0.000338546, gnorm=0.232, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=34292 +epoch 021: 1173 / 1689 loss=4.132, nll_loss=2.51, ppl=5.7, wps=455921, ups=1.05, wpb=433199, bsz=16787.2, num_updates=34900, lr=0.000338546, gnorm=0.232, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=34292 +epoch 021: 1173 / 1689 loss=4.132, nll_loss=2.51, ppl=5.7, wps=455921, ups=1.05, wpb=433199, bsz=16787.2, num_updates=34900, lr=0.000338546, gnorm=0.232, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=34292 +epoch 021: 1173 / 1689 loss=4.132, nll_loss=2.51, ppl=5.7, wps=455921, ups=1.05, wpb=433199, bsz=16787.2, num_updates=34900, lr=0.000338546, gnorm=0.232, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=34292 +epoch 021: 1173 / 1689 loss=4.132, nll_loss=2.51, ppl=5.7, wps=455921, ups=1.05, wpb=433199, bsz=16787.2, num_updates=34900, lr=0.000338546, gnorm=0.232, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=34292 +epoch 021: 1173 / 1689 loss=4.132, nll_loss=2.51, ppl=5.7, wps=455921, ups=1.05, wpb=433199, bsz=16787.2, num_updates=34900, lr=0.000338546, gnorm=0.232, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=34292 +epoch 021: 1173 / 1689 loss=4.132, nll_loss=2.51, ppl=5.7, wps=455921, ups=1.05, wpb=433199, bsz=16787.2, num_updates=34900, lr=0.000338546, gnorm=0.232, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=34292 +epoch 021: 1173 / 1689 loss=4.132, nll_loss=2.51, ppl=5.7, wps=455921, ups=1.05, wpb=433199, bsz=16787.2, num_updates=34900, lr=0.000338546, gnorm=0.232, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=34292 +epoch 021: 1173 / 1689 loss=4.132, nll_loss=2.51, ppl=5.7, wps=455921, ups=1.05, wpb=433199, bsz=16787.2, num_updates=34900, lr=0.000338546, gnorm=0.232, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=34292 +epoch 021: 1173 / 1689 loss=4.132, nll_loss=2.51, ppl=5.7, wps=455921, ups=1.05, wpb=433199, bsz=16787.2, num_updates=34900, lr=0.000338546, gnorm=0.232, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=34292 +epoch 021: 1173 / 1689 loss=4.132, nll_loss=2.51, ppl=5.7, wps=455921, ups=1.05, wpb=433199, bsz=16787.2, num_updates=34900, lr=0.000338546, gnorm=0.232, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=34292 +epoch 021: 1173 / 1689 loss=4.132, nll_loss=2.51, ppl=5.7, wps=455921, ups=1.05, wpb=433199, bsz=16787.2, num_updates=34900, lr=0.000338546, gnorm=0.232, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=34292 +epoch 021: 1273 / 1689 loss=4.13, nll_loss=2.508, ppl=5.69, wps=464930, ups=1.07, wpb=435686, bsz=16508.9, num_updates=35000, lr=0.000338062, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=34386 +epoch 021: 1273 / 1689 loss=4.13, nll_loss=2.508, ppl=5.69, wps=464930, ups=1.07, wpb=435686, bsz=16508.9, num_updates=35000, lr=0.000338062, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=34386 +epoch 021: 1273 / 1689 loss=4.13, nll_loss=2.508, ppl=5.69, wps=464930, ups=1.07, wpb=435686, bsz=16508.9, num_updates=35000, lr=0.000338062, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=34386 +epoch 021: 1273 / 1689 loss=4.13, nll_loss=2.508, ppl=5.69, wps=464930, ups=1.07, wpb=435686, bsz=16508.9, num_updates=35000, lr=0.000338062, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=34386 +epoch 021: 1273 / 1689 loss=4.13, nll_loss=2.508, ppl=5.69, wps=464930, ups=1.07, wpb=435686, bsz=16508.9, num_updates=35000, lr=0.000338062, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=34386 +epoch 021: 1273 / 1689 loss=4.13, nll_loss=2.508, ppl=5.69, wps=464930, ups=1.07, wpb=435686, bsz=16508.9, num_updates=35000, lr=0.000338062, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=34386 +epoch 021: 1273 / 1689 loss=4.13, nll_loss=2.508, ppl=5.69, wps=464930, ups=1.07, wpb=435686, bsz=16508.9, num_updates=35000, lr=0.000338062, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=34386 +epoch 021: 1273 / 1689 loss=4.13, nll_loss=2.508, ppl=5.69, wps=464930, ups=1.07, wpb=435686, bsz=16508.9, num_updates=35000, lr=0.000338062, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=34386 +epoch 021: 1273 / 1689 loss=4.13, nll_loss=2.508, ppl=5.69, wps=464930, ups=1.07, wpb=435686, bsz=16508.9, num_updates=35000, lr=0.000338062, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=34386 +epoch 021: 1273 / 1689 loss=4.13, nll_loss=2.508, ppl=5.69, wps=464930, ups=1.07, wpb=435686, bsz=16508.9, num_updates=35000, lr=0.000338062, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=34386 +epoch 021: 1273 / 1689 loss=4.13, nll_loss=2.508, ppl=5.69, wps=464930, ups=1.07, wpb=435686, bsz=16508.9, num_updates=35000, lr=0.000338062, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=34386 +epoch 021: 1273 / 1689 loss=4.13, nll_loss=2.508, ppl=5.69, wps=464930, ups=1.07, wpb=435686, bsz=16508.9, num_updates=35000, lr=0.000338062, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=34386 +epoch 021: 1273 / 1689 loss=4.13, nll_loss=2.508, ppl=5.69, wps=464930, ups=1.07, wpb=435686, bsz=16508.9, num_updates=35000, lr=0.000338062, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=34386 +epoch 021: 1273 / 1689 loss=4.13, nll_loss=2.508, ppl=5.69, wps=464930, ups=1.07, wpb=435686, bsz=16508.9, num_updates=35000, lr=0.000338062, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=34386 +epoch 021: 1273 / 1689 loss=4.13, nll_loss=2.508, ppl=5.69, wps=464930, ups=1.07, wpb=435686, bsz=16508.9, num_updates=35000, lr=0.000338062, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=34386 +epoch 021: 1273 / 1689 loss=4.13, nll_loss=2.508, ppl=5.69, wps=464930, ups=1.07, wpb=435686, bsz=16508.9, num_updates=35000, lr=0.000338062, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=34386 +epoch 021: 1273 / 1689 loss=4.13, nll_loss=2.508, ppl=5.69, wps=464930, ups=1.07, wpb=435686, bsz=16508.9, num_updates=35000, lr=0.000338062, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=34386 +epoch 021: 1273 / 1689 loss=4.13, nll_loss=2.508, ppl=5.69, wps=464930, ups=1.07, wpb=435686, bsz=16508.9, num_updates=35000, lr=0.000338062, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=34386 +epoch 021: 1273 / 1689 loss=4.13, nll_loss=2.508, ppl=5.69, wps=464930, ups=1.07, wpb=435686, bsz=16508.9, num_updates=35000, lr=0.000338062, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=34386 +epoch 021: 1273 / 1689 loss=4.13, nll_loss=2.508, ppl=5.69, wps=464930, ups=1.07, wpb=435686, bsz=16508.9, num_updates=35000, lr=0.000338062, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=34386 +epoch 021: 1273 / 1689 loss=4.13, nll_loss=2.508, ppl=5.69, wps=464930, ups=1.07, wpb=435686, bsz=16508.9, num_updates=35000, lr=0.000338062, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=34386 +begin validation on "valid" subset +epoch 021 | valid on 'valid' subset | loss 4.233 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 35000 | best_loss 4.233 +epoch 021 | valid on 'valid' subset | loss 4.233 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 35000 | best_loss 4.233 +epoch 021 | valid on 'valid' subset | loss 4.233 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 35000 | best_loss 4.233 +epoch 021 | valid on 'valid' subset | loss 4.233 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 35000 | best_loss 4.233 +epoch 021 | valid on 'valid' subset | loss 4.233 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 35000 | best_loss 4.233 +epoch 021 | valid on 'valid' subset | loss 4.233 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 35000 | best_loss 4.233 +epoch 021 | valid on 'valid' subset | loss 4.233 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 35000 | best_loss 4.233 +epoch 021 | valid on 'valid' subset | loss 4.233 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 35000 | best_loss 4.233 +epoch 021 | valid on 'valid' subset | loss 4.233 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 35000 | best_loss 4.233 +epoch 021 | valid on 'valid' subset | loss 4.233 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 35000 | best_loss 4.233 +epoch 021 | valid on 'valid' subset | loss 4.233 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 35000 | best_loss 4.233 +epoch 021 | valid on 'valid' subset | loss 4.233 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 35000 | best_loss 4.233 +epoch 021 | valid on 'valid' subset | loss 4.233 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 35000 | best_loss 4.233 +epoch 021 | valid on 'valid' subset | loss 4.233 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 35000 | best_loss 4.233 +epoch 021 | valid on 'valid' subset | loss 4.233 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 35000 | best_loss 4.233 +epoch 021 | valid on 'valid' subset | loss 4.233 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 35000 | best_loss 4.233 +epoch 021 | valid on 'valid' subset | loss 4.233 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 35000 | best_loss 4.233 +epoch 021 | valid on 'valid' subset | loss 4.233 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 35000 | best_loss 4.233 +epoch 021 | valid on 'valid' subset | loss 4.233 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 35000 | best_loss 4.233 +epoch 021 | valid on 'valid' subset | loss 4.233 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 35000 | best_loss 4.233 +epoch 021 | valid on 'valid' subset | loss 4.233 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 35000 | best_loss 4.233 +epoch 021: 1373 / 1689 loss=4.127, nll_loss=2.505, ppl=5.68, wps=375758, ups=0.87, wpb=433476, bsz=16184.6, num_updates=35100, lr=0.00033758, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=34501 +epoch 021: 1373 / 1689 loss=4.127, nll_loss=2.505, ppl=5.68, wps=375758, ups=0.87, wpb=433476, bsz=16184.6, num_updates=35100, lr=0.00033758, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=34501 +epoch 021: 1373 / 1689 loss=4.127, nll_loss=2.505, ppl=5.68, wps=375758, ups=0.87, wpb=433476, bsz=16184.6, num_updates=35100, lr=0.00033758, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=34501 +epoch 021: 1373 / 1689 loss=4.127, nll_loss=2.505, ppl=5.68, wps=375758, ups=0.87, wpb=433476, bsz=16184.6, num_updates=35100, lr=0.00033758, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=34501 +epoch 021: 1373 / 1689 loss=4.127, nll_loss=2.505, ppl=5.68, wps=375758, ups=0.87, wpb=433476, bsz=16184.6, num_updates=35100, lr=0.00033758, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=34501 +epoch 021: 1373 / 1689 loss=4.127, nll_loss=2.505, ppl=5.68, wps=375758, ups=0.87, wpb=433476, bsz=16184.6, num_updates=35100, lr=0.00033758, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=34501 +epoch 021: 1373 / 1689 loss=4.127, nll_loss=2.505, ppl=5.68, wps=375758, ups=0.87, wpb=433476, bsz=16184.6, num_updates=35100, lr=0.00033758, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=34501 +epoch 021: 1373 / 1689 loss=4.127, nll_loss=2.505, ppl=5.68, wps=375758, ups=0.87, wpb=433476, bsz=16184.6, num_updates=35100, lr=0.00033758, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=34501 +epoch 021: 1373 / 1689 loss=4.127, nll_loss=2.505, ppl=5.68, wps=375758, ups=0.87, wpb=433476, bsz=16184.6, num_updates=35100, lr=0.00033758, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=34501 +epoch 021: 1373 / 1689 loss=4.127, nll_loss=2.505, ppl=5.68, wps=375758, ups=0.87, wpb=433476, bsz=16184.6, num_updates=35100, lr=0.00033758, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=34501 +epoch 021: 1373 / 1689 loss=4.127, nll_loss=2.505, ppl=5.68, wps=375758, ups=0.87, wpb=433476, bsz=16184.6, num_updates=35100, lr=0.00033758, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=34501 +epoch 021: 1373 / 1689 loss=4.127, nll_loss=2.505, ppl=5.68, wps=375758, ups=0.87, wpb=433476, bsz=16184.6, num_updates=35100, lr=0.00033758, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=34501 +epoch 021: 1373 / 1689 loss=4.127, nll_loss=2.505, ppl=5.68, wps=375758, ups=0.87, wpb=433476, bsz=16184.6, num_updates=35100, lr=0.00033758, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=34501 +epoch 021: 1373 / 1689 loss=4.127, nll_loss=2.505, ppl=5.68, wps=375758, ups=0.87, wpb=433476, bsz=16184.6, num_updates=35100, lr=0.00033758, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=34501 +epoch 021: 1373 / 1689 loss=4.127, nll_loss=2.505, ppl=5.68, wps=375758, ups=0.87, wpb=433476, bsz=16184.6, num_updates=35100, lr=0.00033758, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=34501 +epoch 021: 1373 / 1689 loss=4.127, nll_loss=2.505, ppl=5.68, wps=375758, ups=0.87, wpb=433476, bsz=16184.6, num_updates=35100, lr=0.00033758, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=34501 +epoch 021: 1373 / 1689 loss=4.127, nll_loss=2.505, ppl=5.68, wps=375758, ups=0.87, wpb=433476, bsz=16184.6, num_updates=35100, lr=0.00033758, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=34501 +epoch 021: 1373 / 1689 loss=4.127, nll_loss=2.505, ppl=5.68, wps=375758, ups=0.87, wpb=433476, bsz=16184.6, num_updates=35100, lr=0.00033758, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=34501 +epoch 021: 1373 / 1689 loss=4.127, nll_loss=2.505, ppl=5.68, wps=375758, ups=0.87, wpb=433476, bsz=16184.6, num_updates=35100, lr=0.00033758, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=34501 +epoch 021: 1373 / 1689 loss=4.127, nll_loss=2.505, ppl=5.68, wps=375758, ups=0.87, wpb=433476, bsz=16184.6, num_updates=35100, lr=0.00033758, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=34501 +epoch 021: 1373 / 1689 loss=4.127, nll_loss=2.505, ppl=5.68, wps=375758, ups=0.87, wpb=433476, bsz=16184.6, num_updates=35100, lr=0.00033758, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=34501 +epoch 021: 1473 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=458478, ups=1.06, wpb=433124, bsz=16336.1, num_updates=35200, lr=0.0003371, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34596 +epoch 021: 1473 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=458478, ups=1.06, wpb=433124, bsz=16336.1, num_updates=35200, lr=0.0003371, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34596 +epoch 021: 1473 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=458478, ups=1.06, wpb=433124, bsz=16336.1, num_updates=35200, lr=0.0003371, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34596 +epoch 021: 1473 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=458478, ups=1.06, wpb=433124, bsz=16336.1, num_updates=35200, lr=0.0003371, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34596 +epoch 021: 1473 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=458478, ups=1.06, wpb=433124, bsz=16336.1, num_updates=35200, lr=0.0003371, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34596 +epoch 021: 1473 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=458478, ups=1.06, wpb=433124, bsz=16336.1, num_updates=35200, lr=0.0003371, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34596 +epoch 021: 1473 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=458478, ups=1.06, wpb=433124, bsz=16336.1, num_updates=35200, lr=0.0003371, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34596 +epoch 021: 1473 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=458478, ups=1.06, wpb=433124, bsz=16336.1, num_updates=35200, lr=0.0003371, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34596 +epoch 021: 1473 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=458478, ups=1.06, wpb=433124, bsz=16336.1, num_updates=35200, lr=0.0003371, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34596 +epoch 021: 1473 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=458478, ups=1.06, wpb=433124, bsz=16336.1, num_updates=35200, lr=0.0003371, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34596 +epoch 021: 1473 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=458478, ups=1.06, wpb=433124, bsz=16336.1, num_updates=35200, lr=0.0003371, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34596 +epoch 021: 1473 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=458478, ups=1.06, wpb=433124, bsz=16336.1, num_updates=35200, lr=0.0003371, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34596 +epoch 021: 1473 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=458478, ups=1.06, wpb=433124, bsz=16336.1, num_updates=35200, lr=0.0003371, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34596 +epoch 021: 1473 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=458478, ups=1.06, wpb=433124, bsz=16336.1, num_updates=35200, lr=0.0003371, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34596 +epoch 021: 1473 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=458478, ups=1.06, wpb=433124, bsz=16336.1, num_updates=35200, lr=0.0003371, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34596 +epoch 021: 1473 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=458478, ups=1.06, wpb=433124, bsz=16336.1, num_updates=35200, lr=0.0003371, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34596 +epoch 021: 1473 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=458478, ups=1.06, wpb=433124, bsz=16336.1, num_updates=35200, lr=0.0003371, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34596 +epoch 021: 1473 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=458478, ups=1.06, wpb=433124, bsz=16336.1, num_updates=35200, lr=0.0003371, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34596 +epoch 021: 1473 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=458478, ups=1.06, wpb=433124, bsz=16336.1, num_updates=35200, lr=0.0003371, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34596 +epoch 021: 1473 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=458478, ups=1.06, wpb=433124, bsz=16336.1, num_updates=35200, lr=0.0003371, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34596 +epoch 021: 1473 / 1689 loss=4.131, nll_loss=2.509, ppl=5.69, wps=458478, ups=1.06, wpb=433124, bsz=16336.1, num_updates=35200, lr=0.0003371, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34596 +epoch 021: 1573 / 1689 loss=4.123, nll_loss=2.501, ppl=5.66, wps=464699, ups=1.07, wpb=433947, bsz=16551.1, num_updates=35300, lr=0.000336622, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=34689 +epoch 021: 1573 / 1689 loss=4.123, nll_loss=2.501, ppl=5.66, wps=464699, ups=1.07, wpb=433947, bsz=16551.1, num_updates=35300, lr=0.000336622, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=34689 +epoch 021: 1573 / 1689 loss=4.123, nll_loss=2.501, ppl=5.66, wps=464699, ups=1.07, wpb=433947, bsz=16551.1, num_updates=35300, lr=0.000336622, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=34689 +epoch 021: 1573 / 1689 loss=4.123, nll_loss=2.501, ppl=5.66, wps=464699, ups=1.07, wpb=433947, bsz=16551.1, num_updates=35300, lr=0.000336622, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=34689 +epoch 021: 1573 / 1689 loss=4.123, nll_loss=2.501, ppl=5.66, wps=464699, ups=1.07, wpb=433947, bsz=16551.1, num_updates=35300, lr=0.000336622, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=34689 +epoch 021: 1573 / 1689 loss=4.123, nll_loss=2.501, ppl=5.66, wps=464699, ups=1.07, wpb=433947, bsz=16551.1, num_updates=35300, lr=0.000336622, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=34689 +epoch 021: 1573 / 1689 loss=4.123, nll_loss=2.501, ppl=5.66, wps=464699, ups=1.07, wpb=433947, bsz=16551.1, num_updates=35300, lr=0.000336622, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=34689 +epoch 021: 1573 / 1689 loss=4.123, nll_loss=2.501, ppl=5.66, wps=464699, ups=1.07, wpb=433947, bsz=16551.1, num_updates=35300, lr=0.000336622, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=34689 +epoch 021: 1573 / 1689 loss=4.123, nll_loss=2.501, ppl=5.66, wps=464699, ups=1.07, wpb=433947, bsz=16551.1, num_updates=35300, lr=0.000336622, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=34689 +epoch 021: 1573 / 1689 loss=4.123, nll_loss=2.501, ppl=5.66, wps=464699, ups=1.07, wpb=433947, bsz=16551.1, num_updates=35300, lr=0.000336622, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=34689 +epoch 021: 1573 / 1689 loss=4.123, nll_loss=2.501, ppl=5.66, wps=464699, ups=1.07, wpb=433947, bsz=16551.1, num_updates=35300, lr=0.000336622, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=34689 +epoch 021: 1573 / 1689 loss=4.123, nll_loss=2.501, ppl=5.66, wps=464699, ups=1.07, wpb=433947, bsz=16551.1, num_updates=35300, lr=0.000336622, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=34689 +epoch 021: 1573 / 1689 loss=4.123, nll_loss=2.501, ppl=5.66, wps=464699, ups=1.07, wpb=433947, bsz=16551.1, num_updates=35300, lr=0.000336622, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=34689 +epoch 021: 1573 / 1689 loss=4.123, nll_loss=2.501, ppl=5.66, wps=464699, ups=1.07, wpb=433947, bsz=16551.1, num_updates=35300, lr=0.000336622, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=34689 +epoch 021: 1573 / 1689 loss=4.123, nll_loss=2.501, ppl=5.66, wps=464699, ups=1.07, wpb=433947, bsz=16551.1, num_updates=35300, lr=0.000336622, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=34689 +epoch 021: 1573 / 1689 loss=4.123, nll_loss=2.501, ppl=5.66, wps=464699, ups=1.07, wpb=433947, bsz=16551.1, num_updates=35300, lr=0.000336622, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=34689 +epoch 021: 1573 / 1689 loss=4.123, nll_loss=2.501, ppl=5.66, wps=464699, ups=1.07, wpb=433947, bsz=16551.1, num_updates=35300, lr=0.000336622, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=34689 +epoch 021: 1573 / 1689 loss=4.123, nll_loss=2.501, ppl=5.66, wps=464699, ups=1.07, wpb=433947, bsz=16551.1, num_updates=35300, lr=0.000336622, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=34689 +epoch 021: 1573 / 1689 loss=4.123, nll_loss=2.501, ppl=5.66, wps=464699, ups=1.07, wpb=433947, bsz=16551.1, num_updates=35300, lr=0.000336622, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=34689 +epoch 021: 1573 / 1689 loss=4.123, nll_loss=2.501, ppl=5.66, wps=464699, ups=1.07, wpb=433947, bsz=16551.1, num_updates=35300, lr=0.000336622, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=34689 +epoch 021: 1573 / 1689 loss=4.123, nll_loss=2.501, ppl=5.66, wps=464699, ups=1.07, wpb=433947, bsz=16551.1, num_updates=35300, lr=0.000336622, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=34689 +epoch 021: 1673 / 1689 loss=4.119, nll_loss=2.496, ppl=5.64, wps=462060, ups=1.06, wpb=433905, bsz=16514.5, num_updates=35400, lr=0.000336146, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=34783 +epoch 021: 1673 / 1689 loss=4.119, nll_loss=2.496, ppl=5.64, wps=462060, ups=1.06, wpb=433905, bsz=16514.5, num_updates=35400, lr=0.000336146, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=34783 +epoch 021: 1673 / 1689 loss=4.119, nll_loss=2.496, ppl=5.64, wps=462060, ups=1.06, wpb=433905, bsz=16514.5, num_updates=35400, lr=0.000336146, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=34783 +epoch 021: 1673 / 1689 loss=4.119, nll_loss=2.496, ppl=5.64, wps=462060, ups=1.06, wpb=433905, bsz=16514.5, num_updates=35400, lr=0.000336146, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=34783 +epoch 021: 1673 / 1689 loss=4.119, nll_loss=2.496, ppl=5.64, wps=462060, ups=1.06, wpb=433905, bsz=16514.5, num_updates=35400, lr=0.000336146, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=34783 +epoch 021: 1673 / 1689 loss=4.119, nll_loss=2.496, ppl=5.64, wps=462060, ups=1.06, wpb=433905, bsz=16514.5, num_updates=35400, lr=0.000336146, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=34783 +epoch 021: 1673 / 1689 loss=4.119, nll_loss=2.496, ppl=5.64, wps=462060, ups=1.06, wpb=433905, bsz=16514.5, num_updates=35400, lr=0.000336146, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=34783 +epoch 021: 1673 / 1689 loss=4.119, nll_loss=2.496, ppl=5.64, wps=462060, ups=1.06, wpb=433905, bsz=16514.5, num_updates=35400, lr=0.000336146, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=34783 +epoch 021: 1673 / 1689 loss=4.119, nll_loss=2.496, ppl=5.64, wps=462060, ups=1.06, wpb=433905, bsz=16514.5, num_updates=35400, lr=0.000336146, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=34783 +epoch 021: 1673 / 1689 loss=4.119, nll_loss=2.496, ppl=5.64, wps=462060, ups=1.06, wpb=433905, bsz=16514.5, num_updates=35400, lr=0.000336146, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=34783 +epoch 021: 1673 / 1689 loss=4.119, nll_loss=2.496, ppl=5.64, wps=462060, ups=1.06, wpb=433905, bsz=16514.5, num_updates=35400, lr=0.000336146, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=34783 +epoch 021: 1673 / 1689 loss=4.119, nll_loss=2.496, ppl=5.64, wps=462060, ups=1.06, wpb=433905, bsz=16514.5, num_updates=35400, lr=0.000336146, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=34783 +epoch 021: 1673 / 1689 loss=4.119, nll_loss=2.496, ppl=5.64, wps=462060, ups=1.06, wpb=433905, bsz=16514.5, num_updates=35400, lr=0.000336146, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=34783 +epoch 021: 1673 / 1689 loss=4.119, nll_loss=2.496, ppl=5.64, wps=462060, ups=1.06, wpb=433905, bsz=16514.5, num_updates=35400, lr=0.000336146, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=34783 +epoch 021: 1673 / 1689 loss=4.119, nll_loss=2.496, ppl=5.64, wps=462060, ups=1.06, wpb=433905, bsz=16514.5, num_updates=35400, lr=0.000336146, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=34783 +epoch 021: 1673 / 1689 loss=4.119, nll_loss=2.496, ppl=5.64, wps=462060, ups=1.06, wpb=433905, bsz=16514.5, num_updates=35400, lr=0.000336146, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=34783 +epoch 021: 1673 / 1689 loss=4.119, nll_loss=2.496, ppl=5.64, wps=462060, ups=1.06, wpb=433905, bsz=16514.5, num_updates=35400, lr=0.000336146, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=34783 +epoch 021: 1673 / 1689 loss=4.119, nll_loss=2.496, ppl=5.64, wps=462060, ups=1.06, wpb=433905, bsz=16514.5, num_updates=35400, lr=0.000336146, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=34783 +epoch 021: 1673 / 1689 loss=4.119, nll_loss=2.496, ppl=5.64, wps=462060, ups=1.06, wpb=433905, bsz=16514.5, num_updates=35400, lr=0.000336146, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=34783 +epoch 021: 1673 / 1689 loss=4.119, nll_loss=2.496, ppl=5.64, wps=462060, ups=1.06, wpb=433905, bsz=16514.5, num_updates=35400, lr=0.000336146, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=34783 +epoch 021: 1673 / 1689 loss=4.119, nll_loss=2.496, ppl=5.64, wps=462060, ups=1.06, wpb=433905, bsz=16514.5, num_updates=35400, lr=0.000336146, gnorm=0.236, clip=0, loss_scale=2, train_wall=92, gb_free=19.3, wall=34783 +end of epoch 21 (average epoch stats below) +epoch 021 | loss 4.119 | nll_loss 2.495 | ppl 5.64 | wps 451795 | ups 1.04 | wpb 433540 | bsz 16506.2 | num_updates 35415 | lr 0.000336075 | gnorm 0.224 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 18.8 | wall 34797 +epoch 021 | loss 4.119 | nll_loss 2.495 | ppl 5.64 | wps 451795 | ups 1.04 | wpb 433540 | bsz 16506.2 | num_updates 35415 | lr 0.000336075 | gnorm 0.224 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 18.8 | wall 34797 +epoch 021 | loss 4.119 | nll_loss 2.495 | ppl 5.64 | wps 451795 | ups 1.04 | wpb 433540 | bsz 16506.2 | num_updates 35415 | lr 0.000336075 | gnorm 0.224 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 18.8 | wall 34797 +epoch 021 | loss 4.119 | nll_loss 2.495 | ppl 5.64 | wps 451795 | ups 1.04 | wpb 433540 | bsz 16506.2 | num_updates 35415 | lr 0.000336075 | gnorm 0.224 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 18.8 | wall 34797 +epoch 021 | loss 4.119 | nll_loss 2.495 | ppl 5.64 | wps 451795 | ups 1.04 | wpb 433540 | bsz 16506.2 | num_updates 35415 | lr 0.000336075 | gnorm 0.224 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 18.8 | wall 34797 +epoch 021 | loss 4.119 | nll_loss 2.495 | ppl 5.64 | wps 451795 | ups 1.04 | wpb 433540 | bsz 16506.2 | num_updates 35415 | lr 0.000336075 | gnorm 0.224 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 18.8 | wall 34797 +epoch 021 | loss 4.119 | nll_loss 2.495 | ppl 5.64 | wps 451795 | ups 1.04 | wpb 433540 | bsz 16506.2 | num_updates 35415 | lr 0.000336075 | gnorm 0.224 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 18.8 | wall 34797 +epoch 021 | loss 4.119 | nll_loss 2.495 | ppl 5.64 | wps 451795 | ups 1.04 | wpb 433540 | bsz 16506.2 | num_updates 35415 | lr 0.000336075 | gnorm 0.224 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 18.8 | wall 34797 +epoch 021 | loss 4.119 | nll_loss 2.495 | ppl 5.64 | wps 451795 | ups 1.04 | wpb 433540 | bsz 16506.2 | num_updates 35415 | lr 0.000336075 | gnorm 0.224 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 18.8 | wall 34797 +epoch 021 | loss 4.119 | nll_loss 2.495 | ppl 5.64 | wps 451795 | ups 1.04 | wpb 433540 | bsz 16506.2 | num_updates 35415 | lr 0.000336075 | gnorm 0.224 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 18.8 | wall 34797 +epoch 021 | loss 4.119 | nll_loss 2.495 | ppl 5.64 | wps 451795 | ups 1.04 | wpb 433540 | bsz 16506.2 | num_updates 35415 | lr 0.000336075 | gnorm 0.224 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 18.8 | wall 34797 +epoch 021 | loss 4.119 | nll_loss 2.495 | ppl 5.64 | wps 451795 | ups 1.04 | wpb 433540 | bsz 16506.2 | num_updates 35415 | lr 0.000336075 | gnorm 0.224 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 18.8 | wall 34797 +epoch 021 | loss 4.119 | nll_loss 2.495 | ppl 5.64 | wps 451795 | ups 1.04 | wpb 433540 | bsz 16506.2 | num_updates 35415 | lr 0.000336075 | gnorm 0.224 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 18.8 | wall 34797 +epoch 021 | loss 4.119 | nll_loss 2.495 | ppl 5.64 | wps 451795 | ups 1.04 | wpb 433540 | bsz 16506.2 | num_updates 35415 | lr 0.000336075 | gnorm 0.224 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 18.8 | wall 34797 +epoch 021 | loss 4.119 | nll_loss 2.495 | ppl 5.64 | wps 451795 | ups 1.04 | wpb 433540 | bsz 16506.2 | num_updates 35415 | lr 0.000336075 | gnorm 0.224 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 18.8 | wall 34797 +epoch 021 | loss 4.119 | nll_loss 2.495 | ppl 5.64 | wps 451795 | ups 1.04 | wpb 433540 | bsz 16506.2 | num_updates 35415 | lr 0.000336075 | gnorm 0.224 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 18.8 | wall 34797 +epoch 021 | loss 4.119 | nll_loss 2.495 | ppl 5.64 | wps 451795 | ups 1.04 | wpb 433540 | bsz 16506.2 | num_updates 35415 | lr 0.000336075 | gnorm 0.224 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 18.8 | wall 34797 +epoch 021 | loss 4.119 | nll_loss 2.495 | ppl 5.64 | wps 451795 | ups 1.04 | wpb 433540 | bsz 16506.2 | num_updates 35415 | lr 0.000336075 | gnorm 0.224 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 18.8 | wall 34797 +epoch 021 | loss 4.119 | nll_loss 2.495 | ppl 5.64 | wps 451795 | ups 1.04 | wpb 433540 | bsz 16506.2 | num_updates 35415 | lr 0.000336075 | gnorm 0.224 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 18.8 | wall 34797 +epoch 021 | loss 4.119 | nll_loss 2.495 | ppl 5.64 | wps 451795 | ups 1.04 | wpb 433540 | bsz 16506.2 | num_updates 35415 | lr 0.000336075 | gnorm 0.224 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 18.8 | wall 34797 +epoch 021 | loss 4.119 | nll_loss 2.495 | ppl 5.64 | wps 451795 | ups 1.04 | wpb 433540 | bsz 16506.2 | num_updates 35415 | lr 0.000336075 | gnorm 0.224 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 18.8 | wall 34797 +Start iterating over samples +epoch 022: 85 / 1689 loss=4.092, nll_loss=2.464, ppl=5.52, wps=445110, ups=1.03, wpb=430826, bsz=16381.6, num_updates=35500, lr=0.000335673, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34880 +epoch 022: 85 / 1689 loss=4.092, nll_loss=2.464, ppl=5.52, wps=445110, ups=1.03, wpb=430826, bsz=16381.6, num_updates=35500, lr=0.000335673, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34880 +epoch 022: 85 / 1689 loss=4.092, nll_loss=2.464, ppl=5.52, wps=445110, ups=1.03, wpb=430826, bsz=16381.6, num_updates=35500, lr=0.000335673, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34880 +epoch 022: 85 / 1689 loss=4.092, nll_loss=2.464, ppl=5.52, wps=445110, ups=1.03, wpb=430826, bsz=16381.6, num_updates=35500, lr=0.000335673, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34880 +epoch 022: 85 / 1689 loss=4.092, nll_loss=2.464, ppl=5.52, wps=445110, ups=1.03, wpb=430826, bsz=16381.6, num_updates=35500, lr=0.000335673, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34880 +epoch 022: 85 / 1689 loss=4.092, nll_loss=2.464, ppl=5.52, wps=445110, ups=1.03, wpb=430826, bsz=16381.6, num_updates=35500, lr=0.000335673, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34880 +epoch 022: 85 / 1689 loss=4.092, nll_loss=2.464, ppl=5.52, wps=445110, ups=1.03, wpb=430826, bsz=16381.6, num_updates=35500, lr=0.000335673, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34880 +epoch 022: 85 / 1689 loss=4.092, nll_loss=2.464, ppl=5.52, wps=445110, ups=1.03, wpb=430826, bsz=16381.6, num_updates=35500, lr=0.000335673, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34880 +epoch 022: 85 / 1689 loss=4.092, nll_loss=2.464, ppl=5.52, wps=445110, ups=1.03, wpb=430826, bsz=16381.6, num_updates=35500, lr=0.000335673, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34880 +epoch 022: 85 / 1689 loss=4.092, nll_loss=2.464, ppl=5.52, wps=445110, ups=1.03, wpb=430826, bsz=16381.6, num_updates=35500, lr=0.000335673, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34880 +epoch 022: 85 / 1689 loss=4.092, nll_loss=2.464, ppl=5.52, wps=445110, ups=1.03, wpb=430826, bsz=16381.6, num_updates=35500, lr=0.000335673, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34880 +epoch 022: 85 / 1689 loss=4.092, nll_loss=2.464, ppl=5.52, wps=445110, ups=1.03, wpb=430826, bsz=16381.6, num_updates=35500, lr=0.000335673, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34880 +epoch 022: 85 / 1689 loss=4.092, nll_loss=2.464, ppl=5.52, wps=445110, ups=1.03, wpb=430826, bsz=16381.6, num_updates=35500, lr=0.000335673, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34880 +epoch 022: 85 / 1689 loss=4.092, nll_loss=2.464, ppl=5.52, wps=445110, ups=1.03, wpb=430826, bsz=16381.6, num_updates=35500, lr=0.000335673, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34880 +epoch 022: 85 / 1689 loss=4.092, nll_loss=2.464, ppl=5.52, wps=445110, ups=1.03, wpb=430826, bsz=16381.6, num_updates=35500, lr=0.000335673, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34880 +epoch 022: 85 / 1689 loss=4.092, nll_loss=2.464, ppl=5.52, wps=445110, ups=1.03, wpb=430826, bsz=16381.6, num_updates=35500, lr=0.000335673, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34880 +epoch 022: 85 / 1689 loss=4.092, nll_loss=2.464, ppl=5.52, wps=445110, ups=1.03, wpb=430826, bsz=16381.6, num_updates=35500, lr=0.000335673, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34880 +epoch 022: 85 / 1689 loss=4.092, nll_loss=2.464, ppl=5.52, wps=445110, ups=1.03, wpb=430826, bsz=16381.6, num_updates=35500, lr=0.000335673, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34880 +epoch 022: 85 / 1689 loss=4.092, nll_loss=2.464, ppl=5.52, wps=445110, ups=1.03, wpb=430826, bsz=16381.6, num_updates=35500, lr=0.000335673, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34880 +epoch 022: 85 / 1689 loss=4.092, nll_loss=2.464, ppl=5.52, wps=445110, ups=1.03, wpb=430826, bsz=16381.6, num_updates=35500, lr=0.000335673, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34880 +epoch 022: 85 / 1689 loss=4.092, nll_loss=2.464, ppl=5.52, wps=445110, ups=1.03, wpb=430826, bsz=16381.6, num_updates=35500, lr=0.000335673, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34880 +epoch 022: 85 / 1689 loss=4.092, nll_loss=2.464, ppl=5.52, wps=445110, ups=1.03, wpb=430826, bsz=16381.6, num_updates=35500, lr=0.000335673, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=34880 +epoch 022: 185 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459593, ups=1.07, wpb=431346, bsz=16346.7, num_updates=35600, lr=0.000335201, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=17.8, wall=34973 +epoch 022: 185 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459593, ups=1.07, wpb=431346, bsz=16346.7, num_updates=35600, lr=0.000335201, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=17.8, wall=34973 +epoch 022: 185 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459593, ups=1.07, wpb=431346, bsz=16346.7, num_updates=35600, lr=0.000335201, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=17.8, wall=34973 +epoch 022: 185 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459593, ups=1.07, wpb=431346, bsz=16346.7, num_updates=35600, lr=0.000335201, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=17.8, wall=34973 +epoch 022: 185 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459593, ups=1.07, wpb=431346, bsz=16346.7, num_updates=35600, lr=0.000335201, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=17.8, wall=34973 +epoch 022: 185 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459593, ups=1.07, wpb=431346, bsz=16346.7, num_updates=35600, lr=0.000335201, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=17.8, wall=34973 +epoch 022: 185 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459593, ups=1.07, wpb=431346, bsz=16346.7, num_updates=35600, lr=0.000335201, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=17.8, wall=34973 +epoch 022: 185 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459593, ups=1.07, wpb=431346, bsz=16346.7, num_updates=35600, lr=0.000335201, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=17.8, wall=34973 +epoch 022: 185 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459593, ups=1.07, wpb=431346, bsz=16346.7, num_updates=35600, lr=0.000335201, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=17.8, wall=34973 +epoch 022: 185 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459593, ups=1.07, wpb=431346, bsz=16346.7, num_updates=35600, lr=0.000335201, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=17.8, wall=34973 +epoch 022: 185 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459593, ups=1.07, wpb=431346, bsz=16346.7, num_updates=35600, lr=0.000335201, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=17.8, wall=34973 +epoch 022: 185 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459593, ups=1.07, wpb=431346, bsz=16346.7, num_updates=35600, lr=0.000335201, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=17.8, wall=34973 +epoch 022: 185 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459593, ups=1.07, wpb=431346, bsz=16346.7, num_updates=35600, lr=0.000335201, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=17.8, wall=34973 +epoch 022: 185 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459593, ups=1.07, wpb=431346, bsz=16346.7, num_updates=35600, lr=0.000335201, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=17.8, wall=34973 +epoch 022: 185 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459593, ups=1.07, wpb=431346, bsz=16346.7, num_updates=35600, lr=0.000335201, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=17.8, wall=34973 +epoch 022: 185 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459593, ups=1.07, wpb=431346, bsz=16346.7, num_updates=35600, lr=0.000335201, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=17.8, wall=34973 +epoch 022: 185 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459593, ups=1.07, wpb=431346, bsz=16346.7, num_updates=35600, lr=0.000335201, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=17.8, wall=34973 +epoch 022: 185 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459593, ups=1.07, wpb=431346, bsz=16346.7, num_updates=35600, lr=0.000335201, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=17.8, wall=34973 +epoch 022: 185 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459593, ups=1.07, wpb=431346, bsz=16346.7, num_updates=35600, lr=0.000335201, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=17.8, wall=34973 +epoch 022: 185 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459593, ups=1.07, wpb=431346, bsz=16346.7, num_updates=35600, lr=0.000335201, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=17.8, wall=34973 +epoch 022: 185 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459593, ups=1.07, wpb=431346, bsz=16346.7, num_updates=35600, lr=0.000335201, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=17.8, wall=34973 +epoch 022: 185 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459593, ups=1.07, wpb=431346, bsz=16346.7, num_updates=35600, lr=0.000335201, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=17.8, wall=34973 +epoch 022: 285 / 1689 loss=4.11, nll_loss=2.485, ppl=5.6, wps=463857, ups=1.07, wpb=433350, bsz=16607.8, num_updates=35700, lr=0.000334731, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=35067 +epoch 022: 285 / 1689 loss=4.11, nll_loss=2.485, ppl=5.6, wps=463857, ups=1.07, wpb=433350, bsz=16607.8, num_updates=35700, lr=0.000334731, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=35067 +epoch 022: 285 / 1689 loss=4.11, nll_loss=2.485, ppl=5.6, wps=463857, ups=1.07, wpb=433350, bsz=16607.8, num_updates=35700, lr=0.000334731, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=35067 +epoch 022: 285 / 1689 loss=4.11, nll_loss=2.485, ppl=5.6, wps=463857, ups=1.07, wpb=433350, bsz=16607.8, num_updates=35700, lr=0.000334731, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=35067 +epoch 022: 285 / 1689 loss=4.11, nll_loss=2.485, ppl=5.6, wps=463857, ups=1.07, wpb=433350, bsz=16607.8, num_updates=35700, lr=0.000334731, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=35067 +epoch 022: 285 / 1689 loss=4.11, nll_loss=2.485, ppl=5.6, wps=463857, ups=1.07, wpb=433350, bsz=16607.8, num_updates=35700, lr=0.000334731, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=35067 +epoch 022: 285 / 1689 loss=4.11, nll_loss=2.485, ppl=5.6, wps=463857, ups=1.07, wpb=433350, bsz=16607.8, num_updates=35700, lr=0.000334731, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=35067 +epoch 022: 285 / 1689 loss=4.11, nll_loss=2.485, ppl=5.6, wps=463857, ups=1.07, wpb=433350, bsz=16607.8, num_updates=35700, lr=0.000334731, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=35067 +epoch 022: 285 / 1689 loss=4.11, nll_loss=2.485, ppl=5.6, wps=463857, ups=1.07, wpb=433350, bsz=16607.8, num_updates=35700, lr=0.000334731, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=35067 +epoch 022: 285 / 1689 loss=4.11, nll_loss=2.485, ppl=5.6, wps=463857, ups=1.07, wpb=433350, bsz=16607.8, num_updates=35700, lr=0.000334731, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=35067 +epoch 022: 285 / 1689 loss=4.11, nll_loss=2.485, ppl=5.6, wps=463857, ups=1.07, wpb=433350, bsz=16607.8, num_updates=35700, lr=0.000334731, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=35067 +epoch 022: 285 / 1689 loss=4.11, nll_loss=2.485, ppl=5.6, wps=463857, ups=1.07, wpb=433350, bsz=16607.8, num_updates=35700, lr=0.000334731, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=35067 +epoch 022: 285 / 1689 loss=4.11, nll_loss=2.485, ppl=5.6, wps=463857, ups=1.07, wpb=433350, bsz=16607.8, num_updates=35700, lr=0.000334731, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=35067 +epoch 022: 285 / 1689 loss=4.11, nll_loss=2.485, ppl=5.6, wps=463857, ups=1.07, wpb=433350, bsz=16607.8, num_updates=35700, lr=0.000334731, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=35067 +epoch 022: 285 / 1689 loss=4.11, nll_loss=2.485, ppl=5.6, wps=463857, ups=1.07, wpb=433350, bsz=16607.8, num_updates=35700, lr=0.000334731, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=35067 +epoch 022: 285 / 1689 loss=4.11, nll_loss=2.485, ppl=5.6, wps=463857, ups=1.07, wpb=433350, bsz=16607.8, num_updates=35700, lr=0.000334731, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=35067 +epoch 022: 285 / 1689 loss=4.11, nll_loss=2.485, ppl=5.6, wps=463857, ups=1.07, wpb=433350, bsz=16607.8, num_updates=35700, lr=0.000334731, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=35067 +epoch 022: 285 / 1689 loss=4.11, nll_loss=2.485, ppl=5.6, wps=463857, ups=1.07, wpb=433350, bsz=16607.8, num_updates=35700, lr=0.000334731, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=35067 +epoch 022: 285 / 1689 loss=4.11, nll_loss=2.485, ppl=5.6, wps=463857, ups=1.07, wpb=433350, bsz=16607.8, num_updates=35700, lr=0.000334731, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=35067 +epoch 022: 285 / 1689 loss=4.11, nll_loss=2.485, ppl=5.6, wps=463857, ups=1.07, wpb=433350, bsz=16607.8, num_updates=35700, lr=0.000334731, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=35067 +epoch 022: 285 / 1689 loss=4.11, nll_loss=2.485, ppl=5.6, wps=463857, ups=1.07, wpb=433350, bsz=16607.8, num_updates=35700, lr=0.000334731, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=35067 +epoch 022: 285 / 1689 loss=4.11, nll_loss=2.485, ppl=5.6, wps=463857, ups=1.07, wpb=433350, bsz=16607.8, num_updates=35700, lr=0.000334731, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=35067 +epoch 022: 385 / 1689 loss=4.109, nll_loss=2.484, ppl=5.59, wps=458638, ups=1.06, wpb=434187, bsz=16586.4, num_updates=35800, lr=0.000334263, gnorm=0.227, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=35162 +epoch 022: 385 / 1689 loss=4.109, nll_loss=2.484, ppl=5.59, wps=458638, ups=1.06, wpb=434187, bsz=16586.4, num_updates=35800, lr=0.000334263, gnorm=0.227, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=35162 +epoch 022: 385 / 1689 loss=4.109, nll_loss=2.484, ppl=5.59, wps=458638, ups=1.06, wpb=434187, bsz=16586.4, num_updates=35800, lr=0.000334263, gnorm=0.227, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=35162 +epoch 022: 385 / 1689 loss=4.109, nll_loss=2.484, ppl=5.59, wps=458638, ups=1.06, wpb=434187, bsz=16586.4, num_updates=35800, lr=0.000334263, gnorm=0.227, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=35162 +epoch 022: 385 / 1689 loss=4.109, nll_loss=2.484, ppl=5.59, wps=458638, ups=1.06, wpb=434187, bsz=16586.4, num_updates=35800, lr=0.000334263, gnorm=0.227, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=35162 +epoch 022: 385 / 1689 loss=4.109, nll_loss=2.484, ppl=5.59, wps=458638, ups=1.06, wpb=434187, bsz=16586.4, num_updates=35800, lr=0.000334263, gnorm=0.227, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=35162 +epoch 022: 385 / 1689 loss=4.109, nll_loss=2.484, ppl=5.59, wps=458638, ups=1.06, wpb=434187, bsz=16586.4, num_updates=35800, lr=0.000334263, gnorm=0.227, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=35162 +epoch 022: 385 / 1689 loss=4.109, nll_loss=2.484, ppl=5.59, wps=458638, ups=1.06, wpb=434187, bsz=16586.4, num_updates=35800, lr=0.000334263, gnorm=0.227, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=35162 +epoch 022: 385 / 1689 loss=4.109, nll_loss=2.484, ppl=5.59, wps=458638, ups=1.06, wpb=434187, bsz=16586.4, num_updates=35800, lr=0.000334263, gnorm=0.227, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=35162 +epoch 022: 385 / 1689 loss=4.109, nll_loss=2.484, ppl=5.59, wps=458638, ups=1.06, wpb=434187, bsz=16586.4, num_updates=35800, lr=0.000334263, gnorm=0.227, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=35162 +epoch 022: 385 / 1689 loss=4.109, nll_loss=2.484, ppl=5.59, wps=458638, ups=1.06, wpb=434187, bsz=16586.4, num_updates=35800, lr=0.000334263, gnorm=0.227, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=35162 +epoch 022: 385 / 1689 loss=4.109, nll_loss=2.484, ppl=5.59, wps=458638, ups=1.06, wpb=434187, bsz=16586.4, num_updates=35800, lr=0.000334263, gnorm=0.227, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=35162 +epoch 022: 385 / 1689 loss=4.109, nll_loss=2.484, ppl=5.59, wps=458638, ups=1.06, wpb=434187, bsz=16586.4, num_updates=35800, lr=0.000334263, gnorm=0.227, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=35162 +epoch 022: 385 / 1689 loss=4.109, nll_loss=2.484, ppl=5.59, wps=458638, ups=1.06, wpb=434187, bsz=16586.4, num_updates=35800, lr=0.000334263, gnorm=0.227, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=35162 +epoch 022: 385 / 1689 loss=4.109, nll_loss=2.484, ppl=5.59, wps=458638, ups=1.06, wpb=434187, bsz=16586.4, num_updates=35800, lr=0.000334263, gnorm=0.227, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=35162 +epoch 022: 385 / 1689 loss=4.109, nll_loss=2.484, ppl=5.59, wps=458638, ups=1.06, wpb=434187, bsz=16586.4, num_updates=35800, lr=0.000334263, gnorm=0.227, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=35162 +epoch 022: 385 / 1689 loss=4.109, nll_loss=2.484, ppl=5.59, wps=458638, ups=1.06, wpb=434187, bsz=16586.4, num_updates=35800, lr=0.000334263, gnorm=0.227, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=35162 +epoch 022: 385 / 1689 loss=4.109, nll_loss=2.484, ppl=5.59, wps=458638, ups=1.06, wpb=434187, bsz=16586.4, num_updates=35800, lr=0.000334263, gnorm=0.227, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=35162 +epoch 022: 385 / 1689 loss=4.109, nll_loss=2.484, ppl=5.59, wps=458638, ups=1.06, wpb=434187, bsz=16586.4, num_updates=35800, lr=0.000334263, gnorm=0.227, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=35162 +epoch 022: 385 / 1689 loss=4.109, nll_loss=2.484, ppl=5.59, wps=458638, ups=1.06, wpb=434187, bsz=16586.4, num_updates=35800, lr=0.000334263, gnorm=0.227, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=35162 +epoch 022: 385 / 1689 loss=4.109, nll_loss=2.484, ppl=5.59, wps=458638, ups=1.06, wpb=434187, bsz=16586.4, num_updates=35800, lr=0.000334263, gnorm=0.227, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=35162 +epoch 022: 385 / 1689 loss=4.109, nll_loss=2.484, ppl=5.59, wps=458638, ups=1.06, wpb=434187, bsz=16586.4, num_updates=35800, lr=0.000334263, gnorm=0.227, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=35162 +epoch 022: 485 / 1689 loss=4.108, nll_loss=2.483, ppl=5.59, wps=458912, ups=1.06, wpb=433987, bsz=16532.9, num_updates=35900, lr=0.000333797, gnorm=0.204, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=35256 +epoch 022: 485 / 1689 loss=4.108, nll_loss=2.483, ppl=5.59, wps=458912, ups=1.06, wpb=433987, bsz=16532.9, num_updates=35900, lr=0.000333797, gnorm=0.204, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=35256 +epoch 022: 485 / 1689 loss=4.108, nll_loss=2.483, ppl=5.59, wps=458912, ups=1.06, wpb=433987, bsz=16532.9, num_updates=35900, lr=0.000333797, gnorm=0.204, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=35256 +epoch 022: 485 / 1689 loss=4.108, nll_loss=2.483, ppl=5.59, wps=458912, ups=1.06, wpb=433987, bsz=16532.9, num_updates=35900, lr=0.000333797, gnorm=0.204, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=35256 +epoch 022: 485 / 1689 loss=4.108, nll_loss=2.483, ppl=5.59, wps=458912, ups=1.06, wpb=433987, bsz=16532.9, num_updates=35900, lr=0.000333797, gnorm=0.204, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=35256 +epoch 022: 485 / 1689 loss=4.108, nll_loss=2.483, ppl=5.59, wps=458912, ups=1.06, wpb=433987, bsz=16532.9, num_updates=35900, lr=0.000333797, gnorm=0.204, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=35256 +epoch 022: 485 / 1689 loss=4.108, nll_loss=2.483, ppl=5.59, wps=458912, ups=1.06, wpb=433987, bsz=16532.9, num_updates=35900, lr=0.000333797, gnorm=0.204, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=35256 +epoch 022: 485 / 1689 loss=4.108, nll_loss=2.483, ppl=5.59, wps=458912, ups=1.06, wpb=433987, bsz=16532.9, num_updates=35900, lr=0.000333797, gnorm=0.204, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=35256 +epoch 022: 485 / 1689 loss=4.108, nll_loss=2.483, ppl=5.59, wps=458912, ups=1.06, wpb=433987, bsz=16532.9, num_updates=35900, lr=0.000333797, gnorm=0.204, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=35256 +epoch 022: 485 / 1689 loss=4.108, nll_loss=2.483, ppl=5.59, wps=458912, ups=1.06, wpb=433987, bsz=16532.9, num_updates=35900, lr=0.000333797, gnorm=0.204, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=35256 +epoch 022: 485 / 1689 loss=4.108, nll_loss=2.483, ppl=5.59, wps=458912, ups=1.06, wpb=433987, bsz=16532.9, num_updates=35900, lr=0.000333797, gnorm=0.204, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=35256 +epoch 022: 485 / 1689 loss=4.108, nll_loss=2.483, ppl=5.59, wps=458912, ups=1.06, wpb=433987, bsz=16532.9, num_updates=35900, lr=0.000333797, gnorm=0.204, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=35256 +epoch 022: 485 / 1689 loss=4.108, nll_loss=2.483, ppl=5.59, wps=458912, ups=1.06, wpb=433987, bsz=16532.9, num_updates=35900, lr=0.000333797, gnorm=0.204, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=35256 +epoch 022: 485 / 1689 loss=4.108, nll_loss=2.483, ppl=5.59, wps=458912, ups=1.06, wpb=433987, bsz=16532.9, num_updates=35900, lr=0.000333797, gnorm=0.204, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=35256 +epoch 022: 485 / 1689 loss=4.108, nll_loss=2.483, ppl=5.59, wps=458912, ups=1.06, wpb=433987, bsz=16532.9, num_updates=35900, lr=0.000333797, gnorm=0.204, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=35256 +epoch 022: 485 / 1689 loss=4.108, nll_loss=2.483, ppl=5.59, wps=458912, ups=1.06, wpb=433987, bsz=16532.9, num_updates=35900, lr=0.000333797, gnorm=0.204, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=35256 +epoch 022: 485 / 1689 loss=4.108, nll_loss=2.483, ppl=5.59, wps=458912, ups=1.06, wpb=433987, bsz=16532.9, num_updates=35900, lr=0.000333797, gnorm=0.204, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=35256 +epoch 022: 485 / 1689 loss=4.108, nll_loss=2.483, ppl=5.59, wps=458912, ups=1.06, wpb=433987, bsz=16532.9, num_updates=35900, lr=0.000333797, gnorm=0.204, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=35256 +epoch 022: 485 / 1689 loss=4.108, nll_loss=2.483, ppl=5.59, wps=458912, ups=1.06, wpb=433987, bsz=16532.9, num_updates=35900, lr=0.000333797, gnorm=0.204, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=35256 +epoch 022: 485 / 1689 loss=4.108, nll_loss=2.483, ppl=5.59, wps=458912, ups=1.06, wpb=433987, bsz=16532.9, num_updates=35900, lr=0.000333797, gnorm=0.204, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=35256 +epoch 022: 485 / 1689 loss=4.108, nll_loss=2.483, ppl=5.59, wps=458912, ups=1.06, wpb=433987, bsz=16532.9, num_updates=35900, lr=0.000333797, gnorm=0.204, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=35256 +epoch 022: 485 / 1689 loss=4.108, nll_loss=2.483, ppl=5.59, wps=458912, ups=1.06, wpb=433987, bsz=16532.9, num_updates=35900, lr=0.000333797, gnorm=0.204, clip=0, loss_scale=1, train_wall=93, gb_free=18.6, wall=35256 +epoch 022: 586 / 1689 loss=4.109, nll_loss=2.484, ppl=5.6, wps=457050, ups=1.05, wpb=434850, bsz=16725.8, num_updates=36000, lr=0.000333333, gnorm=0.227, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=35351 +epoch 022: 586 / 1689 loss=4.109, nll_loss=2.484, ppl=5.6, wps=457050, ups=1.05, wpb=434850, bsz=16725.8, num_updates=36000, lr=0.000333333, gnorm=0.227, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=35351 +epoch 022: 586 / 1689 loss=4.109, nll_loss=2.484, ppl=5.6, wps=457050, ups=1.05, wpb=434850, bsz=16725.8, num_updates=36000, lr=0.000333333, gnorm=0.227, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=35351 +epoch 022: 586 / 1689 loss=4.109, nll_loss=2.484, ppl=5.6, wps=457050, ups=1.05, wpb=434850, bsz=16725.8, num_updates=36000, lr=0.000333333, gnorm=0.227, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=35351 +epoch 022: 586 / 1689 loss=4.109, nll_loss=2.484, ppl=5.6, wps=457050, ups=1.05, wpb=434850, bsz=16725.8, num_updates=36000, lr=0.000333333, gnorm=0.227, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=35351 +epoch 022: 586 / 1689 loss=4.109, nll_loss=2.484, ppl=5.6, wps=457050, ups=1.05, wpb=434850, bsz=16725.8, num_updates=36000, lr=0.000333333, gnorm=0.227, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=35351 +epoch 022: 586 / 1689 loss=4.109, nll_loss=2.484, ppl=5.6, wps=457050, ups=1.05, wpb=434850, bsz=16725.8, num_updates=36000, lr=0.000333333, gnorm=0.227, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=35351 +epoch 022: 586 / 1689 loss=4.109, nll_loss=2.484, ppl=5.6, wps=457050, ups=1.05, wpb=434850, bsz=16725.8, num_updates=36000, lr=0.000333333, gnorm=0.227, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=35351 +epoch 022: 586 / 1689 loss=4.109, nll_loss=2.484, ppl=5.6, wps=457050, ups=1.05, wpb=434850, bsz=16725.8, num_updates=36000, lr=0.000333333, gnorm=0.227, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=35351 +epoch 022: 586 / 1689 loss=4.109, nll_loss=2.484, ppl=5.6, wps=457050, ups=1.05, wpb=434850, bsz=16725.8, num_updates=36000, lr=0.000333333, gnorm=0.227, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=35351 +epoch 022: 586 / 1689 loss=4.109, nll_loss=2.484, ppl=5.6, wps=457050, ups=1.05, wpb=434850, bsz=16725.8, num_updates=36000, lr=0.000333333, gnorm=0.227, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=35351 +epoch 022: 586 / 1689 loss=4.109, nll_loss=2.484, ppl=5.6, wps=457050, ups=1.05, wpb=434850, bsz=16725.8, num_updates=36000, lr=0.000333333, gnorm=0.227, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=35351 +epoch 022: 586 / 1689 loss=4.109, nll_loss=2.484, ppl=5.6, wps=457050, ups=1.05, wpb=434850, bsz=16725.8, num_updates=36000, lr=0.000333333, gnorm=0.227, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=35351 +epoch 022: 586 / 1689 loss=4.109, nll_loss=2.484, ppl=5.6, wps=457050, ups=1.05, wpb=434850, bsz=16725.8, num_updates=36000, lr=0.000333333, gnorm=0.227, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=35351 +epoch 022: 586 / 1689 loss=4.109, nll_loss=2.484, ppl=5.6, wps=457050, ups=1.05, wpb=434850, bsz=16725.8, num_updates=36000, lr=0.000333333, gnorm=0.227, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=35351 +epoch 022: 586 / 1689 loss=4.109, nll_loss=2.484, ppl=5.6, wps=457050, ups=1.05, wpb=434850, bsz=16725.8, num_updates=36000, lr=0.000333333, gnorm=0.227, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=35351 +epoch 022: 586 / 1689 loss=4.109, nll_loss=2.484, ppl=5.6, wps=457050, ups=1.05, wpb=434850, bsz=16725.8, num_updates=36000, lr=0.000333333, gnorm=0.227, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=35351 +epoch 022: 586 / 1689 loss=4.109, nll_loss=2.484, ppl=5.6, wps=457050, ups=1.05, wpb=434850, bsz=16725.8, num_updates=36000, lr=0.000333333, gnorm=0.227, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=35351 +epoch 022: 586 / 1689 loss=4.109, nll_loss=2.484, ppl=5.6, wps=457050, ups=1.05, wpb=434850, bsz=16725.8, num_updates=36000, lr=0.000333333, gnorm=0.227, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=35351 +epoch 022: 586 / 1689 loss=4.109, nll_loss=2.484, ppl=5.6, wps=457050, ups=1.05, wpb=434850, bsz=16725.8, num_updates=36000, lr=0.000333333, gnorm=0.227, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=35351 +epoch 022: 586 / 1689 loss=4.109, nll_loss=2.484, ppl=5.6, wps=457050, ups=1.05, wpb=434850, bsz=16725.8, num_updates=36000, lr=0.000333333, gnorm=0.227, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=35351 +epoch 022: 586 / 1689 loss=4.109, nll_loss=2.484, ppl=5.6, wps=457050, ups=1.05, wpb=434850, bsz=16725.8, num_updates=36000, lr=0.000333333, gnorm=0.227, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=35351 +begin validation on "valid" subset +epoch 022 | valid on 'valid' subset | loss 4.24 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 36000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.24 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 36000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.24 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 36000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.24 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 36000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.24 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 36000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.24 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 36000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.24 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 36000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.24 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 36000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.24 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 36000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.24 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 36000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.24 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 36000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.24 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 36000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.24 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 36000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.24 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 36000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.24 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 36000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.24 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 36000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.24 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 36000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.24 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 36000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.24 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 36000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.24 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 36000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.24 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 36000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.24 | nll_loss 2.607 | ppl 6.09 | wps 0 | wpb 42662 | bsz 2032 | num_updates 36000 | best_loss 4.233 +epoch 022: 686 / 1689 loss=4.101, nll_loss=2.475, ppl=5.56, wps=414253, ups=0.95, wpb=434726, bsz=16518.4, num_updates=36100, lr=0.000332871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=35456 +epoch 022: 686 / 1689 loss=4.101, nll_loss=2.475, ppl=5.56, wps=414253, ups=0.95, wpb=434726, bsz=16518.4, num_updates=36100, lr=0.000332871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=35456 +epoch 022: 686 / 1689 loss=4.101, nll_loss=2.475, ppl=5.56, wps=414253, ups=0.95, wpb=434726, bsz=16518.4, num_updates=36100, lr=0.000332871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=35456 +epoch 022: 686 / 1689 loss=4.101, nll_loss=2.475, ppl=5.56, wps=414253, ups=0.95, wpb=434726, bsz=16518.4, num_updates=36100, lr=0.000332871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=35456 +epoch 022: 686 / 1689 loss=4.101, nll_loss=2.475, ppl=5.56, wps=414253, ups=0.95, wpb=434726, bsz=16518.4, num_updates=36100, lr=0.000332871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=35456 +epoch 022: 686 / 1689 loss=4.101, nll_loss=2.475, ppl=5.56, wps=414253, ups=0.95, wpb=434726, bsz=16518.4, num_updates=36100, lr=0.000332871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=35456 +epoch 022: 686 / 1689 loss=4.101, nll_loss=2.475, ppl=5.56, wps=414253, ups=0.95, wpb=434726, bsz=16518.4, num_updates=36100, lr=0.000332871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=35456 +epoch 022: 686 / 1689 loss=4.101, nll_loss=2.475, ppl=5.56, wps=414253, ups=0.95, wpb=434726, bsz=16518.4, num_updates=36100, lr=0.000332871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=35456 +epoch 022: 686 / 1689 loss=4.101, nll_loss=2.475, ppl=5.56, wps=414253, ups=0.95, wpb=434726, bsz=16518.4, num_updates=36100, lr=0.000332871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=35456 +epoch 022: 686 / 1689 loss=4.101, nll_loss=2.475, ppl=5.56, wps=414253, ups=0.95, wpb=434726, bsz=16518.4, num_updates=36100, lr=0.000332871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=35456 +epoch 022: 686 / 1689 loss=4.101, nll_loss=2.475, ppl=5.56, wps=414253, ups=0.95, wpb=434726, bsz=16518.4, num_updates=36100, lr=0.000332871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=35456 +epoch 022: 686 / 1689 loss=4.101, nll_loss=2.475, ppl=5.56, wps=414253, ups=0.95, wpb=434726, bsz=16518.4, num_updates=36100, lr=0.000332871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=35456 +epoch 022: 686 / 1689 loss=4.101, nll_loss=2.475, ppl=5.56, wps=414253, ups=0.95, wpb=434726, bsz=16518.4, num_updates=36100, lr=0.000332871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=35456 +epoch 022: 686 / 1689 loss=4.101, nll_loss=2.475, ppl=5.56, wps=414253, ups=0.95, wpb=434726, bsz=16518.4, num_updates=36100, lr=0.000332871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=35456 +epoch 022: 686 / 1689 loss=4.101, nll_loss=2.475, ppl=5.56, wps=414253, ups=0.95, wpb=434726, bsz=16518.4, num_updates=36100, lr=0.000332871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=35456 +epoch 022: 686 / 1689 loss=4.101, nll_loss=2.475, ppl=5.56, wps=414253, ups=0.95, wpb=434726, bsz=16518.4, num_updates=36100, lr=0.000332871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=35456 +epoch 022: 686 / 1689 loss=4.101, nll_loss=2.475, ppl=5.56, wps=414253, ups=0.95, wpb=434726, bsz=16518.4, num_updates=36100, lr=0.000332871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=35456 +epoch 022: 686 / 1689 loss=4.101, nll_loss=2.475, ppl=5.56, wps=414253, ups=0.95, wpb=434726, bsz=16518.4, num_updates=36100, lr=0.000332871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=35456 +epoch 022: 686 / 1689 loss=4.101, nll_loss=2.475, ppl=5.56, wps=414253, ups=0.95, wpb=434726, bsz=16518.4, num_updates=36100, lr=0.000332871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=35456 +epoch 022: 686 / 1689 loss=4.101, nll_loss=2.475, ppl=5.56, wps=414253, ups=0.95, wpb=434726, bsz=16518.4, num_updates=36100, lr=0.000332871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=35456 +epoch 022: 686 / 1689 loss=4.101, nll_loss=2.475, ppl=5.56, wps=414253, ups=0.95, wpb=434726, bsz=16518.4, num_updates=36100, lr=0.000332871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=35456 +epoch 022: 686 / 1689 loss=4.101, nll_loss=2.475, ppl=5.56, wps=414253, ups=0.95, wpb=434726, bsz=16518.4, num_updates=36100, lr=0.000332871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=35456 +epoch 022: 786 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=462307, ups=1.07, wpb=433591, bsz=16176, num_updates=36200, lr=0.000332411, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=35550 +epoch 022: 786 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=462307, ups=1.07, wpb=433591, bsz=16176, num_updates=36200, lr=0.000332411, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=35550 +epoch 022: 786 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=462307, ups=1.07, wpb=433591, bsz=16176, num_updates=36200, lr=0.000332411, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=35550 +epoch 022: 786 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=462307, ups=1.07, wpb=433591, bsz=16176, num_updates=36200, lr=0.000332411, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=35550 +epoch 022: 786 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=462307, ups=1.07, wpb=433591, bsz=16176, num_updates=36200, lr=0.000332411, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=35550 +epoch 022: 786 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=462307, ups=1.07, wpb=433591, bsz=16176, num_updates=36200, lr=0.000332411, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=35550 +epoch 022: 786 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=462307, ups=1.07, wpb=433591, bsz=16176, num_updates=36200, lr=0.000332411, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=35550 +epoch 022: 786 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=462307, ups=1.07, wpb=433591, bsz=16176, num_updates=36200, lr=0.000332411, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=35550 +epoch 022: 786 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=462307, ups=1.07, wpb=433591, bsz=16176, num_updates=36200, lr=0.000332411, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=35550 +epoch 022: 786 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=462307, ups=1.07, wpb=433591, bsz=16176, num_updates=36200, lr=0.000332411, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=35550 +epoch 022: 786 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=462307, ups=1.07, wpb=433591, bsz=16176, num_updates=36200, lr=0.000332411, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=35550 +epoch 022: 786 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=462307, ups=1.07, wpb=433591, bsz=16176, num_updates=36200, lr=0.000332411, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=35550 +epoch 022: 786 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=462307, ups=1.07, wpb=433591, bsz=16176, num_updates=36200, lr=0.000332411, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=35550 +epoch 022: 786 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=462307, ups=1.07, wpb=433591, bsz=16176, num_updates=36200, lr=0.000332411, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=35550 +epoch 022: 786 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=462307, ups=1.07, wpb=433591, bsz=16176, num_updates=36200, lr=0.000332411, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=35550 +epoch 022: 786 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=462307, ups=1.07, wpb=433591, bsz=16176, num_updates=36200, lr=0.000332411, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=35550 +epoch 022: 786 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=462307, ups=1.07, wpb=433591, bsz=16176, num_updates=36200, lr=0.000332411, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=35550 +epoch 022: 786 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=462307, ups=1.07, wpb=433591, bsz=16176, num_updates=36200, lr=0.000332411, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=35550 +epoch 022: 786 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=462307, ups=1.07, wpb=433591, bsz=16176, num_updates=36200, lr=0.000332411, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=35550 +epoch 022: 786 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=462307, ups=1.07, wpb=433591, bsz=16176, num_updates=36200, lr=0.000332411, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=35550 +epoch 022: 786 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=462307, ups=1.07, wpb=433591, bsz=16176, num_updates=36200, lr=0.000332411, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=35550 +epoch 022: 786 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=462307, ups=1.07, wpb=433591, bsz=16176, num_updates=36200, lr=0.000332411, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.9, wall=35550 +epoch 022: 886 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=462630, ups=1.06, wpb=434698, bsz=16747, num_updates=36300, lr=0.000331953, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=35644 +epoch 022: 886 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=462630, ups=1.06, wpb=434698, bsz=16747, num_updates=36300, lr=0.000331953, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=35644 +epoch 022: 886 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=462630, ups=1.06, wpb=434698, bsz=16747, num_updates=36300, lr=0.000331953, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=35644 +epoch 022: 886 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=462630, ups=1.06, wpb=434698, bsz=16747, num_updates=36300, lr=0.000331953, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=35644 +epoch 022: 886 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=462630, ups=1.06, wpb=434698, bsz=16747, num_updates=36300, lr=0.000331953, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=35644 +epoch 022: 886 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=462630, ups=1.06, wpb=434698, bsz=16747, num_updates=36300, lr=0.000331953, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=35644 +epoch 022: 886 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=462630, ups=1.06, wpb=434698, bsz=16747, num_updates=36300, lr=0.000331953, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=35644 +epoch 022: 886 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=462630, ups=1.06, wpb=434698, bsz=16747, num_updates=36300, lr=0.000331953, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=35644 +epoch 022: 886 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=462630, ups=1.06, wpb=434698, bsz=16747, num_updates=36300, lr=0.000331953, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=35644 +epoch 022: 886 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=462630, ups=1.06, wpb=434698, bsz=16747, num_updates=36300, lr=0.000331953, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=35644 +epoch 022: 886 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=462630, ups=1.06, wpb=434698, bsz=16747, num_updates=36300, lr=0.000331953, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=35644 +epoch 022: 886 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=462630, ups=1.06, wpb=434698, bsz=16747, num_updates=36300, lr=0.000331953, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=35644 +epoch 022: 886 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=462630, ups=1.06, wpb=434698, bsz=16747, num_updates=36300, lr=0.000331953, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=35644 +epoch 022: 886 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=462630, ups=1.06, wpb=434698, bsz=16747, num_updates=36300, lr=0.000331953, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=35644 +epoch 022: 886 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=462630, ups=1.06, wpb=434698, bsz=16747, num_updates=36300, lr=0.000331953, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=35644 +epoch 022: 886 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=462630, ups=1.06, wpb=434698, bsz=16747, num_updates=36300, lr=0.000331953, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=35644 +epoch 022: 886 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=462630, ups=1.06, wpb=434698, bsz=16747, num_updates=36300, lr=0.000331953, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=35644 +epoch 022: 886 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=462630, ups=1.06, wpb=434698, bsz=16747, num_updates=36300, lr=0.000331953, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=35644 +epoch 022: 886 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=462630, ups=1.06, wpb=434698, bsz=16747, num_updates=36300, lr=0.000331953, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=35644 +epoch 022: 886 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=462630, ups=1.06, wpb=434698, bsz=16747, num_updates=36300, lr=0.000331953, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=35644 +epoch 022: 886 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=462630, ups=1.06, wpb=434698, bsz=16747, num_updates=36300, lr=0.000331953, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=35644 +epoch 022: 886 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=462630, ups=1.06, wpb=434698, bsz=16747, num_updates=36300, lr=0.000331953, gnorm=0.231, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=35644 +epoch 022: 986 / 1689 loss=4.118, nll_loss=2.495, ppl=5.64, wps=466253, ups=1.07, wpb=434816, bsz=16595.9, num_updates=36400, lr=0.000331497, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=35737 +epoch 022: 986 / 1689 loss=4.118, nll_loss=2.495, ppl=5.64, wps=466253, ups=1.07, wpb=434816, bsz=16595.9, num_updates=36400, lr=0.000331497, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=35737 +epoch 022: 986 / 1689 loss=4.118, nll_loss=2.495, ppl=5.64, wps=466253, ups=1.07, wpb=434816, bsz=16595.9, num_updates=36400, lr=0.000331497, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=35737 +epoch 022: 986 / 1689 loss=4.118, nll_loss=2.495, ppl=5.64, wps=466253, ups=1.07, wpb=434816, bsz=16595.9, num_updates=36400, lr=0.000331497, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=35737 +epoch 022: 986 / 1689 loss=4.118, nll_loss=2.495, ppl=5.64, wps=466253, ups=1.07, wpb=434816, bsz=16595.9, num_updates=36400, lr=0.000331497, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=35737 +epoch 022: 986 / 1689 loss=4.118, nll_loss=2.495, ppl=5.64, wps=466253, ups=1.07, wpb=434816, bsz=16595.9, num_updates=36400, lr=0.000331497, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=35737 +epoch 022: 986 / 1689 loss=4.118, nll_loss=2.495, ppl=5.64, wps=466253, ups=1.07, wpb=434816, bsz=16595.9, num_updates=36400, lr=0.000331497, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=35737 +epoch 022: 986 / 1689 loss=4.118, nll_loss=2.495, ppl=5.64, wps=466253, ups=1.07, wpb=434816, bsz=16595.9, num_updates=36400, lr=0.000331497, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=35737 +epoch 022: 986 / 1689 loss=4.118, nll_loss=2.495, ppl=5.64, wps=466253, ups=1.07, wpb=434816, bsz=16595.9, num_updates=36400, lr=0.000331497, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=35737 +epoch 022: 986 / 1689 loss=4.118, nll_loss=2.495, ppl=5.64, wps=466253, ups=1.07, wpb=434816, bsz=16595.9, num_updates=36400, lr=0.000331497, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=35737 +epoch 022: 986 / 1689 loss=4.118, nll_loss=2.495, ppl=5.64, wps=466253, ups=1.07, wpb=434816, bsz=16595.9, num_updates=36400, lr=0.000331497, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=35737 +epoch 022: 986 / 1689 loss=4.118, nll_loss=2.495, ppl=5.64, wps=466253, ups=1.07, wpb=434816, bsz=16595.9, num_updates=36400, lr=0.000331497, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=35737 +epoch 022: 986 / 1689 loss=4.118, nll_loss=2.495, ppl=5.64, wps=466253, ups=1.07, wpb=434816, bsz=16595.9, num_updates=36400, lr=0.000331497, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=35737 +epoch 022: 986 / 1689 loss=4.118, nll_loss=2.495, ppl=5.64, wps=466253, ups=1.07, wpb=434816, bsz=16595.9, num_updates=36400, lr=0.000331497, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=35737 +epoch 022: 986 / 1689 loss=4.118, nll_loss=2.495, ppl=5.64, wps=466253, ups=1.07, wpb=434816, bsz=16595.9, num_updates=36400, lr=0.000331497, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=35737 +epoch 022: 986 / 1689 loss=4.118, nll_loss=2.495, ppl=5.64, wps=466253, ups=1.07, wpb=434816, bsz=16595.9, num_updates=36400, lr=0.000331497, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=35737 +epoch 022: 986 / 1689 loss=4.118, nll_loss=2.495, ppl=5.64, wps=466253, ups=1.07, wpb=434816, bsz=16595.9, num_updates=36400, lr=0.000331497, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=35737 +epoch 022: 986 / 1689 loss=4.118, nll_loss=2.495, ppl=5.64, wps=466253, ups=1.07, wpb=434816, bsz=16595.9, num_updates=36400, lr=0.000331497, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=35737 +epoch 022: 986 / 1689 loss=4.118, nll_loss=2.495, ppl=5.64, wps=466253, ups=1.07, wpb=434816, bsz=16595.9, num_updates=36400, lr=0.000331497, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=35737 +epoch 022: 986 / 1689 loss=4.118, nll_loss=2.495, ppl=5.64, wps=466253, ups=1.07, wpb=434816, bsz=16595.9, num_updates=36400, lr=0.000331497, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=35737 +epoch 022: 986 / 1689 loss=4.118, nll_loss=2.495, ppl=5.64, wps=466253, ups=1.07, wpb=434816, bsz=16595.9, num_updates=36400, lr=0.000331497, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=35737 +epoch 022: 986 / 1689 loss=4.118, nll_loss=2.495, ppl=5.64, wps=466253, ups=1.07, wpb=434816, bsz=16595.9, num_updates=36400, lr=0.000331497, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=35737 +epoch 022: 1086 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=467342, ups=1.07, wpb=436005, bsz=16555.4, num_updates=36500, lr=0.000331042, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=35831 +epoch 022: 1086 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=467342, ups=1.07, wpb=436005, bsz=16555.4, num_updates=36500, lr=0.000331042, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=35831 +epoch 022: 1086 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=467342, ups=1.07, wpb=436005, bsz=16555.4, num_updates=36500, lr=0.000331042, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=35831 +epoch 022: 1086 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=467342, ups=1.07, wpb=436005, bsz=16555.4, num_updates=36500, lr=0.000331042, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=35831 +epoch 022: 1086 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=467342, ups=1.07, wpb=436005, bsz=16555.4, num_updates=36500, lr=0.000331042, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=35831 +epoch 022: 1086 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=467342, ups=1.07, wpb=436005, bsz=16555.4, num_updates=36500, lr=0.000331042, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=35831 +epoch 022: 1086 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=467342, ups=1.07, wpb=436005, bsz=16555.4, num_updates=36500, lr=0.000331042, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=35831 +epoch 022: 1086 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=467342, ups=1.07, wpb=436005, bsz=16555.4, num_updates=36500, lr=0.000331042, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=35831 +epoch 022: 1086 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=467342, ups=1.07, wpb=436005, bsz=16555.4, num_updates=36500, lr=0.000331042, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=35831 +epoch 022: 1086 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=467342, ups=1.07, wpb=436005, bsz=16555.4, num_updates=36500, lr=0.000331042, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=35831 +epoch 022: 1086 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=467342, ups=1.07, wpb=436005, bsz=16555.4, num_updates=36500, lr=0.000331042, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=35831 +epoch 022: 1086 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=467342, ups=1.07, wpb=436005, bsz=16555.4, num_updates=36500, lr=0.000331042, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=35831 +epoch 022: 1086 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=467342, ups=1.07, wpb=436005, bsz=16555.4, num_updates=36500, lr=0.000331042, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=35831 +epoch 022: 1086 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=467342, ups=1.07, wpb=436005, bsz=16555.4, num_updates=36500, lr=0.000331042, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=35831 +epoch 022: 1086 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=467342, ups=1.07, wpb=436005, bsz=16555.4, num_updates=36500, lr=0.000331042, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=35831 +epoch 022: 1086 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=467342, ups=1.07, wpb=436005, bsz=16555.4, num_updates=36500, lr=0.000331042, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=35831 +epoch 022: 1086 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=467342, ups=1.07, wpb=436005, bsz=16555.4, num_updates=36500, lr=0.000331042, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=35831 +epoch 022: 1086 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=467342, ups=1.07, wpb=436005, bsz=16555.4, num_updates=36500, lr=0.000331042, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=35831 +epoch 022: 1086 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=467342, ups=1.07, wpb=436005, bsz=16555.4, num_updates=36500, lr=0.000331042, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=35831 +epoch 022: 1086 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=467342, ups=1.07, wpb=436005, bsz=16555.4, num_updates=36500, lr=0.000331042, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=35831 +epoch 022: 1086 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=467342, ups=1.07, wpb=436005, bsz=16555.4, num_updates=36500, lr=0.000331042, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=35831 +epoch 022: 1086 / 1689 loss=4.114, nll_loss=2.49, ppl=5.62, wps=467342, ups=1.07, wpb=436005, bsz=16555.4, num_updates=36500, lr=0.000331042, gnorm=0.223, clip=0, loss_scale=2, train_wall=93, gb_free=18.8, wall=35831 +epoch 022: 1187 / 1689 loss=4.124, nll_loss=2.502, ppl=5.66, wps=461120, ups=1.06, wpb=433412, bsz=16345.7, num_updates=36600, lr=0.00033059, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=35925 +epoch 022: 1187 / 1689 loss=4.124, nll_loss=2.502, ppl=5.66, wps=461120, ups=1.06, wpb=433412, bsz=16345.7, num_updates=36600, lr=0.00033059, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=35925 +epoch 022: 1187 / 1689 loss=4.124, nll_loss=2.502, ppl=5.66, wps=461120, ups=1.06, wpb=433412, bsz=16345.7, num_updates=36600, lr=0.00033059, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=35925 +epoch 022: 1187 / 1689 loss=4.124, nll_loss=2.502, ppl=5.66, wps=461120, ups=1.06, wpb=433412, bsz=16345.7, num_updates=36600, lr=0.00033059, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=35925 +epoch 022: 1187 / 1689 loss=4.124, nll_loss=2.502, ppl=5.66, wps=461120, ups=1.06, wpb=433412, bsz=16345.7, num_updates=36600, lr=0.00033059, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=35925 +epoch 022: 1187 / 1689 loss=4.124, nll_loss=2.502, ppl=5.66, wps=461120, ups=1.06, wpb=433412, bsz=16345.7, num_updates=36600, lr=0.00033059, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=35925 +epoch 022: 1187 / 1689 loss=4.124, nll_loss=2.502, ppl=5.66, wps=461120, ups=1.06, wpb=433412, bsz=16345.7, num_updates=36600, lr=0.00033059, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=35925 +epoch 022: 1187 / 1689 loss=4.124, nll_loss=2.502, ppl=5.66, wps=461120, ups=1.06, wpb=433412, bsz=16345.7, num_updates=36600, lr=0.00033059, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=35925 +epoch 022: 1187 / 1689 loss=4.124, nll_loss=2.502, ppl=5.66, wps=461120, ups=1.06, wpb=433412, bsz=16345.7, num_updates=36600, lr=0.00033059, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=35925 +epoch 022: 1187 / 1689 loss=4.124, nll_loss=2.502, ppl=5.66, wps=461120, ups=1.06, wpb=433412, bsz=16345.7, num_updates=36600, lr=0.00033059, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=35925 +epoch 022: 1187 / 1689 loss=4.124, nll_loss=2.502, ppl=5.66, wps=461120, ups=1.06, wpb=433412, bsz=16345.7, num_updates=36600, lr=0.00033059, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=35925 +epoch 022: 1187 / 1689 loss=4.124, nll_loss=2.502, ppl=5.66, wps=461120, ups=1.06, wpb=433412, bsz=16345.7, num_updates=36600, lr=0.00033059, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=35925 +epoch 022: 1187 / 1689 loss=4.124, nll_loss=2.502, ppl=5.66, wps=461120, ups=1.06, wpb=433412, bsz=16345.7, num_updates=36600, lr=0.00033059, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=35925 +epoch 022: 1187 / 1689 loss=4.124, nll_loss=2.502, ppl=5.66, wps=461120, ups=1.06, wpb=433412, bsz=16345.7, num_updates=36600, lr=0.00033059, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=35925 +epoch 022: 1187 / 1689 loss=4.124, nll_loss=2.502, ppl=5.66, wps=461120, ups=1.06, wpb=433412, bsz=16345.7, num_updates=36600, lr=0.00033059, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=35925 +epoch 022: 1187 / 1689 loss=4.124, nll_loss=2.502, ppl=5.66, wps=461120, ups=1.06, wpb=433412, bsz=16345.7, num_updates=36600, lr=0.00033059, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=35925 +epoch 022: 1187 / 1689 loss=4.124, nll_loss=2.502, ppl=5.66, wps=461120, ups=1.06, wpb=433412, bsz=16345.7, num_updates=36600, lr=0.00033059, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=35925 +epoch 022: 1187 / 1689 loss=4.124, nll_loss=2.502, ppl=5.66, wps=461120, ups=1.06, wpb=433412, bsz=16345.7, num_updates=36600, lr=0.00033059, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=35925 +epoch 022: 1187 / 1689 loss=4.124, nll_loss=2.502, ppl=5.66, wps=461120, ups=1.06, wpb=433412, bsz=16345.7, num_updates=36600, lr=0.00033059, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=35925 +epoch 022: 1187 / 1689 loss=4.124, nll_loss=2.502, ppl=5.66, wps=461120, ups=1.06, wpb=433412, bsz=16345.7, num_updates=36600, lr=0.00033059, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=35925 +epoch 022: 1187 / 1689 loss=4.124, nll_loss=2.502, ppl=5.66, wps=461120, ups=1.06, wpb=433412, bsz=16345.7, num_updates=36600, lr=0.00033059, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=35925 +epoch 022: 1187 / 1689 loss=4.124, nll_loss=2.502, ppl=5.66, wps=461120, ups=1.06, wpb=433412, bsz=16345.7, num_updates=36600, lr=0.00033059, gnorm=0.221, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=35925 +epoch 022: 1287 / 1689 loss=4.115, nll_loss=2.491, ppl=5.62, wps=461921, ups=1.07, wpb=432256, bsz=16581.4, num_updates=36700, lr=0.000330139, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=36018 +epoch 022: 1287 / 1689 loss=4.115, nll_loss=2.491, ppl=5.62, wps=461921, ups=1.07, wpb=432256, bsz=16581.4, num_updates=36700, lr=0.000330139, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=36018 +epoch 022: 1287 / 1689 loss=4.115, nll_loss=2.491, ppl=5.62, wps=461921, ups=1.07, wpb=432256, bsz=16581.4, num_updates=36700, lr=0.000330139, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=36018 +epoch 022: 1287 / 1689 loss=4.115, nll_loss=2.491, ppl=5.62, wps=461921, ups=1.07, wpb=432256, bsz=16581.4, num_updates=36700, lr=0.000330139, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=36018 +epoch 022: 1287 / 1689 loss=4.115, nll_loss=2.491, ppl=5.62, wps=461921, ups=1.07, wpb=432256, bsz=16581.4, num_updates=36700, lr=0.000330139, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=36018 +epoch 022: 1287 / 1689 loss=4.115, nll_loss=2.491, ppl=5.62, wps=461921, ups=1.07, wpb=432256, bsz=16581.4, num_updates=36700, lr=0.000330139, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=36018 +epoch 022: 1287 / 1689 loss=4.115, nll_loss=2.491, ppl=5.62, wps=461921, ups=1.07, wpb=432256, bsz=16581.4, num_updates=36700, lr=0.000330139, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=36018 +epoch 022: 1287 / 1689 loss=4.115, nll_loss=2.491, ppl=5.62, wps=461921, ups=1.07, wpb=432256, bsz=16581.4, num_updates=36700, lr=0.000330139, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=36018 +epoch 022: 1287 / 1689 loss=4.115, nll_loss=2.491, ppl=5.62, wps=461921, ups=1.07, wpb=432256, bsz=16581.4, num_updates=36700, lr=0.000330139, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=36018 +epoch 022: 1287 / 1689 loss=4.115, nll_loss=2.491, ppl=5.62, wps=461921, ups=1.07, wpb=432256, bsz=16581.4, num_updates=36700, lr=0.000330139, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=36018 +epoch 022: 1287 / 1689 loss=4.115, nll_loss=2.491, ppl=5.62, wps=461921, ups=1.07, wpb=432256, bsz=16581.4, num_updates=36700, lr=0.000330139, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=36018 +epoch 022: 1287 / 1689 loss=4.115, nll_loss=2.491, ppl=5.62, wps=461921, ups=1.07, wpb=432256, bsz=16581.4, num_updates=36700, lr=0.000330139, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=36018 +epoch 022: 1287 / 1689 loss=4.115, nll_loss=2.491, ppl=5.62, wps=461921, ups=1.07, wpb=432256, bsz=16581.4, num_updates=36700, lr=0.000330139, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=36018 +epoch 022: 1287 / 1689 loss=4.115, nll_loss=2.491, ppl=5.62, wps=461921, ups=1.07, wpb=432256, bsz=16581.4, num_updates=36700, lr=0.000330139, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=36018 +epoch 022: 1287 / 1689 loss=4.115, nll_loss=2.491, ppl=5.62, wps=461921, ups=1.07, wpb=432256, bsz=16581.4, num_updates=36700, lr=0.000330139, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=36018 +epoch 022: 1287 / 1689 loss=4.115, nll_loss=2.491, ppl=5.62, wps=461921, ups=1.07, wpb=432256, bsz=16581.4, num_updates=36700, lr=0.000330139, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=36018 +epoch 022: 1287 / 1689 loss=4.115, nll_loss=2.491, ppl=5.62, wps=461921, ups=1.07, wpb=432256, bsz=16581.4, num_updates=36700, lr=0.000330139, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=36018 +epoch 022: 1287 / 1689 loss=4.115, nll_loss=2.491, ppl=5.62, wps=461921, ups=1.07, wpb=432256, bsz=16581.4, num_updates=36700, lr=0.000330139, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=36018 +epoch 022: 1287 / 1689 loss=4.115, nll_loss=2.491, ppl=5.62, wps=461921, ups=1.07, wpb=432256, bsz=16581.4, num_updates=36700, lr=0.000330139, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=36018 +epoch 022: 1287 / 1689 loss=4.115, nll_loss=2.491, ppl=5.62, wps=461921, ups=1.07, wpb=432256, bsz=16581.4, num_updates=36700, lr=0.000330139, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=36018 +epoch 022: 1287 / 1689 loss=4.115, nll_loss=2.491, ppl=5.62, wps=461921, ups=1.07, wpb=432256, bsz=16581.4, num_updates=36700, lr=0.000330139, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=36018 +epoch 022: 1287 / 1689 loss=4.115, nll_loss=2.491, ppl=5.62, wps=461921, ups=1.07, wpb=432256, bsz=16581.4, num_updates=36700, lr=0.000330139, gnorm=0.221, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=36018 +epoch 022: 1387 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=461831, ups=1.06, wpb=434487, bsz=16747.7, num_updates=36800, lr=0.00032969, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20.4, wall=36112 +epoch 022: 1387 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=461831, ups=1.06, wpb=434487, bsz=16747.7, num_updates=36800, lr=0.00032969, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20.4, wall=36112 +epoch 022: 1387 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=461831, ups=1.06, wpb=434487, bsz=16747.7, num_updates=36800, lr=0.00032969, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20.4, wall=36112 +epoch 022: 1387 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=461831, ups=1.06, wpb=434487, bsz=16747.7, num_updates=36800, lr=0.00032969, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20.4, wall=36112 +epoch 022: 1387 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=461831, ups=1.06, wpb=434487, bsz=16747.7, num_updates=36800, lr=0.00032969, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20.4, wall=36112 +epoch 022: 1387 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=461831, ups=1.06, wpb=434487, bsz=16747.7, num_updates=36800, lr=0.00032969, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20.4, wall=36112 +epoch 022: 1387 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=461831, ups=1.06, wpb=434487, bsz=16747.7, num_updates=36800, lr=0.00032969, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20.4, wall=36112 +epoch 022: 1387 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=461831, ups=1.06, wpb=434487, bsz=16747.7, num_updates=36800, lr=0.00032969, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20.4, wall=36112 +epoch 022: 1387 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=461831, ups=1.06, wpb=434487, bsz=16747.7, num_updates=36800, lr=0.00032969, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20.4, wall=36112 +epoch 022: 1387 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=461831, ups=1.06, wpb=434487, bsz=16747.7, num_updates=36800, lr=0.00032969, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20.4, wall=36112 +epoch 022: 1387 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=461831, ups=1.06, wpb=434487, bsz=16747.7, num_updates=36800, lr=0.00032969, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20.4, wall=36112 +epoch 022: 1387 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=461831, ups=1.06, wpb=434487, bsz=16747.7, num_updates=36800, lr=0.00032969, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20.4, wall=36112 +epoch 022: 1387 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=461831, ups=1.06, wpb=434487, bsz=16747.7, num_updates=36800, lr=0.00032969, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20.4, wall=36112 +epoch 022: 1387 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=461831, ups=1.06, wpb=434487, bsz=16747.7, num_updates=36800, lr=0.00032969, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20.4, wall=36112 +epoch 022: 1387 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=461831, ups=1.06, wpb=434487, bsz=16747.7, num_updates=36800, lr=0.00032969, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20.4, wall=36112 +epoch 022: 1387 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=461831, ups=1.06, wpb=434487, bsz=16747.7, num_updates=36800, lr=0.00032969, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20.4, wall=36112 +epoch 022: 1387 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=461831, ups=1.06, wpb=434487, bsz=16747.7, num_updates=36800, lr=0.00032969, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20.4, wall=36112 +epoch 022: 1387 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=461831, ups=1.06, wpb=434487, bsz=16747.7, num_updates=36800, lr=0.00032969, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20.4, wall=36112 +epoch 022: 1387 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=461831, ups=1.06, wpb=434487, bsz=16747.7, num_updates=36800, lr=0.00032969, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20.4, wall=36112 +epoch 022: 1387 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=461831, ups=1.06, wpb=434487, bsz=16747.7, num_updates=36800, lr=0.00032969, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20.4, wall=36112 +epoch 022: 1387 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=461831, ups=1.06, wpb=434487, bsz=16747.7, num_updates=36800, lr=0.00032969, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20.4, wall=36112 +epoch 022: 1387 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=461831, ups=1.06, wpb=434487, bsz=16747.7, num_updates=36800, lr=0.00032969, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20.4, wall=36112 +epoch 022: 1487 / 1689 loss=4.12, nll_loss=2.498, ppl=5.65, wps=459307, ups=1.06, wpb=433628, bsz=16277.8, num_updates=36900, lr=0.000329243, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36207 +epoch 022: 1487 / 1689 loss=4.12, nll_loss=2.498, ppl=5.65, wps=459307, ups=1.06, wpb=433628, bsz=16277.8, num_updates=36900, lr=0.000329243, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36207 +epoch 022: 1487 / 1689 loss=4.12, nll_loss=2.498, ppl=5.65, wps=459307, ups=1.06, wpb=433628, bsz=16277.8, num_updates=36900, lr=0.000329243, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36207 +epoch 022: 1487 / 1689 loss=4.12, nll_loss=2.498, ppl=5.65, wps=459307, ups=1.06, wpb=433628, bsz=16277.8, num_updates=36900, lr=0.000329243, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36207 +epoch 022: 1487 / 1689 loss=4.12, nll_loss=2.498, ppl=5.65, wps=459307, ups=1.06, wpb=433628, bsz=16277.8, num_updates=36900, lr=0.000329243, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36207 +epoch 022: 1487 / 1689 loss=4.12, nll_loss=2.498, ppl=5.65, wps=459307, ups=1.06, wpb=433628, bsz=16277.8, num_updates=36900, lr=0.000329243, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36207 +epoch 022: 1487 / 1689 loss=4.12, nll_loss=2.498, ppl=5.65, wps=459307, ups=1.06, wpb=433628, bsz=16277.8, num_updates=36900, lr=0.000329243, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36207 +epoch 022: 1487 / 1689 loss=4.12, nll_loss=2.498, ppl=5.65, wps=459307, ups=1.06, wpb=433628, bsz=16277.8, num_updates=36900, lr=0.000329243, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36207 +epoch 022: 1487 / 1689 loss=4.12, nll_loss=2.498, ppl=5.65, wps=459307, ups=1.06, wpb=433628, bsz=16277.8, num_updates=36900, lr=0.000329243, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36207 +epoch 022: 1487 / 1689 loss=4.12, nll_loss=2.498, ppl=5.65, wps=459307, ups=1.06, wpb=433628, bsz=16277.8, num_updates=36900, lr=0.000329243, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36207 +epoch 022: 1487 / 1689 loss=4.12, nll_loss=2.498, ppl=5.65, wps=459307, ups=1.06, wpb=433628, bsz=16277.8, num_updates=36900, lr=0.000329243, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36207 +epoch 022: 1487 / 1689 loss=4.12, nll_loss=2.498, ppl=5.65, wps=459307, ups=1.06, wpb=433628, bsz=16277.8, num_updates=36900, lr=0.000329243, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36207 +epoch 022: 1487 / 1689 loss=4.12, nll_loss=2.498, ppl=5.65, wps=459307, ups=1.06, wpb=433628, bsz=16277.8, num_updates=36900, lr=0.000329243, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36207 +epoch 022: 1487 / 1689 loss=4.12, nll_loss=2.498, ppl=5.65, wps=459307, ups=1.06, wpb=433628, bsz=16277.8, num_updates=36900, lr=0.000329243, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36207 +epoch 022: 1487 / 1689 loss=4.12, nll_loss=2.498, ppl=5.65, wps=459307, ups=1.06, wpb=433628, bsz=16277.8, num_updates=36900, lr=0.000329243, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36207 +epoch 022: 1487 / 1689 loss=4.12, nll_loss=2.498, ppl=5.65, wps=459307, ups=1.06, wpb=433628, bsz=16277.8, num_updates=36900, lr=0.000329243, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36207 +epoch 022: 1487 / 1689 loss=4.12, nll_loss=2.498, ppl=5.65, wps=459307, ups=1.06, wpb=433628, bsz=16277.8, num_updates=36900, lr=0.000329243, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36207 +epoch 022: 1487 / 1689 loss=4.12, nll_loss=2.498, ppl=5.65, wps=459307, ups=1.06, wpb=433628, bsz=16277.8, num_updates=36900, lr=0.000329243, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36207 +epoch 022: 1487 / 1689 loss=4.12, nll_loss=2.498, ppl=5.65, wps=459307, ups=1.06, wpb=433628, bsz=16277.8, num_updates=36900, lr=0.000329243, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36207 +epoch 022: 1487 / 1689 loss=4.12, nll_loss=2.498, ppl=5.65, wps=459307, ups=1.06, wpb=433628, bsz=16277.8, num_updates=36900, lr=0.000329243, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36207 +epoch 022: 1487 / 1689 loss=4.12, nll_loss=2.498, ppl=5.65, wps=459307, ups=1.06, wpb=433628, bsz=16277.8, num_updates=36900, lr=0.000329243, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36207 +epoch 022: 1487 / 1689 loss=4.12, nll_loss=2.498, ppl=5.65, wps=459307, ups=1.06, wpb=433628, bsz=16277.8, num_updates=36900, lr=0.000329243, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36207 +epoch 022: 1587 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=458291, ups=1.06, wpb=433069, bsz=16595.8, num_updates=37000, lr=0.000328798, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=36301 +epoch 022: 1587 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=458291, ups=1.06, wpb=433069, bsz=16595.8, num_updates=37000, lr=0.000328798, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=36301 +epoch 022: 1587 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=458291, ups=1.06, wpb=433069, bsz=16595.8, num_updates=37000, lr=0.000328798, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=36301 +epoch 022: 1587 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=458291, ups=1.06, wpb=433069, bsz=16595.8, num_updates=37000, lr=0.000328798, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=36301 +epoch 022: 1587 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=458291, ups=1.06, wpb=433069, bsz=16595.8, num_updates=37000, lr=0.000328798, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=36301 +epoch 022: 1587 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=458291, ups=1.06, wpb=433069, bsz=16595.8, num_updates=37000, lr=0.000328798, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=36301 +epoch 022: 1587 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=458291, ups=1.06, wpb=433069, bsz=16595.8, num_updates=37000, lr=0.000328798, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=36301 +epoch 022: 1587 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=458291, ups=1.06, wpb=433069, bsz=16595.8, num_updates=37000, lr=0.000328798, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=36301 +epoch 022: 1587 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=458291, ups=1.06, wpb=433069, bsz=16595.8, num_updates=37000, lr=0.000328798, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=36301 +epoch 022: 1587 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=458291, ups=1.06, wpb=433069, bsz=16595.8, num_updates=37000, lr=0.000328798, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=36301 +epoch 022: 1587 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=458291, ups=1.06, wpb=433069, bsz=16595.8, num_updates=37000, lr=0.000328798, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=36301 +epoch 022: 1587 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=458291, ups=1.06, wpb=433069, bsz=16595.8, num_updates=37000, lr=0.000328798, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=36301 +epoch 022: 1587 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=458291, ups=1.06, wpb=433069, bsz=16595.8, num_updates=37000, lr=0.000328798, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=36301 +epoch 022: 1587 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=458291, ups=1.06, wpb=433069, bsz=16595.8, num_updates=37000, lr=0.000328798, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=36301 +epoch 022: 1587 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=458291, ups=1.06, wpb=433069, bsz=16595.8, num_updates=37000, lr=0.000328798, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=36301 +epoch 022: 1587 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=458291, ups=1.06, wpb=433069, bsz=16595.8, num_updates=37000, lr=0.000328798, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=36301 +epoch 022: 1587 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=458291, ups=1.06, wpb=433069, bsz=16595.8, num_updates=37000, lr=0.000328798, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=36301 +epoch 022: 1587 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=458291, ups=1.06, wpb=433069, bsz=16595.8, num_updates=37000, lr=0.000328798, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=36301 +epoch 022: 1587 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=458291, ups=1.06, wpb=433069, bsz=16595.8, num_updates=37000, lr=0.000328798, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=36301 +epoch 022: 1587 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=458291, ups=1.06, wpb=433069, bsz=16595.8, num_updates=37000, lr=0.000328798, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=36301 +epoch 022: 1587 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=458291, ups=1.06, wpb=433069, bsz=16595.8, num_updates=37000, lr=0.000328798, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=36301 +epoch 022: 1587 / 1689 loss=4.132, nll_loss=2.511, ppl=5.7, wps=458291, ups=1.06, wpb=433069, bsz=16595.8, num_updates=37000, lr=0.000328798, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=36301 +begin validation on "valid" subset +epoch 022 | valid on 'valid' subset | loss 4.238 | nll_loss 2.61 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 37000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.238 | nll_loss 2.61 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 37000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.238 | nll_loss 2.61 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 37000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.238 | nll_loss 2.61 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 37000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.238 | nll_loss 2.61 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 37000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.238 | nll_loss 2.61 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 37000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.238 | nll_loss 2.61 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 37000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.238 | nll_loss 2.61 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 37000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.238 | nll_loss 2.61 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 37000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.238 | nll_loss 2.61 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 37000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.238 | nll_loss 2.61 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 37000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.238 | nll_loss 2.61 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 37000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.238 | nll_loss 2.61 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 37000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.238 | nll_loss 2.61 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 37000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.238 | nll_loss 2.61 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 37000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.238 | nll_loss 2.61 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 37000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.238 | nll_loss 2.61 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 37000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.238 | nll_loss 2.61 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 37000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.238 | nll_loss 2.61 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 37000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.238 | nll_loss 2.61 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 37000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.238 | nll_loss 2.61 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 37000 | best_loss 4.233 +epoch 022 | valid on 'valid' subset | loss 4.238 | nll_loss 2.61 | ppl 6.1 | wps 0 | wpb 42662 | bsz 2032 | num_updates 37000 | best_loss 4.233 +epoch 022: 1687 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=317186, ups=0.73, wpb=432138, bsz=16494, num_updates=37100, lr=0.000328355, gnorm=0.214, clip=0, loss_scale=1, train_wall=120, gb_free=18.6, wall=36437 +epoch 022: 1687 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=317186, ups=0.73, wpb=432138, bsz=16494, num_updates=37100, lr=0.000328355, gnorm=0.214, clip=0, loss_scale=1, train_wall=120, gb_free=18.6, wall=36437 +epoch 022: 1687 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=317186, ups=0.73, wpb=432138, bsz=16494, num_updates=37100, lr=0.000328355, gnorm=0.214, clip=0, loss_scale=1, train_wall=120, gb_free=18.6, wall=36437 +epoch 022: 1687 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=317186, ups=0.73, wpb=432138, bsz=16494, num_updates=37100, lr=0.000328355, gnorm=0.214, clip=0, loss_scale=1, train_wall=120, gb_free=18.6, wall=36437 +epoch 022: 1687 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=317186, ups=0.73, wpb=432138, bsz=16494, num_updates=37100, lr=0.000328355, gnorm=0.214, clip=0, loss_scale=1, train_wall=120, gb_free=18.6, wall=36437 +epoch 022: 1687 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=317186, ups=0.73, wpb=432138, bsz=16494, num_updates=37100, lr=0.000328355, gnorm=0.214, clip=0, loss_scale=1, train_wall=120, gb_free=18.6, wall=36437 +epoch 022: 1687 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=317186, ups=0.73, wpb=432138, bsz=16494, num_updates=37100, lr=0.000328355, gnorm=0.214, clip=0, loss_scale=1, train_wall=120, gb_free=18.6, wall=36437 +epoch 022: 1687 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=317186, ups=0.73, wpb=432138, bsz=16494, num_updates=37100, lr=0.000328355, gnorm=0.214, clip=0, loss_scale=1, train_wall=120, gb_free=18.6, wall=36437 +epoch 022: 1687 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=317186, ups=0.73, wpb=432138, bsz=16494, num_updates=37100, lr=0.000328355, gnorm=0.214, clip=0, loss_scale=1, train_wall=120, gb_free=18.6, wall=36437 +epoch 022: 1687 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=317186, ups=0.73, wpb=432138, bsz=16494, num_updates=37100, lr=0.000328355, gnorm=0.214, clip=0, loss_scale=1, train_wall=120, gb_free=18.6, wall=36437 +epoch 022: 1687 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=317186, ups=0.73, wpb=432138, bsz=16494, num_updates=37100, lr=0.000328355, gnorm=0.214, clip=0, loss_scale=1, train_wall=120, gb_free=18.6, wall=36437 +epoch 022: 1687 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=317186, ups=0.73, wpb=432138, bsz=16494, num_updates=37100, lr=0.000328355, gnorm=0.214, clip=0, loss_scale=1, train_wall=120, gb_free=18.6, wall=36437 +epoch 022: 1687 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=317186, ups=0.73, wpb=432138, bsz=16494, num_updates=37100, lr=0.000328355, gnorm=0.214, clip=0, loss_scale=1, train_wall=120, gb_free=18.6, wall=36437 +epoch 022: 1687 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=317186, ups=0.73, wpb=432138, bsz=16494, num_updates=37100, lr=0.000328355, gnorm=0.214, clip=0, loss_scale=1, train_wall=120, gb_free=18.6, wall=36437 +epoch 022: 1687 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=317186, ups=0.73, wpb=432138, bsz=16494, num_updates=37100, lr=0.000328355, gnorm=0.214, clip=0, loss_scale=1, train_wall=120, gb_free=18.6, wall=36437 +epoch 022: 1687 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=317186, ups=0.73, wpb=432138, bsz=16494, num_updates=37100, lr=0.000328355, gnorm=0.214, clip=0, loss_scale=1, train_wall=120, gb_free=18.6, wall=36437 +epoch 022: 1687 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=317186, ups=0.73, wpb=432138, bsz=16494, num_updates=37100, lr=0.000328355, gnorm=0.214, clip=0, loss_scale=1, train_wall=120, gb_free=18.6, wall=36437 +epoch 022: 1687 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=317186, ups=0.73, wpb=432138, bsz=16494, num_updates=37100, lr=0.000328355, gnorm=0.214, clip=0, loss_scale=1, train_wall=120, gb_free=18.6, wall=36437 +epoch 022: 1687 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=317186, ups=0.73, wpb=432138, bsz=16494, num_updates=37100, lr=0.000328355, gnorm=0.214, clip=0, loss_scale=1, train_wall=120, gb_free=18.6, wall=36437 +epoch 022: 1687 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=317186, ups=0.73, wpb=432138, bsz=16494, num_updates=37100, lr=0.000328355, gnorm=0.214, clip=0, loss_scale=1, train_wall=120, gb_free=18.6, wall=36437 +epoch 022: 1687 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=317186, ups=0.73, wpb=432138, bsz=16494, num_updates=37100, lr=0.000328355, gnorm=0.214, clip=0, loss_scale=1, train_wall=120, gb_free=18.6, wall=36437 +epoch 022: 1687 / 1689 loss=4.116, nll_loss=2.493, ppl=5.63, wps=317186, ups=0.73, wpb=432138, bsz=16494, num_updates=37100, lr=0.000328355, gnorm=0.214, clip=0, loss_scale=1, train_wall=120, gb_free=18.6, wall=36437 +end of epoch 22 (average epoch stats below) +epoch 022 | loss 4.112 | nll_loss 2.488 | ppl 5.61 | wps 445662 | ups 1.03 | wpb 433551 | bsz 16506.2 | num_updates 37102 | lr 0.000328346 | gnorm 0.221 | clip 0 | loss_scale 1 | train_wall 1591 | gb_free 21 | wall 36438 +epoch 022 | loss 4.112 | nll_loss 2.488 | ppl 5.61 | wps 445662 | ups 1.03 | wpb 433551 | bsz 16506.2 | num_updates 37102 | lr 0.000328346 | gnorm 0.221 | clip 0 | loss_scale 1 | train_wall 1591 | gb_free 21 | wall 36438 +epoch 022 | loss 4.112 | nll_loss 2.488 | ppl 5.61 | wps 445662 | ups 1.03 | wpb 433551 | bsz 16506.2 | num_updates 37102 | lr 0.000328346 | gnorm 0.221 | clip 0 | loss_scale 1 | train_wall 1591 | gb_free 21 | wall 36438 +epoch 022 | loss 4.112 | nll_loss 2.488 | ppl 5.61 | wps 445662 | ups 1.03 | wpb 433551 | bsz 16506.2 | num_updates 37102 | lr 0.000328346 | gnorm 0.221 | clip 0 | loss_scale 1 | train_wall 1591 | gb_free 21 | wall 36438 +epoch 022 | loss 4.112 | nll_loss 2.488 | ppl 5.61 | wps 445662 | ups 1.03 | wpb 433551 | bsz 16506.2 | num_updates 37102 | lr 0.000328346 | gnorm 0.221 | clip 0 | loss_scale 1 | train_wall 1591 | gb_free 21 | wall 36438 +epoch 022 | loss 4.112 | nll_loss 2.488 | ppl 5.61 | wps 445662 | ups 1.03 | wpb 433551 | bsz 16506.2 | num_updates 37102 | lr 0.000328346 | gnorm 0.221 | clip 0 | loss_scale 1 | train_wall 1591 | gb_free 21 | wall 36438 +epoch 022 | loss 4.112 | nll_loss 2.488 | ppl 5.61 | wps 445662 | ups 1.03 | wpb 433551 | bsz 16506.2 | num_updates 37102 | lr 0.000328346 | gnorm 0.221 | clip 0 | loss_scale 1 | train_wall 1591 | gb_free 21 | wall 36438 +epoch 022 | loss 4.112 | nll_loss 2.488 | ppl 5.61 | wps 445662 | ups 1.03 | wpb 433551 | bsz 16506.2 | num_updates 37102 | lr 0.000328346 | gnorm 0.221 | clip 0 | loss_scale 1 | train_wall 1591 | gb_free 21 | wall 36438 +epoch 022 | loss 4.112 | nll_loss 2.488 | ppl 5.61 | wps 445662 | ups 1.03 | wpb 433551 | bsz 16506.2 | num_updates 37102 | lr 0.000328346 | gnorm 0.221 | clip 0 | loss_scale 1 | train_wall 1591 | gb_free 21 | wall 36438 +epoch 022 | loss 4.112 | nll_loss 2.488 | ppl 5.61 | wps 445662 | ups 1.03 | wpb 433551 | bsz 16506.2 | num_updates 37102 | lr 0.000328346 | gnorm 0.221 | clip 0 | loss_scale 1 | train_wall 1591 | gb_free 21 | wall 36438 +epoch 022 | loss 4.112 | nll_loss 2.488 | ppl 5.61 | wps 445662 | ups 1.03 | wpb 433551 | bsz 16506.2 | num_updates 37102 | lr 0.000328346 | gnorm 0.221 | clip 0 | loss_scale 1 | train_wall 1591 | gb_free 21 | wall 36438 +epoch 022 | loss 4.112 | nll_loss 2.488 | ppl 5.61 | wps 445662 | ups 1.03 | wpb 433551 | bsz 16506.2 | num_updates 37102 | lr 0.000328346 | gnorm 0.221 | clip 0 | loss_scale 1 | train_wall 1591 | gb_free 21 | wall 36438 +epoch 022 | loss 4.112 | nll_loss 2.488 | ppl 5.61 | wps 445662 | ups 1.03 | wpb 433551 | bsz 16506.2 | num_updates 37102 | lr 0.000328346 | gnorm 0.221 | clip 0 | loss_scale 1 | train_wall 1591 | gb_free 21 | wall 36438 +epoch 022 | loss 4.112 | nll_loss 2.488 | ppl 5.61 | wps 445662 | ups 1.03 | wpb 433551 | bsz 16506.2 | num_updates 37102 | lr 0.000328346 | gnorm 0.221 | clip 0 | loss_scale 1 | train_wall 1591 | gb_free 21 | wall 36438 +epoch 022 | loss 4.112 | nll_loss 2.488 | ppl 5.61 | wps 445662 | ups 1.03 | wpb 433551 | bsz 16506.2 | num_updates 37102 | lr 0.000328346 | gnorm 0.221 | clip 0 | loss_scale 1 | train_wall 1591 | gb_free 21 | wall 36438 +epoch 022 | loss 4.112 | nll_loss 2.488 | ppl 5.61 | wps 445662 | ups 1.03 | wpb 433551 | bsz 16506.2 | num_updates 37102 | lr 0.000328346 | gnorm 0.221 | clip 0 | loss_scale 1 | train_wall 1591 | gb_free 21 | wall 36438 +epoch 022 | loss 4.112 | nll_loss 2.488 | ppl 5.61 | wps 445662 | ups 1.03 | wpb 433551 | bsz 16506.2 | num_updates 37102 | lr 0.000328346 | gnorm 0.221 | clip 0 | loss_scale 1 | train_wall 1591 | gb_free 21 | wall 36438 +epoch 022 | loss 4.112 | nll_loss 2.488 | ppl 5.61 | wps 445662 | ups 1.03 | wpb 433551 | bsz 16506.2 | num_updates 37102 | lr 0.000328346 | gnorm 0.221 | clip 0 | loss_scale 1 | train_wall 1591 | gb_free 21 | wall 36438 +epoch 022 | loss 4.112 | nll_loss 2.488 | ppl 5.61 | wps 445662 | ups 1.03 | wpb 433551 | bsz 16506.2 | num_updates 37102 | lr 0.000328346 | gnorm 0.221 | clip 0 | loss_scale 1 | train_wall 1591 | gb_free 21 | wall 36438 +epoch 022 | loss 4.112 | nll_loss 2.488 | ppl 5.61 | wps 445662 | ups 1.03 | wpb 433551 | bsz 16506.2 | num_updates 37102 | lr 0.000328346 | gnorm 0.221 | clip 0 | loss_scale 1 | train_wall 1591 | gb_free 21 | wall 36438 +epoch 022 | loss 4.112 | nll_loss 2.488 | ppl 5.61 | wps 445662 | ups 1.03 | wpb 433551 | bsz 16506.2 | num_updates 37102 | lr 0.000328346 | gnorm 0.221 | clip 0 | loss_scale 1 | train_wall 1591 | gb_free 21 | wall 36438 +epoch 022 | loss 4.112 | nll_loss 2.488 | ppl 5.61 | wps 445662 | ups 1.03 | wpb 433551 | bsz 16506.2 | num_updates 37102 | lr 0.000328346 | gnorm 0.221 | clip 0 | loss_scale 1 | train_wall 1591 | gb_free 21 | wall 36438 +Start iterating over samples +epoch 023: 99 / 1689 loss=4.094, nll_loss=2.467, ppl=5.53, wps=450883, ups=1.05, wpb=430676, bsz=16401.6, num_updates=37200, lr=0.000327913, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=36533 +epoch 023: 99 / 1689 loss=4.094, nll_loss=2.467, ppl=5.53, wps=450883, ups=1.05, wpb=430676, bsz=16401.6, num_updates=37200, lr=0.000327913, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=36533 +epoch 023: 99 / 1689 loss=4.094, nll_loss=2.467, ppl=5.53, wps=450883, ups=1.05, wpb=430676, bsz=16401.6, num_updates=37200, lr=0.000327913, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=36533 +epoch 023: 99 / 1689 loss=4.094, nll_loss=2.467, ppl=5.53, wps=450883, ups=1.05, wpb=430676, bsz=16401.6, num_updates=37200, lr=0.000327913, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=36533 +epoch 023: 99 / 1689 loss=4.094, nll_loss=2.467, ppl=5.53, wps=450883, ups=1.05, wpb=430676, bsz=16401.6, num_updates=37200, lr=0.000327913, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=36533 +epoch 023: 99 / 1689 loss=4.094, nll_loss=2.467, ppl=5.53, wps=450883, ups=1.05, wpb=430676, bsz=16401.6, num_updates=37200, lr=0.000327913, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=36533 +epoch 023: 99 / 1689 loss=4.094, nll_loss=2.467, ppl=5.53, wps=450883, ups=1.05, wpb=430676, bsz=16401.6, num_updates=37200, lr=0.000327913, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=36533 +epoch 023: 99 / 1689 loss=4.094, nll_loss=2.467, ppl=5.53, wps=450883, ups=1.05, wpb=430676, bsz=16401.6, num_updates=37200, lr=0.000327913, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=36533 +epoch 023: 99 / 1689 loss=4.094, nll_loss=2.467, ppl=5.53, wps=450883, ups=1.05, wpb=430676, bsz=16401.6, num_updates=37200, lr=0.000327913, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=36533 +epoch 023: 99 / 1689 loss=4.094, nll_loss=2.467, ppl=5.53, wps=450883, ups=1.05, wpb=430676, bsz=16401.6, num_updates=37200, lr=0.000327913, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=36533 +epoch 023: 99 / 1689 loss=4.094, nll_loss=2.467, ppl=5.53, wps=450883, ups=1.05, wpb=430676, bsz=16401.6, num_updates=37200, lr=0.000327913, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=36533 +epoch 023: 99 / 1689 loss=4.094, nll_loss=2.467, ppl=5.53, wps=450883, ups=1.05, wpb=430676, bsz=16401.6, num_updates=37200, lr=0.000327913, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=36533 +epoch 023: 99 / 1689 loss=4.094, nll_loss=2.467, ppl=5.53, wps=450883, ups=1.05, wpb=430676, bsz=16401.6, num_updates=37200, lr=0.000327913, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=36533 +epoch 023: 99 / 1689 loss=4.094, nll_loss=2.467, ppl=5.53, wps=450883, ups=1.05, wpb=430676, bsz=16401.6, num_updates=37200, lr=0.000327913, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=36533 +epoch 023: 99 / 1689 loss=4.094, nll_loss=2.467, ppl=5.53, wps=450883, ups=1.05, wpb=430676, bsz=16401.6, num_updates=37200, lr=0.000327913, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=36533 +epoch 023: 99 / 1689 loss=4.094, nll_loss=2.467, ppl=5.53, wps=450883, ups=1.05, wpb=430676, bsz=16401.6, num_updates=37200, lr=0.000327913, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=36533 +epoch 023: 99 / 1689 loss=4.094, nll_loss=2.467, ppl=5.53, wps=450883, ups=1.05, wpb=430676, bsz=16401.6, num_updates=37200, lr=0.000327913, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=36533 +epoch 023: 99 / 1689 loss=4.094, nll_loss=2.467, ppl=5.53, wps=450883, ups=1.05, wpb=430676, bsz=16401.6, num_updates=37200, lr=0.000327913, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=36533 +epoch 023: 99 / 1689 loss=4.094, nll_loss=2.467, ppl=5.53, wps=450883, ups=1.05, wpb=430676, bsz=16401.6, num_updates=37200, lr=0.000327913, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=36533 +epoch 023: 99 / 1689 loss=4.094, nll_loss=2.467, ppl=5.53, wps=450883, ups=1.05, wpb=430676, bsz=16401.6, num_updates=37200, lr=0.000327913, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=36533 +epoch 023: 99 / 1689 loss=4.094, nll_loss=2.467, ppl=5.53, wps=450883, ups=1.05, wpb=430676, bsz=16401.6, num_updates=37200, lr=0.000327913, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=36533 +epoch 023: 99 / 1689 loss=4.094, nll_loss=2.467, ppl=5.53, wps=450883, ups=1.05, wpb=430676, bsz=16401.6, num_updates=37200, lr=0.000327913, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=36533 +epoch 023: 99 / 1689 loss=4.094, nll_loss=2.467, ppl=5.53, wps=450883, ups=1.05, wpb=430676, bsz=16401.6, num_updates=37200, lr=0.000327913, gnorm=0.233, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=36533 +epoch 023: 199 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=464437, ups=1.07, wpb=434008, bsz=17069.5, num_updates=37300, lr=0.000327473, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=36626 +epoch 023: 199 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=464437, ups=1.07, wpb=434008, bsz=17069.5, num_updates=37300, lr=0.000327473, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=36626 +epoch 023: 199 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=464437, ups=1.07, wpb=434008, bsz=17069.5, num_updates=37300, lr=0.000327473, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=36626 +epoch 023: 199 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=464437, ups=1.07, wpb=434008, bsz=17069.5, num_updates=37300, lr=0.000327473, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=36626 +epoch 023: 199 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=464437, ups=1.07, wpb=434008, bsz=17069.5, num_updates=37300, lr=0.000327473, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=36626 +epoch 023: 199 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=464437, ups=1.07, wpb=434008, bsz=17069.5, num_updates=37300, lr=0.000327473, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=36626 +epoch 023: 199 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=464437, ups=1.07, wpb=434008, bsz=17069.5, num_updates=37300, lr=0.000327473, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=36626 +epoch 023: 199 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=464437, ups=1.07, wpb=434008, bsz=17069.5, num_updates=37300, lr=0.000327473, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=36626 +epoch 023: 199 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=464437, ups=1.07, wpb=434008, bsz=17069.5, num_updates=37300, lr=0.000327473, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=36626 +epoch 023: 199 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=464437, ups=1.07, wpb=434008, bsz=17069.5, num_updates=37300, lr=0.000327473, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=36626 +epoch 023: 199 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=464437, ups=1.07, wpb=434008, bsz=17069.5, num_updates=37300, lr=0.000327473, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=36626 +epoch 023: 199 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=464437, ups=1.07, wpb=434008, bsz=17069.5, num_updates=37300, lr=0.000327473, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=36626 +epoch 023: 199 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=464437, ups=1.07, wpb=434008, bsz=17069.5, num_updates=37300, lr=0.000327473, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=36626 +epoch 023: 199 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=464437, ups=1.07, wpb=434008, bsz=17069.5, num_updates=37300, lr=0.000327473, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=36626 +epoch 023: 199 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=464437, ups=1.07, wpb=434008, bsz=17069.5, num_updates=37300, lr=0.000327473, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=36626 +epoch 023: 199 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=464437, ups=1.07, wpb=434008, bsz=17069.5, num_updates=37300, lr=0.000327473, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=36626 +epoch 023: 199 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=464437, ups=1.07, wpb=434008, bsz=17069.5, num_updates=37300, lr=0.000327473, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=36626 +epoch 023: 199 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=464437, ups=1.07, wpb=434008, bsz=17069.5, num_updates=37300, lr=0.000327473, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=36626 +epoch 023: 199 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=464437, ups=1.07, wpb=434008, bsz=17069.5, num_updates=37300, lr=0.000327473, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=36626 +epoch 023: 199 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=464437, ups=1.07, wpb=434008, bsz=17069.5, num_updates=37300, lr=0.000327473, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=36626 +epoch 023: 199 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=464437, ups=1.07, wpb=434008, bsz=17069.5, num_updates=37300, lr=0.000327473, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=36626 +epoch 023: 199 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=464437, ups=1.07, wpb=434008, bsz=17069.5, num_updates=37300, lr=0.000327473, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=36626 +epoch 023: 199 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=464437, ups=1.07, wpb=434008, bsz=17069.5, num_updates=37300, lr=0.000327473, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=36626 +epoch 023: 299 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=462498, ups=1.06, wpb=435224, bsz=16850.2, num_updates=37400, lr=0.000327035, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=36720 +epoch 023: 299 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=462498, ups=1.06, wpb=435224, bsz=16850.2, num_updates=37400, lr=0.000327035, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=36720 +epoch 023: 299 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=462498, ups=1.06, wpb=435224, bsz=16850.2, num_updates=37400, lr=0.000327035, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=36720 +epoch 023: 299 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=462498, ups=1.06, wpb=435224, bsz=16850.2, num_updates=37400, lr=0.000327035, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=36720 +epoch 023: 299 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=462498, ups=1.06, wpb=435224, bsz=16850.2, num_updates=37400, lr=0.000327035, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=36720 +epoch 023: 299 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=462498, ups=1.06, wpb=435224, bsz=16850.2, num_updates=37400, lr=0.000327035, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=36720 +epoch 023: 299 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=462498, ups=1.06, wpb=435224, bsz=16850.2, num_updates=37400, lr=0.000327035, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=36720 +epoch 023: 299 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=462498, ups=1.06, wpb=435224, bsz=16850.2, num_updates=37400, lr=0.000327035, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=36720 +epoch 023: 299 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=462498, ups=1.06, wpb=435224, bsz=16850.2, num_updates=37400, lr=0.000327035, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=36720 +epoch 023: 299 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=462498, ups=1.06, wpb=435224, bsz=16850.2, num_updates=37400, lr=0.000327035, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=36720 +epoch 023: 299 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=462498, ups=1.06, wpb=435224, bsz=16850.2, num_updates=37400, lr=0.000327035, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=36720 +epoch 023: 299 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=462498, ups=1.06, wpb=435224, bsz=16850.2, num_updates=37400, lr=0.000327035, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=36720 +epoch 023: 299 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=462498, ups=1.06, wpb=435224, bsz=16850.2, num_updates=37400, lr=0.000327035, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=36720 +epoch 023: 299 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=462498, ups=1.06, wpb=435224, bsz=16850.2, num_updates=37400, lr=0.000327035, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=36720 +epoch 023: 299 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=462498, ups=1.06, wpb=435224, bsz=16850.2, num_updates=37400, lr=0.000327035, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=36720 +epoch 023: 299 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=462498, ups=1.06, wpb=435224, bsz=16850.2, num_updates=37400, lr=0.000327035, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=36720 +epoch 023: 299 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=462498, ups=1.06, wpb=435224, bsz=16850.2, num_updates=37400, lr=0.000327035, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=36720 +epoch 023: 299 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=462498, ups=1.06, wpb=435224, bsz=16850.2, num_updates=37400, lr=0.000327035, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=36720 +epoch 023: 299 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=462498, ups=1.06, wpb=435224, bsz=16850.2, num_updates=37400, lr=0.000327035, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=36720 +epoch 023: 299 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=462498, ups=1.06, wpb=435224, bsz=16850.2, num_updates=37400, lr=0.000327035, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=36720 +epoch 023: 299 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=462498, ups=1.06, wpb=435224, bsz=16850.2, num_updates=37400, lr=0.000327035, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=36720 +epoch 023: 299 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=462498, ups=1.06, wpb=435224, bsz=16850.2, num_updates=37400, lr=0.000327035, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=36720 +epoch 023: 299 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=462498, ups=1.06, wpb=435224, bsz=16850.2, num_updates=37400, lr=0.000327035, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=36720 +epoch 023: 399 / 1689 loss=4.104, nll_loss=2.478, ppl=5.57, wps=465940, ups=1.07, wpb=433639, bsz=16597, num_updates=37500, lr=0.000326599, gnorm=0.223, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=36813 +epoch 023: 399 / 1689 loss=4.104, nll_loss=2.478, ppl=5.57, wps=465940, ups=1.07, wpb=433639, bsz=16597, num_updates=37500, lr=0.000326599, gnorm=0.223, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=36813 +epoch 023: 399 / 1689 loss=4.104, nll_loss=2.478, ppl=5.57, wps=465940, ups=1.07, wpb=433639, bsz=16597, num_updates=37500, lr=0.000326599, gnorm=0.223, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=36813 +epoch 023: 399 / 1689 loss=4.104, nll_loss=2.478, ppl=5.57, wps=465940, ups=1.07, wpb=433639, bsz=16597, num_updates=37500, lr=0.000326599, gnorm=0.223, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=36813 +epoch 023: 399 / 1689 loss=4.104, nll_loss=2.478, ppl=5.57, wps=465940, ups=1.07, wpb=433639, bsz=16597, num_updates=37500, lr=0.000326599, gnorm=0.223, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=36813 +epoch 023: 399 / 1689 loss=4.104, nll_loss=2.478, ppl=5.57, wps=465940, ups=1.07, wpb=433639, bsz=16597, num_updates=37500, lr=0.000326599, gnorm=0.223, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=36813 +epoch 023: 399 / 1689 loss=4.104, nll_loss=2.478, ppl=5.57, wps=465940, ups=1.07, wpb=433639, bsz=16597, num_updates=37500, lr=0.000326599, gnorm=0.223, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=36813 +epoch 023: 399 / 1689 loss=4.104, nll_loss=2.478, ppl=5.57, wps=465940, ups=1.07, wpb=433639, bsz=16597, num_updates=37500, lr=0.000326599, gnorm=0.223, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=36813 +epoch 023: 399 / 1689 loss=4.104, nll_loss=2.478, ppl=5.57, wps=465940, ups=1.07, wpb=433639, bsz=16597, num_updates=37500, lr=0.000326599, gnorm=0.223, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=36813 +epoch 023: 399 / 1689 loss=4.104, nll_loss=2.478, ppl=5.57, wps=465940, ups=1.07, wpb=433639, bsz=16597, num_updates=37500, lr=0.000326599, gnorm=0.223, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=36813 +epoch 023: 399 / 1689 loss=4.104, nll_loss=2.478, ppl=5.57, wps=465940, ups=1.07, wpb=433639, bsz=16597, num_updates=37500, lr=0.000326599, gnorm=0.223, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=36813 +epoch 023: 399 / 1689 loss=4.104, nll_loss=2.478, ppl=5.57, wps=465940, ups=1.07, wpb=433639, bsz=16597, num_updates=37500, lr=0.000326599, gnorm=0.223, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=36813 +epoch 023: 399 / 1689 loss=4.104, nll_loss=2.478, ppl=5.57, wps=465940, ups=1.07, wpb=433639, bsz=16597, num_updates=37500, lr=0.000326599, gnorm=0.223, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=36813 +epoch 023: 399 / 1689 loss=4.104, nll_loss=2.478, ppl=5.57, wps=465940, ups=1.07, wpb=433639, bsz=16597, num_updates=37500, lr=0.000326599, gnorm=0.223, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=36813 +epoch 023: 399 / 1689 loss=4.104, nll_loss=2.478, ppl=5.57, wps=465940, ups=1.07, wpb=433639, bsz=16597, num_updates=37500, lr=0.000326599, gnorm=0.223, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=36813 +epoch 023: 399 / 1689 loss=4.104, nll_loss=2.478, ppl=5.57, wps=465940, ups=1.07, wpb=433639, bsz=16597, num_updates=37500, lr=0.000326599, gnorm=0.223, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=36813 +epoch 023: 399 / 1689 loss=4.104, nll_loss=2.478, ppl=5.57, wps=465940, ups=1.07, wpb=433639, bsz=16597, num_updates=37500, lr=0.000326599, gnorm=0.223, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=36813 +epoch 023: 399 / 1689 loss=4.104, nll_loss=2.478, ppl=5.57, wps=465940, ups=1.07, wpb=433639, bsz=16597, num_updates=37500, lr=0.000326599, gnorm=0.223, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=36813 +epoch 023: 399 / 1689 loss=4.104, nll_loss=2.478, ppl=5.57, wps=465940, ups=1.07, wpb=433639, bsz=16597, num_updates=37500, lr=0.000326599, gnorm=0.223, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=36813 +epoch 023: 399 / 1689 loss=4.104, nll_loss=2.478, ppl=5.57, wps=465940, ups=1.07, wpb=433639, bsz=16597, num_updates=37500, lr=0.000326599, gnorm=0.223, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=36813 +epoch 023: 399 / 1689 loss=4.104, nll_loss=2.478, ppl=5.57, wps=465940, ups=1.07, wpb=433639, bsz=16597, num_updates=37500, lr=0.000326599, gnorm=0.223, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=36813 +epoch 023: 399 / 1689 loss=4.104, nll_loss=2.478, ppl=5.57, wps=465940, ups=1.07, wpb=433639, bsz=16597, num_updates=37500, lr=0.000326599, gnorm=0.223, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=36813 +epoch 023: 399 / 1689 loss=4.104, nll_loss=2.478, ppl=5.57, wps=465940, ups=1.07, wpb=433639, bsz=16597, num_updates=37500, lr=0.000326599, gnorm=0.223, clip=0, loss_scale=1, train_wall=91, gb_free=18.7, wall=36813 +epoch 023: 499 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=458914, ups=1.06, wpb=433888, bsz=16861.4, num_updates=37600, lr=0.000326164, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36908 +epoch 023: 499 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=458914, ups=1.06, wpb=433888, bsz=16861.4, num_updates=37600, lr=0.000326164, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36908 +epoch 023: 499 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=458914, ups=1.06, wpb=433888, bsz=16861.4, num_updates=37600, lr=0.000326164, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36908 +epoch 023: 499 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=458914, ups=1.06, wpb=433888, bsz=16861.4, num_updates=37600, lr=0.000326164, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36908 +epoch 023: 499 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=458914, ups=1.06, wpb=433888, bsz=16861.4, num_updates=37600, lr=0.000326164, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36908 +epoch 023: 499 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=458914, ups=1.06, wpb=433888, bsz=16861.4, num_updates=37600, lr=0.000326164, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36908 +epoch 023: 499 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=458914, ups=1.06, wpb=433888, bsz=16861.4, num_updates=37600, lr=0.000326164, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36908 +epoch 023: 499 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=458914, ups=1.06, wpb=433888, bsz=16861.4, num_updates=37600, lr=0.000326164, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36908 +epoch 023: 499 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=458914, ups=1.06, wpb=433888, bsz=16861.4, num_updates=37600, lr=0.000326164, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36908 +epoch 023: 499 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=458914, ups=1.06, wpb=433888, bsz=16861.4, num_updates=37600, lr=0.000326164, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36908 +epoch 023: 499 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=458914, ups=1.06, wpb=433888, bsz=16861.4, num_updates=37600, lr=0.000326164, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36908 +epoch 023: 499 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=458914, ups=1.06, wpb=433888, bsz=16861.4, num_updates=37600, lr=0.000326164, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36908 +epoch 023: 499 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=458914, ups=1.06, wpb=433888, bsz=16861.4, num_updates=37600, lr=0.000326164, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36908 +epoch 023: 499 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=458914, ups=1.06, wpb=433888, bsz=16861.4, num_updates=37600, lr=0.000326164, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36908 +epoch 023: 499 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=458914, ups=1.06, wpb=433888, bsz=16861.4, num_updates=37600, lr=0.000326164, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36908 +epoch 023: 499 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=458914, ups=1.06, wpb=433888, bsz=16861.4, num_updates=37600, lr=0.000326164, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36908 +epoch 023: 499 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=458914, ups=1.06, wpb=433888, bsz=16861.4, num_updates=37600, lr=0.000326164, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36908 +epoch 023: 499 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=458914, ups=1.06, wpb=433888, bsz=16861.4, num_updates=37600, lr=0.000326164, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36908 +epoch 023: 499 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=458914, ups=1.06, wpb=433888, bsz=16861.4, num_updates=37600, lr=0.000326164, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36908 +epoch 023: 499 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=458914, ups=1.06, wpb=433888, bsz=16861.4, num_updates=37600, lr=0.000326164, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36908 +epoch 023: 499 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=458914, ups=1.06, wpb=433888, bsz=16861.4, num_updates=37600, lr=0.000326164, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36908 +epoch 023: 499 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=458914, ups=1.06, wpb=433888, bsz=16861.4, num_updates=37600, lr=0.000326164, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36908 +epoch 023: 499 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=458914, ups=1.06, wpb=433888, bsz=16861.4, num_updates=37600, lr=0.000326164, gnorm=0.229, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=36908 +epoch 023: 600 / 1689 loss=4.097, nll_loss=2.47, ppl=5.54, wps=459797, ups=1.06, wpb=434035, bsz=16186.2, num_updates=37700, lr=0.000325731, gnorm=0.213, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37002 +epoch 023: 600 / 1689 loss=4.097, nll_loss=2.47, ppl=5.54, wps=459797, ups=1.06, wpb=434035, bsz=16186.2, num_updates=37700, lr=0.000325731, gnorm=0.213, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37002 +epoch 023: 600 / 1689 loss=4.097, nll_loss=2.47, ppl=5.54, wps=459797, ups=1.06, wpb=434035, bsz=16186.2, num_updates=37700, lr=0.000325731, gnorm=0.213, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37002 +epoch 023: 600 / 1689 loss=4.097, nll_loss=2.47, ppl=5.54, wps=459797, ups=1.06, wpb=434035, bsz=16186.2, num_updates=37700, lr=0.000325731, gnorm=0.213, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37002 +epoch 023: 600 / 1689 loss=4.097, nll_loss=2.47, ppl=5.54, wps=459797, ups=1.06, wpb=434035, bsz=16186.2, num_updates=37700, lr=0.000325731, gnorm=0.213, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37002 +epoch 023: 600 / 1689 loss=4.097, nll_loss=2.47, ppl=5.54, wps=459797, ups=1.06, wpb=434035, bsz=16186.2, num_updates=37700, lr=0.000325731, gnorm=0.213, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37002 +epoch 023: 600 / 1689 loss=4.097, nll_loss=2.47, ppl=5.54, wps=459797, ups=1.06, wpb=434035, bsz=16186.2, num_updates=37700, lr=0.000325731, gnorm=0.213, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37002 +epoch 023: 600 / 1689 loss=4.097, nll_loss=2.47, ppl=5.54, wps=459797, ups=1.06, wpb=434035, bsz=16186.2, num_updates=37700, lr=0.000325731, gnorm=0.213, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37002 +epoch 023: 600 / 1689 loss=4.097, nll_loss=2.47, ppl=5.54, wps=459797, ups=1.06, wpb=434035, bsz=16186.2, num_updates=37700, lr=0.000325731, gnorm=0.213, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37002 +epoch 023: 600 / 1689 loss=4.097, nll_loss=2.47, ppl=5.54, wps=459797, ups=1.06, wpb=434035, bsz=16186.2, num_updates=37700, lr=0.000325731, gnorm=0.213, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37002 +epoch 023: 600 / 1689 loss=4.097, nll_loss=2.47, ppl=5.54, wps=459797, ups=1.06, wpb=434035, bsz=16186.2, num_updates=37700, lr=0.000325731, gnorm=0.213, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37002 +epoch 023: 600 / 1689 loss=4.097, nll_loss=2.47, ppl=5.54, wps=459797, ups=1.06, wpb=434035, bsz=16186.2, num_updates=37700, lr=0.000325731, gnorm=0.213, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37002 +epoch 023: 600 / 1689 loss=4.097, nll_loss=2.47, ppl=5.54, wps=459797, ups=1.06, wpb=434035, bsz=16186.2, num_updates=37700, lr=0.000325731, gnorm=0.213, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37002 +epoch 023: 600 / 1689 loss=4.097, nll_loss=2.47, ppl=5.54, wps=459797, ups=1.06, wpb=434035, bsz=16186.2, num_updates=37700, lr=0.000325731, gnorm=0.213, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37002 +epoch 023: 600 / 1689 loss=4.097, nll_loss=2.47, ppl=5.54, wps=459797, ups=1.06, wpb=434035, bsz=16186.2, num_updates=37700, lr=0.000325731, gnorm=0.213, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37002 +epoch 023: 600 / 1689 loss=4.097, nll_loss=2.47, ppl=5.54, wps=459797, ups=1.06, wpb=434035, bsz=16186.2, num_updates=37700, lr=0.000325731, gnorm=0.213, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37002 +epoch 023: 600 / 1689 loss=4.097, nll_loss=2.47, ppl=5.54, wps=459797, ups=1.06, wpb=434035, bsz=16186.2, num_updates=37700, lr=0.000325731, gnorm=0.213, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37002 +epoch 023: 600 / 1689 loss=4.097, nll_loss=2.47, ppl=5.54, wps=459797, ups=1.06, wpb=434035, bsz=16186.2, num_updates=37700, lr=0.000325731, gnorm=0.213, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37002 +epoch 023: 600 / 1689 loss=4.097, nll_loss=2.47, ppl=5.54, wps=459797, ups=1.06, wpb=434035, bsz=16186.2, num_updates=37700, lr=0.000325731, gnorm=0.213, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37002 +epoch 023: 600 / 1689 loss=4.097, nll_loss=2.47, ppl=5.54, wps=459797, ups=1.06, wpb=434035, bsz=16186.2, num_updates=37700, lr=0.000325731, gnorm=0.213, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37002 +epoch 023: 600 / 1689 loss=4.097, nll_loss=2.47, ppl=5.54, wps=459797, ups=1.06, wpb=434035, bsz=16186.2, num_updates=37700, lr=0.000325731, gnorm=0.213, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37002 +epoch 023: 600 / 1689 loss=4.097, nll_loss=2.47, ppl=5.54, wps=459797, ups=1.06, wpb=434035, bsz=16186.2, num_updates=37700, lr=0.000325731, gnorm=0.213, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37002 +epoch 023: 600 / 1689 loss=4.097, nll_loss=2.47, ppl=5.54, wps=459797, ups=1.06, wpb=434035, bsz=16186.2, num_updates=37700, lr=0.000325731, gnorm=0.213, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37002 +epoch 023: 700 / 1689 loss=4.102, nll_loss=2.476, ppl=5.57, wps=460938, ups=1.07, wpb=432320, bsz=16526.9, num_updates=37800, lr=0.0003253, gnorm=0.238, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=37096 +epoch 023: 700 / 1689 loss=4.102, nll_loss=2.476, ppl=5.57, wps=460938, ups=1.07, wpb=432320, bsz=16526.9, num_updates=37800, lr=0.0003253, gnorm=0.238, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=37096 +epoch 023: 700 / 1689 loss=4.102, nll_loss=2.476, ppl=5.57, wps=460938, ups=1.07, wpb=432320, bsz=16526.9, num_updates=37800, lr=0.0003253, gnorm=0.238, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=37096 +epoch 023: 700 / 1689 loss=4.102, nll_loss=2.476, ppl=5.57, wps=460938, ups=1.07, wpb=432320, bsz=16526.9, num_updates=37800, lr=0.0003253, gnorm=0.238, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=37096 +epoch 023: 700 / 1689 loss=4.102, nll_loss=2.476, ppl=5.57, wps=460938, ups=1.07, wpb=432320, bsz=16526.9, num_updates=37800, lr=0.0003253, gnorm=0.238, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=37096 +epoch 023: 700 / 1689 loss=4.102, nll_loss=2.476, ppl=5.57, wps=460938, ups=1.07, wpb=432320, bsz=16526.9, num_updates=37800, lr=0.0003253, gnorm=0.238, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=37096 +epoch 023: 700 / 1689 loss=4.102, nll_loss=2.476, ppl=5.57, wps=460938, ups=1.07, wpb=432320, bsz=16526.9, num_updates=37800, lr=0.0003253, gnorm=0.238, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=37096 +epoch 023: 700 / 1689 loss=4.102, nll_loss=2.476, ppl=5.57, wps=460938, ups=1.07, wpb=432320, bsz=16526.9, num_updates=37800, lr=0.0003253, gnorm=0.238, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=37096 +epoch 023: 700 / 1689 loss=4.102, nll_loss=2.476, ppl=5.57, wps=460938, ups=1.07, wpb=432320, bsz=16526.9, num_updates=37800, lr=0.0003253, gnorm=0.238, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=37096 +epoch 023: 700 / 1689 loss=4.102, nll_loss=2.476, ppl=5.57, wps=460938, ups=1.07, wpb=432320, bsz=16526.9, num_updates=37800, lr=0.0003253, gnorm=0.238, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=37096 +epoch 023: 700 / 1689 loss=4.102, nll_loss=2.476, ppl=5.57, wps=460938, ups=1.07, wpb=432320, bsz=16526.9, num_updates=37800, lr=0.0003253, gnorm=0.238, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=37096 +epoch 023: 700 / 1689 loss=4.102, nll_loss=2.476, ppl=5.57, wps=460938, ups=1.07, wpb=432320, bsz=16526.9, num_updates=37800, lr=0.0003253, gnorm=0.238, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=37096 +epoch 023: 700 / 1689 loss=4.102, nll_loss=2.476, ppl=5.57, wps=460938, ups=1.07, wpb=432320, bsz=16526.9, num_updates=37800, lr=0.0003253, gnorm=0.238, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=37096 +epoch 023: 700 / 1689 loss=4.102, nll_loss=2.476, ppl=5.57, wps=460938, ups=1.07, wpb=432320, bsz=16526.9, num_updates=37800, lr=0.0003253, gnorm=0.238, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=37096 +epoch 023: 700 / 1689 loss=4.102, nll_loss=2.476, ppl=5.57, wps=460938, ups=1.07, wpb=432320, bsz=16526.9, num_updates=37800, lr=0.0003253, gnorm=0.238, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=37096 +epoch 023: 700 / 1689 loss=4.102, nll_loss=2.476, ppl=5.57, wps=460938, ups=1.07, wpb=432320, bsz=16526.9, num_updates=37800, lr=0.0003253, gnorm=0.238, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=37096 +epoch 023: 700 / 1689 loss=4.102, nll_loss=2.476, ppl=5.57, wps=460938, ups=1.07, wpb=432320, bsz=16526.9, num_updates=37800, lr=0.0003253, gnorm=0.238, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=37096 +epoch 023: 700 / 1689 loss=4.102, nll_loss=2.476, ppl=5.57, wps=460938, ups=1.07, wpb=432320, bsz=16526.9, num_updates=37800, lr=0.0003253, gnorm=0.238, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=37096 +epoch 023: 700 / 1689 loss=4.102, nll_loss=2.476, ppl=5.57, wps=460938, ups=1.07, wpb=432320, bsz=16526.9, num_updates=37800, lr=0.0003253, gnorm=0.238, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=37096 +epoch 023: 700 / 1689 loss=4.102, nll_loss=2.476, ppl=5.57, wps=460938, ups=1.07, wpb=432320, bsz=16526.9, num_updates=37800, lr=0.0003253, gnorm=0.238, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=37096 +epoch 023: 700 / 1689 loss=4.102, nll_loss=2.476, ppl=5.57, wps=460938, ups=1.07, wpb=432320, bsz=16526.9, num_updates=37800, lr=0.0003253, gnorm=0.238, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=37096 +epoch 023: 700 / 1689 loss=4.102, nll_loss=2.476, ppl=5.57, wps=460938, ups=1.07, wpb=432320, bsz=16526.9, num_updates=37800, lr=0.0003253, gnorm=0.238, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=37096 +epoch 023: 700 / 1689 loss=4.102, nll_loss=2.476, ppl=5.57, wps=460938, ups=1.07, wpb=432320, bsz=16526.9, num_updates=37800, lr=0.0003253, gnorm=0.238, clip=0, loss_scale=1, train_wall=92, gb_free=20.2, wall=37096 +epoch 023: 800 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=460818, ups=1.07, wpb=432188, bsz=16580.9, num_updates=37900, lr=0.000324871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=37190 +epoch 023: 800 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=460818, ups=1.07, wpb=432188, bsz=16580.9, num_updates=37900, lr=0.000324871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=37190 +epoch 023: 800 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=460818, ups=1.07, wpb=432188, bsz=16580.9, num_updates=37900, lr=0.000324871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=37190 +epoch 023: 800 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=460818, ups=1.07, wpb=432188, bsz=16580.9, num_updates=37900, lr=0.000324871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=37190 +epoch 023: 800 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=460818, ups=1.07, wpb=432188, bsz=16580.9, num_updates=37900, lr=0.000324871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=37190 +epoch 023: 800 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=460818, ups=1.07, wpb=432188, bsz=16580.9, num_updates=37900, lr=0.000324871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=37190 +epoch 023: 800 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=460818, ups=1.07, wpb=432188, bsz=16580.9, num_updates=37900, lr=0.000324871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=37190 +epoch 023: 800 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=460818, ups=1.07, wpb=432188, bsz=16580.9, num_updates=37900, lr=0.000324871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=37190 +epoch 023: 800 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=460818, ups=1.07, wpb=432188, bsz=16580.9, num_updates=37900, lr=0.000324871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=37190 +epoch 023: 800 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=460818, ups=1.07, wpb=432188, bsz=16580.9, num_updates=37900, lr=0.000324871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=37190 +epoch 023: 800 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=460818, ups=1.07, wpb=432188, bsz=16580.9, num_updates=37900, lr=0.000324871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=37190 +epoch 023: 800 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=460818, ups=1.07, wpb=432188, bsz=16580.9, num_updates=37900, lr=0.000324871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=37190 +epoch 023: 800 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=460818, ups=1.07, wpb=432188, bsz=16580.9, num_updates=37900, lr=0.000324871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=37190 +epoch 023: 800 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=460818, ups=1.07, wpb=432188, bsz=16580.9, num_updates=37900, lr=0.000324871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=37190 +epoch 023: 800 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=460818, ups=1.07, wpb=432188, bsz=16580.9, num_updates=37900, lr=0.000324871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=37190 +epoch 023: 800 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=460818, ups=1.07, wpb=432188, bsz=16580.9, num_updates=37900, lr=0.000324871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=37190 +epoch 023: 800 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=460818, ups=1.07, wpb=432188, bsz=16580.9, num_updates=37900, lr=0.000324871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=37190 +epoch 023: 800 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=460818, ups=1.07, wpb=432188, bsz=16580.9, num_updates=37900, lr=0.000324871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=37190 +epoch 023: 800 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=460818, ups=1.07, wpb=432188, bsz=16580.9, num_updates=37900, lr=0.000324871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=37190 +epoch 023: 800 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=460818, ups=1.07, wpb=432188, bsz=16580.9, num_updates=37900, lr=0.000324871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=37190 +epoch 023: 800 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=460818, ups=1.07, wpb=432188, bsz=16580.9, num_updates=37900, lr=0.000324871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=37190 +epoch 023: 800 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=460818, ups=1.07, wpb=432188, bsz=16580.9, num_updates=37900, lr=0.000324871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=37190 +epoch 023: 800 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=460818, ups=1.07, wpb=432188, bsz=16580.9, num_updates=37900, lr=0.000324871, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=37190 +epoch 023: 900 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=463302, ups=1.06, wpb=435101, bsz=16761.3, num_updates=38000, lr=0.000324443, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=37284 +epoch 023: 900 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=463302, ups=1.06, wpb=435101, bsz=16761.3, num_updates=38000, lr=0.000324443, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=37284 +epoch 023: 900 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=463302, ups=1.06, wpb=435101, bsz=16761.3, num_updates=38000, lr=0.000324443, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=37284 +epoch 023: 900 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=463302, ups=1.06, wpb=435101, bsz=16761.3, num_updates=38000, lr=0.000324443, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=37284 +epoch 023: 900 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=463302, ups=1.06, wpb=435101, bsz=16761.3, num_updates=38000, lr=0.000324443, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=37284 +epoch 023: 900 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=463302, ups=1.06, wpb=435101, bsz=16761.3, num_updates=38000, lr=0.000324443, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=37284 +epoch 023: 900 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=463302, ups=1.06, wpb=435101, bsz=16761.3, num_updates=38000, lr=0.000324443, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=37284 +epoch 023: 900 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=463302, ups=1.06, wpb=435101, bsz=16761.3, num_updates=38000, lr=0.000324443, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=37284 +epoch 023: 900 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=463302, ups=1.06, wpb=435101, bsz=16761.3, num_updates=38000, lr=0.000324443, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=37284 +epoch 023: 900 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=463302, ups=1.06, wpb=435101, bsz=16761.3, num_updates=38000, lr=0.000324443, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=37284 +epoch 023: 900 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=463302, ups=1.06, wpb=435101, bsz=16761.3, num_updates=38000, lr=0.000324443, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=37284 +epoch 023: 900 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=463302, ups=1.06, wpb=435101, bsz=16761.3, num_updates=38000, lr=0.000324443, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=37284 +epoch 023: 900 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=463302, ups=1.06, wpb=435101, bsz=16761.3, num_updates=38000, lr=0.000324443, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=37284 +epoch 023: 900 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=463302, ups=1.06, wpb=435101, bsz=16761.3, num_updates=38000, lr=0.000324443, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=37284 +epoch 023: 900 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=463302, ups=1.06, wpb=435101, bsz=16761.3, num_updates=38000, lr=0.000324443, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=37284 +epoch 023: 900 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=463302, ups=1.06, wpb=435101, bsz=16761.3, num_updates=38000, lr=0.000324443, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=37284 +epoch 023: 900 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=463302, ups=1.06, wpb=435101, bsz=16761.3, num_updates=38000, lr=0.000324443, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=37284 +epoch 023: 900 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=463302, ups=1.06, wpb=435101, bsz=16761.3, num_updates=38000, lr=0.000324443, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=37284 +epoch 023: 900 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=463302, ups=1.06, wpb=435101, bsz=16761.3, num_updates=38000, lr=0.000324443, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=37284 +epoch 023: 900 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=463302, ups=1.06, wpb=435101, bsz=16761.3, num_updates=38000, lr=0.000324443, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=37284 +epoch 023: 900 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=463302, ups=1.06, wpb=435101, bsz=16761.3, num_updates=38000, lr=0.000324443, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=37284 +epoch 023: 900 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=463302, ups=1.06, wpb=435101, bsz=16761.3, num_updates=38000, lr=0.000324443, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=37284 +epoch 023: 900 / 1689 loss=4.116, nll_loss=2.492, ppl=5.63, wps=463302, ups=1.06, wpb=435101, bsz=16761.3, num_updates=38000, lr=0.000324443, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=37284 +begin validation on "valid" subset +epoch 023 | valid on 'valid' subset | loss 4.235 | nll_loss 2.603 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 38000 | best_loss 4.233 +epoch 023 | valid on 'valid' subset | loss 4.235 | nll_loss 2.603 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 38000 | best_loss 4.233 +epoch 023 | valid on 'valid' subset | loss 4.235 | nll_loss 2.603 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 38000 | best_loss 4.233 +epoch 023 | valid on 'valid' subset | loss 4.235 | nll_loss 2.603 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 38000 | best_loss 4.233 +epoch 023 | valid on 'valid' subset | loss 4.235 | nll_loss 2.603 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 38000 | best_loss 4.233 +epoch 023 | valid on 'valid' subset | loss 4.235 | nll_loss 2.603 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 38000 | best_loss 4.233 +epoch 023 | valid on 'valid' subset | loss 4.235 | nll_loss 2.603 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 38000 | best_loss 4.233 +epoch 023 | valid on 'valid' subset | loss 4.235 | nll_loss 2.603 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 38000 | best_loss 4.233 +epoch 023 | valid on 'valid' subset | loss 4.235 | nll_loss 2.603 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 38000 | best_loss 4.233 +epoch 023 | valid on 'valid' subset | loss 4.235 | nll_loss 2.603 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 38000 | best_loss 4.233 +epoch 023 | valid on 'valid' subset | loss 4.235 | nll_loss 2.603 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 38000 | best_loss 4.233 +epoch 023 | valid on 'valid' subset | loss 4.235 | nll_loss 2.603 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 38000 | best_loss 4.233 +epoch 023 | valid on 'valid' subset | loss 4.235 | nll_loss 2.603 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 38000 | best_loss 4.233 +epoch 023 | valid on 'valid' subset | loss 4.235 | nll_loss 2.603 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 38000 | best_loss 4.233 +epoch 023 | valid on 'valid' subset | loss 4.235 | nll_loss 2.603 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 38000 | best_loss 4.233 +epoch 023 | valid on 'valid' subset | loss 4.235 | nll_loss 2.603 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 38000 | best_loss 4.233 +epoch 023 | valid on 'valid' subset | loss 4.235 | nll_loss 2.603 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 38000 | best_loss 4.233 +epoch 023 | valid on 'valid' subset | loss 4.235 | nll_loss 2.603 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 38000 | best_loss 4.233 +epoch 023 | valid on 'valid' subset | loss 4.235 | nll_loss 2.603 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 38000 | best_loss 4.233 +epoch 023 | valid on 'valid' subset | loss 4.235 | nll_loss 2.603 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 38000 | best_loss 4.233 +epoch 023 | valid on 'valid' subset | loss 4.235 | nll_loss 2.603 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 38000 | best_loss 4.233 +epoch 023 | valid on 'valid' subset | loss 4.235 | nll_loss 2.603 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 38000 | best_loss 4.233 +epoch 023 | valid on 'valid' subset | loss 4.235 | nll_loss 2.603 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 38000 | best_loss 4.233 +epoch 023: 1000 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410773, ups=0.95, wpb=431003, bsz=16176.1, num_updates=38100, lr=0.000324017, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=37389 +epoch 023: 1000 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410773, ups=0.95, wpb=431003, bsz=16176.1, num_updates=38100, lr=0.000324017, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=37389 +epoch 023: 1000 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410773, ups=0.95, wpb=431003, bsz=16176.1, num_updates=38100, lr=0.000324017, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=37389 +epoch 023: 1000 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410773, ups=0.95, wpb=431003, bsz=16176.1, num_updates=38100, lr=0.000324017, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=37389 +epoch 023: 1000 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410773, ups=0.95, wpb=431003, bsz=16176.1, num_updates=38100, lr=0.000324017, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=37389 +epoch 023: 1000 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410773, ups=0.95, wpb=431003, bsz=16176.1, num_updates=38100, lr=0.000324017, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=37389 +epoch 023: 1000 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410773, ups=0.95, wpb=431003, bsz=16176.1, num_updates=38100, lr=0.000324017, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=37389 +epoch 023: 1000 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410773, ups=0.95, wpb=431003, bsz=16176.1, num_updates=38100, lr=0.000324017, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=37389 +epoch 023: 1000 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410773, ups=0.95, wpb=431003, bsz=16176.1, num_updates=38100, lr=0.000324017, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=37389 +epoch 023: 1000 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410773, ups=0.95, wpb=431003, bsz=16176.1, num_updates=38100, lr=0.000324017, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=37389 +epoch 023: 1000 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410773, ups=0.95, wpb=431003, bsz=16176.1, num_updates=38100, lr=0.000324017, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=37389 +epoch 023: 1000 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410773, ups=0.95, wpb=431003, bsz=16176.1, num_updates=38100, lr=0.000324017, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=37389 +epoch 023: 1000 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410773, ups=0.95, wpb=431003, bsz=16176.1, num_updates=38100, lr=0.000324017, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=37389 +epoch 023: 1000 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410773, ups=0.95, wpb=431003, bsz=16176.1, num_updates=38100, lr=0.000324017, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=37389 +epoch 023: 1000 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410773, ups=0.95, wpb=431003, bsz=16176.1, num_updates=38100, lr=0.000324017, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=37389 +epoch 023: 1000 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410773, ups=0.95, wpb=431003, bsz=16176.1, num_updates=38100, lr=0.000324017, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=37389 +epoch 023: 1000 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410773, ups=0.95, wpb=431003, bsz=16176.1, num_updates=38100, lr=0.000324017, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=37389 +epoch 023: 1000 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410773, ups=0.95, wpb=431003, bsz=16176.1, num_updates=38100, lr=0.000324017, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=37389 +epoch 023: 1000 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410773, ups=0.95, wpb=431003, bsz=16176.1, num_updates=38100, lr=0.000324017, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=37389 +epoch 023: 1000 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410773, ups=0.95, wpb=431003, bsz=16176.1, num_updates=38100, lr=0.000324017, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=37389 +epoch 023: 1000 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410773, ups=0.95, wpb=431003, bsz=16176.1, num_updates=38100, lr=0.000324017, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=37389 +epoch 023: 1000 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410773, ups=0.95, wpb=431003, bsz=16176.1, num_updates=38100, lr=0.000324017, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=37389 +epoch 023: 1000 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410773, ups=0.95, wpb=431003, bsz=16176.1, num_updates=38100, lr=0.000324017, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=37389 +epoch 023: 1100 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=467617, ups=1.07, wpb=435284, bsz=16297, num_updates=38200, lr=0.000323592, gnorm=0.224, clip=0, loss_scale=2, train_wall=91, gb_free=20.3, wall=37482 +epoch 023: 1100 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=467617, ups=1.07, wpb=435284, bsz=16297, num_updates=38200, lr=0.000323592, gnorm=0.224, clip=0, loss_scale=2, train_wall=91, gb_free=20.3, wall=37482 +epoch 023: 1100 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=467617, ups=1.07, wpb=435284, bsz=16297, num_updates=38200, lr=0.000323592, gnorm=0.224, clip=0, loss_scale=2, train_wall=91, gb_free=20.3, wall=37482 +epoch 023: 1100 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=467617, ups=1.07, wpb=435284, bsz=16297, num_updates=38200, lr=0.000323592, gnorm=0.224, clip=0, loss_scale=2, train_wall=91, gb_free=20.3, wall=37482 +epoch 023: 1100 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=467617, ups=1.07, wpb=435284, bsz=16297, num_updates=38200, lr=0.000323592, gnorm=0.224, clip=0, loss_scale=2, train_wall=91, gb_free=20.3, wall=37482 +epoch 023: 1100 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=467617, ups=1.07, wpb=435284, bsz=16297, num_updates=38200, lr=0.000323592, gnorm=0.224, clip=0, loss_scale=2, train_wall=91, gb_free=20.3, wall=37482 +epoch 023: 1100 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=467617, ups=1.07, wpb=435284, bsz=16297, num_updates=38200, lr=0.000323592, gnorm=0.224, clip=0, loss_scale=2, train_wall=91, gb_free=20.3, wall=37482 +epoch 023: 1100 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=467617, ups=1.07, wpb=435284, bsz=16297, num_updates=38200, lr=0.000323592, gnorm=0.224, clip=0, loss_scale=2, train_wall=91, gb_free=20.3, wall=37482 +epoch 023: 1100 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=467617, ups=1.07, wpb=435284, bsz=16297, num_updates=38200, lr=0.000323592, gnorm=0.224, clip=0, loss_scale=2, train_wall=91, gb_free=20.3, wall=37482 +epoch 023: 1100 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=467617, ups=1.07, wpb=435284, bsz=16297, num_updates=38200, lr=0.000323592, gnorm=0.224, clip=0, loss_scale=2, train_wall=91, gb_free=20.3, wall=37482 +epoch 023: 1100 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=467617, ups=1.07, wpb=435284, bsz=16297, num_updates=38200, lr=0.000323592, gnorm=0.224, clip=0, loss_scale=2, train_wall=91, gb_free=20.3, wall=37482 +epoch 023: 1100 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=467617, ups=1.07, wpb=435284, bsz=16297, num_updates=38200, lr=0.000323592, gnorm=0.224, clip=0, loss_scale=2, train_wall=91, gb_free=20.3, wall=37482 +epoch 023: 1100 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=467617, ups=1.07, wpb=435284, bsz=16297, num_updates=38200, lr=0.000323592, gnorm=0.224, clip=0, loss_scale=2, train_wall=91, gb_free=20.3, wall=37482 +epoch 023: 1100 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=467617, ups=1.07, wpb=435284, bsz=16297, num_updates=38200, lr=0.000323592, gnorm=0.224, clip=0, loss_scale=2, train_wall=91, gb_free=20.3, wall=37482 +epoch 023: 1100 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=467617, ups=1.07, wpb=435284, bsz=16297, num_updates=38200, lr=0.000323592, gnorm=0.224, clip=0, loss_scale=2, train_wall=91, gb_free=20.3, wall=37482 +epoch 023: 1100 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=467617, ups=1.07, wpb=435284, bsz=16297, num_updates=38200, lr=0.000323592, gnorm=0.224, clip=0, loss_scale=2, train_wall=91, gb_free=20.3, wall=37482 +epoch 023: 1100 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=467617, ups=1.07, wpb=435284, bsz=16297, num_updates=38200, lr=0.000323592, gnorm=0.224, clip=0, loss_scale=2, train_wall=91, gb_free=20.3, wall=37482 +epoch 023: 1100 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=467617, ups=1.07, wpb=435284, bsz=16297, num_updates=38200, lr=0.000323592, gnorm=0.224, clip=0, loss_scale=2, train_wall=91, gb_free=20.3, wall=37482 +epoch 023: 1100 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=467617, ups=1.07, wpb=435284, bsz=16297, num_updates=38200, lr=0.000323592, gnorm=0.224, clip=0, loss_scale=2, train_wall=91, gb_free=20.3, wall=37482 +epoch 023: 1100 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=467617, ups=1.07, wpb=435284, bsz=16297, num_updates=38200, lr=0.000323592, gnorm=0.224, clip=0, loss_scale=2, train_wall=91, gb_free=20.3, wall=37482 +epoch 023: 1100 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=467617, ups=1.07, wpb=435284, bsz=16297, num_updates=38200, lr=0.000323592, gnorm=0.224, clip=0, loss_scale=2, train_wall=91, gb_free=20.3, wall=37482 +epoch 023: 1100 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=467617, ups=1.07, wpb=435284, bsz=16297, num_updates=38200, lr=0.000323592, gnorm=0.224, clip=0, loss_scale=2, train_wall=91, gb_free=20.3, wall=37482 +epoch 023: 1100 / 1689 loss=4.122, nll_loss=2.499, ppl=5.65, wps=467617, ups=1.07, wpb=435284, bsz=16297, num_updates=38200, lr=0.000323592, gnorm=0.224, clip=0, loss_scale=2, train_wall=91, gb_free=20.3, wall=37482 +epoch 023: 1201 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460235, ups=1.06, wpb=433884, bsz=16230.2, num_updates=38300, lr=0.00032317, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=37576 +epoch 023: 1201 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460235, ups=1.06, wpb=433884, bsz=16230.2, num_updates=38300, lr=0.00032317, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=37576 +epoch 023: 1201 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460235, ups=1.06, wpb=433884, bsz=16230.2, num_updates=38300, lr=0.00032317, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=37576 +epoch 023: 1201 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460235, ups=1.06, wpb=433884, bsz=16230.2, num_updates=38300, lr=0.00032317, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=37576 +epoch 023: 1201 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460235, ups=1.06, wpb=433884, bsz=16230.2, num_updates=38300, lr=0.00032317, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=37576 +epoch 023: 1201 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460235, ups=1.06, wpb=433884, bsz=16230.2, num_updates=38300, lr=0.00032317, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=37576 +epoch 023: 1201 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460235, ups=1.06, wpb=433884, bsz=16230.2, num_updates=38300, lr=0.00032317, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=37576 +epoch 023: 1201 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460235, ups=1.06, wpb=433884, bsz=16230.2, num_updates=38300, lr=0.00032317, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=37576 +epoch 023: 1201 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460235, ups=1.06, wpb=433884, bsz=16230.2, num_updates=38300, lr=0.00032317, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=37576 +epoch 023: 1201 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460235, ups=1.06, wpb=433884, bsz=16230.2, num_updates=38300, lr=0.00032317, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=37576 +epoch 023: 1201 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460235, ups=1.06, wpb=433884, bsz=16230.2, num_updates=38300, lr=0.00032317, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=37576 +epoch 023: 1201 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460235, ups=1.06, wpb=433884, bsz=16230.2, num_updates=38300, lr=0.00032317, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=37576 +epoch 023: 1201 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460235, ups=1.06, wpb=433884, bsz=16230.2, num_updates=38300, lr=0.00032317, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=37576 +epoch 023: 1201 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460235, ups=1.06, wpb=433884, bsz=16230.2, num_updates=38300, lr=0.00032317, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=37576 +epoch 023: 1201 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460235, ups=1.06, wpb=433884, bsz=16230.2, num_updates=38300, lr=0.00032317, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=37576 +epoch 023: 1201 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460235, ups=1.06, wpb=433884, bsz=16230.2, num_updates=38300, lr=0.00032317, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=37576 +epoch 023: 1201 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460235, ups=1.06, wpb=433884, bsz=16230.2, num_updates=38300, lr=0.00032317, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=37576 +epoch 023: 1201 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460235, ups=1.06, wpb=433884, bsz=16230.2, num_updates=38300, lr=0.00032317, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=37576 +epoch 023: 1201 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460235, ups=1.06, wpb=433884, bsz=16230.2, num_updates=38300, lr=0.00032317, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=37576 +epoch 023: 1201 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460235, ups=1.06, wpb=433884, bsz=16230.2, num_updates=38300, lr=0.00032317, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=37576 +epoch 023: 1201 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460235, ups=1.06, wpb=433884, bsz=16230.2, num_updates=38300, lr=0.00032317, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=37576 +epoch 023: 1201 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460235, ups=1.06, wpb=433884, bsz=16230.2, num_updates=38300, lr=0.00032317, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=37576 +epoch 023: 1201 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460235, ups=1.06, wpb=433884, bsz=16230.2, num_updates=38300, lr=0.00032317, gnorm=0.219, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=37576 +epoch 023: 1301 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=463973, ups=1.07, wpb=434912, bsz=16428.8, num_updates=38400, lr=0.000322749, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=37670 +epoch 023: 1301 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=463973, ups=1.07, wpb=434912, bsz=16428.8, num_updates=38400, lr=0.000322749, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=37670 +epoch 023: 1301 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=463973, ups=1.07, wpb=434912, bsz=16428.8, num_updates=38400, lr=0.000322749, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=37670 +epoch 023: 1301 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=463973, ups=1.07, wpb=434912, bsz=16428.8, num_updates=38400, lr=0.000322749, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=37670 +epoch 023: 1301 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=463973, ups=1.07, wpb=434912, bsz=16428.8, num_updates=38400, lr=0.000322749, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=37670 +epoch 023: 1301 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=463973, ups=1.07, wpb=434912, bsz=16428.8, num_updates=38400, lr=0.000322749, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=37670 +epoch 023: 1301 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=463973, ups=1.07, wpb=434912, bsz=16428.8, num_updates=38400, lr=0.000322749, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=37670 +epoch 023: 1301 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=463973, ups=1.07, wpb=434912, bsz=16428.8, num_updates=38400, lr=0.000322749, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=37670 +epoch 023: 1301 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=463973, ups=1.07, wpb=434912, bsz=16428.8, num_updates=38400, lr=0.000322749, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=37670 +epoch 023: 1301 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=463973, ups=1.07, wpb=434912, bsz=16428.8, num_updates=38400, lr=0.000322749, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=37670 +epoch 023: 1301 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=463973, ups=1.07, wpb=434912, bsz=16428.8, num_updates=38400, lr=0.000322749, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=37670 +epoch 023: 1301 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=463973, ups=1.07, wpb=434912, bsz=16428.8, num_updates=38400, lr=0.000322749, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=37670 +epoch 023: 1301 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=463973, ups=1.07, wpb=434912, bsz=16428.8, num_updates=38400, lr=0.000322749, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=37670 +epoch 023: 1301 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=463973, ups=1.07, wpb=434912, bsz=16428.8, num_updates=38400, lr=0.000322749, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=37670 +epoch 023: 1301 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=463973, ups=1.07, wpb=434912, bsz=16428.8, num_updates=38400, lr=0.000322749, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=37670 +epoch 023: 1301 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=463973, ups=1.07, wpb=434912, bsz=16428.8, num_updates=38400, lr=0.000322749, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=37670 +epoch 023: 1301 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=463973, ups=1.07, wpb=434912, bsz=16428.8, num_updates=38400, lr=0.000322749, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=37670 +epoch 023: 1301 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=463973, ups=1.07, wpb=434912, bsz=16428.8, num_updates=38400, lr=0.000322749, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=37670 +epoch 023: 1301 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=463973, ups=1.07, wpb=434912, bsz=16428.8, num_updates=38400, lr=0.000322749, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=37670 +epoch 023: 1301 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=463973, ups=1.07, wpb=434912, bsz=16428.8, num_updates=38400, lr=0.000322749, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=37670 +epoch 023: 1301 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=463973, ups=1.07, wpb=434912, bsz=16428.8, num_updates=38400, lr=0.000322749, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=37670 +epoch 023: 1301 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=463973, ups=1.07, wpb=434912, bsz=16428.8, num_updates=38400, lr=0.000322749, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=37670 +epoch 023: 1301 / 1689 loss=4.115, nll_loss=2.492, ppl=5.62, wps=463973, ups=1.07, wpb=434912, bsz=16428.8, num_updates=38400, lr=0.000322749, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.7, wall=37670 +epoch 023: 1401 / 1689 loss=4.108, nll_loss=2.484, ppl=5.59, wps=460976, ups=1.06, wpb=433728, bsz=16586.5, num_updates=38500, lr=0.000322329, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=21.2, wall=37764 +epoch 023: 1401 / 1689 loss=4.108, nll_loss=2.484, ppl=5.59, wps=460976, ups=1.06, wpb=433728, bsz=16586.5, num_updates=38500, lr=0.000322329, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=21.2, wall=37764 +epoch 023: 1401 / 1689 loss=4.108, nll_loss=2.484, ppl=5.59, wps=460976, ups=1.06, wpb=433728, bsz=16586.5, num_updates=38500, lr=0.000322329, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=21.2, wall=37764 +epoch 023: 1401 / 1689 loss=4.108, nll_loss=2.484, ppl=5.59, wps=460976, ups=1.06, wpb=433728, bsz=16586.5, num_updates=38500, lr=0.000322329, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=21.2, wall=37764 +epoch 023: 1401 / 1689 loss=4.108, nll_loss=2.484, ppl=5.59, wps=460976, ups=1.06, wpb=433728, bsz=16586.5, num_updates=38500, lr=0.000322329, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=21.2, wall=37764 +epoch 023: 1401 / 1689 loss=4.108, nll_loss=2.484, ppl=5.59, wps=460976, ups=1.06, wpb=433728, bsz=16586.5, num_updates=38500, lr=0.000322329, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=21.2, wall=37764 +epoch 023: 1401 / 1689 loss=4.108, nll_loss=2.484, ppl=5.59, wps=460976, ups=1.06, wpb=433728, bsz=16586.5, num_updates=38500, lr=0.000322329, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=21.2, wall=37764 +epoch 023: 1401 / 1689 loss=4.108, nll_loss=2.484, ppl=5.59, wps=460976, ups=1.06, wpb=433728, bsz=16586.5, num_updates=38500, lr=0.000322329, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=21.2, wall=37764 +epoch 023: 1401 / 1689 loss=4.108, nll_loss=2.484, ppl=5.59, wps=460976, ups=1.06, wpb=433728, bsz=16586.5, num_updates=38500, lr=0.000322329, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=21.2, wall=37764 +epoch 023: 1401 / 1689 loss=4.108, nll_loss=2.484, ppl=5.59, wps=460976, ups=1.06, wpb=433728, bsz=16586.5, num_updates=38500, lr=0.000322329, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=21.2, wall=37764 +epoch 023: 1401 / 1689 loss=4.108, nll_loss=2.484, ppl=5.59, wps=460976, ups=1.06, wpb=433728, bsz=16586.5, num_updates=38500, lr=0.000322329, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=21.2, wall=37764 +epoch 023: 1401 / 1689 loss=4.108, nll_loss=2.484, ppl=5.59, wps=460976, ups=1.06, wpb=433728, bsz=16586.5, num_updates=38500, lr=0.000322329, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=21.2, wall=37764 +epoch 023: 1401 / 1689 loss=4.108, nll_loss=2.484, ppl=5.59, wps=460976, ups=1.06, wpb=433728, bsz=16586.5, num_updates=38500, lr=0.000322329, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=21.2, wall=37764 +epoch 023: 1401 / 1689 loss=4.108, nll_loss=2.484, ppl=5.59, wps=460976, ups=1.06, wpb=433728, bsz=16586.5, num_updates=38500, lr=0.000322329, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=21.2, wall=37764 +epoch 023: 1401 / 1689 loss=4.108, nll_loss=2.484, ppl=5.59, wps=460976, ups=1.06, wpb=433728, bsz=16586.5, num_updates=38500, lr=0.000322329, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=21.2, wall=37764 +epoch 023: 1401 / 1689 loss=4.108, nll_loss=2.484, ppl=5.59, wps=460976, ups=1.06, wpb=433728, bsz=16586.5, num_updates=38500, lr=0.000322329, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=21.2, wall=37764 +epoch 023: 1401 / 1689 loss=4.108, nll_loss=2.484, ppl=5.59, wps=460976, ups=1.06, wpb=433728, bsz=16586.5, num_updates=38500, lr=0.000322329, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=21.2, wall=37764 +epoch 023: 1401 / 1689 loss=4.108, nll_loss=2.484, ppl=5.59, wps=460976, ups=1.06, wpb=433728, bsz=16586.5, num_updates=38500, lr=0.000322329, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=21.2, wall=37764 +epoch 023: 1401 / 1689 loss=4.108, nll_loss=2.484, ppl=5.59, wps=460976, ups=1.06, wpb=433728, bsz=16586.5, num_updates=38500, lr=0.000322329, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=21.2, wall=37764 +epoch 023: 1401 / 1689 loss=4.108, nll_loss=2.484, ppl=5.59, wps=460976, ups=1.06, wpb=433728, bsz=16586.5, num_updates=38500, lr=0.000322329, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=21.2, wall=37764 +epoch 023: 1401 / 1689 loss=4.108, nll_loss=2.484, ppl=5.59, wps=460976, ups=1.06, wpb=433728, bsz=16586.5, num_updates=38500, lr=0.000322329, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=21.2, wall=37764 +epoch 023: 1401 / 1689 loss=4.108, nll_loss=2.484, ppl=5.59, wps=460976, ups=1.06, wpb=433728, bsz=16586.5, num_updates=38500, lr=0.000322329, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=21.2, wall=37764 +epoch 023: 1401 / 1689 loss=4.108, nll_loss=2.484, ppl=5.59, wps=460976, ups=1.06, wpb=433728, bsz=16586.5, num_updates=38500, lr=0.000322329, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=21.2, wall=37764 +epoch 023: 1501 / 1689 loss=4.112, nll_loss=2.489, ppl=5.61, wps=460169, ups=1.06, wpb=433465, bsz=16515.7, num_updates=38600, lr=0.000321911, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37858 +epoch 023: 1501 / 1689 loss=4.112, nll_loss=2.489, ppl=5.61, wps=460169, ups=1.06, wpb=433465, bsz=16515.7, num_updates=38600, lr=0.000321911, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37858 +epoch 023: 1501 / 1689 loss=4.112, nll_loss=2.489, ppl=5.61, wps=460169, ups=1.06, wpb=433465, bsz=16515.7, num_updates=38600, lr=0.000321911, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37858 +epoch 023: 1501 / 1689 loss=4.112, nll_loss=2.489, ppl=5.61, wps=460169, ups=1.06, wpb=433465, bsz=16515.7, num_updates=38600, lr=0.000321911, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37858 +epoch 023: 1501 / 1689 loss=4.112, nll_loss=2.489, ppl=5.61, wps=460169, ups=1.06, wpb=433465, bsz=16515.7, num_updates=38600, lr=0.000321911, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37858 +epoch 023: 1501 / 1689 loss=4.112, nll_loss=2.489, ppl=5.61, wps=460169, ups=1.06, wpb=433465, bsz=16515.7, num_updates=38600, lr=0.000321911, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37858 +epoch 023: 1501 / 1689 loss=4.112, nll_loss=2.489, ppl=5.61, wps=460169, ups=1.06, wpb=433465, bsz=16515.7, num_updates=38600, lr=0.000321911, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37858 +epoch 023: 1501 / 1689 loss=4.112, nll_loss=2.489, ppl=5.61, wps=460169, ups=1.06, wpb=433465, bsz=16515.7, num_updates=38600, lr=0.000321911, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37858 +epoch 023: 1501 / 1689 loss=4.112, nll_loss=2.489, ppl=5.61, wps=460169, ups=1.06, wpb=433465, bsz=16515.7, num_updates=38600, lr=0.000321911, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37858 +epoch 023: 1501 / 1689 loss=4.112, nll_loss=2.489, ppl=5.61, wps=460169, ups=1.06, wpb=433465, bsz=16515.7, num_updates=38600, lr=0.000321911, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37858 +epoch 023: 1501 / 1689 loss=4.112, nll_loss=2.489, ppl=5.61, wps=460169, ups=1.06, wpb=433465, bsz=16515.7, num_updates=38600, lr=0.000321911, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37858 +epoch 023: 1501 / 1689 loss=4.112, nll_loss=2.489, ppl=5.61, wps=460169, ups=1.06, wpb=433465, bsz=16515.7, num_updates=38600, lr=0.000321911, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37858 +epoch 023: 1501 / 1689 loss=4.112, nll_loss=2.489, ppl=5.61, wps=460169, ups=1.06, wpb=433465, bsz=16515.7, num_updates=38600, lr=0.000321911, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37858 +epoch 023: 1501 / 1689 loss=4.112, nll_loss=2.489, ppl=5.61, wps=460169, ups=1.06, wpb=433465, bsz=16515.7, num_updates=38600, lr=0.000321911, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37858 +epoch 023: 1501 / 1689 loss=4.112, nll_loss=2.489, ppl=5.61, wps=460169, ups=1.06, wpb=433465, bsz=16515.7, num_updates=38600, lr=0.000321911, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37858 +epoch 023: 1501 / 1689 loss=4.112, nll_loss=2.489, ppl=5.61, wps=460169, ups=1.06, wpb=433465, bsz=16515.7, num_updates=38600, lr=0.000321911, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37858 +epoch 023: 1501 / 1689 loss=4.112, nll_loss=2.489, ppl=5.61, wps=460169, ups=1.06, wpb=433465, bsz=16515.7, num_updates=38600, lr=0.000321911, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37858 +epoch 023: 1501 / 1689 loss=4.112, nll_loss=2.489, ppl=5.61, wps=460169, ups=1.06, wpb=433465, bsz=16515.7, num_updates=38600, lr=0.000321911, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37858 +epoch 023: 1501 / 1689 loss=4.112, nll_loss=2.489, ppl=5.61, wps=460169, ups=1.06, wpb=433465, bsz=16515.7, num_updates=38600, lr=0.000321911, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37858 +epoch 023: 1501 / 1689 loss=4.112, nll_loss=2.489, ppl=5.61, wps=460169, ups=1.06, wpb=433465, bsz=16515.7, num_updates=38600, lr=0.000321911, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37858 +epoch 023: 1501 / 1689 loss=4.112, nll_loss=2.489, ppl=5.61, wps=460169, ups=1.06, wpb=433465, bsz=16515.7, num_updates=38600, lr=0.000321911, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37858 +epoch 023: 1501 / 1689 loss=4.112, nll_loss=2.489, ppl=5.61, wps=460169, ups=1.06, wpb=433465, bsz=16515.7, num_updates=38600, lr=0.000321911, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37858 +epoch 023: 1501 / 1689 loss=4.112, nll_loss=2.489, ppl=5.61, wps=460169, ups=1.06, wpb=433465, bsz=16515.7, num_updates=38600, lr=0.000321911, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=37858 +epoch 023: 1601 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460233, ups=1.06, wpb=432668, bsz=16395.2, num_updates=38700, lr=0.000321495, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=37952 +epoch 023: 1601 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460233, ups=1.06, wpb=432668, bsz=16395.2, num_updates=38700, lr=0.000321495, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=37952 +epoch 023: 1601 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460233, ups=1.06, wpb=432668, bsz=16395.2, num_updates=38700, lr=0.000321495, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=37952 +epoch 023: 1601 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460233, ups=1.06, wpb=432668, bsz=16395.2, num_updates=38700, lr=0.000321495, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=37952 +epoch 023: 1601 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460233, ups=1.06, wpb=432668, bsz=16395.2, num_updates=38700, lr=0.000321495, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=37952 +epoch 023: 1601 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460233, ups=1.06, wpb=432668, bsz=16395.2, num_updates=38700, lr=0.000321495, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=37952 +epoch 023: 1601 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460233, ups=1.06, wpb=432668, bsz=16395.2, num_updates=38700, lr=0.000321495, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=37952 +epoch 023: 1601 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460233, ups=1.06, wpb=432668, bsz=16395.2, num_updates=38700, lr=0.000321495, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=37952 +epoch 023: 1601 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460233, ups=1.06, wpb=432668, bsz=16395.2, num_updates=38700, lr=0.000321495, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=37952 +epoch 023: 1601 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460233, ups=1.06, wpb=432668, bsz=16395.2, num_updates=38700, lr=0.000321495, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=37952 +epoch 023: 1601 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460233, ups=1.06, wpb=432668, bsz=16395.2, num_updates=38700, lr=0.000321495, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=37952 +epoch 023: 1601 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460233, ups=1.06, wpb=432668, bsz=16395.2, num_updates=38700, lr=0.000321495, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=37952 +epoch 023: 1601 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460233, ups=1.06, wpb=432668, bsz=16395.2, num_updates=38700, lr=0.000321495, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=37952 +epoch 023: 1601 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460233, ups=1.06, wpb=432668, bsz=16395.2, num_updates=38700, lr=0.000321495, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=37952 +epoch 023: 1601 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460233, ups=1.06, wpb=432668, bsz=16395.2, num_updates=38700, lr=0.000321495, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=37952 +epoch 023: 1601 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460233, ups=1.06, wpb=432668, bsz=16395.2, num_updates=38700, lr=0.000321495, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=37952 +epoch 023: 1601 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460233, ups=1.06, wpb=432668, bsz=16395.2, num_updates=38700, lr=0.000321495, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=37952 +epoch 023: 1601 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460233, ups=1.06, wpb=432668, bsz=16395.2, num_updates=38700, lr=0.000321495, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=37952 +epoch 023: 1601 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460233, ups=1.06, wpb=432668, bsz=16395.2, num_updates=38700, lr=0.000321495, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=37952 +epoch 023: 1601 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460233, ups=1.06, wpb=432668, bsz=16395.2, num_updates=38700, lr=0.000321495, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=37952 +epoch 023: 1601 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460233, ups=1.06, wpb=432668, bsz=16395.2, num_updates=38700, lr=0.000321495, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=37952 +epoch 023: 1601 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460233, ups=1.06, wpb=432668, bsz=16395.2, num_updates=38700, lr=0.000321495, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=37952 +epoch 023: 1601 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=460233, ups=1.06, wpb=432668, bsz=16395.2, num_updates=38700, lr=0.000321495, gnorm=0.231, clip=0, loss_scale=1, train_wall=92, gb_free=18.3, wall=37952 +end of epoch 23 (average epoch stats below) +epoch 023 | loss 4.106 | nll_loss 2.481 | ppl 5.58 | wps 458203 | ups 1.06 | wpb 433517 | bsz 16510.4 | num_updates 38788 | lr 0.00032113 | gnorm 0.221 | clip 0 | loss_scale 2 | train_wall 1554 | gb_free 22.2 | wall 38034 +epoch 023 | loss 4.106 | nll_loss 2.481 | ppl 5.58 | wps 458203 | ups 1.06 | wpb 433517 | bsz 16510.4 | num_updates 38788 | lr 0.00032113 | gnorm 0.221 | clip 0 | loss_scale 2 | train_wall 1554 | gb_free 22.2 | wall 38034 +epoch 023 | loss 4.106 | nll_loss 2.481 | ppl 5.58 | wps 458203 | ups 1.06 | wpb 433517 | bsz 16510.4 | num_updates 38788 | lr 0.00032113 | gnorm 0.221 | clip 0 | loss_scale 2 | train_wall 1554 | gb_free 22.2 | wall 38034 +epoch 023 | loss 4.106 | nll_loss 2.481 | ppl 5.58 | wps 458203 | ups 1.06 | wpb 433517 | bsz 16510.4 | num_updates 38788 | lr 0.00032113 | gnorm 0.221 | clip 0 | loss_scale 2 | train_wall 1554 | gb_free 22.2 | wall 38034 +epoch 023 | loss 4.106 | nll_loss 2.481 | ppl 5.58 | wps 458203 | ups 1.06 | wpb 433517 | bsz 16510.4 | num_updates 38788 | lr 0.00032113 | gnorm 0.221 | clip 0 | loss_scale 2 | train_wall 1554 | gb_free 22.2 | wall 38034 +epoch 023 | loss 4.106 | nll_loss 2.481 | ppl 5.58 | wps 458203 | ups 1.06 | wpb 433517 | bsz 16510.4 | num_updates 38788 | lr 0.00032113 | gnorm 0.221 | clip 0 | loss_scale 2 | train_wall 1554 | gb_free 22.2 | wall 38034 +epoch 023 | loss 4.106 | nll_loss 2.481 | ppl 5.58 | wps 458203 | ups 1.06 | wpb 433517 | bsz 16510.4 | num_updates 38788 | lr 0.00032113 | gnorm 0.221 | clip 0 | loss_scale 2 | train_wall 1554 | gb_free 22.2 | wall 38034 +epoch 023 | loss 4.106 | nll_loss 2.481 | ppl 5.58 | wps 458203 | ups 1.06 | wpb 433517 | bsz 16510.4 | num_updates 38788 | lr 0.00032113 | gnorm 0.221 | clip 0 | loss_scale 2 | train_wall 1554 | gb_free 22.2 | wall 38034 +epoch 023 | loss 4.106 | nll_loss 2.481 | ppl 5.58 | wps 458203 | ups 1.06 | wpb 433517 | bsz 16510.4 | num_updates 38788 | lr 0.00032113 | gnorm 0.221 | clip 0 | loss_scale 2 | train_wall 1554 | gb_free 22.2 | wall 38034 +epoch 023 | loss 4.106 | nll_loss 2.481 | ppl 5.58 | wps 458203 | ups 1.06 | wpb 433517 | bsz 16510.4 | num_updates 38788 | lr 0.00032113 | gnorm 0.221 | clip 0 | loss_scale 2 | train_wall 1554 | gb_free 22.2 | wall 38034 +epoch 023 | loss 4.106 | nll_loss 2.481 | ppl 5.58 | wps 458203 | ups 1.06 | wpb 433517 | bsz 16510.4 | num_updates 38788 | lr 0.00032113 | gnorm 0.221 | clip 0 | loss_scale 2 | train_wall 1554 | gb_free 22.2 | wall 38034 +epoch 023 | loss 4.106 | nll_loss 2.481 | ppl 5.58 | wps 458203 | ups 1.06 | wpb 433517 | bsz 16510.4 | num_updates 38788 | lr 0.00032113 | gnorm 0.221 | clip 0 | loss_scale 2 | train_wall 1554 | gb_free 22.2 | wall 38034 +epoch 023 | loss 4.106 | nll_loss 2.481 | ppl 5.58 | wps 458203 | ups 1.06 | wpb 433517 | bsz 16510.4 | num_updates 38788 | lr 0.00032113 | gnorm 0.221 | clip 0 | loss_scale 2 | train_wall 1554 | gb_free 22.2 | wall 38034 +epoch 023 | loss 4.106 | nll_loss 2.481 | ppl 5.58 | wps 458203 | ups 1.06 | wpb 433517 | bsz 16510.4 | num_updates 38788 | lr 0.00032113 | gnorm 0.221 | clip 0 | loss_scale 2 | train_wall 1554 | gb_free 22.2 | wall 38034 +epoch 023 | loss 4.106 | nll_loss 2.481 | ppl 5.58 | wps 458203 | ups 1.06 | wpb 433517 | bsz 16510.4 | num_updates 38788 | lr 0.00032113 | gnorm 0.221 | clip 0 | loss_scale 2 | train_wall 1554 | gb_free 22.2 | wall 38034 +epoch 023 | loss 4.106 | nll_loss 2.481 | ppl 5.58 | wps 458203 | ups 1.06 | wpb 433517 | bsz 16510.4 | num_updates 38788 | lr 0.00032113 | gnorm 0.221 | clip 0 | loss_scale 2 | train_wall 1554 | gb_free 22.2 | wall 38034 +epoch 023 | loss 4.106 | nll_loss 2.481 | ppl 5.58 | wps 458203 | ups 1.06 | wpb 433517 | bsz 16510.4 | num_updates 38788 | lr 0.00032113 | gnorm 0.221 | clip 0 | loss_scale 2 | train_wall 1554 | gb_free 22.2 | wall 38034 +epoch 023 | loss 4.106 | nll_loss 2.481 | ppl 5.58 | wps 458203 | ups 1.06 | wpb 433517 | bsz 16510.4 | num_updates 38788 | lr 0.00032113 | gnorm 0.221 | clip 0 | loss_scale 2 | train_wall 1554 | gb_free 22.2 | wall 38034 +epoch 023 | loss 4.106 | nll_loss 2.481 | ppl 5.58 | wps 458203 | ups 1.06 | wpb 433517 | bsz 16510.4 | num_updates 38788 | lr 0.00032113 | gnorm 0.221 | clip 0 | loss_scale 2 | train_wall 1554 | gb_free 22.2 | wall 38034 +epoch 023 | loss 4.106 | nll_loss 2.481 | ppl 5.58 | wps 458203 | ups 1.06 | wpb 433517 | bsz 16510.4 | num_updates 38788 | lr 0.00032113 | gnorm 0.221 | clip 0 | loss_scale 2 | train_wall 1554 | gb_free 22.2 | wall 38034 +epoch 023 | loss 4.106 | nll_loss 2.481 | ppl 5.58 | wps 458203 | ups 1.06 | wpb 433517 | bsz 16510.4 | num_updates 38788 | lr 0.00032113 | gnorm 0.221 | clip 0 | loss_scale 2 | train_wall 1554 | gb_free 22.2 | wall 38034 +epoch 023 | loss 4.106 | nll_loss 2.481 | ppl 5.58 | wps 458203 | ups 1.06 | wpb 433517 | bsz 16510.4 | num_updates 38788 | lr 0.00032113 | gnorm 0.221 | clip 0 | loss_scale 2 | train_wall 1554 | gb_free 22.2 | wall 38034 +epoch 023 | loss 4.106 | nll_loss 2.481 | ppl 5.58 | wps 458203 | ups 1.06 | wpb 433517 | bsz 16510.4 | num_updates 38788 | lr 0.00032113 | gnorm 0.221 | clip 0 | loss_scale 2 | train_wall 1554 | gb_free 22.2 | wall 38034 +Start iterating over samples +epoch 024: 13 / 1689 loss=4.113, nll_loss=2.489, ppl=5.61, wps=458254, ups=1.07, wpb=430281, bsz=16059.3, num_updates=38800, lr=0.000321081, gnorm=0.212, clip=0, loss_scale=1, train_wall=92, gb_free=20.7, wall=38046 +epoch 024: 13 / 1689 loss=4.113, nll_loss=2.489, ppl=5.61, wps=458254, ups=1.07, wpb=430281, bsz=16059.3, num_updates=38800, lr=0.000321081, gnorm=0.212, clip=0, loss_scale=1, train_wall=92, gb_free=20.7, wall=38046 +epoch 024: 13 / 1689 loss=4.113, nll_loss=2.489, ppl=5.61, wps=458254, ups=1.07, wpb=430281, bsz=16059.3, num_updates=38800, lr=0.000321081, gnorm=0.212, clip=0, loss_scale=1, train_wall=92, gb_free=20.7, wall=38046 +epoch 024: 13 / 1689 loss=4.113, nll_loss=2.489, ppl=5.61, wps=458254, ups=1.07, wpb=430281, bsz=16059.3, num_updates=38800, lr=0.000321081, gnorm=0.212, clip=0, loss_scale=1, train_wall=92, gb_free=20.7, wall=38046 +epoch 024: 13 / 1689 loss=4.113, nll_loss=2.489, ppl=5.61, wps=458254, ups=1.07, wpb=430281, bsz=16059.3, num_updates=38800, lr=0.000321081, gnorm=0.212, clip=0, loss_scale=1, train_wall=92, gb_free=20.7, wall=38046 +epoch 024: 13 / 1689 loss=4.113, nll_loss=2.489, ppl=5.61, wps=458254, ups=1.07, wpb=430281, bsz=16059.3, num_updates=38800, lr=0.000321081, gnorm=0.212, clip=0, loss_scale=1, train_wall=92, gb_free=20.7, wall=38046 +epoch 024: 13 / 1689 loss=4.113, nll_loss=2.489, ppl=5.61, wps=458254, ups=1.07, wpb=430281, bsz=16059.3, num_updates=38800, lr=0.000321081, gnorm=0.212, clip=0, loss_scale=1, train_wall=92, gb_free=20.7, wall=38046 +epoch 024: 13 / 1689 loss=4.113, nll_loss=2.489, ppl=5.61, wps=458254, ups=1.07, wpb=430281, bsz=16059.3, num_updates=38800, lr=0.000321081, gnorm=0.212, clip=0, loss_scale=1, train_wall=92, gb_free=20.7, wall=38046 +epoch 024: 13 / 1689 loss=4.113, nll_loss=2.489, ppl=5.61, wps=458254, ups=1.07, wpb=430281, bsz=16059.3, num_updates=38800, lr=0.000321081, gnorm=0.212, clip=0, loss_scale=1, train_wall=92, gb_free=20.7, wall=38046 +epoch 024: 13 / 1689 loss=4.113, nll_loss=2.489, ppl=5.61, wps=458254, ups=1.07, wpb=430281, bsz=16059.3, num_updates=38800, lr=0.000321081, gnorm=0.212, clip=0, loss_scale=1, train_wall=92, gb_free=20.7, wall=38046 +epoch 024: 13 / 1689 loss=4.113, nll_loss=2.489, ppl=5.61, wps=458254, ups=1.07, wpb=430281, bsz=16059.3, num_updates=38800, lr=0.000321081, gnorm=0.212, clip=0, loss_scale=1, train_wall=92, gb_free=20.7, wall=38046 +epoch 024: 13 / 1689 loss=4.113, nll_loss=2.489, ppl=5.61, wps=458254, ups=1.07, wpb=430281, bsz=16059.3, num_updates=38800, lr=0.000321081, gnorm=0.212, clip=0, loss_scale=1, train_wall=92, gb_free=20.7, wall=38046 +epoch 024: 13 / 1689 loss=4.113, nll_loss=2.489, ppl=5.61, wps=458254, ups=1.07, wpb=430281, bsz=16059.3, num_updates=38800, lr=0.000321081, gnorm=0.212, clip=0, loss_scale=1, train_wall=92, gb_free=20.7, wall=38046 +epoch 024: 13 / 1689 loss=4.113, nll_loss=2.489, ppl=5.61, wps=458254, ups=1.07, wpb=430281, bsz=16059.3, num_updates=38800, lr=0.000321081, gnorm=0.212, clip=0, loss_scale=1, train_wall=92, gb_free=20.7, wall=38046 +epoch 024: 13 / 1689 loss=4.113, nll_loss=2.489, ppl=5.61, wps=458254, ups=1.07, wpb=430281, bsz=16059.3, num_updates=38800, lr=0.000321081, gnorm=0.212, clip=0, loss_scale=1, train_wall=92, gb_free=20.7, wall=38046 +epoch 024: 13 / 1689 loss=4.113, nll_loss=2.489, ppl=5.61, wps=458254, ups=1.07, wpb=430281, bsz=16059.3, num_updates=38800, lr=0.000321081, gnorm=0.212, clip=0, loss_scale=1, train_wall=92, gb_free=20.7, wall=38046 +epoch 024: 13 / 1689 loss=4.113, nll_loss=2.489, ppl=5.61, wps=458254, ups=1.07, wpb=430281, bsz=16059.3, num_updates=38800, lr=0.000321081, gnorm=0.212, clip=0, loss_scale=1, train_wall=92, gb_free=20.7, wall=38046 +epoch 024: 13 / 1689 loss=4.113, nll_loss=2.489, ppl=5.61, wps=458254, ups=1.07, wpb=430281, bsz=16059.3, num_updates=38800, lr=0.000321081, gnorm=0.212, clip=0, loss_scale=1, train_wall=92, gb_free=20.7, wall=38046 +epoch 024: 13 / 1689 loss=4.113, nll_loss=2.489, ppl=5.61, wps=458254, ups=1.07, wpb=430281, bsz=16059.3, num_updates=38800, lr=0.000321081, gnorm=0.212, clip=0, loss_scale=1, train_wall=92, gb_free=20.7, wall=38046 +epoch 024: 13 / 1689 loss=4.113, nll_loss=2.489, ppl=5.61, wps=458254, ups=1.07, wpb=430281, bsz=16059.3, num_updates=38800, lr=0.000321081, gnorm=0.212, clip=0, loss_scale=1, train_wall=92, gb_free=20.7, wall=38046 +epoch 024: 13 / 1689 loss=4.113, nll_loss=2.489, ppl=5.61, wps=458254, ups=1.07, wpb=430281, bsz=16059.3, num_updates=38800, lr=0.000321081, gnorm=0.212, clip=0, loss_scale=1, train_wall=92, gb_free=20.7, wall=38046 +epoch 024: 13 / 1689 loss=4.113, nll_loss=2.489, ppl=5.61, wps=458254, ups=1.07, wpb=430281, bsz=16059.3, num_updates=38800, lr=0.000321081, gnorm=0.212, clip=0, loss_scale=1, train_wall=92, gb_free=20.7, wall=38046 +epoch 024: 13 / 1689 loss=4.113, nll_loss=2.489, ppl=5.61, wps=458254, ups=1.07, wpb=430281, bsz=16059.3, num_updates=38800, lr=0.000321081, gnorm=0.212, clip=0, loss_scale=1, train_wall=92, gb_free=20.7, wall=38046 +epoch 024: 13 / 1689 loss=4.113, nll_loss=2.489, ppl=5.61, wps=458254, ups=1.07, wpb=430281, bsz=16059.3, num_updates=38800, lr=0.000321081, gnorm=0.212, clip=0, loss_scale=1, train_wall=92, gb_free=20.7, wall=38046 +epoch 024: 113 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=463552, ups=1.07, wpb=433372, bsz=16822.6, num_updates=38900, lr=0.000320668, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=38140 +epoch 024: 113 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=463552, ups=1.07, wpb=433372, bsz=16822.6, num_updates=38900, lr=0.000320668, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=38140 +epoch 024: 113 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=463552, ups=1.07, wpb=433372, bsz=16822.6, num_updates=38900, lr=0.000320668, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=38140 +epoch 024: 113 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=463552, ups=1.07, wpb=433372, bsz=16822.6, num_updates=38900, lr=0.000320668, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=38140 +epoch 024: 113 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=463552, ups=1.07, wpb=433372, bsz=16822.6, num_updates=38900, lr=0.000320668, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=38140 +epoch 024: 113 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=463552, ups=1.07, wpb=433372, bsz=16822.6, num_updates=38900, lr=0.000320668, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=38140 +epoch 024: 113 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=463552, ups=1.07, wpb=433372, bsz=16822.6, num_updates=38900, lr=0.000320668, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=38140 +epoch 024: 113 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=463552, ups=1.07, wpb=433372, bsz=16822.6, num_updates=38900, lr=0.000320668, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=38140 +epoch 024: 113 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=463552, ups=1.07, wpb=433372, bsz=16822.6, num_updates=38900, lr=0.000320668, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=38140 +epoch 024: 113 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=463552, ups=1.07, wpb=433372, bsz=16822.6, num_updates=38900, lr=0.000320668, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=38140 +epoch 024: 113 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=463552, ups=1.07, wpb=433372, bsz=16822.6, num_updates=38900, lr=0.000320668, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=38140 +epoch 024: 113 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=463552, ups=1.07, wpb=433372, bsz=16822.6, num_updates=38900, lr=0.000320668, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=38140 +epoch 024: 113 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=463552, ups=1.07, wpb=433372, bsz=16822.6, num_updates=38900, lr=0.000320668, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=38140 +epoch 024: 113 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=463552, ups=1.07, wpb=433372, bsz=16822.6, num_updates=38900, lr=0.000320668, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=38140 +epoch 024: 113 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=463552, ups=1.07, wpb=433372, bsz=16822.6, num_updates=38900, lr=0.000320668, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=38140 +epoch 024: 113 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=463552, ups=1.07, wpb=433372, bsz=16822.6, num_updates=38900, lr=0.000320668, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=38140 +epoch 024: 113 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=463552, ups=1.07, wpb=433372, bsz=16822.6, num_updates=38900, lr=0.000320668, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=38140 +epoch 024: 113 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=463552, ups=1.07, wpb=433372, bsz=16822.6, num_updates=38900, lr=0.000320668, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=38140 +epoch 024: 113 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=463552, ups=1.07, wpb=433372, bsz=16822.6, num_updates=38900, lr=0.000320668, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=38140 +epoch 024: 113 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=463552, ups=1.07, wpb=433372, bsz=16822.6, num_updates=38900, lr=0.000320668, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=38140 +epoch 024: 113 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=463552, ups=1.07, wpb=433372, bsz=16822.6, num_updates=38900, lr=0.000320668, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=38140 +epoch 024: 113 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=463552, ups=1.07, wpb=433372, bsz=16822.6, num_updates=38900, lr=0.000320668, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=38140 +epoch 024: 113 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=463552, ups=1.07, wpb=433372, bsz=16822.6, num_updates=38900, lr=0.000320668, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=38140 +epoch 024: 113 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=463552, ups=1.07, wpb=433372, bsz=16822.6, num_updates=38900, lr=0.000320668, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=38140 +epoch 024: 213 / 1689 loss=4.086, nll_loss=2.458, ppl=5.5, wps=460121, ups=1.06, wpb=432202, bsz=16683.8, num_updates=39000, lr=0.000320256, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=38234 +epoch 024: 213 / 1689 loss=4.086, nll_loss=2.458, ppl=5.5, wps=460121, ups=1.06, wpb=432202, bsz=16683.8, num_updates=39000, lr=0.000320256, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=38234 +epoch 024: 213 / 1689 loss=4.086, nll_loss=2.458, ppl=5.5, wps=460121, ups=1.06, wpb=432202, bsz=16683.8, num_updates=39000, lr=0.000320256, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=38234 +epoch 024: 213 / 1689 loss=4.086, nll_loss=2.458, ppl=5.5, wps=460121, ups=1.06, wpb=432202, bsz=16683.8, num_updates=39000, lr=0.000320256, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=38234 +epoch 024: 213 / 1689 loss=4.086, nll_loss=2.458, ppl=5.5, wps=460121, ups=1.06, wpb=432202, bsz=16683.8, num_updates=39000, lr=0.000320256, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=38234 +epoch 024: 213 / 1689 loss=4.086, nll_loss=2.458, ppl=5.5, wps=460121, ups=1.06, wpb=432202, bsz=16683.8, num_updates=39000, lr=0.000320256, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=38234 +epoch 024: 213 / 1689 loss=4.086, nll_loss=2.458, ppl=5.5, wps=460121, ups=1.06, wpb=432202, bsz=16683.8, num_updates=39000, lr=0.000320256, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=38234 +epoch 024: 213 / 1689 loss=4.086, nll_loss=2.458, ppl=5.5, wps=460121, ups=1.06, wpb=432202, bsz=16683.8, num_updates=39000, lr=0.000320256, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=38234 +epoch 024: 213 / 1689 loss=4.086, nll_loss=2.458, ppl=5.5, wps=460121, ups=1.06, wpb=432202, bsz=16683.8, num_updates=39000, lr=0.000320256, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=38234 +epoch 024: 213 / 1689 loss=4.086, nll_loss=2.458, ppl=5.5, wps=460121, ups=1.06, wpb=432202, bsz=16683.8, num_updates=39000, lr=0.000320256, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=38234 +epoch 024: 213 / 1689 loss=4.086, nll_loss=2.458, ppl=5.5, wps=460121, ups=1.06, wpb=432202, bsz=16683.8, num_updates=39000, lr=0.000320256, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=38234 +epoch 024: 213 / 1689 loss=4.086, nll_loss=2.458, ppl=5.5, wps=460121, ups=1.06, wpb=432202, bsz=16683.8, num_updates=39000, lr=0.000320256, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=38234 +epoch 024: 213 / 1689 loss=4.086, nll_loss=2.458, ppl=5.5, wps=460121, ups=1.06, wpb=432202, bsz=16683.8, num_updates=39000, lr=0.000320256, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=38234 +epoch 024: 213 / 1689 loss=4.086, nll_loss=2.458, ppl=5.5, wps=460121, ups=1.06, wpb=432202, bsz=16683.8, num_updates=39000, lr=0.000320256, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=38234 +epoch 024: 213 / 1689 loss=4.086, nll_loss=2.458, ppl=5.5, wps=460121, ups=1.06, wpb=432202, bsz=16683.8, num_updates=39000, lr=0.000320256, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=38234 +epoch 024: 213 / 1689 loss=4.086, nll_loss=2.458, ppl=5.5, wps=460121, ups=1.06, wpb=432202, bsz=16683.8, num_updates=39000, lr=0.000320256, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=38234 +epoch 024: 213 / 1689 loss=4.086, nll_loss=2.458, ppl=5.5, wps=460121, ups=1.06, wpb=432202, bsz=16683.8, num_updates=39000, lr=0.000320256, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=38234 +epoch 024: 213 / 1689 loss=4.086, nll_loss=2.458, ppl=5.5, wps=460121, ups=1.06, wpb=432202, bsz=16683.8, num_updates=39000, lr=0.000320256, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=38234 +epoch 024: 213 / 1689 loss=4.086, nll_loss=2.458, ppl=5.5, wps=460121, ups=1.06, wpb=432202, bsz=16683.8, num_updates=39000, lr=0.000320256, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=38234 +epoch 024: 213 / 1689 loss=4.086, nll_loss=2.458, ppl=5.5, wps=460121, ups=1.06, wpb=432202, bsz=16683.8, num_updates=39000, lr=0.000320256, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=38234 +epoch 024: 213 / 1689 loss=4.086, nll_loss=2.458, ppl=5.5, wps=460121, ups=1.06, wpb=432202, bsz=16683.8, num_updates=39000, lr=0.000320256, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=38234 +epoch 024: 213 / 1689 loss=4.086, nll_loss=2.458, ppl=5.5, wps=460121, ups=1.06, wpb=432202, bsz=16683.8, num_updates=39000, lr=0.000320256, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=38234 +epoch 024: 213 / 1689 loss=4.086, nll_loss=2.458, ppl=5.5, wps=460121, ups=1.06, wpb=432202, bsz=16683.8, num_updates=39000, lr=0.000320256, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=38234 +epoch 024: 213 / 1689 loss=4.086, nll_loss=2.458, ppl=5.5, wps=460121, ups=1.06, wpb=432202, bsz=16683.8, num_updates=39000, lr=0.000320256, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=38234 +begin validation on "valid" subset +epoch 024 | valid on 'valid' subset | loss 4.236 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 39000 | best_loss 4.233 +epoch 024 | valid on 'valid' subset | loss 4.236 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 39000 | best_loss 4.233 +epoch 024 | valid on 'valid' subset | loss 4.236 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 39000 | best_loss 4.233 +epoch 024 | valid on 'valid' subset | loss 4.236 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 39000 | best_loss 4.233 +epoch 024 | valid on 'valid' subset | loss 4.236 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 39000 | best_loss 4.233 +epoch 024 | valid on 'valid' subset | loss 4.236 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 39000 | best_loss 4.233 +epoch 024 | valid on 'valid' subset | loss 4.236 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 39000 | best_loss 4.233 +epoch 024 | valid on 'valid' subset | loss 4.236 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 39000 | best_loss 4.233 +epoch 024 | valid on 'valid' subset | loss 4.236 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 39000 | best_loss 4.233 +epoch 024 | valid on 'valid' subset | loss 4.236 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 39000 | best_loss 4.233 +epoch 024 | valid on 'valid' subset | loss 4.236 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 39000 | best_loss 4.233 +epoch 024 | valid on 'valid' subset | loss 4.236 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 39000 | best_loss 4.233 +epoch 024 | valid on 'valid' subset | loss 4.236 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 39000 | best_loss 4.233 +epoch 024 | valid on 'valid' subset | loss 4.236 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 39000 | best_loss 4.233 +epoch 024 | valid on 'valid' subset | loss 4.236 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 39000 | best_loss 4.233 +epoch 024 | valid on 'valid' subset | loss 4.236 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 39000 | best_loss 4.233 +epoch 024 | valid on 'valid' subset | loss 4.236 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 39000 | best_loss 4.233 +epoch 024 | valid on 'valid' subset | loss 4.236 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 39000 | best_loss 4.233 +epoch 024 | valid on 'valid' subset | loss 4.236 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 39000 | best_loss 4.233 +epoch 024 | valid on 'valid' subset | loss 4.236 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 39000 | best_loss 4.233 +epoch 024 | valid on 'valid' subset | loss 4.236 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 39000 | best_loss 4.233 +epoch 024 | valid on 'valid' subset | loss 4.236 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 39000 | best_loss 4.233 +epoch 024 | valid on 'valid' subset | loss 4.236 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 39000 | best_loss 4.233 +epoch 024 | valid on 'valid' subset | loss 4.236 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 39000 | best_loss 4.233 +epoch 024: 313 / 1689 loss=4.091, nll_loss=2.464, ppl=5.52, wps=408362, ups=0.94, wpb=433792, bsz=16267.3, num_updates=39100, lr=0.000319847, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=38340 +epoch 024: 313 / 1689 loss=4.091, nll_loss=2.464, ppl=5.52, wps=408362, ups=0.94, wpb=433792, bsz=16267.3, num_updates=39100, lr=0.000319847, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=38340 +epoch 024: 313 / 1689 loss=4.091, nll_loss=2.464, ppl=5.52, wps=408362, ups=0.94, wpb=433792, bsz=16267.3, num_updates=39100, lr=0.000319847, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=38340 +epoch 024: 313 / 1689 loss=4.091, nll_loss=2.464, ppl=5.52, wps=408362, ups=0.94, wpb=433792, bsz=16267.3, num_updates=39100, lr=0.000319847, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=38340 +epoch 024: 313 / 1689 loss=4.091, nll_loss=2.464, ppl=5.52, wps=408362, ups=0.94, wpb=433792, bsz=16267.3, num_updates=39100, lr=0.000319847, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=38340 +epoch 024: 313 / 1689 loss=4.091, nll_loss=2.464, ppl=5.52, wps=408362, ups=0.94, wpb=433792, bsz=16267.3, num_updates=39100, lr=0.000319847, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=38340 +epoch 024: 313 / 1689 loss=4.091, nll_loss=2.464, ppl=5.52, wps=408362, ups=0.94, wpb=433792, bsz=16267.3, num_updates=39100, lr=0.000319847, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=38340 +epoch 024: 313 / 1689 loss=4.091, nll_loss=2.464, ppl=5.52, wps=408362, ups=0.94, wpb=433792, bsz=16267.3, num_updates=39100, lr=0.000319847, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=38340 +epoch 024: 313 / 1689 loss=4.091, nll_loss=2.464, ppl=5.52, wps=408362, ups=0.94, wpb=433792, bsz=16267.3, num_updates=39100, lr=0.000319847, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=38340 +epoch 024: 313 / 1689 loss=4.091, nll_loss=2.464, ppl=5.52, wps=408362, ups=0.94, wpb=433792, bsz=16267.3, num_updates=39100, lr=0.000319847, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=38340 +epoch 024: 313 / 1689 loss=4.091, nll_loss=2.464, ppl=5.52, wps=408362, ups=0.94, wpb=433792, bsz=16267.3, num_updates=39100, lr=0.000319847, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=38340 +epoch 024: 313 / 1689 loss=4.091, nll_loss=2.464, ppl=5.52, wps=408362, ups=0.94, wpb=433792, bsz=16267.3, num_updates=39100, lr=0.000319847, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=38340 +epoch 024: 313 / 1689 loss=4.091, nll_loss=2.464, ppl=5.52, wps=408362, ups=0.94, wpb=433792, bsz=16267.3, num_updates=39100, lr=0.000319847, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=38340 +epoch 024: 313 / 1689 loss=4.091, nll_loss=2.464, ppl=5.52, wps=408362, ups=0.94, wpb=433792, bsz=16267.3, num_updates=39100, lr=0.000319847, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=38340 +epoch 024: 313 / 1689 loss=4.091, nll_loss=2.464, ppl=5.52, wps=408362, ups=0.94, wpb=433792, bsz=16267.3, num_updates=39100, lr=0.000319847, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=38340 +epoch 024: 313 / 1689 loss=4.091, nll_loss=2.464, ppl=5.52, wps=408362, ups=0.94, wpb=433792, bsz=16267.3, num_updates=39100, lr=0.000319847, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=38340 +epoch 024: 313 / 1689 loss=4.091, nll_loss=2.464, ppl=5.52, wps=408362, ups=0.94, wpb=433792, bsz=16267.3, num_updates=39100, lr=0.000319847, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=38340 +epoch 024: 313 / 1689 loss=4.091, nll_loss=2.464, ppl=5.52, wps=408362, ups=0.94, wpb=433792, bsz=16267.3, num_updates=39100, lr=0.000319847, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=38340 +epoch 024: 313 / 1689 loss=4.091, nll_loss=2.464, ppl=5.52, wps=408362, ups=0.94, wpb=433792, bsz=16267.3, num_updates=39100, lr=0.000319847, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=38340 +epoch 024: 313 / 1689 loss=4.091, nll_loss=2.464, ppl=5.52, wps=408362, ups=0.94, wpb=433792, bsz=16267.3, num_updates=39100, lr=0.000319847, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=38340 +epoch 024: 313 / 1689 loss=4.091, nll_loss=2.464, ppl=5.52, wps=408362, ups=0.94, wpb=433792, bsz=16267.3, num_updates=39100, lr=0.000319847, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=38340 +epoch 024: 313 / 1689 loss=4.091, nll_loss=2.464, ppl=5.52, wps=408362, ups=0.94, wpb=433792, bsz=16267.3, num_updates=39100, lr=0.000319847, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=38340 +epoch 024: 313 / 1689 loss=4.091, nll_loss=2.464, ppl=5.52, wps=408362, ups=0.94, wpb=433792, bsz=16267.3, num_updates=39100, lr=0.000319847, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=38340 +epoch 024: 313 / 1689 loss=4.091, nll_loss=2.464, ppl=5.52, wps=408362, ups=0.94, wpb=433792, bsz=16267.3, num_updates=39100, lr=0.000319847, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=38340 +epoch 024: 413 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=464026, ups=1.07, wpb=435565, bsz=16440.8, num_updates=39200, lr=0.000319438, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=38434 +epoch 024: 413 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=464026, ups=1.07, wpb=435565, bsz=16440.8, num_updates=39200, lr=0.000319438, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=38434 +epoch 024: 413 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=464026, ups=1.07, wpb=435565, bsz=16440.8, num_updates=39200, lr=0.000319438, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=38434 +epoch 024: 413 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=464026, ups=1.07, wpb=435565, bsz=16440.8, num_updates=39200, lr=0.000319438, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=38434 +epoch 024: 413 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=464026, ups=1.07, wpb=435565, bsz=16440.8, num_updates=39200, lr=0.000319438, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=38434 +epoch 024: 413 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=464026, ups=1.07, wpb=435565, bsz=16440.8, num_updates=39200, lr=0.000319438, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=38434 +epoch 024: 413 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=464026, ups=1.07, wpb=435565, bsz=16440.8, num_updates=39200, lr=0.000319438, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=38434 +epoch 024: 413 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=464026, ups=1.07, wpb=435565, bsz=16440.8, num_updates=39200, lr=0.000319438, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=38434 +epoch 024: 413 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=464026, ups=1.07, wpb=435565, bsz=16440.8, num_updates=39200, lr=0.000319438, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=38434 +epoch 024: 413 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=464026, ups=1.07, wpb=435565, bsz=16440.8, num_updates=39200, lr=0.000319438, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=38434 +epoch 024: 413 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=464026, ups=1.07, wpb=435565, bsz=16440.8, num_updates=39200, lr=0.000319438, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=38434 +epoch 024: 413 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=464026, ups=1.07, wpb=435565, bsz=16440.8, num_updates=39200, lr=0.000319438, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=38434 +epoch 024: 413 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=464026, ups=1.07, wpb=435565, bsz=16440.8, num_updates=39200, lr=0.000319438, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=38434 +epoch 024: 413 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=464026, ups=1.07, wpb=435565, bsz=16440.8, num_updates=39200, lr=0.000319438, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=38434 +epoch 024: 413 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=464026, ups=1.07, wpb=435565, bsz=16440.8, num_updates=39200, lr=0.000319438, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=38434 +epoch 024: 413 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=464026, ups=1.07, wpb=435565, bsz=16440.8, num_updates=39200, lr=0.000319438, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=38434 +epoch 024: 413 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=464026, ups=1.07, wpb=435565, bsz=16440.8, num_updates=39200, lr=0.000319438, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=38434 +epoch 024: 413 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=464026, ups=1.07, wpb=435565, bsz=16440.8, num_updates=39200, lr=0.000319438, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=38434 +epoch 024: 413 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=464026, ups=1.07, wpb=435565, bsz=16440.8, num_updates=39200, lr=0.000319438, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=38434 +epoch 024: 413 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=464026, ups=1.07, wpb=435565, bsz=16440.8, num_updates=39200, lr=0.000319438, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=38434 +epoch 024: 413 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=464026, ups=1.07, wpb=435565, bsz=16440.8, num_updates=39200, lr=0.000319438, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=38434 +epoch 024: 413 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=464026, ups=1.07, wpb=435565, bsz=16440.8, num_updates=39200, lr=0.000319438, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=38434 +epoch 024: 413 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=464026, ups=1.07, wpb=435565, bsz=16440.8, num_updates=39200, lr=0.000319438, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=38434 +epoch 024: 413 / 1689 loss=4.095, nll_loss=2.468, ppl=5.53, wps=464026, ups=1.07, wpb=435565, bsz=16440.8, num_updates=39200, lr=0.000319438, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=38434 +epoch 024: 513 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=459758, ups=1.06, wpb=433170, bsz=16663.7, num_updates=39300, lr=0.000319032, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=38528 +epoch 024: 513 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=459758, ups=1.06, wpb=433170, bsz=16663.7, num_updates=39300, lr=0.000319032, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=38528 +epoch 024: 513 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=459758, ups=1.06, wpb=433170, bsz=16663.7, num_updates=39300, lr=0.000319032, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=38528 +epoch 024: 513 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=459758, ups=1.06, wpb=433170, bsz=16663.7, num_updates=39300, lr=0.000319032, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=38528 +epoch 024: 513 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=459758, ups=1.06, wpb=433170, bsz=16663.7, num_updates=39300, lr=0.000319032, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=38528 +epoch 024: 513 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=459758, ups=1.06, wpb=433170, bsz=16663.7, num_updates=39300, lr=0.000319032, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=38528 +epoch 024: 513 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=459758, ups=1.06, wpb=433170, bsz=16663.7, num_updates=39300, lr=0.000319032, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=38528 +epoch 024: 513 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=459758, ups=1.06, wpb=433170, bsz=16663.7, num_updates=39300, lr=0.000319032, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=38528 +epoch 024: 513 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=459758, ups=1.06, wpb=433170, bsz=16663.7, num_updates=39300, lr=0.000319032, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=38528 +epoch 024: 513 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=459758, ups=1.06, wpb=433170, bsz=16663.7, num_updates=39300, lr=0.000319032, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=38528 +epoch 024: 513 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=459758, ups=1.06, wpb=433170, bsz=16663.7, num_updates=39300, lr=0.000319032, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=38528 +epoch 024: 513 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=459758, ups=1.06, wpb=433170, bsz=16663.7, num_updates=39300, lr=0.000319032, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=38528 +epoch 024: 513 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=459758, ups=1.06, wpb=433170, bsz=16663.7, num_updates=39300, lr=0.000319032, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=38528 +epoch 024: 513 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=459758, ups=1.06, wpb=433170, bsz=16663.7, num_updates=39300, lr=0.000319032, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=38528 +epoch 024: 513 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=459758, ups=1.06, wpb=433170, bsz=16663.7, num_updates=39300, lr=0.000319032, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=38528 +epoch 024: 513 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=459758, ups=1.06, wpb=433170, bsz=16663.7, num_updates=39300, lr=0.000319032, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=38528 +epoch 024: 513 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=459758, ups=1.06, wpb=433170, bsz=16663.7, num_updates=39300, lr=0.000319032, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=38528 +epoch 024: 513 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=459758, ups=1.06, wpb=433170, bsz=16663.7, num_updates=39300, lr=0.000319032, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=38528 +epoch 024: 513 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=459758, ups=1.06, wpb=433170, bsz=16663.7, num_updates=39300, lr=0.000319032, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=38528 +epoch 024: 513 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=459758, ups=1.06, wpb=433170, bsz=16663.7, num_updates=39300, lr=0.000319032, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=38528 +epoch 024: 513 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=459758, ups=1.06, wpb=433170, bsz=16663.7, num_updates=39300, lr=0.000319032, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=38528 +epoch 024: 513 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=459758, ups=1.06, wpb=433170, bsz=16663.7, num_updates=39300, lr=0.000319032, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=38528 +epoch 024: 513 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=459758, ups=1.06, wpb=433170, bsz=16663.7, num_updates=39300, lr=0.000319032, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=38528 +epoch 024: 513 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=459758, ups=1.06, wpb=433170, bsz=16663.7, num_updates=39300, lr=0.000319032, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=38528 +epoch 024: 615 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=454817, ups=1.04, wpb=435446, bsz=16265, num_updates=39400, lr=0.000318626, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.9, wall=38624 +epoch 024: 615 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=454817, ups=1.04, wpb=435446, bsz=16265, num_updates=39400, lr=0.000318626, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.9, wall=38624 +epoch 024: 615 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=454817, ups=1.04, wpb=435446, bsz=16265, num_updates=39400, lr=0.000318626, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.9, wall=38624 +epoch 024: 615 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=454817, ups=1.04, wpb=435446, bsz=16265, num_updates=39400, lr=0.000318626, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.9, wall=38624 +epoch 024: 615 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=454817, ups=1.04, wpb=435446, bsz=16265, num_updates=39400, lr=0.000318626, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.9, wall=38624 +epoch 024: 615 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=454817, ups=1.04, wpb=435446, bsz=16265, num_updates=39400, lr=0.000318626, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.9, wall=38624 +epoch 024: 615 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=454817, ups=1.04, wpb=435446, bsz=16265, num_updates=39400, lr=0.000318626, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.9, wall=38624 +epoch 024: 615 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=454817, ups=1.04, wpb=435446, bsz=16265, num_updates=39400, lr=0.000318626, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.9, wall=38624 +epoch 024: 615 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=454817, ups=1.04, wpb=435446, bsz=16265, num_updates=39400, lr=0.000318626, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.9, wall=38624 +epoch 024: 615 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=454817, ups=1.04, wpb=435446, bsz=16265, num_updates=39400, lr=0.000318626, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.9, wall=38624 +epoch 024: 615 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=454817, ups=1.04, wpb=435446, bsz=16265, num_updates=39400, lr=0.000318626, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.9, wall=38624 +epoch 024: 615 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=454817, ups=1.04, wpb=435446, bsz=16265, num_updates=39400, lr=0.000318626, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.9, wall=38624 +epoch 024: 615 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=454817, ups=1.04, wpb=435446, bsz=16265, num_updates=39400, lr=0.000318626, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.9, wall=38624 +epoch 024: 615 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=454817, ups=1.04, wpb=435446, bsz=16265, num_updates=39400, lr=0.000318626, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.9, wall=38624 +epoch 024: 615 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=454817, ups=1.04, wpb=435446, bsz=16265, num_updates=39400, lr=0.000318626, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.9, wall=38624 +epoch 024: 615 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=454817, ups=1.04, wpb=435446, bsz=16265, num_updates=39400, lr=0.000318626, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.9, wall=38624 +epoch 024: 615 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=454817, ups=1.04, wpb=435446, bsz=16265, num_updates=39400, lr=0.000318626, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.9, wall=38624 +epoch 024: 615 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=454817, ups=1.04, wpb=435446, bsz=16265, num_updates=39400, lr=0.000318626, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.9, wall=38624 +epoch 024: 615 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=454817, ups=1.04, wpb=435446, bsz=16265, num_updates=39400, lr=0.000318626, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.9, wall=38624 +epoch 024: 615 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=454817, ups=1.04, wpb=435446, bsz=16265, num_updates=39400, lr=0.000318626, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.9, wall=38624 +epoch 024: 615 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=454817, ups=1.04, wpb=435446, bsz=16265, num_updates=39400, lr=0.000318626, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.9, wall=38624 +epoch 024: 615 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=454817, ups=1.04, wpb=435446, bsz=16265, num_updates=39400, lr=0.000318626, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.9, wall=38624 +epoch 024: 615 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=454817, ups=1.04, wpb=435446, bsz=16265, num_updates=39400, lr=0.000318626, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.9, wall=38624 +epoch 024: 615 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=454817, ups=1.04, wpb=435446, bsz=16265, num_updates=39400, lr=0.000318626, gnorm=0.219, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.9, wall=38624 +epoch 024: 715 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=463105, ups=1.07, wpb=434612, bsz=16288.6, num_updates=39500, lr=0.000318223, gnorm=0.233, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=38718 +epoch 024: 715 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=463105, ups=1.07, wpb=434612, bsz=16288.6, num_updates=39500, lr=0.000318223, gnorm=0.233, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=38718 +epoch 024: 715 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=463105, ups=1.07, wpb=434612, bsz=16288.6, num_updates=39500, lr=0.000318223, gnorm=0.233, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=38718 +epoch 024: 715 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=463105, ups=1.07, wpb=434612, bsz=16288.6, num_updates=39500, lr=0.000318223, gnorm=0.233, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=38718 +epoch 024: 715 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=463105, ups=1.07, wpb=434612, bsz=16288.6, num_updates=39500, lr=0.000318223, gnorm=0.233, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=38718 +epoch 024: 715 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=463105, ups=1.07, wpb=434612, bsz=16288.6, num_updates=39500, lr=0.000318223, gnorm=0.233, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=38718 +epoch 024: 715 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=463105, ups=1.07, wpb=434612, bsz=16288.6, num_updates=39500, lr=0.000318223, gnorm=0.233, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=38718 +epoch 024: 715 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=463105, ups=1.07, wpb=434612, bsz=16288.6, num_updates=39500, lr=0.000318223, gnorm=0.233, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=38718 +epoch 024: 715 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=463105, ups=1.07, wpb=434612, bsz=16288.6, num_updates=39500, lr=0.000318223, gnorm=0.233, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=38718 +epoch 024: 715 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=463105, ups=1.07, wpb=434612, bsz=16288.6, num_updates=39500, lr=0.000318223, gnorm=0.233, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=38718 +epoch 024: 715 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=463105, ups=1.07, wpb=434612, bsz=16288.6, num_updates=39500, lr=0.000318223, gnorm=0.233, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=38718 +epoch 024: 715 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=463105, ups=1.07, wpb=434612, bsz=16288.6, num_updates=39500, lr=0.000318223, gnorm=0.233, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=38718 +epoch 024: 715 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=463105, ups=1.07, wpb=434612, bsz=16288.6, num_updates=39500, lr=0.000318223, gnorm=0.233, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=38718 +epoch 024: 715 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=463105, ups=1.07, wpb=434612, bsz=16288.6, num_updates=39500, lr=0.000318223, gnorm=0.233, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=38718 +epoch 024: 715 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=463105, ups=1.07, wpb=434612, bsz=16288.6, num_updates=39500, lr=0.000318223, gnorm=0.233, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=38718 +epoch 024: 715 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=463105, ups=1.07, wpb=434612, bsz=16288.6, num_updates=39500, lr=0.000318223, gnorm=0.233, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=38718 +epoch 024: 715 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=463105, ups=1.07, wpb=434612, bsz=16288.6, num_updates=39500, lr=0.000318223, gnorm=0.233, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=38718 +epoch 024: 715 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=463105, ups=1.07, wpb=434612, bsz=16288.6, num_updates=39500, lr=0.000318223, gnorm=0.233, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=38718 +epoch 024: 715 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=463105, ups=1.07, wpb=434612, bsz=16288.6, num_updates=39500, lr=0.000318223, gnorm=0.233, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=38718 +epoch 024: 715 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=463105, ups=1.07, wpb=434612, bsz=16288.6, num_updates=39500, lr=0.000318223, gnorm=0.233, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=38718 +epoch 024: 715 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=463105, ups=1.07, wpb=434612, bsz=16288.6, num_updates=39500, lr=0.000318223, gnorm=0.233, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=38718 +epoch 024: 715 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=463105, ups=1.07, wpb=434612, bsz=16288.6, num_updates=39500, lr=0.000318223, gnorm=0.233, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=38718 +epoch 024: 715 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=463105, ups=1.07, wpb=434612, bsz=16288.6, num_updates=39500, lr=0.000318223, gnorm=0.233, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=38718 +epoch 024: 715 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=463105, ups=1.07, wpb=434612, bsz=16288.6, num_updates=39500, lr=0.000318223, gnorm=0.233, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=38718 +epoch 024: 815 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=459706, ups=1.06, wpb=432624, bsz=16313.3, num_updates=39600, lr=0.000317821, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.2, wall=38812 +epoch 024: 815 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=459706, ups=1.06, wpb=432624, bsz=16313.3, num_updates=39600, lr=0.000317821, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.2, wall=38812 +epoch 024: 815 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=459706, ups=1.06, wpb=432624, bsz=16313.3, num_updates=39600, lr=0.000317821, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.2, wall=38812 +epoch 024: 815 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=459706, ups=1.06, wpb=432624, bsz=16313.3, num_updates=39600, lr=0.000317821, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.2, wall=38812 +epoch 024: 815 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=459706, ups=1.06, wpb=432624, bsz=16313.3, num_updates=39600, lr=0.000317821, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.2, wall=38812 +epoch 024: 815 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=459706, ups=1.06, wpb=432624, bsz=16313.3, num_updates=39600, lr=0.000317821, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.2, wall=38812 +epoch 024: 815 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=459706, ups=1.06, wpb=432624, bsz=16313.3, num_updates=39600, lr=0.000317821, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.2, wall=38812 +epoch 024: 815 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=459706, ups=1.06, wpb=432624, bsz=16313.3, num_updates=39600, lr=0.000317821, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.2, wall=38812 +epoch 024: 815 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=459706, ups=1.06, wpb=432624, bsz=16313.3, num_updates=39600, lr=0.000317821, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.2, wall=38812 +epoch 024: 815 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=459706, ups=1.06, wpb=432624, bsz=16313.3, num_updates=39600, lr=0.000317821, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.2, wall=38812 +epoch 024: 815 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=459706, ups=1.06, wpb=432624, bsz=16313.3, num_updates=39600, lr=0.000317821, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.2, wall=38812 +epoch 024: 815 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=459706, ups=1.06, wpb=432624, bsz=16313.3, num_updates=39600, lr=0.000317821, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.2, wall=38812 +epoch 024: 815 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=459706, ups=1.06, wpb=432624, bsz=16313.3, num_updates=39600, lr=0.000317821, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.2, wall=38812 +epoch 024: 815 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=459706, ups=1.06, wpb=432624, bsz=16313.3, num_updates=39600, lr=0.000317821, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.2, wall=38812 +epoch 024: 815 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=459706, ups=1.06, wpb=432624, bsz=16313.3, num_updates=39600, lr=0.000317821, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.2, wall=38812 +epoch 024: 815 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=459706, ups=1.06, wpb=432624, bsz=16313.3, num_updates=39600, lr=0.000317821, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.2, wall=38812 +epoch 024: 815 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=459706, ups=1.06, wpb=432624, bsz=16313.3, num_updates=39600, lr=0.000317821, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.2, wall=38812 +epoch 024: 815 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=459706, ups=1.06, wpb=432624, bsz=16313.3, num_updates=39600, lr=0.000317821, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.2, wall=38812 +epoch 024: 815 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=459706, ups=1.06, wpb=432624, bsz=16313.3, num_updates=39600, lr=0.000317821, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.2, wall=38812 +epoch 024: 815 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=459706, ups=1.06, wpb=432624, bsz=16313.3, num_updates=39600, lr=0.000317821, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.2, wall=38812 +epoch 024: 815 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=459706, ups=1.06, wpb=432624, bsz=16313.3, num_updates=39600, lr=0.000317821, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.2, wall=38812 +epoch 024: 815 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=459706, ups=1.06, wpb=432624, bsz=16313.3, num_updates=39600, lr=0.000317821, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.2, wall=38812 +epoch 024: 815 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=459706, ups=1.06, wpb=432624, bsz=16313.3, num_updates=39600, lr=0.000317821, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.2, wall=38812 +epoch 024: 815 / 1689 loss=4.099, nll_loss=2.473, ppl=5.55, wps=459706, ups=1.06, wpb=432624, bsz=16313.3, num_updates=39600, lr=0.000317821, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.2, wall=38812 +epoch 024: 915 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=456886, ups=1.05, wpb=434155, bsz=16573.2, num_updates=39700, lr=0.00031742, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=38907 +epoch 024: 915 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=456886, ups=1.05, wpb=434155, bsz=16573.2, num_updates=39700, lr=0.00031742, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=38907 +epoch 024: 915 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=456886, ups=1.05, wpb=434155, bsz=16573.2, num_updates=39700, lr=0.00031742, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=38907 +epoch 024: 915 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=456886, ups=1.05, wpb=434155, bsz=16573.2, num_updates=39700, lr=0.00031742, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=38907 +epoch 024: 915 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=456886, ups=1.05, wpb=434155, bsz=16573.2, num_updates=39700, lr=0.00031742, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=38907 +epoch 024: 915 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=456886, ups=1.05, wpb=434155, bsz=16573.2, num_updates=39700, lr=0.00031742, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=38907 +epoch 024: 915 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=456886, ups=1.05, wpb=434155, bsz=16573.2, num_updates=39700, lr=0.00031742, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=38907 +epoch 024: 915 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=456886, ups=1.05, wpb=434155, bsz=16573.2, num_updates=39700, lr=0.00031742, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=38907 +epoch 024: 915 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=456886, ups=1.05, wpb=434155, bsz=16573.2, num_updates=39700, lr=0.00031742, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=38907 +epoch 024: 915 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=456886, ups=1.05, wpb=434155, bsz=16573.2, num_updates=39700, lr=0.00031742, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=38907 +epoch 024: 915 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=456886, ups=1.05, wpb=434155, bsz=16573.2, num_updates=39700, lr=0.00031742, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=38907 +epoch 024: 915 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=456886, ups=1.05, wpb=434155, bsz=16573.2, num_updates=39700, lr=0.00031742, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=38907 +epoch 024: 915 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=456886, ups=1.05, wpb=434155, bsz=16573.2, num_updates=39700, lr=0.00031742, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=38907 +epoch 024: 915 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=456886, ups=1.05, wpb=434155, bsz=16573.2, num_updates=39700, lr=0.00031742, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=38907 +epoch 024: 915 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=456886, ups=1.05, wpb=434155, bsz=16573.2, num_updates=39700, lr=0.00031742, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=38907 +epoch 024: 915 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=456886, ups=1.05, wpb=434155, bsz=16573.2, num_updates=39700, lr=0.00031742, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=38907 +epoch 024: 915 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=456886, ups=1.05, wpb=434155, bsz=16573.2, num_updates=39700, lr=0.00031742, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=38907 +epoch 024: 915 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=456886, ups=1.05, wpb=434155, bsz=16573.2, num_updates=39700, lr=0.00031742, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=38907 +epoch 024: 915 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=456886, ups=1.05, wpb=434155, bsz=16573.2, num_updates=39700, lr=0.00031742, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=38907 +epoch 024: 915 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=456886, ups=1.05, wpb=434155, bsz=16573.2, num_updates=39700, lr=0.00031742, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=38907 +epoch 024: 915 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=456886, ups=1.05, wpb=434155, bsz=16573.2, num_updates=39700, lr=0.00031742, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=38907 +epoch 024: 915 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=456886, ups=1.05, wpb=434155, bsz=16573.2, num_updates=39700, lr=0.00031742, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=38907 +epoch 024: 915 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=456886, ups=1.05, wpb=434155, bsz=16573.2, num_updates=39700, lr=0.00031742, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=38907 +epoch 024: 915 / 1689 loss=4.109, nll_loss=2.485, ppl=5.6, wps=456886, ups=1.05, wpb=434155, bsz=16573.2, num_updates=39700, lr=0.00031742, gnorm=0.225, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=38907 +epoch 024: 1015 / 1689 loss=4.104, nll_loss=2.479, ppl=5.58, wps=462835, ups=1.07, wpb=433411, bsz=16499.4, num_updates=39800, lr=0.000317021, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=39000 +epoch 024: 1015 / 1689 loss=4.104, nll_loss=2.479, ppl=5.58, wps=462835, ups=1.07, wpb=433411, bsz=16499.4, num_updates=39800, lr=0.000317021, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=39000 +epoch 024: 1015 / 1689 loss=4.104, nll_loss=2.479, ppl=5.58, wps=462835, ups=1.07, wpb=433411, bsz=16499.4, num_updates=39800, lr=0.000317021, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=39000 +epoch 024: 1015 / 1689 loss=4.104, nll_loss=2.479, ppl=5.58, wps=462835, ups=1.07, wpb=433411, bsz=16499.4, num_updates=39800, lr=0.000317021, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=39000 +epoch 024: 1015 / 1689 loss=4.104, nll_loss=2.479, ppl=5.58, wps=462835, ups=1.07, wpb=433411, bsz=16499.4, num_updates=39800, lr=0.000317021, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=39000 +epoch 024: 1015 / 1689 loss=4.104, nll_loss=2.479, ppl=5.58, wps=462835, ups=1.07, wpb=433411, bsz=16499.4, num_updates=39800, lr=0.000317021, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=39000 +epoch 024: 1015 / 1689 loss=4.104, nll_loss=2.479, ppl=5.58, wps=462835, ups=1.07, wpb=433411, bsz=16499.4, num_updates=39800, lr=0.000317021, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=39000 +epoch 024: 1015 / 1689 loss=4.104, nll_loss=2.479, ppl=5.58, wps=462835, ups=1.07, wpb=433411, bsz=16499.4, num_updates=39800, lr=0.000317021, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=39000 +epoch 024: 1015 / 1689 loss=4.104, nll_loss=2.479, ppl=5.58, wps=462835, ups=1.07, wpb=433411, bsz=16499.4, num_updates=39800, lr=0.000317021, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=39000 +epoch 024: 1015 / 1689 loss=4.104, nll_loss=2.479, ppl=5.58, wps=462835, ups=1.07, wpb=433411, bsz=16499.4, num_updates=39800, lr=0.000317021, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=39000 +epoch 024: 1015 / 1689 loss=4.104, nll_loss=2.479, ppl=5.58, wps=462835, ups=1.07, wpb=433411, bsz=16499.4, num_updates=39800, lr=0.000317021, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=39000 +epoch 024: 1015 / 1689 loss=4.104, nll_loss=2.479, ppl=5.58, wps=462835, ups=1.07, wpb=433411, bsz=16499.4, num_updates=39800, lr=0.000317021, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=39000 +epoch 024: 1015 / 1689 loss=4.104, nll_loss=2.479, ppl=5.58, wps=462835, ups=1.07, wpb=433411, bsz=16499.4, num_updates=39800, lr=0.000317021, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=39000 +epoch 024: 1015 / 1689 loss=4.104, nll_loss=2.479, ppl=5.58, wps=462835, ups=1.07, wpb=433411, bsz=16499.4, num_updates=39800, lr=0.000317021, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=39000 +epoch 024: 1015 / 1689 loss=4.104, nll_loss=2.479, ppl=5.58, wps=462835, ups=1.07, wpb=433411, bsz=16499.4, num_updates=39800, lr=0.000317021, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=39000 +epoch 024: 1015 / 1689 loss=4.104, nll_loss=2.479, ppl=5.58, wps=462835, ups=1.07, wpb=433411, bsz=16499.4, num_updates=39800, lr=0.000317021, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=39000 +epoch 024: 1015 / 1689 loss=4.104, nll_loss=2.479, ppl=5.58, wps=462835, ups=1.07, wpb=433411, bsz=16499.4, num_updates=39800, lr=0.000317021, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=39000 +epoch 024: 1015 / 1689 loss=4.104, nll_loss=2.479, ppl=5.58, wps=462835, ups=1.07, wpb=433411, bsz=16499.4, num_updates=39800, lr=0.000317021, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=39000 +epoch 024: 1015 / 1689 loss=4.104, nll_loss=2.479, ppl=5.58, wps=462835, ups=1.07, wpb=433411, bsz=16499.4, num_updates=39800, lr=0.000317021, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=39000 +epoch 024: 1015 / 1689 loss=4.104, nll_loss=2.479, ppl=5.58, wps=462835, ups=1.07, wpb=433411, bsz=16499.4, num_updates=39800, lr=0.000317021, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=39000 +epoch 024: 1015 / 1689 loss=4.104, nll_loss=2.479, ppl=5.58, wps=462835, ups=1.07, wpb=433411, bsz=16499.4, num_updates=39800, lr=0.000317021, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=39000 +epoch 024: 1015 / 1689 loss=4.104, nll_loss=2.479, ppl=5.58, wps=462835, ups=1.07, wpb=433411, bsz=16499.4, num_updates=39800, lr=0.000317021, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=39000 +epoch 024: 1015 / 1689 loss=4.104, nll_loss=2.479, ppl=5.58, wps=462835, ups=1.07, wpb=433411, bsz=16499.4, num_updates=39800, lr=0.000317021, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=39000 +epoch 024: 1015 / 1689 loss=4.104, nll_loss=2.479, ppl=5.58, wps=462835, ups=1.07, wpb=433411, bsz=16499.4, num_updates=39800, lr=0.000317021, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=39000 +epoch 024: 1115 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=453571, ups=1.05, wpb=433929, bsz=17108.2, num_updates=39900, lr=0.000316624, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=39096 +epoch 024: 1115 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=453571, ups=1.05, wpb=433929, bsz=17108.2, num_updates=39900, lr=0.000316624, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=39096 +epoch 024: 1115 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=453571, ups=1.05, wpb=433929, bsz=17108.2, num_updates=39900, lr=0.000316624, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=39096 +epoch 024: 1115 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=453571, ups=1.05, wpb=433929, bsz=17108.2, num_updates=39900, lr=0.000316624, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=39096 +epoch 024: 1115 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=453571, ups=1.05, wpb=433929, bsz=17108.2, num_updates=39900, lr=0.000316624, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=39096 +epoch 024: 1115 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=453571, ups=1.05, wpb=433929, bsz=17108.2, num_updates=39900, lr=0.000316624, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=39096 +epoch 024: 1115 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=453571, ups=1.05, wpb=433929, bsz=17108.2, num_updates=39900, lr=0.000316624, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=39096 +epoch 024: 1115 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=453571, ups=1.05, wpb=433929, bsz=17108.2, num_updates=39900, lr=0.000316624, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=39096 +epoch 024: 1115 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=453571, ups=1.05, wpb=433929, bsz=17108.2, num_updates=39900, lr=0.000316624, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=39096 +epoch 024: 1115 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=453571, ups=1.05, wpb=433929, bsz=17108.2, num_updates=39900, lr=0.000316624, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=39096 +epoch 024: 1115 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=453571, ups=1.05, wpb=433929, bsz=17108.2, num_updates=39900, lr=0.000316624, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=39096 +epoch 024: 1115 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=453571, ups=1.05, wpb=433929, bsz=17108.2, num_updates=39900, lr=0.000316624, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=39096 +epoch 024: 1115 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=453571, ups=1.05, wpb=433929, bsz=17108.2, num_updates=39900, lr=0.000316624, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=39096 +epoch 024: 1115 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=453571, ups=1.05, wpb=433929, bsz=17108.2, num_updates=39900, lr=0.000316624, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=39096 +epoch 024: 1115 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=453571, ups=1.05, wpb=433929, bsz=17108.2, num_updates=39900, lr=0.000316624, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=39096 +epoch 024: 1115 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=453571, ups=1.05, wpb=433929, bsz=17108.2, num_updates=39900, lr=0.000316624, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=39096 +epoch 024: 1115 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=453571, ups=1.05, wpb=433929, bsz=17108.2, num_updates=39900, lr=0.000316624, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=39096 +epoch 024: 1115 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=453571, ups=1.05, wpb=433929, bsz=17108.2, num_updates=39900, lr=0.000316624, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=39096 +epoch 024: 1115 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=453571, ups=1.05, wpb=433929, bsz=17108.2, num_updates=39900, lr=0.000316624, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=39096 +epoch 024: 1115 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=453571, ups=1.05, wpb=433929, bsz=17108.2, num_updates=39900, lr=0.000316624, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=39096 +epoch 024: 1115 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=453571, ups=1.05, wpb=433929, bsz=17108.2, num_updates=39900, lr=0.000316624, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=39096 +epoch 024: 1115 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=453571, ups=1.05, wpb=433929, bsz=17108.2, num_updates=39900, lr=0.000316624, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=39096 +epoch 024: 1115 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=453571, ups=1.05, wpb=433929, bsz=17108.2, num_updates=39900, lr=0.000316624, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=39096 +epoch 024: 1115 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=453571, ups=1.05, wpb=433929, bsz=17108.2, num_updates=39900, lr=0.000316624, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19, wall=39096 +epoch 024: 1215 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=454729, ups=1.05, wpb=434876, bsz=16647, num_updates=40000, lr=0.000316228, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=17.7, wall=39192 +epoch 024: 1215 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=454729, ups=1.05, wpb=434876, bsz=16647, num_updates=40000, lr=0.000316228, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=17.7, wall=39192 +epoch 024: 1215 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=454729, ups=1.05, wpb=434876, bsz=16647, num_updates=40000, lr=0.000316228, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=17.7, wall=39192 +epoch 024: 1215 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=454729, ups=1.05, wpb=434876, bsz=16647, num_updates=40000, lr=0.000316228, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=17.7, wall=39192 +epoch 024: 1215 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=454729, ups=1.05, wpb=434876, bsz=16647, num_updates=40000, lr=0.000316228, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=17.7, wall=39192 +epoch 024: 1215 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=454729, ups=1.05, wpb=434876, bsz=16647, num_updates=40000, lr=0.000316228, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=17.7, wall=39192 +epoch 024: 1215 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=454729, ups=1.05, wpb=434876, bsz=16647, num_updates=40000, lr=0.000316228, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=17.7, wall=39192 +epoch 024: 1215 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=454729, ups=1.05, wpb=434876, bsz=16647, num_updates=40000, lr=0.000316228, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=17.7, wall=39192 +epoch 024: 1215 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=454729, ups=1.05, wpb=434876, bsz=16647, num_updates=40000, lr=0.000316228, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=17.7, wall=39192 +epoch 024: 1215 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=454729, ups=1.05, wpb=434876, bsz=16647, num_updates=40000, lr=0.000316228, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=17.7, wall=39192 +epoch 024: 1215 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=454729, ups=1.05, wpb=434876, bsz=16647, num_updates=40000, lr=0.000316228, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=17.7, wall=39192 +epoch 024: 1215 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=454729, ups=1.05, wpb=434876, bsz=16647, num_updates=40000, lr=0.000316228, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=17.7, wall=39192 +epoch 024: 1215 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=454729, ups=1.05, wpb=434876, bsz=16647, num_updates=40000, lr=0.000316228, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=17.7, wall=39192 +epoch 024: 1215 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=454729, ups=1.05, wpb=434876, bsz=16647, num_updates=40000, lr=0.000316228, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=17.7, wall=39192 +epoch 024: 1215 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=454729, ups=1.05, wpb=434876, bsz=16647, num_updates=40000, lr=0.000316228, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=17.7, wall=39192 +epoch 024: 1215 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=454729, ups=1.05, wpb=434876, bsz=16647, num_updates=40000, lr=0.000316228, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=17.7, wall=39192 +epoch 024: 1215 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=454729, ups=1.05, wpb=434876, bsz=16647, num_updates=40000, lr=0.000316228, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=17.7, wall=39192 +epoch 024: 1215 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=454729, ups=1.05, wpb=434876, bsz=16647, num_updates=40000, lr=0.000316228, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=17.7, wall=39192 +epoch 024: 1215 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=454729, ups=1.05, wpb=434876, bsz=16647, num_updates=40000, lr=0.000316228, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=17.7, wall=39192 +epoch 024: 1215 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=454729, ups=1.05, wpb=434876, bsz=16647, num_updates=40000, lr=0.000316228, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=17.7, wall=39192 +epoch 024: 1215 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=454729, ups=1.05, wpb=434876, bsz=16647, num_updates=40000, lr=0.000316228, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=17.7, wall=39192 +epoch 024: 1215 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=454729, ups=1.05, wpb=434876, bsz=16647, num_updates=40000, lr=0.000316228, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=17.7, wall=39192 +epoch 024: 1215 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=454729, ups=1.05, wpb=434876, bsz=16647, num_updates=40000, lr=0.000316228, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=17.7, wall=39192 +epoch 024: 1215 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=454729, ups=1.05, wpb=434876, bsz=16647, num_updates=40000, lr=0.000316228, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=17.7, wall=39192 +begin validation on "valid" subset +epoch 024 | valid on 'valid' subset | loss 4.232 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 40000 | best_loss 4.232 +epoch 024 | valid on 'valid' subset | loss 4.232 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 40000 | best_loss 4.232 +epoch 024 | valid on 'valid' subset | loss 4.232 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 40000 | best_loss 4.232 +epoch 024 | valid on 'valid' subset | loss 4.232 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 40000 | best_loss 4.232 +epoch 024 | valid on 'valid' subset | loss 4.232 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 40000 | best_loss 4.232 +epoch 024 | valid on 'valid' subset | loss 4.232 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 40000 | best_loss 4.232 +epoch 024 | valid on 'valid' subset | loss 4.232 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 40000 | best_loss 4.232 +epoch 024 | valid on 'valid' subset | loss 4.232 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 40000 | best_loss 4.232 +epoch 024 | valid on 'valid' subset | loss 4.232 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 40000 | best_loss 4.232 +epoch 024 | valid on 'valid' subset | loss 4.232 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 40000 | best_loss 4.232 +epoch 024 | valid on 'valid' subset | loss 4.232 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 40000 | best_loss 4.232 +epoch 024 | valid on 'valid' subset | loss 4.232 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 40000 | best_loss 4.232 +epoch 024 | valid on 'valid' subset | loss 4.232 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 40000 | best_loss 4.232 +epoch 024 | valid on 'valid' subset | loss 4.232 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 40000 | best_loss 4.232 +epoch 024 | valid on 'valid' subset | loss 4.232 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 40000 | best_loss 4.232 +epoch 024 | valid on 'valid' subset | loss 4.232 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 40000 | best_loss 4.232 +epoch 024 | valid on 'valid' subset | loss 4.232 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 40000 | best_loss 4.232 +epoch 024 | valid on 'valid' subset | loss 4.232 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 40000 | best_loss 4.232 +epoch 024 | valid on 'valid' subset | loss 4.232 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 40000 | best_loss 4.232 +epoch 024 | valid on 'valid' subset | loss 4.232 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 40000 | best_loss 4.232 +epoch 024 | valid on 'valid' subset | loss 4.232 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 40000 | best_loss 4.232 +epoch 024 | valid on 'valid' subset | loss 4.232 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 40000 | best_loss 4.232 +epoch 024 | valid on 'valid' subset | loss 4.232 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 40000 | best_loss 4.232 +epoch 024 | valid on 'valid' subset | loss 4.232 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 40000 | best_loss 4.232 +epoch 024: 1315 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=251521, ups=0.58, wpb=431984, bsz=16493.4, num_updates=40100, lr=0.000315833, gnorm=0.219, clip=0, loss_scale=1, train_wall=104, gb_free=18.9, wall=39363 +epoch 024: 1315 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=251521, ups=0.58, wpb=431984, bsz=16493.4, num_updates=40100, lr=0.000315833, gnorm=0.219, clip=0, loss_scale=1, train_wall=104, gb_free=18.9, wall=39363 +epoch 024: 1315 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=251521, ups=0.58, wpb=431984, bsz=16493.4, num_updates=40100, lr=0.000315833, gnorm=0.219, clip=0, loss_scale=1, train_wall=104, gb_free=18.9, wall=39363 +epoch 024: 1315 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=251521, ups=0.58, wpb=431984, bsz=16493.4, num_updates=40100, lr=0.000315833, gnorm=0.219, clip=0, loss_scale=1, train_wall=104, gb_free=18.9, wall=39363 +epoch 024: 1315 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=251521, ups=0.58, wpb=431984, bsz=16493.4, num_updates=40100, lr=0.000315833, gnorm=0.219, clip=0, loss_scale=1, train_wall=104, gb_free=18.9, wall=39363 +epoch 024: 1315 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=251521, ups=0.58, wpb=431984, bsz=16493.4, num_updates=40100, lr=0.000315833, gnorm=0.219, clip=0, loss_scale=1, train_wall=104, gb_free=18.9, wall=39363 +epoch 024: 1315 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=251521, ups=0.58, wpb=431984, bsz=16493.4, num_updates=40100, lr=0.000315833, gnorm=0.219, clip=0, loss_scale=1, train_wall=104, gb_free=18.9, wall=39363 +epoch 024: 1315 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=251521, ups=0.58, wpb=431984, bsz=16493.4, num_updates=40100, lr=0.000315833, gnorm=0.219, clip=0, loss_scale=1, train_wall=104, gb_free=18.9, wall=39363 +epoch 024: 1315 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=251521, ups=0.58, wpb=431984, bsz=16493.4, num_updates=40100, lr=0.000315833, gnorm=0.219, clip=0, loss_scale=1, train_wall=104, gb_free=18.9, wall=39363 +epoch 024: 1315 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=251521, ups=0.58, wpb=431984, bsz=16493.4, num_updates=40100, lr=0.000315833, gnorm=0.219, clip=0, loss_scale=1, train_wall=104, gb_free=18.9, wall=39363 +epoch 024: 1315 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=251521, ups=0.58, wpb=431984, bsz=16493.4, num_updates=40100, lr=0.000315833, gnorm=0.219, clip=0, loss_scale=1, train_wall=104, gb_free=18.9, wall=39363 +epoch 024: 1315 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=251521, ups=0.58, wpb=431984, bsz=16493.4, num_updates=40100, lr=0.000315833, gnorm=0.219, clip=0, loss_scale=1, train_wall=104, gb_free=18.9, wall=39363 +epoch 024: 1315 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=251521, ups=0.58, wpb=431984, bsz=16493.4, num_updates=40100, lr=0.000315833, gnorm=0.219, clip=0, loss_scale=1, train_wall=104, gb_free=18.9, wall=39363 +epoch 024: 1315 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=251521, ups=0.58, wpb=431984, bsz=16493.4, num_updates=40100, lr=0.000315833, gnorm=0.219, clip=0, loss_scale=1, train_wall=104, gb_free=18.9, wall=39363 +epoch 024: 1315 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=251521, ups=0.58, wpb=431984, bsz=16493.4, num_updates=40100, lr=0.000315833, gnorm=0.219, clip=0, loss_scale=1, train_wall=104, gb_free=18.9, wall=39363 +epoch 024: 1315 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=251521, ups=0.58, wpb=431984, bsz=16493.4, num_updates=40100, lr=0.000315833, gnorm=0.219, clip=0, loss_scale=1, train_wall=104, gb_free=18.9, wall=39363 +epoch 024: 1315 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=251521, ups=0.58, wpb=431984, bsz=16493.4, num_updates=40100, lr=0.000315833, gnorm=0.219, clip=0, loss_scale=1, train_wall=104, gb_free=18.9, wall=39363 +epoch 024: 1315 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=251521, ups=0.58, wpb=431984, bsz=16493.4, num_updates=40100, lr=0.000315833, gnorm=0.219, clip=0, loss_scale=1, train_wall=104, gb_free=18.9, wall=39363 +epoch 024: 1315 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=251521, ups=0.58, wpb=431984, bsz=16493.4, num_updates=40100, lr=0.000315833, gnorm=0.219, clip=0, loss_scale=1, train_wall=104, gb_free=18.9, wall=39363 +epoch 024: 1315 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=251521, ups=0.58, wpb=431984, bsz=16493.4, num_updates=40100, lr=0.000315833, gnorm=0.219, clip=0, loss_scale=1, train_wall=104, gb_free=18.9, wall=39363 +epoch 024: 1315 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=251521, ups=0.58, wpb=431984, bsz=16493.4, num_updates=40100, lr=0.000315833, gnorm=0.219, clip=0, loss_scale=1, train_wall=104, gb_free=18.9, wall=39363 +epoch 024: 1315 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=251521, ups=0.58, wpb=431984, bsz=16493.4, num_updates=40100, lr=0.000315833, gnorm=0.219, clip=0, loss_scale=1, train_wall=104, gb_free=18.9, wall=39363 +epoch 024: 1315 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=251521, ups=0.58, wpb=431984, bsz=16493.4, num_updates=40100, lr=0.000315833, gnorm=0.219, clip=0, loss_scale=1, train_wall=104, gb_free=18.9, wall=39363 +epoch 024: 1315 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=251521, ups=0.58, wpb=431984, bsz=16493.4, num_updates=40100, lr=0.000315833, gnorm=0.219, clip=0, loss_scale=1, train_wall=104, gb_free=18.9, wall=39363 +epoch 024: 1415 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=469183, ups=1.08, wpb=433214, bsz=16179.9, num_updates=40200, lr=0.00031544, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=39456 +epoch 024: 1415 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=469183, ups=1.08, wpb=433214, bsz=16179.9, num_updates=40200, lr=0.00031544, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=39456 +epoch 024: 1415 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=469183, ups=1.08, wpb=433214, bsz=16179.9, num_updates=40200, lr=0.00031544, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=39456 +epoch 024: 1415 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=469183, ups=1.08, wpb=433214, bsz=16179.9, num_updates=40200, lr=0.00031544, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=39456 +epoch 024: 1415 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=469183, ups=1.08, wpb=433214, bsz=16179.9, num_updates=40200, lr=0.00031544, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=39456 +epoch 024: 1415 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=469183, ups=1.08, wpb=433214, bsz=16179.9, num_updates=40200, lr=0.00031544, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=39456 +epoch 024: 1415 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=469183, ups=1.08, wpb=433214, bsz=16179.9, num_updates=40200, lr=0.00031544, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=39456 +epoch 024: 1415 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=469183, ups=1.08, wpb=433214, bsz=16179.9, num_updates=40200, lr=0.00031544, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=39456 +epoch 024: 1415 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=469183, ups=1.08, wpb=433214, bsz=16179.9, num_updates=40200, lr=0.00031544, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=39456 +epoch 024: 1415 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=469183, ups=1.08, wpb=433214, bsz=16179.9, num_updates=40200, lr=0.00031544, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=39456 +epoch 024: 1415 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=469183, ups=1.08, wpb=433214, bsz=16179.9, num_updates=40200, lr=0.00031544, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=39456 +epoch 024: 1415 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=469183, ups=1.08, wpb=433214, bsz=16179.9, num_updates=40200, lr=0.00031544, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=39456 +epoch 024: 1415 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=469183, ups=1.08, wpb=433214, bsz=16179.9, num_updates=40200, lr=0.00031544, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=39456 +epoch 024: 1415 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=469183, ups=1.08, wpb=433214, bsz=16179.9, num_updates=40200, lr=0.00031544, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=39456 +epoch 024: 1415 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=469183, ups=1.08, wpb=433214, bsz=16179.9, num_updates=40200, lr=0.00031544, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=39456 +epoch 024: 1415 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=469183, ups=1.08, wpb=433214, bsz=16179.9, num_updates=40200, lr=0.00031544, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=39456 +epoch 024: 1415 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=469183, ups=1.08, wpb=433214, bsz=16179.9, num_updates=40200, lr=0.00031544, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=39456 +epoch 024: 1415 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=469183, ups=1.08, wpb=433214, bsz=16179.9, num_updates=40200, lr=0.00031544, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=39456 +epoch 024: 1415 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=469183, ups=1.08, wpb=433214, bsz=16179.9, num_updates=40200, lr=0.00031544, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=39456 +epoch 024: 1415 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=469183, ups=1.08, wpb=433214, bsz=16179.9, num_updates=40200, lr=0.00031544, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=39456 +epoch 024: 1415 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=469183, ups=1.08, wpb=433214, bsz=16179.9, num_updates=40200, lr=0.00031544, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=39456 +epoch 024: 1415 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=469183, ups=1.08, wpb=433214, bsz=16179.9, num_updates=40200, lr=0.00031544, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=39456 +epoch 024: 1415 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=469183, ups=1.08, wpb=433214, bsz=16179.9, num_updates=40200, lr=0.00031544, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=39456 +epoch 024: 1415 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=469183, ups=1.08, wpb=433214, bsz=16179.9, num_updates=40200, lr=0.00031544, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=39456 +epoch 024: 1515 / 1689 loss=4.116, nll_loss=2.494, ppl=5.63, wps=464152, ups=1.07, wpb=433972, bsz=16471.4, num_updates=40300, lr=0.000315049, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=39549 +epoch 024: 1515 / 1689 loss=4.116, nll_loss=2.494, ppl=5.63, wps=464152, ups=1.07, wpb=433972, bsz=16471.4, num_updates=40300, lr=0.000315049, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=39549 +epoch 024: 1515 / 1689 loss=4.116, nll_loss=2.494, ppl=5.63, wps=464152, ups=1.07, wpb=433972, bsz=16471.4, num_updates=40300, lr=0.000315049, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=39549 +epoch 024: 1515 / 1689 loss=4.116, nll_loss=2.494, ppl=5.63, wps=464152, ups=1.07, wpb=433972, bsz=16471.4, num_updates=40300, lr=0.000315049, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=39549 +epoch 024: 1515 / 1689 loss=4.116, nll_loss=2.494, ppl=5.63, wps=464152, ups=1.07, wpb=433972, bsz=16471.4, num_updates=40300, lr=0.000315049, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=39549 +epoch 024: 1515 / 1689 loss=4.116, nll_loss=2.494, ppl=5.63, wps=464152, ups=1.07, wpb=433972, bsz=16471.4, num_updates=40300, lr=0.000315049, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=39549 +epoch 024: 1515 / 1689 loss=4.116, nll_loss=2.494, ppl=5.63, wps=464152, ups=1.07, wpb=433972, bsz=16471.4, num_updates=40300, lr=0.000315049, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=39549 +epoch 024: 1515 / 1689 loss=4.116, nll_loss=2.494, ppl=5.63, wps=464152, ups=1.07, wpb=433972, bsz=16471.4, num_updates=40300, lr=0.000315049, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=39549 +epoch 024: 1515 / 1689 loss=4.116, nll_loss=2.494, ppl=5.63, wps=464152, ups=1.07, wpb=433972, bsz=16471.4, num_updates=40300, lr=0.000315049, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=39549 +epoch 024: 1515 / 1689 loss=4.116, nll_loss=2.494, ppl=5.63, wps=464152, ups=1.07, wpb=433972, bsz=16471.4, num_updates=40300, lr=0.000315049, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=39549 +epoch 024: 1515 / 1689 loss=4.116, nll_loss=2.494, ppl=5.63, wps=464152, ups=1.07, wpb=433972, bsz=16471.4, num_updates=40300, lr=0.000315049, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=39549 +epoch 024: 1515 / 1689 loss=4.116, nll_loss=2.494, ppl=5.63, wps=464152, ups=1.07, wpb=433972, bsz=16471.4, num_updates=40300, lr=0.000315049, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=39549 +epoch 024: 1515 / 1689 loss=4.116, nll_loss=2.494, ppl=5.63, wps=464152, ups=1.07, wpb=433972, bsz=16471.4, num_updates=40300, lr=0.000315049, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=39549 +epoch 024: 1515 / 1689 loss=4.116, nll_loss=2.494, ppl=5.63, wps=464152, ups=1.07, wpb=433972, bsz=16471.4, num_updates=40300, lr=0.000315049, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=39549 +epoch 024: 1515 / 1689 loss=4.116, nll_loss=2.494, ppl=5.63, wps=464152, ups=1.07, wpb=433972, bsz=16471.4, num_updates=40300, lr=0.000315049, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=39549 +epoch 024: 1515 / 1689 loss=4.116, nll_loss=2.494, ppl=5.63, wps=464152, ups=1.07, wpb=433972, bsz=16471.4, num_updates=40300, lr=0.000315049, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=39549 +epoch 024: 1515 / 1689 loss=4.116, nll_loss=2.494, ppl=5.63, wps=464152, ups=1.07, wpb=433972, bsz=16471.4, num_updates=40300, lr=0.000315049, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=39549 +epoch 024: 1515 / 1689 loss=4.116, nll_loss=2.494, ppl=5.63, wps=464152, ups=1.07, wpb=433972, bsz=16471.4, num_updates=40300, lr=0.000315049, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=39549 +epoch 024: 1515 / 1689 loss=4.116, nll_loss=2.494, ppl=5.63, wps=464152, ups=1.07, wpb=433972, bsz=16471.4, num_updates=40300, lr=0.000315049, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=39549 +epoch 024: 1515 / 1689 loss=4.116, nll_loss=2.494, ppl=5.63, wps=464152, ups=1.07, wpb=433972, bsz=16471.4, num_updates=40300, lr=0.000315049, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=39549 +epoch 024: 1515 / 1689 loss=4.116, nll_loss=2.494, ppl=5.63, wps=464152, ups=1.07, wpb=433972, bsz=16471.4, num_updates=40300, lr=0.000315049, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=39549 +epoch 024: 1515 / 1689 loss=4.116, nll_loss=2.494, ppl=5.63, wps=464152, ups=1.07, wpb=433972, bsz=16471.4, num_updates=40300, lr=0.000315049, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=39549 +epoch 024: 1515 / 1689 loss=4.116, nll_loss=2.494, ppl=5.63, wps=464152, ups=1.07, wpb=433972, bsz=16471.4, num_updates=40300, lr=0.000315049, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=39549 +epoch 024: 1515 / 1689 loss=4.116, nll_loss=2.494, ppl=5.63, wps=464152, ups=1.07, wpb=433972, bsz=16471.4, num_updates=40300, lr=0.000315049, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=39549 +epoch 024: 1616 / 1689 loss=4.114, nll_loss=2.491, ppl=5.62, wps=462423, ups=1.07, wpb=433494, bsz=16577.4, num_updates=40400, lr=0.000314658, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=39643 +epoch 024: 1616 / 1689 loss=4.114, nll_loss=2.491, ppl=5.62, wps=462423, ups=1.07, wpb=433494, bsz=16577.4, num_updates=40400, lr=0.000314658, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=39643 +epoch 024: 1616 / 1689 loss=4.114, nll_loss=2.491, ppl=5.62, wps=462423, ups=1.07, wpb=433494, bsz=16577.4, num_updates=40400, lr=0.000314658, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=39643 +epoch 024: 1616 / 1689 loss=4.114, nll_loss=2.491, ppl=5.62, wps=462423, ups=1.07, wpb=433494, bsz=16577.4, num_updates=40400, lr=0.000314658, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=39643 +epoch 024: 1616 / 1689 loss=4.114, nll_loss=2.491, ppl=5.62, wps=462423, ups=1.07, wpb=433494, bsz=16577.4, num_updates=40400, lr=0.000314658, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=39643 +epoch 024: 1616 / 1689 loss=4.114, nll_loss=2.491, ppl=5.62, wps=462423, ups=1.07, wpb=433494, bsz=16577.4, num_updates=40400, lr=0.000314658, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=39643 +epoch 024: 1616 / 1689 loss=4.114, nll_loss=2.491, ppl=5.62, wps=462423, ups=1.07, wpb=433494, bsz=16577.4, num_updates=40400, lr=0.000314658, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=39643 +epoch 024: 1616 / 1689 loss=4.114, nll_loss=2.491, ppl=5.62, wps=462423, ups=1.07, wpb=433494, bsz=16577.4, num_updates=40400, lr=0.000314658, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=39643 +epoch 024: 1616 / 1689 loss=4.114, nll_loss=2.491, ppl=5.62, wps=462423, ups=1.07, wpb=433494, bsz=16577.4, num_updates=40400, lr=0.000314658, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=39643 +epoch 024: 1616 / 1689 loss=4.114, nll_loss=2.491, ppl=5.62, wps=462423, ups=1.07, wpb=433494, bsz=16577.4, num_updates=40400, lr=0.000314658, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=39643 +epoch 024: 1616 / 1689 loss=4.114, nll_loss=2.491, ppl=5.62, wps=462423, ups=1.07, wpb=433494, bsz=16577.4, num_updates=40400, lr=0.000314658, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=39643 +epoch 024: 1616 / 1689 loss=4.114, nll_loss=2.491, ppl=5.62, wps=462423, ups=1.07, wpb=433494, bsz=16577.4, num_updates=40400, lr=0.000314658, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=39643 +epoch 024: 1616 / 1689 loss=4.114, nll_loss=2.491, ppl=5.62, wps=462423, ups=1.07, wpb=433494, bsz=16577.4, num_updates=40400, lr=0.000314658, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=39643 +epoch 024: 1616 / 1689 loss=4.114, nll_loss=2.491, ppl=5.62, wps=462423, ups=1.07, wpb=433494, bsz=16577.4, num_updates=40400, lr=0.000314658, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=39643 +epoch 024: 1616 / 1689 loss=4.114, nll_loss=2.491, ppl=5.62, wps=462423, ups=1.07, wpb=433494, bsz=16577.4, num_updates=40400, lr=0.000314658, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=39643 +epoch 024: 1616 / 1689 loss=4.114, nll_loss=2.491, ppl=5.62, wps=462423, ups=1.07, wpb=433494, bsz=16577.4, num_updates=40400, lr=0.000314658, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=39643 +epoch 024: 1616 / 1689 loss=4.114, nll_loss=2.491, ppl=5.62, wps=462423, ups=1.07, wpb=433494, bsz=16577.4, num_updates=40400, lr=0.000314658, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=39643 +epoch 024: 1616 / 1689 loss=4.114, nll_loss=2.491, ppl=5.62, wps=462423, ups=1.07, wpb=433494, bsz=16577.4, num_updates=40400, lr=0.000314658, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=39643 +epoch 024: 1616 / 1689 loss=4.114, nll_loss=2.491, ppl=5.62, wps=462423, ups=1.07, wpb=433494, bsz=16577.4, num_updates=40400, lr=0.000314658, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=39643 +epoch 024: 1616 / 1689 loss=4.114, nll_loss=2.491, ppl=5.62, wps=462423, ups=1.07, wpb=433494, bsz=16577.4, num_updates=40400, lr=0.000314658, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=39643 +epoch 024: 1616 / 1689 loss=4.114, nll_loss=2.491, ppl=5.62, wps=462423, ups=1.07, wpb=433494, bsz=16577.4, num_updates=40400, lr=0.000314658, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=39643 +epoch 024: 1616 / 1689 loss=4.114, nll_loss=2.491, ppl=5.62, wps=462423, ups=1.07, wpb=433494, bsz=16577.4, num_updates=40400, lr=0.000314658, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=39643 +epoch 024: 1616 / 1689 loss=4.114, nll_loss=2.491, ppl=5.62, wps=462423, ups=1.07, wpb=433494, bsz=16577.4, num_updates=40400, lr=0.000314658, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=39643 +epoch 024: 1616 / 1689 loss=4.114, nll_loss=2.491, ppl=5.62, wps=462423, ups=1.07, wpb=433494, bsz=16577.4, num_updates=40400, lr=0.000314658, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=39643 +end of epoch 24 (average epoch stats below) +epoch 024 | loss 4.1 | nll_loss 2.474 | ppl 5.56 | wps 435725 | ups 1.01 | wpb 433550 | bsz 16502.7 | num_updates 40473 | lr 0.000314374 | gnorm 0.22 | clip 0 | loss_scale 0.5 | train_wall 1575 | gb_free 19.8 | wall 39710 +epoch 024 | loss 4.1 | nll_loss 2.474 | ppl 5.56 | wps 435725 | ups 1.01 | wpb 433550 | bsz 16502.7 | num_updates 40473 | lr 0.000314374 | gnorm 0.22 | clip 0 | loss_scale 0.5 | train_wall 1575 | gb_free 19.8 | wall 39710 +epoch 024 | loss 4.1 | nll_loss 2.474 | ppl 5.56 | wps 435725 | ups 1.01 | wpb 433550 | bsz 16502.7 | num_updates 40473 | lr 0.000314374 | gnorm 0.22 | clip 0 | loss_scale 0.5 | train_wall 1575 | gb_free 19.8 | wall 39710 +epoch 024 | loss 4.1 | nll_loss 2.474 | ppl 5.56 | wps 435725 | ups 1.01 | wpb 433550 | bsz 16502.7 | num_updates 40473 | lr 0.000314374 | gnorm 0.22 | clip 0 | loss_scale 0.5 | train_wall 1575 | gb_free 19.8 | wall 39710 +epoch 024 | loss 4.1 | nll_loss 2.474 | ppl 5.56 | wps 435725 | ups 1.01 | wpb 433550 | bsz 16502.7 | num_updates 40473 | lr 0.000314374 | gnorm 0.22 | clip 0 | loss_scale 0.5 | train_wall 1575 | gb_free 19.8 | wall 39710 +epoch 024 | loss 4.1 | nll_loss 2.474 | ppl 5.56 | wps 435725 | ups 1.01 | wpb 433550 | bsz 16502.7 | num_updates 40473 | lr 0.000314374 | gnorm 0.22 | clip 0 | loss_scale 0.5 | train_wall 1575 | gb_free 19.8 | wall 39710 +epoch 024 | loss 4.1 | nll_loss 2.474 | ppl 5.56 | wps 435725 | ups 1.01 | wpb 433550 | bsz 16502.7 | num_updates 40473 | lr 0.000314374 | gnorm 0.22 | clip 0 | loss_scale 0.5 | train_wall 1575 | gb_free 19.8 | wall 39710 +epoch 024 | loss 4.1 | nll_loss 2.474 | ppl 5.56 | wps 435725 | ups 1.01 | wpb 433550 | bsz 16502.7 | num_updates 40473 | lr 0.000314374 | gnorm 0.22 | clip 0 | loss_scale 0.5 | train_wall 1575 | gb_free 19.8 | wall 39710 +epoch 024 | loss 4.1 | nll_loss 2.474 | ppl 5.56 | wps 435725 | ups 1.01 | wpb 433550 | bsz 16502.7 | num_updates 40473 | lr 0.000314374 | gnorm 0.22 | clip 0 | loss_scale 0.5 | train_wall 1575 | gb_free 19.8 | wall 39710 +epoch 024 | loss 4.1 | nll_loss 2.474 | ppl 5.56 | wps 435725 | ups 1.01 | wpb 433550 | bsz 16502.7 | num_updates 40473 | lr 0.000314374 | gnorm 0.22 | clip 0 | loss_scale 0.5 | train_wall 1575 | gb_free 19.8 | wall 39710 +epoch 024 | loss 4.1 | nll_loss 2.474 | ppl 5.56 | wps 435725 | ups 1.01 | wpb 433550 | bsz 16502.7 | num_updates 40473 | lr 0.000314374 | gnorm 0.22 | clip 0 | loss_scale 0.5 | train_wall 1575 | gb_free 19.8 | wall 39710 +epoch 024 | loss 4.1 | nll_loss 2.474 | ppl 5.56 | wps 435725 | ups 1.01 | wpb 433550 | bsz 16502.7 | num_updates 40473 | lr 0.000314374 | gnorm 0.22 | clip 0 | loss_scale 0.5 | train_wall 1575 | gb_free 19.8 | wall 39710 +epoch 024 | loss 4.1 | nll_loss 2.474 | ppl 5.56 | wps 435725 | ups 1.01 | wpb 433550 | bsz 16502.7 | num_updates 40473 | lr 0.000314374 | gnorm 0.22 | clip 0 | loss_scale 0.5 | train_wall 1575 | gb_free 19.8 | wall 39710 +epoch 024 | loss 4.1 | nll_loss 2.474 | ppl 5.56 | wps 435725 | ups 1.01 | wpb 433550 | bsz 16502.7 | num_updates 40473 | lr 0.000314374 | gnorm 0.22 | clip 0 | loss_scale 0.5 | train_wall 1575 | gb_free 19.8 | wall 39710 +epoch 024 | loss 4.1 | nll_loss 2.474 | ppl 5.56 | wps 435725 | ups 1.01 | wpb 433550 | bsz 16502.7 | num_updates 40473 | lr 0.000314374 | gnorm 0.22 | clip 0 | loss_scale 0.5 | train_wall 1575 | gb_free 19.8 | wall 39710 +epoch 024 | loss 4.1 | nll_loss 2.474 | ppl 5.56 | wps 435725 | ups 1.01 | wpb 433550 | bsz 16502.7 | num_updates 40473 | lr 0.000314374 | gnorm 0.22 | clip 0 | loss_scale 0.5 | train_wall 1575 | gb_free 19.8 | wall 39710 +epoch 024 | loss 4.1 | nll_loss 2.474 | ppl 5.56 | wps 435725 | ups 1.01 | wpb 433550 | bsz 16502.7 | num_updates 40473 | lr 0.000314374 | gnorm 0.22 | clip 0 | loss_scale 0.5 | train_wall 1575 | gb_free 19.8 | wall 39710 +epoch 024 | loss 4.1 | nll_loss 2.474 | ppl 5.56 | wps 435725 | ups 1.01 | wpb 433550 | bsz 16502.7 | num_updates 40473 | lr 0.000314374 | gnorm 0.22 | clip 0 | loss_scale 0.5 | train_wall 1575 | gb_free 19.8 | wall 39710 +epoch 024 | loss 4.1 | nll_loss 2.474 | ppl 5.56 | wps 435725 | ups 1.01 | wpb 433550 | bsz 16502.7 | num_updates 40473 | lr 0.000314374 | gnorm 0.22 | clip 0 | loss_scale 0.5 | train_wall 1575 | gb_free 19.8 | wall 39710 +epoch 024 | loss 4.1 | nll_loss 2.474 | ppl 5.56 | wps 435725 | ups 1.01 | wpb 433550 | bsz 16502.7 | num_updates 40473 | lr 0.000314374 | gnorm 0.22 | clip 0 | loss_scale 0.5 | train_wall 1575 | gb_free 19.8 | wall 39710 +epoch 024 | loss 4.1 | nll_loss 2.474 | ppl 5.56 | wps 435725 | ups 1.01 | wpb 433550 | bsz 16502.7 | num_updates 40473 | lr 0.000314374 | gnorm 0.22 | clip 0 | loss_scale 0.5 | train_wall 1575 | gb_free 19.8 | wall 39710 +epoch 024 | loss 4.1 | nll_loss 2.474 | ppl 5.56 | wps 435725 | ups 1.01 | wpb 433550 | bsz 16502.7 | num_updates 40473 | lr 0.000314374 | gnorm 0.22 | clip 0 | loss_scale 0.5 | train_wall 1575 | gb_free 19.8 | wall 39710 +epoch 024 | loss 4.1 | nll_loss 2.474 | ppl 5.56 | wps 435725 | ups 1.01 | wpb 433550 | bsz 16502.7 | num_updates 40473 | lr 0.000314374 | gnorm 0.22 | clip 0 | loss_scale 0.5 | train_wall 1575 | gb_free 19.8 | wall 39710 +epoch 024 | loss 4.1 | nll_loss 2.474 | ppl 5.56 | wps 435725 | ups 1.01 | wpb 433550 | bsz 16502.7 | num_updates 40473 | lr 0.000314374 | gnorm 0.22 | clip 0 | loss_scale 0.5 | train_wall 1575 | gb_free 19.8 | wall 39710 +Start iterating over samples +epoch 025: 27 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=455930, ups=1.06, wpb=431756, bsz=16213.8, num_updates=40500, lr=0.00031427, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39738 +epoch 025: 27 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=455930, ups=1.06, wpb=431756, bsz=16213.8, num_updates=40500, lr=0.00031427, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39738 +epoch 025: 27 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=455930, ups=1.06, wpb=431756, bsz=16213.8, num_updates=40500, lr=0.00031427, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39738 +epoch 025: 27 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=455930, ups=1.06, wpb=431756, bsz=16213.8, num_updates=40500, lr=0.00031427, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39738 +epoch 025: 27 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=455930, ups=1.06, wpb=431756, bsz=16213.8, num_updates=40500, lr=0.00031427, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39738 +epoch 025: 27 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=455930, ups=1.06, wpb=431756, bsz=16213.8, num_updates=40500, lr=0.00031427, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39738 +epoch 025: 27 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=455930, ups=1.06, wpb=431756, bsz=16213.8, num_updates=40500, lr=0.00031427, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39738 +epoch 025: 27 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=455930, ups=1.06, wpb=431756, bsz=16213.8, num_updates=40500, lr=0.00031427, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39738 +epoch 025: 27 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=455930, ups=1.06, wpb=431756, bsz=16213.8, num_updates=40500, lr=0.00031427, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39738 +epoch 025: 27 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=455930, ups=1.06, wpb=431756, bsz=16213.8, num_updates=40500, lr=0.00031427, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39738 +epoch 025: 27 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=455930, ups=1.06, wpb=431756, bsz=16213.8, num_updates=40500, lr=0.00031427, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39738 +epoch 025: 27 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=455930, ups=1.06, wpb=431756, bsz=16213.8, num_updates=40500, lr=0.00031427, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39738 +epoch 025: 27 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=455930, ups=1.06, wpb=431756, bsz=16213.8, num_updates=40500, lr=0.00031427, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39738 +epoch 025: 27 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=455930, ups=1.06, wpb=431756, bsz=16213.8, num_updates=40500, lr=0.00031427, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39738 +epoch 025: 27 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=455930, ups=1.06, wpb=431756, bsz=16213.8, num_updates=40500, lr=0.00031427, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39738 +epoch 025: 27 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=455930, ups=1.06, wpb=431756, bsz=16213.8, num_updates=40500, lr=0.00031427, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39738 +epoch 025: 27 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=455930, ups=1.06, wpb=431756, bsz=16213.8, num_updates=40500, lr=0.00031427, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39738 +epoch 025: 27 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=455930, ups=1.06, wpb=431756, bsz=16213.8, num_updates=40500, lr=0.00031427, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39738 +epoch 025: 27 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=455930, ups=1.06, wpb=431756, bsz=16213.8, num_updates=40500, lr=0.00031427, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39738 +epoch 025: 27 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=455930, ups=1.06, wpb=431756, bsz=16213.8, num_updates=40500, lr=0.00031427, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39738 +epoch 025: 27 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=455930, ups=1.06, wpb=431756, bsz=16213.8, num_updates=40500, lr=0.00031427, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39738 +epoch 025: 27 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=455930, ups=1.06, wpb=431756, bsz=16213.8, num_updates=40500, lr=0.00031427, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39738 +epoch 025: 27 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=455930, ups=1.06, wpb=431756, bsz=16213.8, num_updates=40500, lr=0.00031427, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39738 +epoch 025: 27 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=455930, ups=1.06, wpb=431756, bsz=16213.8, num_updates=40500, lr=0.00031427, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39738 +epoch 025: 27 / 1689 loss=4.101, nll_loss=2.476, ppl=5.56, wps=455930, ups=1.06, wpb=431756, bsz=16213.8, num_updates=40500, lr=0.00031427, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39738 +epoch 025: 127 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=460414, ups=1.07, wpb=431621, bsz=16475.8, num_updates=40600, lr=0.000313882, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.2, wall=39831 +epoch 025: 127 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=460414, ups=1.07, wpb=431621, bsz=16475.8, num_updates=40600, lr=0.000313882, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.2, wall=39831 +epoch 025: 127 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=460414, ups=1.07, wpb=431621, bsz=16475.8, num_updates=40600, lr=0.000313882, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.2, wall=39831 +epoch 025: 127 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=460414, ups=1.07, wpb=431621, bsz=16475.8, num_updates=40600, lr=0.000313882, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.2, wall=39831 +epoch 025: 127 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=460414, ups=1.07, wpb=431621, bsz=16475.8, num_updates=40600, lr=0.000313882, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.2, wall=39831 +epoch 025: 127 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=460414, ups=1.07, wpb=431621, bsz=16475.8, num_updates=40600, lr=0.000313882, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.2, wall=39831 +epoch 025: 127 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=460414, ups=1.07, wpb=431621, bsz=16475.8, num_updates=40600, lr=0.000313882, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.2, wall=39831 +epoch 025: 127 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=460414, ups=1.07, wpb=431621, bsz=16475.8, num_updates=40600, lr=0.000313882, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.2, wall=39831 +epoch 025: 127 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=460414, ups=1.07, wpb=431621, bsz=16475.8, num_updates=40600, lr=0.000313882, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.2, wall=39831 +epoch 025: 127 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=460414, ups=1.07, wpb=431621, bsz=16475.8, num_updates=40600, lr=0.000313882, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.2, wall=39831 +epoch 025: 127 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=460414, ups=1.07, wpb=431621, bsz=16475.8, num_updates=40600, lr=0.000313882, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.2, wall=39831 +epoch 025: 127 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=460414, ups=1.07, wpb=431621, bsz=16475.8, num_updates=40600, lr=0.000313882, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.2, wall=39831 +epoch 025: 127 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=460414, ups=1.07, wpb=431621, bsz=16475.8, num_updates=40600, lr=0.000313882, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.2, wall=39831 +epoch 025: 127 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=460414, ups=1.07, wpb=431621, bsz=16475.8, num_updates=40600, lr=0.000313882, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.2, wall=39831 +epoch 025: 127 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=460414, ups=1.07, wpb=431621, bsz=16475.8, num_updates=40600, lr=0.000313882, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.2, wall=39831 +epoch 025: 127 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=460414, ups=1.07, wpb=431621, bsz=16475.8, num_updates=40600, lr=0.000313882, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.2, wall=39831 +epoch 025: 127 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=460414, ups=1.07, wpb=431621, bsz=16475.8, num_updates=40600, lr=0.000313882, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.2, wall=39831 +epoch 025: 127 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=460414, ups=1.07, wpb=431621, bsz=16475.8, num_updates=40600, lr=0.000313882, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.2, wall=39831 +epoch 025: 127 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=460414, ups=1.07, wpb=431621, bsz=16475.8, num_updates=40600, lr=0.000313882, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.2, wall=39831 +epoch 025: 127 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=460414, ups=1.07, wpb=431621, bsz=16475.8, num_updates=40600, lr=0.000313882, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.2, wall=39831 +epoch 025: 127 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=460414, ups=1.07, wpb=431621, bsz=16475.8, num_updates=40600, lr=0.000313882, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.2, wall=39831 +epoch 025: 127 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=460414, ups=1.07, wpb=431621, bsz=16475.8, num_updates=40600, lr=0.000313882, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.2, wall=39831 +epoch 025: 127 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=460414, ups=1.07, wpb=431621, bsz=16475.8, num_updates=40600, lr=0.000313882, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.2, wall=39831 +epoch 025: 127 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=460414, ups=1.07, wpb=431621, bsz=16475.8, num_updates=40600, lr=0.000313882, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.2, wall=39831 +epoch 025: 127 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=460414, ups=1.07, wpb=431621, bsz=16475.8, num_updates=40600, lr=0.000313882, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.2, wall=39831 +epoch 025: 227 / 1689 loss=4.08, nll_loss=2.451, ppl=5.47, wps=462544, ups=1.06, wpb=435008, bsz=16494.2, num_updates=40700, lr=0.000313497, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39925 +epoch 025: 227 / 1689 loss=4.08, nll_loss=2.451, ppl=5.47, wps=462544, ups=1.06, wpb=435008, bsz=16494.2, num_updates=40700, lr=0.000313497, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39925 +epoch 025: 227 / 1689 loss=4.08, nll_loss=2.451, ppl=5.47, wps=462544, ups=1.06, wpb=435008, bsz=16494.2, num_updates=40700, lr=0.000313497, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39925 +epoch 025: 227 / 1689 loss=4.08, nll_loss=2.451, ppl=5.47, wps=462544, ups=1.06, wpb=435008, bsz=16494.2, num_updates=40700, lr=0.000313497, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39925 +epoch 025: 227 / 1689 loss=4.08, nll_loss=2.451, ppl=5.47, wps=462544, ups=1.06, wpb=435008, bsz=16494.2, num_updates=40700, lr=0.000313497, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39925 +epoch 025: 227 / 1689 loss=4.08, nll_loss=2.451, ppl=5.47, wps=462544, ups=1.06, wpb=435008, bsz=16494.2, num_updates=40700, lr=0.000313497, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39925 +epoch 025: 227 / 1689 loss=4.08, nll_loss=2.451, ppl=5.47, wps=462544, ups=1.06, wpb=435008, bsz=16494.2, num_updates=40700, lr=0.000313497, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39925 +epoch 025: 227 / 1689 loss=4.08, nll_loss=2.451, ppl=5.47, wps=462544, ups=1.06, wpb=435008, bsz=16494.2, num_updates=40700, lr=0.000313497, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39925 +epoch 025: 227 / 1689 loss=4.08, nll_loss=2.451, ppl=5.47, wps=462544, ups=1.06, wpb=435008, bsz=16494.2, num_updates=40700, lr=0.000313497, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39925 +epoch 025: 227 / 1689 loss=4.08, nll_loss=2.451, ppl=5.47, wps=462544, ups=1.06, wpb=435008, bsz=16494.2, num_updates=40700, lr=0.000313497, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39925 +epoch 025: 227 / 1689 loss=4.08, nll_loss=2.451, ppl=5.47, wps=462544, ups=1.06, wpb=435008, bsz=16494.2, num_updates=40700, lr=0.000313497, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39925 +epoch 025: 227 / 1689 loss=4.08, nll_loss=2.451, ppl=5.47, wps=462544, ups=1.06, wpb=435008, bsz=16494.2, num_updates=40700, lr=0.000313497, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39925 +epoch 025: 227 / 1689 loss=4.08, nll_loss=2.451, ppl=5.47, wps=462544, ups=1.06, wpb=435008, bsz=16494.2, num_updates=40700, lr=0.000313497, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39925 +epoch 025: 227 / 1689 loss=4.08, nll_loss=2.451, ppl=5.47, wps=462544, ups=1.06, wpb=435008, bsz=16494.2, num_updates=40700, lr=0.000313497, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39925 +epoch 025: 227 / 1689 loss=4.08, nll_loss=2.451, ppl=5.47, wps=462544, ups=1.06, wpb=435008, bsz=16494.2, num_updates=40700, lr=0.000313497, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39925 +epoch 025: 227 / 1689 loss=4.08, nll_loss=2.451, ppl=5.47, wps=462544, ups=1.06, wpb=435008, bsz=16494.2, num_updates=40700, lr=0.000313497, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39925 +epoch 025: 227 / 1689 loss=4.08, nll_loss=2.451, ppl=5.47, wps=462544, ups=1.06, wpb=435008, bsz=16494.2, num_updates=40700, lr=0.000313497, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39925 +epoch 025: 227 / 1689 loss=4.08, nll_loss=2.451, ppl=5.47, wps=462544, ups=1.06, wpb=435008, bsz=16494.2, num_updates=40700, lr=0.000313497, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39925 +epoch 025: 227 / 1689 loss=4.08, nll_loss=2.451, ppl=5.47, wps=462544, ups=1.06, wpb=435008, bsz=16494.2, num_updates=40700, lr=0.000313497, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39925 +epoch 025: 227 / 1689 loss=4.08, nll_loss=2.451, ppl=5.47, wps=462544, ups=1.06, wpb=435008, bsz=16494.2, num_updates=40700, lr=0.000313497, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39925 +epoch 025: 227 / 1689 loss=4.08, nll_loss=2.451, ppl=5.47, wps=462544, ups=1.06, wpb=435008, bsz=16494.2, num_updates=40700, lr=0.000313497, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39925 +epoch 025: 227 / 1689 loss=4.08, nll_loss=2.451, ppl=5.47, wps=462544, ups=1.06, wpb=435008, bsz=16494.2, num_updates=40700, lr=0.000313497, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39925 +epoch 025: 227 / 1689 loss=4.08, nll_loss=2.451, ppl=5.47, wps=462544, ups=1.06, wpb=435008, bsz=16494.2, num_updates=40700, lr=0.000313497, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39925 +epoch 025: 227 / 1689 loss=4.08, nll_loss=2.451, ppl=5.47, wps=462544, ups=1.06, wpb=435008, bsz=16494.2, num_updates=40700, lr=0.000313497, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39925 +epoch 025: 227 / 1689 loss=4.08, nll_loss=2.451, ppl=5.47, wps=462544, ups=1.06, wpb=435008, bsz=16494.2, num_updates=40700, lr=0.000313497, gnorm=0.232, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=39925 +epoch 025: 327 / 1689 loss=4.092, nll_loss=2.466, ppl=5.52, wps=465125, ups=1.07, wpb=435440, bsz=16651.8, num_updates=40800, lr=0.000313112, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=92, gb_free=19, wall=40019 +epoch 025: 327 / 1689 loss=4.092, nll_loss=2.466, ppl=5.52, wps=465125, ups=1.07, wpb=435440, bsz=16651.8, num_updates=40800, lr=0.000313112, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=92, gb_free=19, wall=40019 +epoch 025: 327 / 1689 loss=4.092, nll_loss=2.466, ppl=5.52, wps=465125, ups=1.07, wpb=435440, bsz=16651.8, num_updates=40800, lr=0.000313112, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=92, gb_free=19, wall=40019 +epoch 025: 327 / 1689 loss=4.092, nll_loss=2.466, ppl=5.52, wps=465125, ups=1.07, wpb=435440, bsz=16651.8, num_updates=40800, lr=0.000313112, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=92, gb_free=19, wall=40019 +epoch 025: 327 / 1689 loss=4.092, nll_loss=2.466, ppl=5.52, wps=465125, ups=1.07, wpb=435440, bsz=16651.8, num_updates=40800, lr=0.000313112, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=92, gb_free=19, wall=40019 +epoch 025: 327 / 1689 loss=4.092, nll_loss=2.466, ppl=5.52, wps=465125, ups=1.07, wpb=435440, bsz=16651.8, num_updates=40800, lr=0.000313112, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=92, gb_free=19, wall=40019 +epoch 025: 327 / 1689 loss=4.092, nll_loss=2.466, ppl=5.52, wps=465125, ups=1.07, wpb=435440, bsz=16651.8, num_updates=40800, lr=0.000313112, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=92, gb_free=19, wall=40019 +epoch 025: 327 / 1689 loss=4.092, nll_loss=2.466, ppl=5.52, wps=465125, ups=1.07, wpb=435440, bsz=16651.8, num_updates=40800, lr=0.000313112, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=92, gb_free=19, wall=40019 +epoch 025: 327 / 1689 loss=4.092, nll_loss=2.466, ppl=5.52, wps=465125, ups=1.07, wpb=435440, bsz=16651.8, num_updates=40800, lr=0.000313112, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=92, gb_free=19, wall=40019 +epoch 025: 327 / 1689 loss=4.092, nll_loss=2.466, ppl=5.52, wps=465125, ups=1.07, wpb=435440, bsz=16651.8, num_updates=40800, lr=0.000313112, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=92, gb_free=19, wall=40019 +epoch 025: 327 / 1689 loss=4.092, nll_loss=2.466, ppl=5.52, wps=465125, ups=1.07, wpb=435440, bsz=16651.8, num_updates=40800, lr=0.000313112, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=92, gb_free=19, wall=40019 +epoch 025: 327 / 1689 loss=4.092, nll_loss=2.466, ppl=5.52, wps=465125, ups=1.07, wpb=435440, bsz=16651.8, num_updates=40800, lr=0.000313112, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=92, gb_free=19, wall=40019 +epoch 025: 327 / 1689 loss=4.092, nll_loss=2.466, ppl=5.52, wps=465125, ups=1.07, wpb=435440, bsz=16651.8, num_updates=40800, lr=0.000313112, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=92, gb_free=19, wall=40019 +epoch 025: 327 / 1689 loss=4.092, nll_loss=2.466, ppl=5.52, wps=465125, ups=1.07, wpb=435440, bsz=16651.8, num_updates=40800, lr=0.000313112, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=92, gb_free=19, wall=40019 +epoch 025: 327 / 1689 loss=4.092, nll_loss=2.466, ppl=5.52, wps=465125, ups=1.07, wpb=435440, bsz=16651.8, num_updates=40800, lr=0.000313112, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=92, gb_free=19, wall=40019 +epoch 025: 327 / 1689 loss=4.092, nll_loss=2.466, ppl=5.52, wps=465125, ups=1.07, wpb=435440, bsz=16651.8, num_updates=40800, lr=0.000313112, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=92, gb_free=19, wall=40019 +epoch 025: 327 / 1689 loss=4.092, nll_loss=2.466, ppl=5.52, wps=465125, ups=1.07, wpb=435440, bsz=16651.8, num_updates=40800, lr=0.000313112, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=92, gb_free=19, wall=40019 +epoch 025: 327 / 1689 loss=4.092, nll_loss=2.466, ppl=5.52, wps=465125, ups=1.07, wpb=435440, bsz=16651.8, num_updates=40800, lr=0.000313112, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=92, gb_free=19, wall=40019 +epoch 025: 327 / 1689 loss=4.092, nll_loss=2.466, ppl=5.52, wps=465125, ups=1.07, wpb=435440, bsz=16651.8, num_updates=40800, lr=0.000313112, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=92, gb_free=19, wall=40019 +epoch 025: 327 / 1689 loss=4.092, nll_loss=2.466, ppl=5.52, wps=465125, ups=1.07, wpb=435440, bsz=16651.8, num_updates=40800, lr=0.000313112, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=92, gb_free=19, wall=40019 +epoch 025: 327 / 1689 loss=4.092, nll_loss=2.466, ppl=5.52, wps=465125, ups=1.07, wpb=435440, bsz=16651.8, num_updates=40800, lr=0.000313112, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=92, gb_free=19, wall=40019 +epoch 025: 327 / 1689 loss=4.092, nll_loss=2.466, ppl=5.52, wps=465125, ups=1.07, wpb=435440, bsz=16651.8, num_updates=40800, lr=0.000313112, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=92, gb_free=19, wall=40019 +epoch 025: 327 / 1689 loss=4.092, nll_loss=2.466, ppl=5.52, wps=465125, ups=1.07, wpb=435440, bsz=16651.8, num_updates=40800, lr=0.000313112, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=92, gb_free=19, wall=40019 +epoch 025: 327 / 1689 loss=4.092, nll_loss=2.466, ppl=5.52, wps=465125, ups=1.07, wpb=435440, bsz=16651.8, num_updates=40800, lr=0.000313112, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=92, gb_free=19, wall=40019 +epoch 025: 327 / 1689 loss=4.092, nll_loss=2.466, ppl=5.52, wps=465125, ups=1.07, wpb=435440, bsz=16651.8, num_updates=40800, lr=0.000313112, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=92, gb_free=19, wall=40019 +epoch 025: 427 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=462588, ups=1.07, wpb=431628, bsz=16671.1, num_updates=40900, lr=0.000312729, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=17.9, wall=40112 +epoch 025: 427 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=462588, ups=1.07, wpb=431628, bsz=16671.1, num_updates=40900, lr=0.000312729, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=17.9, wall=40112 +epoch 025: 427 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=462588, ups=1.07, wpb=431628, bsz=16671.1, num_updates=40900, lr=0.000312729, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=17.9, wall=40112 +epoch 025: 427 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=462588, ups=1.07, wpb=431628, bsz=16671.1, num_updates=40900, lr=0.000312729, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=17.9, wall=40112 +epoch 025: 427 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=462588, ups=1.07, wpb=431628, bsz=16671.1, num_updates=40900, lr=0.000312729, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=17.9, wall=40112 +epoch 025: 427 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=462588, ups=1.07, wpb=431628, bsz=16671.1, num_updates=40900, lr=0.000312729, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=17.9, wall=40112 +epoch 025: 427 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=462588, ups=1.07, wpb=431628, bsz=16671.1, num_updates=40900, lr=0.000312729, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=17.9, wall=40112 +epoch 025: 427 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=462588, ups=1.07, wpb=431628, bsz=16671.1, num_updates=40900, lr=0.000312729, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=17.9, wall=40112 +epoch 025: 427 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=462588, ups=1.07, wpb=431628, bsz=16671.1, num_updates=40900, lr=0.000312729, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=17.9, wall=40112 +epoch 025: 427 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=462588, ups=1.07, wpb=431628, bsz=16671.1, num_updates=40900, lr=0.000312729, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=17.9, wall=40112 +epoch 025: 427 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=462588, ups=1.07, wpb=431628, bsz=16671.1, num_updates=40900, lr=0.000312729, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=17.9, wall=40112 +epoch 025: 427 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=462588, ups=1.07, wpb=431628, bsz=16671.1, num_updates=40900, lr=0.000312729, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=17.9, wall=40112 +epoch 025: 427 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=462588, ups=1.07, wpb=431628, bsz=16671.1, num_updates=40900, lr=0.000312729, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=17.9, wall=40112 +epoch 025: 427 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=462588, ups=1.07, wpb=431628, bsz=16671.1, num_updates=40900, lr=0.000312729, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=17.9, wall=40112 +epoch 025: 427 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=462588, ups=1.07, wpb=431628, bsz=16671.1, num_updates=40900, lr=0.000312729, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=17.9, wall=40112 +epoch 025: 427 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=462588, ups=1.07, wpb=431628, bsz=16671.1, num_updates=40900, lr=0.000312729, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=17.9, wall=40112 +epoch 025: 427 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=462588, ups=1.07, wpb=431628, bsz=16671.1, num_updates=40900, lr=0.000312729, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=17.9, wall=40112 +epoch 025: 427 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=462588, ups=1.07, wpb=431628, bsz=16671.1, num_updates=40900, lr=0.000312729, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=17.9, wall=40112 +epoch 025: 427 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=462588, ups=1.07, wpb=431628, bsz=16671.1, num_updates=40900, lr=0.000312729, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=17.9, wall=40112 +epoch 025: 427 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=462588, ups=1.07, wpb=431628, bsz=16671.1, num_updates=40900, lr=0.000312729, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=17.9, wall=40112 +epoch 025: 427 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=462588, ups=1.07, wpb=431628, bsz=16671.1, num_updates=40900, lr=0.000312729, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=17.9, wall=40112 +epoch 025: 427 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=462588, ups=1.07, wpb=431628, bsz=16671.1, num_updates=40900, lr=0.000312729, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=17.9, wall=40112 +epoch 025: 427 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=462588, ups=1.07, wpb=431628, bsz=16671.1, num_updates=40900, lr=0.000312729, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=17.9, wall=40112 +epoch 025: 427 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=462588, ups=1.07, wpb=431628, bsz=16671.1, num_updates=40900, lr=0.000312729, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=17.9, wall=40112 +epoch 025: 427 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=462588, ups=1.07, wpb=431628, bsz=16671.1, num_updates=40900, lr=0.000312729, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=17.9, wall=40112 +epoch 025: 527 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=461279, ups=1.07, wpb=432049, bsz=16662.6, num_updates=41000, lr=0.000312348, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=40206 +epoch 025: 527 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=461279, ups=1.07, wpb=432049, bsz=16662.6, num_updates=41000, lr=0.000312348, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=40206 +epoch 025: 527 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=461279, ups=1.07, wpb=432049, bsz=16662.6, num_updates=41000, lr=0.000312348, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=40206 +epoch 025: 527 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=461279, ups=1.07, wpb=432049, bsz=16662.6, num_updates=41000, lr=0.000312348, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=40206 +epoch 025: 527 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=461279, ups=1.07, wpb=432049, bsz=16662.6, num_updates=41000, lr=0.000312348, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=40206 +epoch 025: 527 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=461279, ups=1.07, wpb=432049, bsz=16662.6, num_updates=41000, lr=0.000312348, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=40206 +epoch 025: 527 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=461279, ups=1.07, wpb=432049, bsz=16662.6, num_updates=41000, lr=0.000312348, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=40206 +epoch 025: 527 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=461279, ups=1.07, wpb=432049, bsz=16662.6, num_updates=41000, lr=0.000312348, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=40206 +epoch 025: 527 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=461279, ups=1.07, wpb=432049, bsz=16662.6, num_updates=41000, lr=0.000312348, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=40206 +epoch 025: 527 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=461279, ups=1.07, wpb=432049, bsz=16662.6, num_updates=41000, lr=0.000312348, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=40206 +epoch 025: 527 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=461279, ups=1.07, wpb=432049, bsz=16662.6, num_updates=41000, lr=0.000312348, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=40206 +epoch 025: 527 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=461279, ups=1.07, wpb=432049, bsz=16662.6, num_updates=41000, lr=0.000312348, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=40206 +epoch 025: 527 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=461279, ups=1.07, wpb=432049, bsz=16662.6, num_updates=41000, lr=0.000312348, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=40206 +epoch 025: 527 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=461279, ups=1.07, wpb=432049, bsz=16662.6, num_updates=41000, lr=0.000312348, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=40206 +epoch 025: 527 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=461279, ups=1.07, wpb=432049, bsz=16662.6, num_updates=41000, lr=0.000312348, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=40206 +epoch 025: 527 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=461279, ups=1.07, wpb=432049, bsz=16662.6, num_updates=41000, lr=0.000312348, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=40206 +epoch 025: 527 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=461279, ups=1.07, wpb=432049, bsz=16662.6, num_updates=41000, lr=0.000312348, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=40206 +epoch 025: 527 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=461279, ups=1.07, wpb=432049, bsz=16662.6, num_updates=41000, lr=0.000312348, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=40206 +epoch 025: 527 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=461279, ups=1.07, wpb=432049, bsz=16662.6, num_updates=41000, lr=0.000312348, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=40206 +epoch 025: 527 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=461279, ups=1.07, wpb=432049, bsz=16662.6, num_updates=41000, lr=0.000312348, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=40206 +epoch 025: 527 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=461279, ups=1.07, wpb=432049, bsz=16662.6, num_updates=41000, lr=0.000312348, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=40206 +epoch 025: 527 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=461279, ups=1.07, wpb=432049, bsz=16662.6, num_updates=41000, lr=0.000312348, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=40206 +epoch 025: 527 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=461279, ups=1.07, wpb=432049, bsz=16662.6, num_updates=41000, lr=0.000312348, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=40206 +epoch 025: 527 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=461279, ups=1.07, wpb=432049, bsz=16662.6, num_updates=41000, lr=0.000312348, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=40206 +epoch 025: 527 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=461279, ups=1.07, wpb=432049, bsz=16662.6, num_updates=41000, lr=0.000312348, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=40206 +begin validation on "valid" subset +epoch 025 | valid on 'valid' subset | loss 4.235 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 41000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.235 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 41000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.235 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 41000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.235 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 41000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.235 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 41000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.235 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 41000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.235 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 41000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.235 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 41000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.235 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 41000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.235 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 41000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.235 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 41000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.235 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 41000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.235 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 41000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.235 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 41000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.235 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 41000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.235 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 41000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.235 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 41000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.235 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 41000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.235 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 41000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.235 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 41000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.235 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 41000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.235 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 41000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.235 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 41000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.235 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 41000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.235 | nll_loss 2.604 | ppl 6.08 | wps 0 | wpb 42662 | bsz 2032 | num_updates 41000 | best_loss 4.232 +epoch 025: 627 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=360427, ups=0.83, wpb=434054, bsz=16200.5, num_updates=41100, lr=0.000311967, gnorm=0.224, clip=0, loss_scale=1, train_wall=100, gb_free=19.2, wall=40326 +epoch 025: 627 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=360427, ups=0.83, wpb=434054, bsz=16200.5, num_updates=41100, lr=0.000311967, gnorm=0.224, clip=0, loss_scale=1, train_wall=100, gb_free=19.2, wall=40326 +epoch 025: 627 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=360427, ups=0.83, wpb=434054, bsz=16200.5, num_updates=41100, lr=0.000311967, gnorm=0.224, clip=0, loss_scale=1, train_wall=100, gb_free=19.2, wall=40326 +epoch 025: 627 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=360427, ups=0.83, wpb=434054, bsz=16200.5, num_updates=41100, lr=0.000311967, gnorm=0.224, clip=0, loss_scale=1, train_wall=100, gb_free=19.2, wall=40326 +epoch 025: 627 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=360427, ups=0.83, wpb=434054, bsz=16200.5, num_updates=41100, lr=0.000311967, gnorm=0.224, clip=0, loss_scale=1, train_wall=100, gb_free=19.2, wall=40326 +epoch 025: 627 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=360427, ups=0.83, wpb=434054, bsz=16200.5, num_updates=41100, lr=0.000311967, gnorm=0.224, clip=0, loss_scale=1, train_wall=100, gb_free=19.2, wall=40326 +epoch 025: 627 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=360427, ups=0.83, wpb=434054, bsz=16200.5, num_updates=41100, lr=0.000311967, gnorm=0.224, clip=0, loss_scale=1, train_wall=100, gb_free=19.2, wall=40326 +epoch 025: 627 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=360427, ups=0.83, wpb=434054, bsz=16200.5, num_updates=41100, lr=0.000311967, gnorm=0.224, clip=0, loss_scale=1, train_wall=100, gb_free=19.2, wall=40326 +epoch 025: 627 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=360427, ups=0.83, wpb=434054, bsz=16200.5, num_updates=41100, lr=0.000311967, gnorm=0.224, clip=0, loss_scale=1, train_wall=100, gb_free=19.2, wall=40326 +epoch 025: 627 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=360427, ups=0.83, wpb=434054, bsz=16200.5, num_updates=41100, lr=0.000311967, gnorm=0.224, clip=0, loss_scale=1, train_wall=100, gb_free=19.2, wall=40326 +epoch 025: 627 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=360427, ups=0.83, wpb=434054, bsz=16200.5, num_updates=41100, lr=0.000311967, gnorm=0.224, clip=0, loss_scale=1, train_wall=100, gb_free=19.2, wall=40326 +epoch 025: 627 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=360427, ups=0.83, wpb=434054, bsz=16200.5, num_updates=41100, lr=0.000311967, gnorm=0.224, clip=0, loss_scale=1, train_wall=100, gb_free=19.2, wall=40326 +epoch 025: 627 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=360427, ups=0.83, wpb=434054, bsz=16200.5, num_updates=41100, lr=0.000311967, gnorm=0.224, clip=0, loss_scale=1, train_wall=100, gb_free=19.2, wall=40326 +epoch 025: 627 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=360427, ups=0.83, wpb=434054, bsz=16200.5, num_updates=41100, lr=0.000311967, gnorm=0.224, clip=0, loss_scale=1, train_wall=100, gb_free=19.2, wall=40326 +epoch 025: 627 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=360427, ups=0.83, wpb=434054, bsz=16200.5, num_updates=41100, lr=0.000311967, gnorm=0.224, clip=0, loss_scale=1, train_wall=100, gb_free=19.2, wall=40326 +epoch 025: 627 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=360427, ups=0.83, wpb=434054, bsz=16200.5, num_updates=41100, lr=0.000311967, gnorm=0.224, clip=0, loss_scale=1, train_wall=100, gb_free=19.2, wall=40326 +epoch 025: 627 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=360427, ups=0.83, wpb=434054, bsz=16200.5, num_updates=41100, lr=0.000311967, gnorm=0.224, clip=0, loss_scale=1, train_wall=100, gb_free=19.2, wall=40326 +epoch 025: 627 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=360427, ups=0.83, wpb=434054, bsz=16200.5, num_updates=41100, lr=0.000311967, gnorm=0.224, clip=0, loss_scale=1, train_wall=100, gb_free=19.2, wall=40326 +epoch 025: 627 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=360427, ups=0.83, wpb=434054, bsz=16200.5, num_updates=41100, lr=0.000311967, gnorm=0.224, clip=0, loss_scale=1, train_wall=100, gb_free=19.2, wall=40326 +epoch 025: 627 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=360427, ups=0.83, wpb=434054, bsz=16200.5, num_updates=41100, lr=0.000311967, gnorm=0.224, clip=0, loss_scale=1, train_wall=100, gb_free=19.2, wall=40326 +epoch 025: 627 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=360427, ups=0.83, wpb=434054, bsz=16200.5, num_updates=41100, lr=0.000311967, gnorm=0.224, clip=0, loss_scale=1, train_wall=100, gb_free=19.2, wall=40326 +epoch 025: 627 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=360427, ups=0.83, wpb=434054, bsz=16200.5, num_updates=41100, lr=0.000311967, gnorm=0.224, clip=0, loss_scale=1, train_wall=100, gb_free=19.2, wall=40326 +epoch 025: 627 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=360427, ups=0.83, wpb=434054, bsz=16200.5, num_updates=41100, lr=0.000311967, gnorm=0.224, clip=0, loss_scale=1, train_wall=100, gb_free=19.2, wall=40326 +epoch 025: 627 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=360427, ups=0.83, wpb=434054, bsz=16200.5, num_updates=41100, lr=0.000311967, gnorm=0.224, clip=0, loss_scale=1, train_wall=100, gb_free=19.2, wall=40326 +epoch 025: 627 / 1689 loss=4.098, nll_loss=2.472, ppl=5.55, wps=360427, ups=0.83, wpb=434054, bsz=16200.5, num_updates=41100, lr=0.000311967, gnorm=0.224, clip=0, loss_scale=1, train_wall=100, gb_free=19.2, wall=40326 +epoch 025: 727 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=461516, ups=1.06, wpb=434038, bsz=16858.3, num_updates=41200, lr=0.000311588, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=40421 +epoch 025: 727 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=461516, ups=1.06, wpb=434038, bsz=16858.3, num_updates=41200, lr=0.000311588, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=40421 +epoch 025: 727 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=461516, ups=1.06, wpb=434038, bsz=16858.3, num_updates=41200, lr=0.000311588, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=40421 +epoch 025: 727 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=461516, ups=1.06, wpb=434038, bsz=16858.3, num_updates=41200, lr=0.000311588, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=40421 +epoch 025: 727 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=461516, ups=1.06, wpb=434038, bsz=16858.3, num_updates=41200, lr=0.000311588, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=40421 +epoch 025: 727 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=461516, ups=1.06, wpb=434038, bsz=16858.3, num_updates=41200, lr=0.000311588, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=40421 +epoch 025: 727 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=461516, ups=1.06, wpb=434038, bsz=16858.3, num_updates=41200, lr=0.000311588, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=40421 +epoch 025: 727 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=461516, ups=1.06, wpb=434038, bsz=16858.3, num_updates=41200, lr=0.000311588, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=40421 +epoch 025: 727 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=461516, ups=1.06, wpb=434038, bsz=16858.3, num_updates=41200, lr=0.000311588, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=40421 +epoch 025: 727 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=461516, ups=1.06, wpb=434038, bsz=16858.3, num_updates=41200, lr=0.000311588, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=40421 +epoch 025: 727 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=461516, ups=1.06, wpb=434038, bsz=16858.3, num_updates=41200, lr=0.000311588, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=40421 +epoch 025: 727 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=461516, ups=1.06, wpb=434038, bsz=16858.3, num_updates=41200, lr=0.000311588, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=40421 +epoch 025: 727 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=461516, ups=1.06, wpb=434038, bsz=16858.3, num_updates=41200, lr=0.000311588, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=40421 +epoch 025: 727 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=461516, ups=1.06, wpb=434038, bsz=16858.3, num_updates=41200, lr=0.000311588, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=40421 +epoch 025: 727 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=461516, ups=1.06, wpb=434038, bsz=16858.3, num_updates=41200, lr=0.000311588, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=40421 +epoch 025: 727 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=461516, ups=1.06, wpb=434038, bsz=16858.3, num_updates=41200, lr=0.000311588, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=40421 +epoch 025: 727 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=461516, ups=1.06, wpb=434038, bsz=16858.3, num_updates=41200, lr=0.000311588, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=40421 +epoch 025: 727 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=461516, ups=1.06, wpb=434038, bsz=16858.3, num_updates=41200, lr=0.000311588, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=40421 +epoch 025: 727 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=461516, ups=1.06, wpb=434038, bsz=16858.3, num_updates=41200, lr=0.000311588, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=40421 +epoch 025: 727 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=461516, ups=1.06, wpb=434038, bsz=16858.3, num_updates=41200, lr=0.000311588, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=40421 +epoch 025: 727 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=461516, ups=1.06, wpb=434038, bsz=16858.3, num_updates=41200, lr=0.000311588, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=40421 +epoch 025: 727 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=461516, ups=1.06, wpb=434038, bsz=16858.3, num_updates=41200, lr=0.000311588, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=40421 +epoch 025: 727 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=461516, ups=1.06, wpb=434038, bsz=16858.3, num_updates=41200, lr=0.000311588, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=40421 +epoch 025: 727 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=461516, ups=1.06, wpb=434038, bsz=16858.3, num_updates=41200, lr=0.000311588, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=40421 +epoch 025: 727 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=461516, ups=1.06, wpb=434038, bsz=16858.3, num_updates=41200, lr=0.000311588, gnorm=0.215, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=40421 +epoch 025: 827 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=464647, ups=1.07, wpb=433830, bsz=16363.4, num_updates=41300, lr=0.000311211, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=40514 +epoch 025: 827 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=464647, ups=1.07, wpb=433830, bsz=16363.4, num_updates=41300, lr=0.000311211, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=40514 +epoch 025: 827 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=464647, ups=1.07, wpb=433830, bsz=16363.4, num_updates=41300, lr=0.000311211, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=40514 +epoch 025: 827 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=464647, ups=1.07, wpb=433830, bsz=16363.4, num_updates=41300, lr=0.000311211, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=40514 +epoch 025: 827 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=464647, ups=1.07, wpb=433830, bsz=16363.4, num_updates=41300, lr=0.000311211, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=40514 +epoch 025: 827 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=464647, ups=1.07, wpb=433830, bsz=16363.4, num_updates=41300, lr=0.000311211, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=40514 +epoch 025: 827 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=464647, ups=1.07, wpb=433830, bsz=16363.4, num_updates=41300, lr=0.000311211, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=40514 +epoch 025: 827 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=464647, ups=1.07, wpb=433830, bsz=16363.4, num_updates=41300, lr=0.000311211, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=40514 +epoch 025: 827 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=464647, ups=1.07, wpb=433830, bsz=16363.4, num_updates=41300, lr=0.000311211, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=40514 +epoch 025: 827 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=464647, ups=1.07, wpb=433830, bsz=16363.4, num_updates=41300, lr=0.000311211, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=40514 +epoch 025: 827 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=464647, ups=1.07, wpb=433830, bsz=16363.4, num_updates=41300, lr=0.000311211, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=40514 +epoch 025: 827 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=464647, ups=1.07, wpb=433830, bsz=16363.4, num_updates=41300, lr=0.000311211, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=40514 +epoch 025: 827 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=464647, ups=1.07, wpb=433830, bsz=16363.4, num_updates=41300, lr=0.000311211, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=40514 +epoch 025: 827 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=464647, ups=1.07, wpb=433830, bsz=16363.4, num_updates=41300, lr=0.000311211, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=40514 +epoch 025: 827 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=464647, ups=1.07, wpb=433830, bsz=16363.4, num_updates=41300, lr=0.000311211, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=40514 +epoch 025: 827 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=464647, ups=1.07, wpb=433830, bsz=16363.4, num_updates=41300, lr=0.000311211, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=40514 +epoch 025: 827 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=464647, ups=1.07, wpb=433830, bsz=16363.4, num_updates=41300, lr=0.000311211, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=40514 +epoch 025: 827 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=464647, ups=1.07, wpb=433830, bsz=16363.4, num_updates=41300, lr=0.000311211, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=40514 +epoch 025: 827 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=464647, ups=1.07, wpb=433830, bsz=16363.4, num_updates=41300, lr=0.000311211, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=40514 +epoch 025: 827 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=464647, ups=1.07, wpb=433830, bsz=16363.4, num_updates=41300, lr=0.000311211, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=40514 +epoch 025: 827 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=464647, ups=1.07, wpb=433830, bsz=16363.4, num_updates=41300, lr=0.000311211, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=40514 +epoch 025: 827 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=464647, ups=1.07, wpb=433830, bsz=16363.4, num_updates=41300, lr=0.000311211, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=40514 +epoch 025: 827 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=464647, ups=1.07, wpb=433830, bsz=16363.4, num_updates=41300, lr=0.000311211, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=40514 +epoch 025: 827 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=464647, ups=1.07, wpb=433830, bsz=16363.4, num_updates=41300, lr=0.000311211, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=40514 +epoch 025: 827 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=464647, ups=1.07, wpb=433830, bsz=16363.4, num_updates=41300, lr=0.000311211, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=40514 +epoch 025: 927 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=463027, ups=1.07, wpb=432684, bsz=16772.3, num_updates=41400, lr=0.000310835, gnorm=0.232, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=40607 +epoch 025: 927 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=463027, ups=1.07, wpb=432684, bsz=16772.3, num_updates=41400, lr=0.000310835, gnorm=0.232, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=40607 +epoch 025: 927 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=463027, ups=1.07, wpb=432684, bsz=16772.3, num_updates=41400, lr=0.000310835, gnorm=0.232, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=40607 +epoch 025: 927 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=463027, ups=1.07, wpb=432684, bsz=16772.3, num_updates=41400, lr=0.000310835, gnorm=0.232, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=40607 +epoch 025: 927 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=463027, ups=1.07, wpb=432684, bsz=16772.3, num_updates=41400, lr=0.000310835, gnorm=0.232, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=40607 +epoch 025: 927 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=463027, ups=1.07, wpb=432684, bsz=16772.3, num_updates=41400, lr=0.000310835, gnorm=0.232, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=40607 +epoch 025: 927 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=463027, ups=1.07, wpb=432684, bsz=16772.3, num_updates=41400, lr=0.000310835, gnorm=0.232, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=40607 +epoch 025: 927 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=463027, ups=1.07, wpb=432684, bsz=16772.3, num_updates=41400, lr=0.000310835, gnorm=0.232, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=40607 +epoch 025: 927 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=463027, ups=1.07, wpb=432684, bsz=16772.3, num_updates=41400, lr=0.000310835, gnorm=0.232, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=40607 +epoch 025: 927 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=463027, ups=1.07, wpb=432684, bsz=16772.3, num_updates=41400, lr=0.000310835, gnorm=0.232, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=40607 +epoch 025: 927 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=463027, ups=1.07, wpb=432684, bsz=16772.3, num_updates=41400, lr=0.000310835, gnorm=0.232, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=40607 +epoch 025: 927 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=463027, ups=1.07, wpb=432684, bsz=16772.3, num_updates=41400, lr=0.000310835, gnorm=0.232, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=40607 +epoch 025: 927 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=463027, ups=1.07, wpb=432684, bsz=16772.3, num_updates=41400, lr=0.000310835, gnorm=0.232, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=40607 +epoch 025: 927 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=463027, ups=1.07, wpb=432684, bsz=16772.3, num_updates=41400, lr=0.000310835, gnorm=0.232, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=40607 +epoch 025: 927 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=463027, ups=1.07, wpb=432684, bsz=16772.3, num_updates=41400, lr=0.000310835, gnorm=0.232, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=40607 +epoch 025: 927 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=463027, ups=1.07, wpb=432684, bsz=16772.3, num_updates=41400, lr=0.000310835, gnorm=0.232, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=40607 +epoch 025: 927 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=463027, ups=1.07, wpb=432684, bsz=16772.3, num_updates=41400, lr=0.000310835, gnorm=0.232, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=40607 +epoch 025: 927 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=463027, ups=1.07, wpb=432684, bsz=16772.3, num_updates=41400, lr=0.000310835, gnorm=0.232, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=40607 +epoch 025: 927 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=463027, ups=1.07, wpb=432684, bsz=16772.3, num_updates=41400, lr=0.000310835, gnorm=0.232, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=40607 +epoch 025: 927 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=463027, ups=1.07, wpb=432684, bsz=16772.3, num_updates=41400, lr=0.000310835, gnorm=0.232, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=40607 +epoch 025: 927 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=463027, ups=1.07, wpb=432684, bsz=16772.3, num_updates=41400, lr=0.000310835, gnorm=0.232, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=40607 +epoch 025: 927 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=463027, ups=1.07, wpb=432684, bsz=16772.3, num_updates=41400, lr=0.000310835, gnorm=0.232, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=40607 +epoch 025: 927 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=463027, ups=1.07, wpb=432684, bsz=16772.3, num_updates=41400, lr=0.000310835, gnorm=0.232, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=40607 +epoch 025: 927 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=463027, ups=1.07, wpb=432684, bsz=16772.3, num_updates=41400, lr=0.000310835, gnorm=0.232, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=40607 +epoch 025: 927 / 1689 loss=4.087, nll_loss=2.46, ppl=5.5, wps=463027, ups=1.07, wpb=432684, bsz=16772.3, num_updates=41400, lr=0.000310835, gnorm=0.232, clip=0, loss_scale=2, train_wall=93, gb_free=19.3, wall=40607 +epoch 025: 1027 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=464190, ups=1.07, wpb=434759, bsz=16368.8, num_updates=41500, lr=0.00031046, gnorm=0.218, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=40701 +epoch 025: 1027 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=464190, ups=1.07, wpb=434759, bsz=16368.8, num_updates=41500, lr=0.00031046, gnorm=0.218, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=40701 +epoch 025: 1027 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=464190, ups=1.07, wpb=434759, bsz=16368.8, num_updates=41500, lr=0.00031046, gnorm=0.218, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=40701 +epoch 025: 1027 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=464190, ups=1.07, wpb=434759, bsz=16368.8, num_updates=41500, lr=0.00031046, gnorm=0.218, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=40701 +epoch 025: 1027 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=464190, ups=1.07, wpb=434759, bsz=16368.8, num_updates=41500, lr=0.00031046, gnorm=0.218, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=40701 +epoch 025: 1027 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=464190, ups=1.07, wpb=434759, bsz=16368.8, num_updates=41500, lr=0.00031046, gnorm=0.218, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=40701 +epoch 025: 1027 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=464190, ups=1.07, wpb=434759, bsz=16368.8, num_updates=41500, lr=0.00031046, gnorm=0.218, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=40701 +epoch 025: 1027 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=464190, ups=1.07, wpb=434759, bsz=16368.8, num_updates=41500, lr=0.00031046, gnorm=0.218, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=40701 +epoch 025: 1027 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=464190, ups=1.07, wpb=434759, bsz=16368.8, num_updates=41500, lr=0.00031046, gnorm=0.218, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=40701 +epoch 025: 1027 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=464190, ups=1.07, wpb=434759, bsz=16368.8, num_updates=41500, lr=0.00031046, gnorm=0.218, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=40701 +epoch 025: 1027 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=464190, ups=1.07, wpb=434759, bsz=16368.8, num_updates=41500, lr=0.00031046, gnorm=0.218, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=40701 +epoch 025: 1027 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=464190, ups=1.07, wpb=434759, bsz=16368.8, num_updates=41500, lr=0.00031046, gnorm=0.218, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=40701 +epoch 025: 1027 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=464190, ups=1.07, wpb=434759, bsz=16368.8, num_updates=41500, lr=0.00031046, gnorm=0.218, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=40701 +epoch 025: 1027 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=464190, ups=1.07, wpb=434759, bsz=16368.8, num_updates=41500, lr=0.00031046, gnorm=0.218, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=40701 +epoch 025: 1027 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=464190, ups=1.07, wpb=434759, bsz=16368.8, num_updates=41500, lr=0.00031046, gnorm=0.218, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=40701 +epoch 025: 1027 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=464190, ups=1.07, wpb=434759, bsz=16368.8, num_updates=41500, lr=0.00031046, gnorm=0.218, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=40701 +epoch 025: 1027 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=464190, ups=1.07, wpb=434759, bsz=16368.8, num_updates=41500, lr=0.00031046, gnorm=0.218, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=40701 +epoch 025: 1027 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=464190, ups=1.07, wpb=434759, bsz=16368.8, num_updates=41500, lr=0.00031046, gnorm=0.218, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=40701 +epoch 025: 1027 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=464190, ups=1.07, wpb=434759, bsz=16368.8, num_updates=41500, lr=0.00031046, gnorm=0.218, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=40701 +epoch 025: 1027 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=464190, ups=1.07, wpb=434759, bsz=16368.8, num_updates=41500, lr=0.00031046, gnorm=0.218, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=40701 +epoch 025: 1027 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=464190, ups=1.07, wpb=434759, bsz=16368.8, num_updates=41500, lr=0.00031046, gnorm=0.218, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=40701 +epoch 025: 1027 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=464190, ups=1.07, wpb=434759, bsz=16368.8, num_updates=41500, lr=0.00031046, gnorm=0.218, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=40701 +epoch 025: 1027 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=464190, ups=1.07, wpb=434759, bsz=16368.8, num_updates=41500, lr=0.00031046, gnorm=0.218, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=40701 +epoch 025: 1027 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=464190, ups=1.07, wpb=434759, bsz=16368.8, num_updates=41500, lr=0.00031046, gnorm=0.218, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=40701 +epoch 025: 1027 / 1689 loss=4.103, nll_loss=2.478, ppl=5.57, wps=464190, ups=1.07, wpb=434759, bsz=16368.8, num_updates=41500, lr=0.00031046, gnorm=0.218, clip=0, loss_scale=2, train_wall=93, gb_free=18.5, wall=40701 +epoch 025: 1127 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=464057, ups=1.07, wpb=432624, bsz=16232, num_updates=41600, lr=0.000310087, gnorm=0.223, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=40794 +epoch 025: 1127 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=464057, ups=1.07, wpb=432624, bsz=16232, num_updates=41600, lr=0.000310087, gnorm=0.223, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=40794 +epoch 025: 1127 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=464057, ups=1.07, wpb=432624, bsz=16232, num_updates=41600, lr=0.000310087, gnorm=0.223, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=40794 +epoch 025: 1127 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=464057, ups=1.07, wpb=432624, bsz=16232, num_updates=41600, lr=0.000310087, gnorm=0.223, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=40794 +epoch 025: 1127 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=464057, ups=1.07, wpb=432624, bsz=16232, num_updates=41600, lr=0.000310087, gnorm=0.223, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=40794 +epoch 025: 1127 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=464057, ups=1.07, wpb=432624, bsz=16232, num_updates=41600, lr=0.000310087, gnorm=0.223, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=40794 +epoch 025: 1127 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=464057, ups=1.07, wpb=432624, bsz=16232, num_updates=41600, lr=0.000310087, gnorm=0.223, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=40794 +epoch 025: 1127 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=464057, ups=1.07, wpb=432624, bsz=16232, num_updates=41600, lr=0.000310087, gnorm=0.223, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=40794 +epoch 025: 1127 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=464057, ups=1.07, wpb=432624, bsz=16232, num_updates=41600, lr=0.000310087, gnorm=0.223, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=40794 +epoch 025: 1127 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=464057, ups=1.07, wpb=432624, bsz=16232, num_updates=41600, lr=0.000310087, gnorm=0.223, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=40794 +epoch 025: 1127 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=464057, ups=1.07, wpb=432624, bsz=16232, num_updates=41600, lr=0.000310087, gnorm=0.223, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=40794 +epoch 025: 1127 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=464057, ups=1.07, wpb=432624, bsz=16232, num_updates=41600, lr=0.000310087, gnorm=0.223, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=40794 +epoch 025: 1127 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=464057, ups=1.07, wpb=432624, bsz=16232, num_updates=41600, lr=0.000310087, gnorm=0.223, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=40794 +epoch 025: 1127 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=464057, ups=1.07, wpb=432624, bsz=16232, num_updates=41600, lr=0.000310087, gnorm=0.223, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=40794 +epoch 025: 1127 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=464057, ups=1.07, wpb=432624, bsz=16232, num_updates=41600, lr=0.000310087, gnorm=0.223, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=40794 +epoch 025: 1127 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=464057, ups=1.07, wpb=432624, bsz=16232, num_updates=41600, lr=0.000310087, gnorm=0.223, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=40794 +epoch 025: 1127 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=464057, ups=1.07, wpb=432624, bsz=16232, num_updates=41600, lr=0.000310087, gnorm=0.223, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=40794 +epoch 025: 1127 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=464057, ups=1.07, wpb=432624, bsz=16232, num_updates=41600, lr=0.000310087, gnorm=0.223, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=40794 +epoch 025: 1127 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=464057, ups=1.07, wpb=432624, bsz=16232, num_updates=41600, lr=0.000310087, gnorm=0.223, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=40794 +epoch 025: 1127 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=464057, ups=1.07, wpb=432624, bsz=16232, num_updates=41600, lr=0.000310087, gnorm=0.223, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=40794 +epoch 025: 1127 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=464057, ups=1.07, wpb=432624, bsz=16232, num_updates=41600, lr=0.000310087, gnorm=0.223, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=40794 +epoch 025: 1127 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=464057, ups=1.07, wpb=432624, bsz=16232, num_updates=41600, lr=0.000310087, gnorm=0.223, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=40794 +epoch 025: 1127 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=464057, ups=1.07, wpb=432624, bsz=16232, num_updates=41600, lr=0.000310087, gnorm=0.223, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=40794 +epoch 025: 1127 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=464057, ups=1.07, wpb=432624, bsz=16232, num_updates=41600, lr=0.000310087, gnorm=0.223, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=40794 +epoch 025: 1127 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=464057, ups=1.07, wpb=432624, bsz=16232, num_updates=41600, lr=0.000310087, gnorm=0.223, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=40794 +epoch 025: 1228 / 1689 loss=4.098, nll_loss=2.473, ppl=5.55, wps=457651, ups=1.05, wpb=434848, bsz=16320.3, num_updates=41700, lr=0.000309715, gnorm=0.212, clip=0, loss_scale=1, train_wall=94, gb_free=18.7, wall=40889 +epoch 025: 1228 / 1689 loss=4.098, nll_loss=2.473, ppl=5.55, wps=457651, ups=1.05, wpb=434848, bsz=16320.3, num_updates=41700, lr=0.000309715, gnorm=0.212, clip=0, loss_scale=1, train_wall=94, gb_free=18.7, wall=40889 +epoch 025: 1228 / 1689 loss=4.098, nll_loss=2.473, ppl=5.55, wps=457651, ups=1.05, wpb=434848, bsz=16320.3, num_updates=41700, lr=0.000309715, gnorm=0.212, clip=0, loss_scale=1, train_wall=94, gb_free=18.7, wall=40889 +epoch 025: 1228 / 1689 loss=4.098, nll_loss=2.473, ppl=5.55, wps=457651, ups=1.05, wpb=434848, bsz=16320.3, num_updates=41700, lr=0.000309715, gnorm=0.212, clip=0, loss_scale=1, train_wall=94, gb_free=18.7, wall=40889 +epoch 025: 1228 / 1689 loss=4.098, nll_loss=2.473, ppl=5.55, wps=457651, ups=1.05, wpb=434848, bsz=16320.3, num_updates=41700, lr=0.000309715, gnorm=0.212, clip=0, loss_scale=1, train_wall=94, gb_free=18.7, wall=40889 +epoch 025: 1228 / 1689 loss=4.098, nll_loss=2.473, ppl=5.55, wps=457651, ups=1.05, wpb=434848, bsz=16320.3, num_updates=41700, lr=0.000309715, gnorm=0.212, clip=0, loss_scale=1, train_wall=94, gb_free=18.7, wall=40889 +epoch 025: 1228 / 1689 loss=4.098, nll_loss=2.473, ppl=5.55, wps=457651, ups=1.05, wpb=434848, bsz=16320.3, num_updates=41700, lr=0.000309715, gnorm=0.212, clip=0, loss_scale=1, train_wall=94, gb_free=18.7, wall=40889 +epoch 025: 1228 / 1689 loss=4.098, nll_loss=2.473, ppl=5.55, wps=457651, ups=1.05, wpb=434848, bsz=16320.3, num_updates=41700, lr=0.000309715, gnorm=0.212, clip=0, loss_scale=1, train_wall=94, gb_free=18.7, wall=40889 +epoch 025: 1228 / 1689 loss=4.098, nll_loss=2.473, ppl=5.55, wps=457651, ups=1.05, wpb=434848, bsz=16320.3, num_updates=41700, lr=0.000309715, gnorm=0.212, clip=0, loss_scale=1, train_wall=94, gb_free=18.7, wall=40889 +epoch 025: 1228 / 1689 loss=4.098, nll_loss=2.473, ppl=5.55, wps=457651, ups=1.05, wpb=434848, bsz=16320.3, num_updates=41700, lr=0.000309715, gnorm=0.212, clip=0, loss_scale=1, train_wall=94, gb_free=18.7, wall=40889 +epoch 025: 1228 / 1689 loss=4.098, nll_loss=2.473, ppl=5.55, wps=457651, ups=1.05, wpb=434848, bsz=16320.3, num_updates=41700, lr=0.000309715, gnorm=0.212, clip=0, loss_scale=1, train_wall=94, gb_free=18.7, wall=40889 +epoch 025: 1228 / 1689 loss=4.098, nll_loss=2.473, ppl=5.55, wps=457651, ups=1.05, wpb=434848, bsz=16320.3, num_updates=41700, lr=0.000309715, gnorm=0.212, clip=0, loss_scale=1, train_wall=94, gb_free=18.7, wall=40889 +epoch 025: 1228 / 1689 loss=4.098, nll_loss=2.473, ppl=5.55, wps=457651, ups=1.05, wpb=434848, bsz=16320.3, num_updates=41700, lr=0.000309715, gnorm=0.212, clip=0, loss_scale=1, train_wall=94, gb_free=18.7, wall=40889 +epoch 025: 1228 / 1689 loss=4.098, nll_loss=2.473, ppl=5.55, wps=457651, ups=1.05, wpb=434848, bsz=16320.3, num_updates=41700, lr=0.000309715, gnorm=0.212, clip=0, loss_scale=1, train_wall=94, gb_free=18.7, wall=40889 +epoch 025: 1228 / 1689 loss=4.098, nll_loss=2.473, ppl=5.55, wps=457651, ups=1.05, wpb=434848, bsz=16320.3, num_updates=41700, lr=0.000309715, gnorm=0.212, clip=0, loss_scale=1, train_wall=94, gb_free=18.7, wall=40889 +epoch 025: 1228 / 1689 loss=4.098, nll_loss=2.473, ppl=5.55, wps=457651, ups=1.05, wpb=434848, bsz=16320.3, num_updates=41700, lr=0.000309715, gnorm=0.212, clip=0, loss_scale=1, train_wall=94, gb_free=18.7, wall=40889 +epoch 025: 1228 / 1689 loss=4.098, nll_loss=2.473, ppl=5.55, wps=457651, ups=1.05, wpb=434848, bsz=16320.3, num_updates=41700, lr=0.000309715, gnorm=0.212, clip=0, loss_scale=1, train_wall=94, gb_free=18.7, wall=40889 +epoch 025: 1228 / 1689 loss=4.098, nll_loss=2.473, ppl=5.55, wps=457651, ups=1.05, wpb=434848, bsz=16320.3, num_updates=41700, lr=0.000309715, gnorm=0.212, clip=0, loss_scale=1, train_wall=94, gb_free=18.7, wall=40889 +epoch 025: 1228 / 1689 loss=4.098, nll_loss=2.473, ppl=5.55, wps=457651, ups=1.05, wpb=434848, bsz=16320.3, num_updates=41700, lr=0.000309715, gnorm=0.212, clip=0, loss_scale=1, train_wall=94, gb_free=18.7, wall=40889 +epoch 025: 1228 / 1689 loss=4.098, nll_loss=2.473, ppl=5.55, wps=457651, ups=1.05, wpb=434848, bsz=16320.3, num_updates=41700, lr=0.000309715, gnorm=0.212, clip=0, loss_scale=1, train_wall=94, gb_free=18.7, wall=40889 +epoch 025: 1228 / 1689 loss=4.098, nll_loss=2.473, ppl=5.55, wps=457651, ups=1.05, wpb=434848, bsz=16320.3, num_updates=41700, lr=0.000309715, gnorm=0.212, clip=0, loss_scale=1, train_wall=94, gb_free=18.7, wall=40889 +epoch 025: 1228 / 1689 loss=4.098, nll_loss=2.473, ppl=5.55, wps=457651, ups=1.05, wpb=434848, bsz=16320.3, num_updates=41700, lr=0.000309715, gnorm=0.212, clip=0, loss_scale=1, train_wall=94, gb_free=18.7, wall=40889 +epoch 025: 1228 / 1689 loss=4.098, nll_loss=2.473, ppl=5.55, wps=457651, ups=1.05, wpb=434848, bsz=16320.3, num_updates=41700, lr=0.000309715, gnorm=0.212, clip=0, loss_scale=1, train_wall=94, gb_free=18.7, wall=40889 +epoch 025: 1228 / 1689 loss=4.098, nll_loss=2.473, ppl=5.55, wps=457651, ups=1.05, wpb=434848, bsz=16320.3, num_updates=41700, lr=0.000309715, gnorm=0.212, clip=0, loss_scale=1, train_wall=94, gb_free=18.7, wall=40889 +epoch 025: 1228 / 1689 loss=4.098, nll_loss=2.473, ppl=5.55, wps=457651, ups=1.05, wpb=434848, bsz=16320.3, num_updates=41700, lr=0.000309715, gnorm=0.212, clip=0, loss_scale=1, train_wall=94, gb_free=18.7, wall=40889 +epoch 025: 1328 / 1689 loss=4.103, nll_loss=2.479, ppl=5.57, wps=465036, ups=1.07, wpb=435681, bsz=16699.1, num_updates=41800, lr=0.000309344, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=40983 +epoch 025: 1328 / 1689 loss=4.103, nll_loss=2.479, ppl=5.57, wps=465036, ups=1.07, wpb=435681, bsz=16699.1, num_updates=41800, lr=0.000309344, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=40983 +epoch 025: 1328 / 1689 loss=4.103, nll_loss=2.479, ppl=5.57, wps=465036, ups=1.07, wpb=435681, bsz=16699.1, num_updates=41800, lr=0.000309344, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=40983 +epoch 025: 1328 / 1689 loss=4.103, nll_loss=2.479, ppl=5.57, wps=465036, ups=1.07, wpb=435681, bsz=16699.1, num_updates=41800, lr=0.000309344, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=40983 +epoch 025: 1328 / 1689 loss=4.103, nll_loss=2.479, ppl=5.57, wps=465036, ups=1.07, wpb=435681, bsz=16699.1, num_updates=41800, lr=0.000309344, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=40983 +epoch 025: 1328 / 1689 loss=4.103, nll_loss=2.479, ppl=5.57, wps=465036, ups=1.07, wpb=435681, bsz=16699.1, num_updates=41800, lr=0.000309344, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=40983 +epoch 025: 1328 / 1689 loss=4.103, nll_loss=2.479, ppl=5.57, wps=465036, ups=1.07, wpb=435681, bsz=16699.1, num_updates=41800, lr=0.000309344, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=40983 +epoch 025: 1328 / 1689 loss=4.103, nll_loss=2.479, ppl=5.57, wps=465036, ups=1.07, wpb=435681, bsz=16699.1, num_updates=41800, lr=0.000309344, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=40983 +epoch 025: 1328 / 1689 loss=4.103, nll_loss=2.479, ppl=5.57, wps=465036, ups=1.07, wpb=435681, bsz=16699.1, num_updates=41800, lr=0.000309344, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=40983 +epoch 025: 1328 / 1689 loss=4.103, nll_loss=2.479, ppl=5.57, wps=465036, ups=1.07, wpb=435681, bsz=16699.1, num_updates=41800, lr=0.000309344, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=40983 +epoch 025: 1328 / 1689 loss=4.103, nll_loss=2.479, ppl=5.57, wps=465036, ups=1.07, wpb=435681, bsz=16699.1, num_updates=41800, lr=0.000309344, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=40983 +epoch 025: 1328 / 1689 loss=4.103, nll_loss=2.479, ppl=5.57, wps=465036, ups=1.07, wpb=435681, bsz=16699.1, num_updates=41800, lr=0.000309344, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=40983 +epoch 025: 1328 / 1689 loss=4.103, nll_loss=2.479, ppl=5.57, wps=465036, ups=1.07, wpb=435681, bsz=16699.1, num_updates=41800, lr=0.000309344, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=40983 +epoch 025: 1328 / 1689 loss=4.103, nll_loss=2.479, ppl=5.57, wps=465036, ups=1.07, wpb=435681, bsz=16699.1, num_updates=41800, lr=0.000309344, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=40983 +epoch 025: 1328 / 1689 loss=4.103, nll_loss=2.479, ppl=5.57, wps=465036, ups=1.07, wpb=435681, bsz=16699.1, num_updates=41800, lr=0.000309344, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=40983 +epoch 025: 1328 / 1689 loss=4.103, nll_loss=2.479, ppl=5.57, wps=465036, ups=1.07, wpb=435681, bsz=16699.1, num_updates=41800, lr=0.000309344, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=40983 +epoch 025: 1328 / 1689 loss=4.103, nll_loss=2.479, ppl=5.57, wps=465036, ups=1.07, wpb=435681, bsz=16699.1, num_updates=41800, lr=0.000309344, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=40983 +epoch 025: 1328 / 1689 loss=4.103, nll_loss=2.479, ppl=5.57, wps=465036, ups=1.07, wpb=435681, bsz=16699.1, num_updates=41800, lr=0.000309344, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=40983 +epoch 025: 1328 / 1689 loss=4.103, nll_loss=2.479, ppl=5.57, wps=465036, ups=1.07, wpb=435681, bsz=16699.1, num_updates=41800, lr=0.000309344, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=40983 +epoch 025: 1328 / 1689 loss=4.103, nll_loss=2.479, ppl=5.57, wps=465036, ups=1.07, wpb=435681, bsz=16699.1, num_updates=41800, lr=0.000309344, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=40983 +epoch 025: 1328 / 1689 loss=4.103, nll_loss=2.479, ppl=5.57, wps=465036, ups=1.07, wpb=435681, bsz=16699.1, num_updates=41800, lr=0.000309344, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=40983 +epoch 025: 1328 / 1689 loss=4.103, nll_loss=2.479, ppl=5.57, wps=465036, ups=1.07, wpb=435681, bsz=16699.1, num_updates=41800, lr=0.000309344, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=40983 +epoch 025: 1328 / 1689 loss=4.103, nll_loss=2.479, ppl=5.57, wps=465036, ups=1.07, wpb=435681, bsz=16699.1, num_updates=41800, lr=0.000309344, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=40983 +epoch 025: 1328 / 1689 loss=4.103, nll_loss=2.479, ppl=5.57, wps=465036, ups=1.07, wpb=435681, bsz=16699.1, num_updates=41800, lr=0.000309344, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=40983 +epoch 025: 1328 / 1689 loss=4.103, nll_loss=2.479, ppl=5.57, wps=465036, ups=1.07, wpb=435681, bsz=16699.1, num_updates=41800, lr=0.000309344, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=40983 +epoch 025: 1428 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=463133, ups=1.07, wpb=434410, bsz=16341.2, num_updates=41900, lr=0.000308975, gnorm=0.218, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=41077 +epoch 025: 1428 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=463133, ups=1.07, wpb=434410, bsz=16341.2, num_updates=41900, lr=0.000308975, gnorm=0.218, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=41077 +epoch 025: 1428 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=463133, ups=1.07, wpb=434410, bsz=16341.2, num_updates=41900, lr=0.000308975, gnorm=0.218, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=41077 +epoch 025: 1428 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=463133, ups=1.07, wpb=434410, bsz=16341.2, num_updates=41900, lr=0.000308975, gnorm=0.218, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=41077 +epoch 025: 1428 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=463133, ups=1.07, wpb=434410, bsz=16341.2, num_updates=41900, lr=0.000308975, gnorm=0.218, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=41077 +epoch 025: 1428 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=463133, ups=1.07, wpb=434410, bsz=16341.2, num_updates=41900, lr=0.000308975, gnorm=0.218, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=41077 +epoch 025: 1428 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=463133, ups=1.07, wpb=434410, bsz=16341.2, num_updates=41900, lr=0.000308975, gnorm=0.218, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=41077 +epoch 025: 1428 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=463133, ups=1.07, wpb=434410, bsz=16341.2, num_updates=41900, lr=0.000308975, gnorm=0.218, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=41077 +epoch 025: 1428 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=463133, ups=1.07, wpb=434410, bsz=16341.2, num_updates=41900, lr=0.000308975, gnorm=0.218, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=41077 +epoch 025: 1428 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=463133, ups=1.07, wpb=434410, bsz=16341.2, num_updates=41900, lr=0.000308975, gnorm=0.218, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=41077 +epoch 025: 1428 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=463133, ups=1.07, wpb=434410, bsz=16341.2, num_updates=41900, lr=0.000308975, gnorm=0.218, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=41077 +epoch 025: 1428 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=463133, ups=1.07, wpb=434410, bsz=16341.2, num_updates=41900, lr=0.000308975, gnorm=0.218, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=41077 +epoch 025: 1428 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=463133, ups=1.07, wpb=434410, bsz=16341.2, num_updates=41900, lr=0.000308975, gnorm=0.218, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=41077 +epoch 025: 1428 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=463133, ups=1.07, wpb=434410, bsz=16341.2, num_updates=41900, lr=0.000308975, gnorm=0.218, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=41077 +epoch 025: 1428 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=463133, ups=1.07, wpb=434410, bsz=16341.2, num_updates=41900, lr=0.000308975, gnorm=0.218, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=41077 +epoch 025: 1428 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=463133, ups=1.07, wpb=434410, bsz=16341.2, num_updates=41900, lr=0.000308975, gnorm=0.218, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=41077 +epoch 025: 1428 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=463133, ups=1.07, wpb=434410, bsz=16341.2, num_updates=41900, lr=0.000308975, gnorm=0.218, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=41077 +epoch 025: 1428 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=463133, ups=1.07, wpb=434410, bsz=16341.2, num_updates=41900, lr=0.000308975, gnorm=0.218, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=41077 +epoch 025: 1428 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=463133, ups=1.07, wpb=434410, bsz=16341.2, num_updates=41900, lr=0.000308975, gnorm=0.218, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=41077 +epoch 025: 1428 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=463133, ups=1.07, wpb=434410, bsz=16341.2, num_updates=41900, lr=0.000308975, gnorm=0.218, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=41077 +epoch 025: 1428 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=463133, ups=1.07, wpb=434410, bsz=16341.2, num_updates=41900, lr=0.000308975, gnorm=0.218, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=41077 +epoch 025: 1428 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=463133, ups=1.07, wpb=434410, bsz=16341.2, num_updates=41900, lr=0.000308975, gnorm=0.218, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=41077 +epoch 025: 1428 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=463133, ups=1.07, wpb=434410, bsz=16341.2, num_updates=41900, lr=0.000308975, gnorm=0.218, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=41077 +epoch 025: 1428 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=463133, ups=1.07, wpb=434410, bsz=16341.2, num_updates=41900, lr=0.000308975, gnorm=0.218, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=41077 +epoch 025: 1428 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=463133, ups=1.07, wpb=434410, bsz=16341.2, num_updates=41900, lr=0.000308975, gnorm=0.218, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=41077 +epoch 025: 1528 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=461606, ups=1.06, wpb=433830, bsz=16720.8, num_updates=42000, lr=0.000308607, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=41171 +epoch 025: 1528 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=461606, ups=1.06, wpb=433830, bsz=16720.8, num_updates=42000, lr=0.000308607, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=41171 +epoch 025: 1528 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=461606, ups=1.06, wpb=433830, bsz=16720.8, num_updates=42000, lr=0.000308607, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=41171 +epoch 025: 1528 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=461606, ups=1.06, wpb=433830, bsz=16720.8, num_updates=42000, lr=0.000308607, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=41171 +epoch 025: 1528 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=461606, ups=1.06, wpb=433830, bsz=16720.8, num_updates=42000, lr=0.000308607, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=41171 +epoch 025: 1528 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=461606, ups=1.06, wpb=433830, bsz=16720.8, num_updates=42000, lr=0.000308607, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=41171 +epoch 025: 1528 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=461606, ups=1.06, wpb=433830, bsz=16720.8, num_updates=42000, lr=0.000308607, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=41171 +epoch 025: 1528 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=461606, ups=1.06, wpb=433830, bsz=16720.8, num_updates=42000, lr=0.000308607, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=41171 +epoch 025: 1528 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=461606, ups=1.06, wpb=433830, bsz=16720.8, num_updates=42000, lr=0.000308607, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=41171 +epoch 025: 1528 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=461606, ups=1.06, wpb=433830, bsz=16720.8, num_updates=42000, lr=0.000308607, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=41171 +epoch 025: 1528 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=461606, ups=1.06, wpb=433830, bsz=16720.8, num_updates=42000, lr=0.000308607, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=41171 +epoch 025: 1528 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=461606, ups=1.06, wpb=433830, bsz=16720.8, num_updates=42000, lr=0.000308607, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=41171 +epoch 025: 1528 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=461606, ups=1.06, wpb=433830, bsz=16720.8, num_updates=42000, lr=0.000308607, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=41171 +epoch 025: 1528 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=461606, ups=1.06, wpb=433830, bsz=16720.8, num_updates=42000, lr=0.000308607, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=41171 +epoch 025: 1528 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=461606, ups=1.06, wpb=433830, bsz=16720.8, num_updates=42000, lr=0.000308607, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=41171 +epoch 025: 1528 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=461606, ups=1.06, wpb=433830, bsz=16720.8, num_updates=42000, lr=0.000308607, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=41171 +epoch 025: 1528 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=461606, ups=1.06, wpb=433830, bsz=16720.8, num_updates=42000, lr=0.000308607, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=41171 +epoch 025: 1528 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=461606, ups=1.06, wpb=433830, bsz=16720.8, num_updates=42000, lr=0.000308607, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=41171 +epoch 025: 1528 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=461606, ups=1.06, wpb=433830, bsz=16720.8, num_updates=42000, lr=0.000308607, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=41171 +epoch 025: 1528 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=461606, ups=1.06, wpb=433830, bsz=16720.8, num_updates=42000, lr=0.000308607, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=41171 +epoch 025: 1528 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=461606, ups=1.06, wpb=433830, bsz=16720.8, num_updates=42000, lr=0.000308607, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=41171 +epoch 025: 1528 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=461606, ups=1.06, wpb=433830, bsz=16720.8, num_updates=42000, lr=0.000308607, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=41171 +epoch 025: 1528 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=461606, ups=1.06, wpb=433830, bsz=16720.8, num_updates=42000, lr=0.000308607, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=41171 +epoch 025: 1528 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=461606, ups=1.06, wpb=433830, bsz=16720.8, num_updates=42000, lr=0.000308607, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=41171 +epoch 025: 1528 / 1689 loss=4.105, nll_loss=2.481, ppl=5.58, wps=461606, ups=1.06, wpb=433830, bsz=16720.8, num_updates=42000, lr=0.000308607, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=41171 +begin validation on "valid" subset +epoch 025 | valid on 'valid' subset | loss 4.232 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 42000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.232 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 42000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.232 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 42000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.232 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 42000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.232 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 42000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.232 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 42000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.232 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 42000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.232 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 42000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.232 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 42000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.232 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 42000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.232 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 42000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.232 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 42000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.232 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 42000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.232 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 42000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.232 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 42000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.232 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 42000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.232 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 42000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.232 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 42000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.232 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 42000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.232 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 42000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.232 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 42000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.232 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 42000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.232 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 42000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.232 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 42000 | best_loss 4.232 +epoch 025 | valid on 'valid' subset | loss 4.232 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 42000 | best_loss 4.232 +epoch 025: 1628 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=238148, ups=0.55, wpb=432275, bsz=16311, num_updates=42100, lr=0.00030824, gnorm=0.208, clip=0, loss_scale=1, train_wall=149, gb_free=19.4, wall=41352 +epoch 025: 1628 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=238148, ups=0.55, wpb=432275, bsz=16311, num_updates=42100, lr=0.00030824, gnorm=0.208, clip=0, loss_scale=1, train_wall=149, gb_free=19.4, wall=41352 +epoch 025: 1628 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=238148, ups=0.55, wpb=432275, bsz=16311, num_updates=42100, lr=0.00030824, gnorm=0.208, clip=0, loss_scale=1, train_wall=149, gb_free=19.4, wall=41352 +epoch 025: 1628 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=238148, ups=0.55, wpb=432275, bsz=16311, num_updates=42100, lr=0.00030824, gnorm=0.208, clip=0, loss_scale=1, train_wall=149, gb_free=19.4, wall=41352 +epoch 025: 1628 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=238148, ups=0.55, wpb=432275, bsz=16311, num_updates=42100, lr=0.00030824, gnorm=0.208, clip=0, loss_scale=1, train_wall=149, gb_free=19.4, wall=41352 +epoch 025: 1628 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=238148, ups=0.55, wpb=432275, bsz=16311, num_updates=42100, lr=0.00030824, gnorm=0.208, clip=0, loss_scale=1, train_wall=149, gb_free=19.4, wall=41352 +epoch 025: 1628 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=238148, ups=0.55, wpb=432275, bsz=16311, num_updates=42100, lr=0.00030824, gnorm=0.208, clip=0, loss_scale=1, train_wall=149, gb_free=19.4, wall=41352 +epoch 025: 1628 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=238148, ups=0.55, wpb=432275, bsz=16311, num_updates=42100, lr=0.00030824, gnorm=0.208, clip=0, loss_scale=1, train_wall=149, gb_free=19.4, wall=41352 +epoch 025: 1628 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=238148, ups=0.55, wpb=432275, bsz=16311, num_updates=42100, lr=0.00030824, gnorm=0.208, clip=0, loss_scale=1, train_wall=149, gb_free=19.4, wall=41352 +epoch 025: 1628 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=238148, ups=0.55, wpb=432275, bsz=16311, num_updates=42100, lr=0.00030824, gnorm=0.208, clip=0, loss_scale=1, train_wall=149, gb_free=19.4, wall=41352 +epoch 025: 1628 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=238148, ups=0.55, wpb=432275, bsz=16311, num_updates=42100, lr=0.00030824, gnorm=0.208, clip=0, loss_scale=1, train_wall=149, gb_free=19.4, wall=41352 +epoch 025: 1628 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=238148, ups=0.55, wpb=432275, bsz=16311, num_updates=42100, lr=0.00030824, gnorm=0.208, clip=0, loss_scale=1, train_wall=149, gb_free=19.4, wall=41352 +epoch 025: 1628 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=238148, ups=0.55, wpb=432275, bsz=16311, num_updates=42100, lr=0.00030824, gnorm=0.208, clip=0, loss_scale=1, train_wall=149, gb_free=19.4, wall=41352 +epoch 025: 1628 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=238148, ups=0.55, wpb=432275, bsz=16311, num_updates=42100, lr=0.00030824, gnorm=0.208, clip=0, loss_scale=1, train_wall=149, gb_free=19.4, wall=41352 +epoch 025: 1628 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=238148, ups=0.55, wpb=432275, bsz=16311, num_updates=42100, lr=0.00030824, gnorm=0.208, clip=0, loss_scale=1, train_wall=149, gb_free=19.4, wall=41352 +epoch 025: 1628 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=238148, ups=0.55, wpb=432275, bsz=16311, num_updates=42100, lr=0.00030824, gnorm=0.208, clip=0, loss_scale=1, train_wall=149, gb_free=19.4, wall=41352 +epoch 025: 1628 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=238148, ups=0.55, wpb=432275, bsz=16311, num_updates=42100, lr=0.00030824, gnorm=0.208, clip=0, loss_scale=1, train_wall=149, gb_free=19.4, wall=41352 +epoch 025: 1628 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=238148, ups=0.55, wpb=432275, bsz=16311, num_updates=42100, lr=0.00030824, gnorm=0.208, clip=0, loss_scale=1, train_wall=149, gb_free=19.4, wall=41352 +epoch 025: 1628 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=238148, ups=0.55, wpb=432275, bsz=16311, num_updates=42100, lr=0.00030824, gnorm=0.208, clip=0, loss_scale=1, train_wall=149, gb_free=19.4, wall=41352 +epoch 025: 1628 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=238148, ups=0.55, wpb=432275, bsz=16311, num_updates=42100, lr=0.00030824, gnorm=0.208, clip=0, loss_scale=1, train_wall=149, gb_free=19.4, wall=41352 +epoch 025: 1628 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=238148, ups=0.55, wpb=432275, bsz=16311, num_updates=42100, lr=0.00030824, gnorm=0.208, clip=0, loss_scale=1, train_wall=149, gb_free=19.4, wall=41352 +epoch 025: 1628 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=238148, ups=0.55, wpb=432275, bsz=16311, num_updates=42100, lr=0.00030824, gnorm=0.208, clip=0, loss_scale=1, train_wall=149, gb_free=19.4, wall=41352 +epoch 025: 1628 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=238148, ups=0.55, wpb=432275, bsz=16311, num_updates=42100, lr=0.00030824, gnorm=0.208, clip=0, loss_scale=1, train_wall=149, gb_free=19.4, wall=41352 +epoch 025: 1628 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=238148, ups=0.55, wpb=432275, bsz=16311, num_updates=42100, lr=0.00030824, gnorm=0.208, clip=0, loss_scale=1, train_wall=149, gb_free=19.4, wall=41352 +epoch 025: 1628 / 1689 loss=4.097, nll_loss=2.472, ppl=5.55, wps=238148, ups=0.55, wpb=432275, bsz=16311, num_updates=42100, lr=0.00030824, gnorm=0.208, clip=0, loss_scale=1, train_wall=149, gb_free=19.4, wall=41352 +end of epoch 25 (average epoch stats below) +epoch 025 | loss 4.094 | nll_loss 2.467 | ppl 5.53 | wps 431064 | ups 0.99 | wpb 433520 | bsz 16504.6 | num_updates 42161 | lr 0.000308017 | gnorm 0.216 | clip 0 | loss_scale 2 | train_wall 1626 | gb_free 19.3 | wall 41408 +epoch 025 | loss 4.094 | nll_loss 2.467 | ppl 5.53 | wps 431064 | ups 0.99 | wpb 433520 | bsz 16504.6 | num_updates 42161 | lr 0.000308017 | gnorm 0.216 | clip 0 | loss_scale 2 | train_wall 1626 | gb_free 19.3 | wall 41408 +epoch 025 | loss 4.094 | nll_loss 2.467 | ppl 5.53 | wps 431064 | ups 0.99 | wpb 433520 | bsz 16504.6 | num_updates 42161 | lr 0.000308017 | gnorm 0.216 | clip 0 | loss_scale 2 | train_wall 1626 | gb_free 19.3 | wall 41408 +epoch 025 | loss 4.094 | nll_loss 2.467 | ppl 5.53 | wps 431064 | ups 0.99 | wpb 433520 | bsz 16504.6 | num_updates 42161 | lr 0.000308017 | gnorm 0.216 | clip 0 | loss_scale 2 | train_wall 1626 | gb_free 19.3 | wall 41408 +epoch 025 | loss 4.094 | nll_loss 2.467 | ppl 5.53 | wps 431064 | ups 0.99 | wpb 433520 | bsz 16504.6 | num_updates 42161 | lr 0.000308017 | gnorm 0.216 | clip 0 | loss_scale 2 | train_wall 1626 | gb_free 19.3 | wall 41408 +epoch 025 | loss 4.094 | nll_loss 2.467 | ppl 5.53 | wps 431064 | ups 0.99 | wpb 433520 | bsz 16504.6 | num_updates 42161 | lr 0.000308017 | gnorm 0.216 | clip 0 | loss_scale 2 | train_wall 1626 | gb_free 19.3 | wall 41408 +epoch 025 | loss 4.094 | nll_loss 2.467 | ppl 5.53 | wps 431064 | ups 0.99 | wpb 433520 | bsz 16504.6 | num_updates 42161 | lr 0.000308017 | gnorm 0.216 | clip 0 | loss_scale 2 | train_wall 1626 | gb_free 19.3 | wall 41408 +epoch 025 | loss 4.094 | nll_loss 2.467 | ppl 5.53 | wps 431064 | ups 0.99 | wpb 433520 | bsz 16504.6 | num_updates 42161 | lr 0.000308017 | gnorm 0.216 | clip 0 | loss_scale 2 | train_wall 1626 | gb_free 19.3 | wall 41408 +epoch 025 | loss 4.094 | nll_loss 2.467 | ppl 5.53 | wps 431064 | ups 0.99 | wpb 433520 | bsz 16504.6 | num_updates 42161 | lr 0.000308017 | gnorm 0.216 | clip 0 | loss_scale 2 | train_wall 1626 | gb_free 19.3 | wall 41408 +epoch 025 | loss 4.094 | nll_loss 2.467 | ppl 5.53 | wps 431064 | ups 0.99 | wpb 433520 | bsz 16504.6 | num_updates 42161 | lr 0.000308017 | gnorm 0.216 | clip 0 | loss_scale 2 | train_wall 1626 | gb_free 19.3 | wall 41408 +epoch 025 | loss 4.094 | nll_loss 2.467 | ppl 5.53 | wps 431064 | ups 0.99 | wpb 433520 | bsz 16504.6 | num_updates 42161 | lr 0.000308017 | gnorm 0.216 | clip 0 | loss_scale 2 | train_wall 1626 | gb_free 19.3 | wall 41408 +epoch 025 | loss 4.094 | nll_loss 2.467 | ppl 5.53 | wps 431064 | ups 0.99 | wpb 433520 | bsz 16504.6 | num_updates 42161 | lr 0.000308017 | gnorm 0.216 | clip 0 | loss_scale 2 | train_wall 1626 | gb_free 19.3 | wall 41408 +epoch 025 | loss 4.094 | nll_loss 2.467 | ppl 5.53 | wps 431064 | ups 0.99 | wpb 433520 | bsz 16504.6 | num_updates 42161 | lr 0.000308017 | gnorm 0.216 | clip 0 | loss_scale 2 | train_wall 1626 | gb_free 19.3 | wall 41408 +epoch 025 | loss 4.094 | nll_loss 2.467 | ppl 5.53 | wps 431064 | ups 0.99 | wpb 433520 | bsz 16504.6 | num_updates 42161 | lr 0.000308017 | gnorm 0.216 | clip 0 | loss_scale 2 | train_wall 1626 | gb_free 19.3 | wall 41408 +epoch 025 | loss 4.094 | nll_loss 2.467 | ppl 5.53 | wps 431064 | ups 0.99 | wpb 433520 | bsz 16504.6 | num_updates 42161 | lr 0.000308017 | gnorm 0.216 | clip 0 | loss_scale 2 | train_wall 1626 | gb_free 19.3 | wall 41408 +epoch 025 | loss 4.094 | nll_loss 2.467 | ppl 5.53 | wps 431064 | ups 0.99 | wpb 433520 | bsz 16504.6 | num_updates 42161 | lr 0.000308017 | gnorm 0.216 | clip 0 | loss_scale 2 | train_wall 1626 | gb_free 19.3 | wall 41408 +epoch 025 | loss 4.094 | nll_loss 2.467 | ppl 5.53 | wps 431064 | ups 0.99 | wpb 433520 | bsz 16504.6 | num_updates 42161 | lr 0.000308017 | gnorm 0.216 | clip 0 | loss_scale 2 | train_wall 1626 | gb_free 19.3 | wall 41408 +epoch 025 | loss 4.094 | nll_loss 2.467 | ppl 5.53 | wps 431064 | ups 0.99 | wpb 433520 | bsz 16504.6 | num_updates 42161 | lr 0.000308017 | gnorm 0.216 | clip 0 | loss_scale 2 | train_wall 1626 | gb_free 19.3 | wall 41408 +epoch 025 | loss 4.094 | nll_loss 2.467 | ppl 5.53 | wps 431064 | ups 0.99 | wpb 433520 | bsz 16504.6 | num_updates 42161 | lr 0.000308017 | gnorm 0.216 | clip 0 | loss_scale 2 | train_wall 1626 | gb_free 19.3 | wall 41408 +epoch 025 | loss 4.094 | nll_loss 2.467 | ppl 5.53 | wps 431064 | ups 0.99 | wpb 433520 | bsz 16504.6 | num_updates 42161 | lr 0.000308017 | gnorm 0.216 | clip 0 | loss_scale 2 | train_wall 1626 | gb_free 19.3 | wall 41408 +epoch 025 | loss 4.094 | nll_loss 2.467 | ppl 5.53 | wps 431064 | ups 0.99 | wpb 433520 | bsz 16504.6 | num_updates 42161 | lr 0.000308017 | gnorm 0.216 | clip 0 | loss_scale 2 | train_wall 1626 | gb_free 19.3 | wall 41408 +epoch 025 | loss 4.094 | nll_loss 2.467 | ppl 5.53 | wps 431064 | ups 0.99 | wpb 433520 | bsz 16504.6 | num_updates 42161 | lr 0.000308017 | gnorm 0.216 | clip 0 | loss_scale 2 | train_wall 1626 | gb_free 19.3 | wall 41408 +epoch 025 | loss 4.094 | nll_loss 2.467 | ppl 5.53 | wps 431064 | ups 0.99 | wpb 433520 | bsz 16504.6 | num_updates 42161 | lr 0.000308017 | gnorm 0.216 | clip 0 | loss_scale 2 | train_wall 1626 | gb_free 19.3 | wall 41408 +epoch 025 | loss 4.094 | nll_loss 2.467 | ppl 5.53 | wps 431064 | ups 0.99 | wpb 433520 | bsz 16504.6 | num_updates 42161 | lr 0.000308017 | gnorm 0.216 | clip 0 | loss_scale 2 | train_wall 1626 | gb_free 19.3 | wall 41408 +epoch 025 | loss 4.094 | nll_loss 2.467 | ppl 5.53 | wps 431064 | ups 0.99 | wpb 433520 | bsz 16504.6 | num_updates 42161 | lr 0.000308017 | gnorm 0.216 | clip 0 | loss_scale 2 | train_wall 1626 | gb_free 19.3 | wall 41408 +Start iterating over samples +epoch 026: 40 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=459681, ups=1.07, wpb=431488, bsz=16521.7, num_updates=42200, lr=0.000307875, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=41446 +epoch 026: 40 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=459681, ups=1.07, wpb=431488, bsz=16521.7, num_updates=42200, lr=0.000307875, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=41446 +epoch 026: 40 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=459681, ups=1.07, wpb=431488, bsz=16521.7, num_updates=42200, lr=0.000307875, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=41446 +epoch 026: 40 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=459681, ups=1.07, wpb=431488, bsz=16521.7, num_updates=42200, lr=0.000307875, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=41446 +epoch 026: 40 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=459681, ups=1.07, wpb=431488, bsz=16521.7, num_updates=42200, lr=0.000307875, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=41446 +epoch 026: 40 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=459681, ups=1.07, wpb=431488, bsz=16521.7, num_updates=42200, lr=0.000307875, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=41446 +epoch 026: 40 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=459681, ups=1.07, wpb=431488, bsz=16521.7, num_updates=42200, lr=0.000307875, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=41446 +epoch 026: 40 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=459681, ups=1.07, wpb=431488, bsz=16521.7, num_updates=42200, lr=0.000307875, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=41446 +epoch 026: 40 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=459681, ups=1.07, wpb=431488, bsz=16521.7, num_updates=42200, lr=0.000307875, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=41446 +epoch 026: 40 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=459681, ups=1.07, wpb=431488, bsz=16521.7, num_updates=42200, lr=0.000307875, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=41446 +epoch 026: 40 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=459681, ups=1.07, wpb=431488, bsz=16521.7, num_updates=42200, lr=0.000307875, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=41446 +epoch 026: 40 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=459681, ups=1.07, wpb=431488, bsz=16521.7, num_updates=42200, lr=0.000307875, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=41446 +epoch 026: 40 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=459681, ups=1.07, wpb=431488, bsz=16521.7, num_updates=42200, lr=0.000307875, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=41446 +epoch 026: 40 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=459681, ups=1.07, wpb=431488, bsz=16521.7, num_updates=42200, lr=0.000307875, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=41446 +epoch 026: 40 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=459681, ups=1.07, wpb=431488, bsz=16521.7, num_updates=42200, lr=0.000307875, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=41446 +epoch 026: 40 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=459681, ups=1.07, wpb=431488, bsz=16521.7, num_updates=42200, lr=0.000307875, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=41446 +epoch 026: 40 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=459681, ups=1.07, wpb=431488, bsz=16521.7, num_updates=42200, lr=0.000307875, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=41446 +epoch 026: 40 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=459681, ups=1.07, wpb=431488, bsz=16521.7, num_updates=42200, lr=0.000307875, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=41446 +epoch 026: 40 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=459681, ups=1.07, wpb=431488, bsz=16521.7, num_updates=42200, lr=0.000307875, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=41446 +epoch 026: 40 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=459681, ups=1.07, wpb=431488, bsz=16521.7, num_updates=42200, lr=0.000307875, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=41446 +epoch 026: 40 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=459681, ups=1.07, wpb=431488, bsz=16521.7, num_updates=42200, lr=0.000307875, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=41446 +epoch 026: 40 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=459681, ups=1.07, wpb=431488, bsz=16521.7, num_updates=42200, lr=0.000307875, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=41446 +epoch 026: 40 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=459681, ups=1.07, wpb=431488, bsz=16521.7, num_updates=42200, lr=0.000307875, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=41446 +epoch 026: 40 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=459681, ups=1.07, wpb=431488, bsz=16521.7, num_updates=42200, lr=0.000307875, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=41446 +epoch 026: 40 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=459681, ups=1.07, wpb=431488, bsz=16521.7, num_updates=42200, lr=0.000307875, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=41446 +epoch 026: 40 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=459681, ups=1.07, wpb=431488, bsz=16521.7, num_updates=42200, lr=0.000307875, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.5, wall=41446 +epoch 026: 140 / 1689 loss=4.073, nll_loss=2.443, ppl=5.44, wps=463849, ups=1.07, wpb=434897, bsz=16448.6, num_updates=42300, lr=0.00030751, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=41540 +epoch 026: 140 / 1689 loss=4.073, nll_loss=2.443, ppl=5.44, wps=463849, ups=1.07, wpb=434897, bsz=16448.6, num_updates=42300, lr=0.00030751, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=41540 +epoch 026: 140 / 1689 loss=4.073, nll_loss=2.443, ppl=5.44, wps=463849, ups=1.07, wpb=434897, bsz=16448.6, num_updates=42300, lr=0.00030751, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=41540 +epoch 026: 140 / 1689 loss=4.073, nll_loss=2.443, ppl=5.44, wps=463849, ups=1.07, wpb=434897, bsz=16448.6, num_updates=42300, lr=0.00030751, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=41540 +epoch 026: 140 / 1689 loss=4.073, nll_loss=2.443, ppl=5.44, wps=463849, ups=1.07, wpb=434897, bsz=16448.6, num_updates=42300, lr=0.00030751, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=41540 +epoch 026: 140 / 1689 loss=4.073, nll_loss=2.443, ppl=5.44, wps=463849, ups=1.07, wpb=434897, bsz=16448.6, num_updates=42300, lr=0.00030751, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=41540 +epoch 026: 140 / 1689 loss=4.073, nll_loss=2.443, ppl=5.44, wps=463849, ups=1.07, wpb=434897, bsz=16448.6, num_updates=42300, lr=0.00030751, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=41540 +epoch 026: 140 / 1689 loss=4.073, nll_loss=2.443, ppl=5.44, wps=463849, ups=1.07, wpb=434897, bsz=16448.6, num_updates=42300, lr=0.00030751, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=41540 +epoch 026: 140 / 1689 loss=4.073, nll_loss=2.443, ppl=5.44, wps=463849, ups=1.07, wpb=434897, bsz=16448.6, num_updates=42300, lr=0.00030751, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=41540 +epoch 026: 140 / 1689 loss=4.073, nll_loss=2.443, ppl=5.44, wps=463849, ups=1.07, wpb=434897, bsz=16448.6, num_updates=42300, lr=0.00030751, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=41540 +epoch 026: 140 / 1689 loss=4.073, nll_loss=2.443, ppl=5.44, wps=463849, ups=1.07, wpb=434897, bsz=16448.6, num_updates=42300, lr=0.00030751, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=41540 +epoch 026: 140 / 1689 loss=4.073, nll_loss=2.443, ppl=5.44, wps=463849, ups=1.07, wpb=434897, bsz=16448.6, num_updates=42300, lr=0.00030751, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=41540 +epoch 026: 140 / 1689 loss=4.073, nll_loss=2.443, ppl=5.44, wps=463849, ups=1.07, wpb=434897, bsz=16448.6, num_updates=42300, lr=0.00030751, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=41540 +epoch 026: 140 / 1689 loss=4.073, nll_loss=2.443, ppl=5.44, wps=463849, ups=1.07, wpb=434897, bsz=16448.6, num_updates=42300, lr=0.00030751, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=41540 +epoch 026: 140 / 1689 loss=4.073, nll_loss=2.443, ppl=5.44, wps=463849, ups=1.07, wpb=434897, bsz=16448.6, num_updates=42300, lr=0.00030751, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=41540 +epoch 026: 140 / 1689 loss=4.073, nll_loss=2.443, ppl=5.44, wps=463849, ups=1.07, wpb=434897, bsz=16448.6, num_updates=42300, lr=0.00030751, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=41540 +epoch 026: 140 / 1689 loss=4.073, nll_loss=2.443, ppl=5.44, wps=463849, ups=1.07, wpb=434897, bsz=16448.6, num_updates=42300, lr=0.00030751, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=41540 +epoch 026: 140 / 1689 loss=4.073, nll_loss=2.443, ppl=5.44, wps=463849, ups=1.07, wpb=434897, bsz=16448.6, num_updates=42300, lr=0.00030751, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=41540 +epoch 026: 140 / 1689 loss=4.073, nll_loss=2.443, ppl=5.44, wps=463849, ups=1.07, wpb=434897, bsz=16448.6, num_updates=42300, lr=0.00030751, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=41540 +epoch 026: 140 / 1689 loss=4.073, nll_loss=2.443, ppl=5.44, wps=463849, ups=1.07, wpb=434897, bsz=16448.6, num_updates=42300, lr=0.00030751, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=41540 +epoch 026: 140 / 1689 loss=4.073, nll_loss=2.443, ppl=5.44, wps=463849, ups=1.07, wpb=434897, bsz=16448.6, num_updates=42300, lr=0.00030751, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=41540 +epoch 026: 140 / 1689 loss=4.073, nll_loss=2.443, ppl=5.44, wps=463849, ups=1.07, wpb=434897, bsz=16448.6, num_updates=42300, lr=0.00030751, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=41540 +epoch 026: 140 / 1689 loss=4.073, nll_loss=2.443, ppl=5.44, wps=463849, ups=1.07, wpb=434897, bsz=16448.6, num_updates=42300, lr=0.00030751, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=41540 +epoch 026: 140 / 1689 loss=4.073, nll_loss=2.443, ppl=5.44, wps=463849, ups=1.07, wpb=434897, bsz=16448.6, num_updates=42300, lr=0.00030751, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=41540 +epoch 026: 140 / 1689 loss=4.073, nll_loss=2.443, ppl=5.44, wps=463849, ups=1.07, wpb=434897, bsz=16448.6, num_updates=42300, lr=0.00030751, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=41540 +epoch 026: 140 / 1689 loss=4.073, nll_loss=2.443, ppl=5.44, wps=463849, ups=1.07, wpb=434897, bsz=16448.6, num_updates=42300, lr=0.00030751, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=41540 +epoch 026: 240 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=465295, ups=1.07, wpb=434285, bsz=16774.8, num_updates=42400, lr=0.000307148, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=41633 +epoch 026: 240 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=465295, ups=1.07, wpb=434285, bsz=16774.8, num_updates=42400, lr=0.000307148, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=41633 +epoch 026: 240 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=465295, ups=1.07, wpb=434285, bsz=16774.8, num_updates=42400, lr=0.000307148, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=41633 +epoch 026: 240 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=465295, ups=1.07, wpb=434285, bsz=16774.8, num_updates=42400, lr=0.000307148, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=41633 +epoch 026: 240 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=465295, ups=1.07, wpb=434285, bsz=16774.8, num_updates=42400, lr=0.000307148, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=41633 +epoch 026: 240 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=465295, ups=1.07, wpb=434285, bsz=16774.8, num_updates=42400, lr=0.000307148, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=41633 +epoch 026: 240 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=465295, ups=1.07, wpb=434285, bsz=16774.8, num_updates=42400, lr=0.000307148, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=41633 +epoch 026: 240 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=465295, ups=1.07, wpb=434285, bsz=16774.8, num_updates=42400, lr=0.000307148, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=41633 +epoch 026: 240 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=465295, ups=1.07, wpb=434285, bsz=16774.8, num_updates=42400, lr=0.000307148, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=41633 +epoch 026: 240 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=465295, ups=1.07, wpb=434285, bsz=16774.8, num_updates=42400, lr=0.000307148, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=41633 +epoch 026: 240 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=465295, ups=1.07, wpb=434285, bsz=16774.8, num_updates=42400, lr=0.000307148, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=41633 +epoch 026: 240 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=465295, ups=1.07, wpb=434285, bsz=16774.8, num_updates=42400, lr=0.000307148, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=41633 +epoch 026: 240 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=465295, ups=1.07, wpb=434285, bsz=16774.8, num_updates=42400, lr=0.000307148, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=41633 +epoch 026: 240 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=465295, ups=1.07, wpb=434285, bsz=16774.8, num_updates=42400, lr=0.000307148, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=41633 +epoch 026: 240 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=465295, ups=1.07, wpb=434285, bsz=16774.8, num_updates=42400, lr=0.000307148, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=41633 +epoch 026: 240 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=465295, ups=1.07, wpb=434285, bsz=16774.8, num_updates=42400, lr=0.000307148, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=41633 +epoch 026: 240 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=465295, ups=1.07, wpb=434285, bsz=16774.8, num_updates=42400, lr=0.000307148, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=41633 +epoch 026: 240 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=465295, ups=1.07, wpb=434285, bsz=16774.8, num_updates=42400, lr=0.000307148, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=41633 +epoch 026: 240 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=465295, ups=1.07, wpb=434285, bsz=16774.8, num_updates=42400, lr=0.000307148, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=41633 +epoch 026: 240 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=465295, ups=1.07, wpb=434285, bsz=16774.8, num_updates=42400, lr=0.000307148, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=41633 +epoch 026: 240 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=465295, ups=1.07, wpb=434285, bsz=16774.8, num_updates=42400, lr=0.000307148, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=41633 +epoch 026: 240 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=465295, ups=1.07, wpb=434285, bsz=16774.8, num_updates=42400, lr=0.000307148, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=41633 +epoch 026: 240 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=465295, ups=1.07, wpb=434285, bsz=16774.8, num_updates=42400, lr=0.000307148, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=41633 +epoch 026: 240 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=465295, ups=1.07, wpb=434285, bsz=16774.8, num_updates=42400, lr=0.000307148, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=41633 +epoch 026: 240 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=465295, ups=1.07, wpb=434285, bsz=16774.8, num_updates=42400, lr=0.000307148, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=41633 +epoch 026: 240 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=465295, ups=1.07, wpb=434285, bsz=16774.8, num_updates=42400, lr=0.000307148, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=41633 +epoch 026: 340 / 1689 loss=4.085, nll_loss=2.457, ppl=5.49, wps=463612, ups=1.06, wpb=435325, bsz=16605.9, num_updates=42500, lr=0.000306786, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=41727 +epoch 026: 340 / 1689 loss=4.085, nll_loss=2.457, ppl=5.49, wps=463612, ups=1.06, wpb=435325, bsz=16605.9, num_updates=42500, lr=0.000306786, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=41727 +epoch 026: 340 / 1689 loss=4.085, nll_loss=2.457, ppl=5.49, wps=463612, ups=1.06, wpb=435325, bsz=16605.9, num_updates=42500, lr=0.000306786, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=41727 +epoch 026: 340 / 1689 loss=4.085, nll_loss=2.457, ppl=5.49, wps=463612, ups=1.06, wpb=435325, bsz=16605.9, num_updates=42500, lr=0.000306786, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=41727 +epoch 026: 340 / 1689 loss=4.085, nll_loss=2.457, ppl=5.49, wps=463612, ups=1.06, wpb=435325, bsz=16605.9, num_updates=42500, lr=0.000306786, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=41727 +epoch 026: 340 / 1689 loss=4.085, nll_loss=2.457, ppl=5.49, wps=463612, ups=1.06, wpb=435325, bsz=16605.9, num_updates=42500, lr=0.000306786, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=41727 +epoch 026: 340 / 1689 loss=4.085, nll_loss=2.457, ppl=5.49, wps=463612, ups=1.06, wpb=435325, bsz=16605.9, num_updates=42500, lr=0.000306786, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=41727 +epoch 026: 340 / 1689 loss=4.085, nll_loss=2.457, ppl=5.49, wps=463612, ups=1.06, wpb=435325, bsz=16605.9, num_updates=42500, lr=0.000306786, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=41727 +epoch 026: 340 / 1689 loss=4.085, nll_loss=2.457, ppl=5.49, wps=463612, ups=1.06, wpb=435325, bsz=16605.9, num_updates=42500, lr=0.000306786, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=41727 +epoch 026: 340 / 1689 loss=4.085, nll_loss=2.457, ppl=5.49, wps=463612, ups=1.06, wpb=435325, bsz=16605.9, num_updates=42500, lr=0.000306786, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=41727 +epoch 026: 340 / 1689 loss=4.085, nll_loss=2.457, ppl=5.49, wps=463612, ups=1.06, wpb=435325, bsz=16605.9, num_updates=42500, lr=0.000306786, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=41727 +epoch 026: 340 / 1689 loss=4.085, nll_loss=2.457, ppl=5.49, wps=463612, ups=1.06, wpb=435325, bsz=16605.9, num_updates=42500, lr=0.000306786, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=41727 +epoch 026: 340 / 1689 loss=4.085, nll_loss=2.457, ppl=5.49, wps=463612, ups=1.06, wpb=435325, bsz=16605.9, num_updates=42500, lr=0.000306786, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=41727 +epoch 026: 340 / 1689 loss=4.085, nll_loss=2.457, ppl=5.49, wps=463612, ups=1.06, wpb=435325, bsz=16605.9, num_updates=42500, lr=0.000306786, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=41727 +epoch 026: 340 / 1689 loss=4.085, nll_loss=2.457, ppl=5.49, wps=463612, ups=1.06, wpb=435325, bsz=16605.9, num_updates=42500, lr=0.000306786, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=41727 +epoch 026: 340 / 1689 loss=4.085, nll_loss=2.457, ppl=5.49, wps=463612, ups=1.06, wpb=435325, bsz=16605.9, num_updates=42500, lr=0.000306786, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=41727 +epoch 026: 340 / 1689 loss=4.085, nll_loss=2.457, ppl=5.49, wps=463612, ups=1.06, wpb=435325, bsz=16605.9, num_updates=42500, lr=0.000306786, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=41727 +epoch 026: 340 / 1689 loss=4.085, nll_loss=2.457, ppl=5.49, wps=463612, ups=1.06, wpb=435325, bsz=16605.9, num_updates=42500, lr=0.000306786, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=41727 +epoch 026: 340 / 1689 loss=4.085, nll_loss=2.457, ppl=5.49, wps=463612, ups=1.06, wpb=435325, bsz=16605.9, num_updates=42500, lr=0.000306786, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=41727 +epoch 026: 340 / 1689 loss=4.085, nll_loss=2.457, ppl=5.49, wps=463612, ups=1.06, wpb=435325, bsz=16605.9, num_updates=42500, lr=0.000306786, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=41727 +epoch 026: 340 / 1689 loss=4.085, nll_loss=2.457, ppl=5.49, wps=463612, ups=1.06, wpb=435325, bsz=16605.9, num_updates=42500, lr=0.000306786, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=41727 +epoch 026: 340 / 1689 loss=4.085, nll_loss=2.457, ppl=5.49, wps=463612, ups=1.06, wpb=435325, bsz=16605.9, num_updates=42500, lr=0.000306786, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=41727 +epoch 026: 340 / 1689 loss=4.085, nll_loss=2.457, ppl=5.49, wps=463612, ups=1.06, wpb=435325, bsz=16605.9, num_updates=42500, lr=0.000306786, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=41727 +epoch 026: 340 / 1689 loss=4.085, nll_loss=2.457, ppl=5.49, wps=463612, ups=1.06, wpb=435325, bsz=16605.9, num_updates=42500, lr=0.000306786, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=41727 +epoch 026: 340 / 1689 loss=4.085, nll_loss=2.457, ppl=5.49, wps=463612, ups=1.06, wpb=435325, bsz=16605.9, num_updates=42500, lr=0.000306786, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=41727 +epoch 026: 340 / 1689 loss=4.085, nll_loss=2.457, ppl=5.49, wps=463612, ups=1.06, wpb=435325, bsz=16605.9, num_updates=42500, lr=0.000306786, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=41727 +epoch 026: 440 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=464183, ups=1.07, wpb=434092, bsz=16283.8, num_updates=42600, lr=0.000306426, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=41821 +epoch 026: 440 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=464183, ups=1.07, wpb=434092, bsz=16283.8, num_updates=42600, lr=0.000306426, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=41821 +epoch 026: 440 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=464183, ups=1.07, wpb=434092, bsz=16283.8, num_updates=42600, lr=0.000306426, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=41821 +epoch 026: 440 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=464183, ups=1.07, wpb=434092, bsz=16283.8, num_updates=42600, lr=0.000306426, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=41821 +epoch 026: 440 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=464183, ups=1.07, wpb=434092, bsz=16283.8, num_updates=42600, lr=0.000306426, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=41821 +epoch 026: 440 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=464183, ups=1.07, wpb=434092, bsz=16283.8, num_updates=42600, lr=0.000306426, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=41821 +epoch 026: 440 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=464183, ups=1.07, wpb=434092, bsz=16283.8, num_updates=42600, lr=0.000306426, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=41821 +epoch 026: 440 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=464183, ups=1.07, wpb=434092, bsz=16283.8, num_updates=42600, lr=0.000306426, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=41821 +epoch 026: 440 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=464183, ups=1.07, wpb=434092, bsz=16283.8, num_updates=42600, lr=0.000306426, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=41821 +epoch 026: 440 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=464183, ups=1.07, wpb=434092, bsz=16283.8, num_updates=42600, lr=0.000306426, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=41821 +epoch 026: 440 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=464183, ups=1.07, wpb=434092, bsz=16283.8, num_updates=42600, lr=0.000306426, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=41821 +epoch 026: 440 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=464183, ups=1.07, wpb=434092, bsz=16283.8, num_updates=42600, lr=0.000306426, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=41821 +epoch 026: 440 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=464183, ups=1.07, wpb=434092, bsz=16283.8, num_updates=42600, lr=0.000306426, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=41821 +epoch 026: 440 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=464183, ups=1.07, wpb=434092, bsz=16283.8, num_updates=42600, lr=0.000306426, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=41821 +epoch 026: 440 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=464183, ups=1.07, wpb=434092, bsz=16283.8, num_updates=42600, lr=0.000306426, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=41821 +epoch 026: 440 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=464183, ups=1.07, wpb=434092, bsz=16283.8, num_updates=42600, lr=0.000306426, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=41821 +epoch 026: 440 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=464183, ups=1.07, wpb=434092, bsz=16283.8, num_updates=42600, lr=0.000306426, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=41821 +epoch 026: 440 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=464183, ups=1.07, wpb=434092, bsz=16283.8, num_updates=42600, lr=0.000306426, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=41821 +epoch 026: 440 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=464183, ups=1.07, wpb=434092, bsz=16283.8, num_updates=42600, lr=0.000306426, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=41821 +epoch 026: 440 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=464183, ups=1.07, wpb=434092, bsz=16283.8, num_updates=42600, lr=0.000306426, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=41821 +epoch 026: 440 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=464183, ups=1.07, wpb=434092, bsz=16283.8, num_updates=42600, lr=0.000306426, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=41821 +epoch 026: 440 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=464183, ups=1.07, wpb=434092, bsz=16283.8, num_updates=42600, lr=0.000306426, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=41821 +epoch 026: 440 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=464183, ups=1.07, wpb=434092, bsz=16283.8, num_updates=42600, lr=0.000306426, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=41821 +epoch 026: 440 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=464183, ups=1.07, wpb=434092, bsz=16283.8, num_updates=42600, lr=0.000306426, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=41821 +epoch 026: 440 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=464183, ups=1.07, wpb=434092, bsz=16283.8, num_updates=42600, lr=0.000306426, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=41821 +epoch 026: 440 / 1689 loss=4.088, nll_loss=2.46, ppl=5.5, wps=464183, ups=1.07, wpb=434092, bsz=16283.8, num_updates=42600, lr=0.000306426, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=41821 +epoch 026: 540 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=459413, ups=1.06, wpb=431496, bsz=16663.7, num_updates=42700, lr=0.000306067, gnorm=0.226, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=41915 +epoch 026: 540 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=459413, ups=1.06, wpb=431496, bsz=16663.7, num_updates=42700, lr=0.000306067, gnorm=0.226, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=41915 +epoch 026: 540 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=459413, ups=1.06, wpb=431496, bsz=16663.7, num_updates=42700, lr=0.000306067, gnorm=0.226, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=41915 +epoch 026: 540 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=459413, ups=1.06, wpb=431496, bsz=16663.7, num_updates=42700, lr=0.000306067, gnorm=0.226, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=41915 +epoch 026: 540 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=459413, ups=1.06, wpb=431496, bsz=16663.7, num_updates=42700, lr=0.000306067, gnorm=0.226, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=41915 +epoch 026: 540 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=459413, ups=1.06, wpb=431496, bsz=16663.7, num_updates=42700, lr=0.000306067, gnorm=0.226, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=41915 +epoch 026: 540 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=459413, ups=1.06, wpb=431496, bsz=16663.7, num_updates=42700, lr=0.000306067, gnorm=0.226, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=41915 +epoch 026: 540 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=459413, ups=1.06, wpb=431496, bsz=16663.7, num_updates=42700, lr=0.000306067, gnorm=0.226, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=41915 +epoch 026: 540 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=459413, ups=1.06, wpb=431496, bsz=16663.7, num_updates=42700, lr=0.000306067, gnorm=0.226, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=41915 +epoch 026: 540 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=459413, ups=1.06, wpb=431496, bsz=16663.7, num_updates=42700, lr=0.000306067, gnorm=0.226, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=41915 +epoch 026: 540 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=459413, ups=1.06, wpb=431496, bsz=16663.7, num_updates=42700, lr=0.000306067, gnorm=0.226, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=41915 +epoch 026: 540 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=459413, ups=1.06, wpb=431496, bsz=16663.7, num_updates=42700, lr=0.000306067, gnorm=0.226, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=41915 +epoch 026: 540 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=459413, ups=1.06, wpb=431496, bsz=16663.7, num_updates=42700, lr=0.000306067, gnorm=0.226, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=41915 +epoch 026: 540 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=459413, ups=1.06, wpb=431496, bsz=16663.7, num_updates=42700, lr=0.000306067, gnorm=0.226, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=41915 +epoch 026: 540 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=459413, ups=1.06, wpb=431496, bsz=16663.7, num_updates=42700, lr=0.000306067, gnorm=0.226, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=41915 +epoch 026: 540 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=459413, ups=1.06, wpb=431496, bsz=16663.7, num_updates=42700, lr=0.000306067, gnorm=0.226, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=41915 +epoch 026: 540 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=459413, ups=1.06, wpb=431496, bsz=16663.7, num_updates=42700, lr=0.000306067, gnorm=0.226, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=41915 +epoch 026: 540 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=459413, ups=1.06, wpb=431496, bsz=16663.7, num_updates=42700, lr=0.000306067, gnorm=0.226, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=41915 +epoch 026: 540 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=459413, ups=1.06, wpb=431496, bsz=16663.7, num_updates=42700, lr=0.000306067, gnorm=0.226, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=41915 +epoch 026: 540 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=459413, ups=1.06, wpb=431496, bsz=16663.7, num_updates=42700, lr=0.000306067, gnorm=0.226, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=41915 +epoch 026: 540 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=459413, ups=1.06, wpb=431496, bsz=16663.7, num_updates=42700, lr=0.000306067, gnorm=0.226, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=41915 +epoch 026: 540 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=459413, ups=1.06, wpb=431496, bsz=16663.7, num_updates=42700, lr=0.000306067, gnorm=0.226, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=41915 +epoch 026: 540 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=459413, ups=1.06, wpb=431496, bsz=16663.7, num_updates=42700, lr=0.000306067, gnorm=0.226, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=41915 +epoch 026: 540 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=459413, ups=1.06, wpb=431496, bsz=16663.7, num_updates=42700, lr=0.000306067, gnorm=0.226, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=41915 +epoch 026: 540 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=459413, ups=1.06, wpb=431496, bsz=16663.7, num_updates=42700, lr=0.000306067, gnorm=0.226, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=41915 +epoch 026: 540 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=459413, ups=1.06, wpb=431496, bsz=16663.7, num_updates=42700, lr=0.000306067, gnorm=0.226, clip=0, loss_scale=2, train_wall=93, gb_free=19.5, wall=41915 +epoch 026: 640 / 1689 loss=4.079, nll_loss=2.451, ppl=5.47, wps=461918, ups=1.07, wpb=433468, bsz=16352.1, num_updates=42800, lr=0.000305709, gnorm=0.216, clip=0, loss_scale=2, train_wall=93, gb_free=19, wall=42008 +epoch 026: 640 / 1689 loss=4.079, nll_loss=2.451, ppl=5.47, wps=461918, ups=1.07, wpb=433468, bsz=16352.1, num_updates=42800, lr=0.000305709, gnorm=0.216, clip=0, loss_scale=2, train_wall=93, gb_free=19, wall=42008 +epoch 026: 640 / 1689 loss=4.079, nll_loss=2.451, ppl=5.47, wps=461918, ups=1.07, wpb=433468, bsz=16352.1, num_updates=42800, lr=0.000305709, gnorm=0.216, clip=0, loss_scale=2, train_wall=93, gb_free=19, wall=42008 +epoch 026: 640 / 1689 loss=4.079, nll_loss=2.451, ppl=5.47, wps=461918, ups=1.07, wpb=433468, bsz=16352.1, num_updates=42800, lr=0.000305709, gnorm=0.216, clip=0, loss_scale=2, train_wall=93, gb_free=19, wall=42008 +epoch 026: 640 / 1689 loss=4.079, nll_loss=2.451, ppl=5.47, wps=461918, ups=1.07, wpb=433468, bsz=16352.1, num_updates=42800, lr=0.000305709, gnorm=0.216, clip=0, loss_scale=2, train_wall=93, gb_free=19, wall=42008 +epoch 026: 640 / 1689 loss=4.079, nll_loss=2.451, ppl=5.47, wps=461918, ups=1.07, wpb=433468, bsz=16352.1, num_updates=42800, lr=0.000305709, gnorm=0.216, clip=0, loss_scale=2, train_wall=93, gb_free=19, wall=42008 +epoch 026: 640 / 1689 loss=4.079, nll_loss=2.451, ppl=5.47, wps=461918, ups=1.07, wpb=433468, bsz=16352.1, num_updates=42800, lr=0.000305709, gnorm=0.216, clip=0, loss_scale=2, train_wall=93, gb_free=19, wall=42008 +epoch 026: 640 / 1689 loss=4.079, nll_loss=2.451, ppl=5.47, wps=461918, ups=1.07, wpb=433468, bsz=16352.1, num_updates=42800, lr=0.000305709, gnorm=0.216, clip=0, loss_scale=2, train_wall=93, gb_free=19, wall=42008 +epoch 026: 640 / 1689 loss=4.079, nll_loss=2.451, ppl=5.47, wps=461918, ups=1.07, wpb=433468, bsz=16352.1, num_updates=42800, lr=0.000305709, gnorm=0.216, clip=0, loss_scale=2, train_wall=93, gb_free=19, wall=42008 +epoch 026: 640 / 1689 loss=4.079, nll_loss=2.451, ppl=5.47, wps=461918, ups=1.07, wpb=433468, bsz=16352.1, num_updates=42800, lr=0.000305709, gnorm=0.216, clip=0, loss_scale=2, train_wall=93, gb_free=19, wall=42008 +epoch 026: 640 / 1689 loss=4.079, nll_loss=2.451, ppl=5.47, wps=461918, ups=1.07, wpb=433468, bsz=16352.1, num_updates=42800, lr=0.000305709, gnorm=0.216, clip=0, loss_scale=2, train_wall=93, gb_free=19, wall=42008 +epoch 026: 640 / 1689 loss=4.079, nll_loss=2.451, ppl=5.47, wps=461918, ups=1.07, wpb=433468, bsz=16352.1, num_updates=42800, lr=0.000305709, gnorm=0.216, clip=0, loss_scale=2, train_wall=93, gb_free=19, wall=42008 +epoch 026: 640 / 1689 loss=4.079, nll_loss=2.451, ppl=5.47, wps=461918, ups=1.07, wpb=433468, bsz=16352.1, num_updates=42800, lr=0.000305709, gnorm=0.216, clip=0, loss_scale=2, train_wall=93, gb_free=19, wall=42008 +epoch 026: 640 / 1689 loss=4.079, nll_loss=2.451, ppl=5.47, wps=461918, ups=1.07, wpb=433468, bsz=16352.1, num_updates=42800, lr=0.000305709, gnorm=0.216, clip=0, loss_scale=2, train_wall=93, gb_free=19, wall=42008 +epoch 026: 640 / 1689 loss=4.079, nll_loss=2.451, ppl=5.47, wps=461918, ups=1.07, wpb=433468, bsz=16352.1, num_updates=42800, lr=0.000305709, gnorm=0.216, clip=0, loss_scale=2, train_wall=93, gb_free=19, wall=42008 +epoch 026: 640 / 1689 loss=4.079, nll_loss=2.451, ppl=5.47, wps=461918, ups=1.07, wpb=433468, bsz=16352.1, num_updates=42800, lr=0.000305709, gnorm=0.216, clip=0, loss_scale=2, train_wall=93, gb_free=19, wall=42008 +epoch 026: 640 / 1689 loss=4.079, nll_loss=2.451, ppl=5.47, wps=461918, ups=1.07, wpb=433468, bsz=16352.1, num_updates=42800, lr=0.000305709, gnorm=0.216, clip=0, loss_scale=2, train_wall=93, gb_free=19, wall=42008 +epoch 026: 640 / 1689 loss=4.079, nll_loss=2.451, ppl=5.47, wps=461918, ups=1.07, wpb=433468, bsz=16352.1, num_updates=42800, lr=0.000305709, gnorm=0.216, clip=0, loss_scale=2, train_wall=93, gb_free=19, wall=42008 +epoch 026: 640 / 1689 loss=4.079, nll_loss=2.451, ppl=5.47, wps=461918, ups=1.07, wpb=433468, bsz=16352.1, num_updates=42800, lr=0.000305709, gnorm=0.216, clip=0, loss_scale=2, train_wall=93, gb_free=19, wall=42008 +epoch 026: 640 / 1689 loss=4.079, nll_loss=2.451, ppl=5.47, wps=461918, ups=1.07, wpb=433468, bsz=16352.1, num_updates=42800, lr=0.000305709, gnorm=0.216, clip=0, loss_scale=2, train_wall=93, gb_free=19, wall=42008 +epoch 026: 640 / 1689 loss=4.079, nll_loss=2.451, ppl=5.47, wps=461918, ups=1.07, wpb=433468, bsz=16352.1, num_updates=42800, lr=0.000305709, gnorm=0.216, clip=0, loss_scale=2, train_wall=93, gb_free=19, wall=42008 +epoch 026: 640 / 1689 loss=4.079, nll_loss=2.451, ppl=5.47, wps=461918, ups=1.07, wpb=433468, bsz=16352.1, num_updates=42800, lr=0.000305709, gnorm=0.216, clip=0, loss_scale=2, train_wall=93, gb_free=19, wall=42008 +epoch 026: 640 / 1689 loss=4.079, nll_loss=2.451, ppl=5.47, wps=461918, ups=1.07, wpb=433468, bsz=16352.1, num_updates=42800, lr=0.000305709, gnorm=0.216, clip=0, loss_scale=2, train_wall=93, gb_free=19, wall=42008 +epoch 026: 640 / 1689 loss=4.079, nll_loss=2.451, ppl=5.47, wps=461918, ups=1.07, wpb=433468, bsz=16352.1, num_updates=42800, lr=0.000305709, gnorm=0.216, clip=0, loss_scale=2, train_wall=93, gb_free=19, wall=42008 +epoch 026: 640 / 1689 loss=4.079, nll_loss=2.451, ppl=5.47, wps=461918, ups=1.07, wpb=433468, bsz=16352.1, num_updates=42800, lr=0.000305709, gnorm=0.216, clip=0, loss_scale=2, train_wall=93, gb_free=19, wall=42008 +epoch 026: 640 / 1689 loss=4.079, nll_loss=2.451, ppl=5.47, wps=461918, ups=1.07, wpb=433468, bsz=16352.1, num_updates=42800, lr=0.000305709, gnorm=0.216, clip=0, loss_scale=2, train_wall=93, gb_free=19, wall=42008 +epoch 026: 741 / 1689 loss=4.092, nll_loss=2.466, ppl=5.53, wps=456837, ups=1.05, wpb=435287, bsz=16554.8, num_updates=42900, lr=0.000305352, gnorm=0.204, clip=0, loss_scale=1, train_wall=94, gb_free=21, wall=42104 +epoch 026: 741 / 1689 loss=4.092, nll_loss=2.466, ppl=5.53, wps=456837, ups=1.05, wpb=435287, bsz=16554.8, num_updates=42900, lr=0.000305352, gnorm=0.204, clip=0, loss_scale=1, train_wall=94, gb_free=21, wall=42104 +epoch 026: 741 / 1689 loss=4.092, nll_loss=2.466, ppl=5.53, wps=456837, ups=1.05, wpb=435287, bsz=16554.8, num_updates=42900, lr=0.000305352, gnorm=0.204, clip=0, loss_scale=1, train_wall=94, gb_free=21, wall=42104 +epoch 026: 741 / 1689 loss=4.092, nll_loss=2.466, ppl=5.53, wps=456837, ups=1.05, wpb=435287, bsz=16554.8, num_updates=42900, lr=0.000305352, gnorm=0.204, clip=0, loss_scale=1, train_wall=94, gb_free=21, wall=42104 +epoch 026: 741 / 1689 loss=4.092, nll_loss=2.466, ppl=5.53, wps=456837, ups=1.05, wpb=435287, bsz=16554.8, num_updates=42900, lr=0.000305352, gnorm=0.204, clip=0, loss_scale=1, train_wall=94, gb_free=21, wall=42104 +epoch 026: 741 / 1689 loss=4.092, nll_loss=2.466, ppl=5.53, wps=456837, ups=1.05, wpb=435287, bsz=16554.8, num_updates=42900, lr=0.000305352, gnorm=0.204, clip=0, loss_scale=1, train_wall=94, gb_free=21, wall=42104 +epoch 026: 741 / 1689 loss=4.092, nll_loss=2.466, ppl=5.53, wps=456837, ups=1.05, wpb=435287, bsz=16554.8, num_updates=42900, lr=0.000305352, gnorm=0.204, clip=0, loss_scale=1, train_wall=94, gb_free=21, wall=42104 +epoch 026: 741 / 1689 loss=4.092, nll_loss=2.466, ppl=5.53, wps=456837, ups=1.05, wpb=435287, bsz=16554.8, num_updates=42900, lr=0.000305352, gnorm=0.204, clip=0, loss_scale=1, train_wall=94, gb_free=21, wall=42104 +epoch 026: 741 / 1689 loss=4.092, nll_loss=2.466, ppl=5.53, wps=456837, ups=1.05, wpb=435287, bsz=16554.8, num_updates=42900, lr=0.000305352, gnorm=0.204, clip=0, loss_scale=1, train_wall=94, gb_free=21, wall=42104 +epoch 026: 741 / 1689 loss=4.092, nll_loss=2.466, ppl=5.53, wps=456837, ups=1.05, wpb=435287, bsz=16554.8, num_updates=42900, lr=0.000305352, gnorm=0.204, clip=0, loss_scale=1, train_wall=94, gb_free=21, wall=42104 +epoch 026: 741 / 1689 loss=4.092, nll_loss=2.466, ppl=5.53, wps=456837, ups=1.05, wpb=435287, bsz=16554.8, num_updates=42900, lr=0.000305352, gnorm=0.204, clip=0, loss_scale=1, train_wall=94, gb_free=21, wall=42104 +epoch 026: 741 / 1689 loss=4.092, nll_loss=2.466, ppl=5.53, wps=456837, ups=1.05, wpb=435287, bsz=16554.8, num_updates=42900, lr=0.000305352, gnorm=0.204, clip=0, loss_scale=1, train_wall=94, gb_free=21, wall=42104 +epoch 026: 741 / 1689 loss=4.092, nll_loss=2.466, ppl=5.53, wps=456837, ups=1.05, wpb=435287, bsz=16554.8, num_updates=42900, lr=0.000305352, gnorm=0.204, clip=0, loss_scale=1, train_wall=94, gb_free=21, wall=42104 +epoch 026: 741 / 1689 loss=4.092, nll_loss=2.466, ppl=5.53, wps=456837, ups=1.05, wpb=435287, bsz=16554.8, num_updates=42900, lr=0.000305352, gnorm=0.204, clip=0, loss_scale=1, train_wall=94, gb_free=21, wall=42104 +epoch 026: 741 / 1689 loss=4.092, nll_loss=2.466, ppl=5.53, wps=456837, ups=1.05, wpb=435287, bsz=16554.8, num_updates=42900, lr=0.000305352, gnorm=0.204, clip=0, loss_scale=1, train_wall=94, gb_free=21, wall=42104 +epoch 026: 741 / 1689 loss=4.092, nll_loss=2.466, ppl=5.53, wps=456837, ups=1.05, wpb=435287, bsz=16554.8, num_updates=42900, lr=0.000305352, gnorm=0.204, clip=0, loss_scale=1, train_wall=94, gb_free=21, wall=42104 +epoch 026: 741 / 1689 loss=4.092, nll_loss=2.466, ppl=5.53, wps=456837, ups=1.05, wpb=435287, bsz=16554.8, num_updates=42900, lr=0.000305352, gnorm=0.204, clip=0, loss_scale=1, train_wall=94, gb_free=21, wall=42104 +epoch 026: 741 / 1689 loss=4.092, nll_loss=2.466, ppl=5.53, wps=456837, ups=1.05, wpb=435287, bsz=16554.8, num_updates=42900, lr=0.000305352, gnorm=0.204, clip=0, loss_scale=1, train_wall=94, gb_free=21, wall=42104 +epoch 026: 741 / 1689 loss=4.092, nll_loss=2.466, ppl=5.53, wps=456837, ups=1.05, wpb=435287, bsz=16554.8, num_updates=42900, lr=0.000305352, gnorm=0.204, clip=0, loss_scale=1, train_wall=94, gb_free=21, wall=42104 +epoch 026: 741 / 1689 loss=4.092, nll_loss=2.466, ppl=5.53, wps=456837, ups=1.05, wpb=435287, bsz=16554.8, num_updates=42900, lr=0.000305352, gnorm=0.204, clip=0, loss_scale=1, train_wall=94, gb_free=21, wall=42104 +epoch 026: 741 / 1689 loss=4.092, nll_loss=2.466, ppl=5.53, wps=456837, ups=1.05, wpb=435287, bsz=16554.8, num_updates=42900, lr=0.000305352, gnorm=0.204, clip=0, loss_scale=1, train_wall=94, gb_free=21, wall=42104 +epoch 026: 741 / 1689 loss=4.092, nll_loss=2.466, ppl=5.53, wps=456837, ups=1.05, wpb=435287, bsz=16554.8, num_updates=42900, lr=0.000305352, gnorm=0.204, clip=0, loss_scale=1, train_wall=94, gb_free=21, wall=42104 +epoch 026: 741 / 1689 loss=4.092, nll_loss=2.466, ppl=5.53, wps=456837, ups=1.05, wpb=435287, bsz=16554.8, num_updates=42900, lr=0.000305352, gnorm=0.204, clip=0, loss_scale=1, train_wall=94, gb_free=21, wall=42104 +epoch 026: 741 / 1689 loss=4.092, nll_loss=2.466, ppl=5.53, wps=456837, ups=1.05, wpb=435287, bsz=16554.8, num_updates=42900, lr=0.000305352, gnorm=0.204, clip=0, loss_scale=1, train_wall=94, gb_free=21, wall=42104 +epoch 026: 741 / 1689 loss=4.092, nll_loss=2.466, ppl=5.53, wps=456837, ups=1.05, wpb=435287, bsz=16554.8, num_updates=42900, lr=0.000305352, gnorm=0.204, clip=0, loss_scale=1, train_wall=94, gb_free=21, wall=42104 +epoch 026: 741 / 1689 loss=4.092, nll_loss=2.466, ppl=5.53, wps=456837, ups=1.05, wpb=435287, bsz=16554.8, num_updates=42900, lr=0.000305352, gnorm=0.204, clip=0, loss_scale=1, train_wall=94, gb_free=21, wall=42104 +epoch 026: 841 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=461283, ups=1.07, wpb=432793, bsz=16478.5, num_updates=43000, lr=0.000304997, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=42198 +epoch 026: 841 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=461283, ups=1.07, wpb=432793, bsz=16478.5, num_updates=43000, lr=0.000304997, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=42198 +epoch 026: 841 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=461283, ups=1.07, wpb=432793, bsz=16478.5, num_updates=43000, lr=0.000304997, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=42198 +epoch 026: 841 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=461283, ups=1.07, wpb=432793, bsz=16478.5, num_updates=43000, lr=0.000304997, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=42198 +epoch 026: 841 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=461283, ups=1.07, wpb=432793, bsz=16478.5, num_updates=43000, lr=0.000304997, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=42198 +epoch 026: 841 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=461283, ups=1.07, wpb=432793, bsz=16478.5, num_updates=43000, lr=0.000304997, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=42198 +epoch 026: 841 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=461283, ups=1.07, wpb=432793, bsz=16478.5, num_updates=43000, lr=0.000304997, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=42198 +epoch 026: 841 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=461283, ups=1.07, wpb=432793, bsz=16478.5, num_updates=43000, lr=0.000304997, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=42198 +epoch 026: 841 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=461283, ups=1.07, wpb=432793, bsz=16478.5, num_updates=43000, lr=0.000304997, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=42198 +epoch 026: 841 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=461283, ups=1.07, wpb=432793, bsz=16478.5, num_updates=43000, lr=0.000304997, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=42198 +epoch 026: 841 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=461283, ups=1.07, wpb=432793, bsz=16478.5, num_updates=43000, lr=0.000304997, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=42198 +epoch 026: 841 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=461283, ups=1.07, wpb=432793, bsz=16478.5, num_updates=43000, lr=0.000304997, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=42198 +epoch 026: 841 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=461283, ups=1.07, wpb=432793, bsz=16478.5, num_updates=43000, lr=0.000304997, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=42198 +epoch 026: 841 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=461283, ups=1.07, wpb=432793, bsz=16478.5, num_updates=43000, lr=0.000304997, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=42198 +epoch 026: 841 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=461283, ups=1.07, wpb=432793, bsz=16478.5, num_updates=43000, lr=0.000304997, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=42198 +epoch 026: 841 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=461283, ups=1.07, wpb=432793, bsz=16478.5, num_updates=43000, lr=0.000304997, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=42198 +epoch 026: 841 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=461283, ups=1.07, wpb=432793, bsz=16478.5, num_updates=43000, lr=0.000304997, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=42198 +epoch 026: 841 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=461283, ups=1.07, wpb=432793, bsz=16478.5, num_updates=43000, lr=0.000304997, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=42198 +epoch 026: 841 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=461283, ups=1.07, wpb=432793, bsz=16478.5, num_updates=43000, lr=0.000304997, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=42198 +epoch 026: 841 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=461283, ups=1.07, wpb=432793, bsz=16478.5, num_updates=43000, lr=0.000304997, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=42198 +epoch 026: 841 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=461283, ups=1.07, wpb=432793, bsz=16478.5, num_updates=43000, lr=0.000304997, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=42198 +epoch 026: 841 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=461283, ups=1.07, wpb=432793, bsz=16478.5, num_updates=43000, lr=0.000304997, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=42198 +epoch 026: 841 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=461283, ups=1.07, wpb=432793, bsz=16478.5, num_updates=43000, lr=0.000304997, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=42198 +epoch 026: 841 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=461283, ups=1.07, wpb=432793, bsz=16478.5, num_updates=43000, lr=0.000304997, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=42198 +epoch 026: 841 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=461283, ups=1.07, wpb=432793, bsz=16478.5, num_updates=43000, lr=0.000304997, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=42198 +epoch 026: 841 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=461283, ups=1.07, wpb=432793, bsz=16478.5, num_updates=43000, lr=0.000304997, gnorm=0.225, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=42198 +begin validation on "valid" subset +epoch 026 | valid on 'valid' subset | loss 4.237 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 43000 | best_loss 4.232 +epoch 026 | valid on 'valid' subset | loss 4.237 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 43000 | best_loss 4.232 +epoch 026 | valid on 'valid' subset | loss 4.237 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 43000 | best_loss 4.232 +epoch 026 | valid on 'valid' subset | loss 4.237 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 43000 | best_loss 4.232 +epoch 026 | valid on 'valid' subset | loss 4.237 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 43000 | best_loss 4.232 +epoch 026 | valid on 'valid' subset | loss 4.237 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 43000 | best_loss 4.232 +epoch 026 | valid on 'valid' subset | loss 4.237 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 43000 | best_loss 4.232 +epoch 026 | valid on 'valid' subset | loss 4.237 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 43000 | best_loss 4.232 +epoch 026 | valid on 'valid' subset | loss 4.237 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 43000 | best_loss 4.232 +epoch 026 | valid on 'valid' subset | loss 4.237 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 43000 | best_loss 4.232 +epoch 026 | valid on 'valid' subset | loss 4.237 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 43000 | best_loss 4.232 +epoch 026 | valid on 'valid' subset | loss 4.237 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 43000 | best_loss 4.232 +epoch 026 | valid on 'valid' subset | loss 4.237 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 43000 | best_loss 4.232 +epoch 026 | valid on 'valid' subset | loss 4.237 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 43000 | best_loss 4.232 +epoch 026 | valid on 'valid' subset | loss 4.237 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 43000 | best_loss 4.232 +epoch 026 | valid on 'valid' subset | loss 4.237 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 43000 | best_loss 4.232 +epoch 026 | valid on 'valid' subset | loss 4.237 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 43000 | best_loss 4.232 +epoch 026 | valid on 'valid' subset | loss 4.237 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 43000 | best_loss 4.232 +epoch 026 | valid on 'valid' subset | loss 4.237 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 43000 | best_loss 4.232 +epoch 026 | valid on 'valid' subset | loss 4.237 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 43000 | best_loss 4.232 +epoch 026 | valid on 'valid' subset | loss 4.237 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 43000 | best_loss 4.232 +epoch 026 | valid on 'valid' subset | loss 4.237 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 43000 | best_loss 4.232 +epoch 026 | valid on 'valid' subset | loss 4.237 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 43000 | best_loss 4.232 +epoch 026 | valid on 'valid' subset | loss 4.237 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 43000 | best_loss 4.232 +epoch 026 | valid on 'valid' subset | loss 4.237 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 43000 | best_loss 4.232 +epoch 026 | valid on 'valid' subset | loss 4.237 | nll_loss 2.598 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 43000 | best_loss 4.232 +epoch 026: 941 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410406, ups=0.94, wpb=435399, bsz=16444.4, num_updates=43100, lr=0.000304643, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=42304 +epoch 026: 941 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410406, ups=0.94, wpb=435399, bsz=16444.4, num_updates=43100, lr=0.000304643, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=42304 +epoch 026: 941 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410406, ups=0.94, wpb=435399, bsz=16444.4, num_updates=43100, lr=0.000304643, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=42304 +epoch 026: 941 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410406, ups=0.94, wpb=435399, bsz=16444.4, num_updates=43100, lr=0.000304643, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=42304 +epoch 026: 941 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410406, ups=0.94, wpb=435399, bsz=16444.4, num_updates=43100, lr=0.000304643, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=42304 +epoch 026: 941 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410406, ups=0.94, wpb=435399, bsz=16444.4, num_updates=43100, lr=0.000304643, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=42304 +epoch 026: 941 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410406, ups=0.94, wpb=435399, bsz=16444.4, num_updates=43100, lr=0.000304643, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=42304 +epoch 026: 941 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410406, ups=0.94, wpb=435399, bsz=16444.4, num_updates=43100, lr=0.000304643, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=42304 +epoch 026: 941 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410406, ups=0.94, wpb=435399, bsz=16444.4, num_updates=43100, lr=0.000304643, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=42304 +epoch 026: 941 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410406, ups=0.94, wpb=435399, bsz=16444.4, num_updates=43100, lr=0.000304643, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=42304 +epoch 026: 941 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410406, ups=0.94, wpb=435399, bsz=16444.4, num_updates=43100, lr=0.000304643, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=42304 +epoch 026: 941 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410406, ups=0.94, wpb=435399, bsz=16444.4, num_updates=43100, lr=0.000304643, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=42304 +epoch 026: 941 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410406, ups=0.94, wpb=435399, bsz=16444.4, num_updates=43100, lr=0.000304643, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=42304 +epoch 026: 941 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410406, ups=0.94, wpb=435399, bsz=16444.4, num_updates=43100, lr=0.000304643, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=42304 +epoch 026: 941 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410406, ups=0.94, wpb=435399, bsz=16444.4, num_updates=43100, lr=0.000304643, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=42304 +epoch 026: 941 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410406, ups=0.94, wpb=435399, bsz=16444.4, num_updates=43100, lr=0.000304643, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=42304 +epoch 026: 941 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410406, ups=0.94, wpb=435399, bsz=16444.4, num_updates=43100, lr=0.000304643, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=42304 +epoch 026: 941 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410406, ups=0.94, wpb=435399, bsz=16444.4, num_updates=43100, lr=0.000304643, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=42304 +epoch 026: 941 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410406, ups=0.94, wpb=435399, bsz=16444.4, num_updates=43100, lr=0.000304643, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=42304 +epoch 026: 941 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410406, ups=0.94, wpb=435399, bsz=16444.4, num_updates=43100, lr=0.000304643, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=42304 +epoch 026: 941 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410406, ups=0.94, wpb=435399, bsz=16444.4, num_updates=43100, lr=0.000304643, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=42304 +epoch 026: 941 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410406, ups=0.94, wpb=435399, bsz=16444.4, num_updates=43100, lr=0.000304643, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=42304 +epoch 026: 941 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410406, ups=0.94, wpb=435399, bsz=16444.4, num_updates=43100, lr=0.000304643, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=42304 +epoch 026: 941 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410406, ups=0.94, wpb=435399, bsz=16444.4, num_updates=43100, lr=0.000304643, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=42304 +epoch 026: 941 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410406, ups=0.94, wpb=435399, bsz=16444.4, num_updates=43100, lr=0.000304643, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=42304 +epoch 026: 941 / 1689 loss=4.105, nll_loss=2.48, ppl=5.58, wps=410406, ups=0.94, wpb=435399, bsz=16444.4, num_updates=43100, lr=0.000304643, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=42304 +epoch 026: 1041 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=459870, ups=1.07, wpb=431750, bsz=16312.1, num_updates=43200, lr=0.00030429, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=42397 +epoch 026: 1041 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=459870, ups=1.07, wpb=431750, bsz=16312.1, num_updates=43200, lr=0.00030429, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=42397 +epoch 026: 1041 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=459870, ups=1.07, wpb=431750, bsz=16312.1, num_updates=43200, lr=0.00030429, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=42397 +epoch 026: 1041 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=459870, ups=1.07, wpb=431750, bsz=16312.1, num_updates=43200, lr=0.00030429, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=42397 +epoch 026: 1041 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=459870, ups=1.07, wpb=431750, bsz=16312.1, num_updates=43200, lr=0.00030429, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=42397 +epoch 026: 1041 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=459870, ups=1.07, wpb=431750, bsz=16312.1, num_updates=43200, lr=0.00030429, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=42397 +epoch 026: 1041 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=459870, ups=1.07, wpb=431750, bsz=16312.1, num_updates=43200, lr=0.00030429, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=42397 +epoch 026: 1041 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=459870, ups=1.07, wpb=431750, bsz=16312.1, num_updates=43200, lr=0.00030429, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=42397 +epoch 026: 1041 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=459870, ups=1.07, wpb=431750, bsz=16312.1, num_updates=43200, lr=0.00030429, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=42397 +epoch 026: 1041 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=459870, ups=1.07, wpb=431750, bsz=16312.1, num_updates=43200, lr=0.00030429, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=42397 +epoch 026: 1041 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=459870, ups=1.07, wpb=431750, bsz=16312.1, num_updates=43200, lr=0.00030429, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=42397 +epoch 026: 1041 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=459870, ups=1.07, wpb=431750, bsz=16312.1, num_updates=43200, lr=0.00030429, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=42397 +epoch 026: 1041 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=459870, ups=1.07, wpb=431750, bsz=16312.1, num_updates=43200, lr=0.00030429, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=42397 +epoch 026: 1041 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=459870, ups=1.07, wpb=431750, bsz=16312.1, num_updates=43200, lr=0.00030429, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=42397 +epoch 026: 1041 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=459870, ups=1.07, wpb=431750, bsz=16312.1, num_updates=43200, lr=0.00030429, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=42397 +epoch 026: 1041 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=459870, ups=1.07, wpb=431750, bsz=16312.1, num_updates=43200, lr=0.00030429, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=42397 +epoch 026: 1041 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=459870, ups=1.07, wpb=431750, bsz=16312.1, num_updates=43200, lr=0.00030429, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=42397 +epoch 026: 1041 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=459870, ups=1.07, wpb=431750, bsz=16312.1, num_updates=43200, lr=0.00030429, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=42397 +epoch 026: 1041 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=459870, ups=1.07, wpb=431750, bsz=16312.1, num_updates=43200, lr=0.00030429, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=42397 +epoch 026: 1041 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=459870, ups=1.07, wpb=431750, bsz=16312.1, num_updates=43200, lr=0.00030429, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=42397 +epoch 026: 1041 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=459870, ups=1.07, wpb=431750, bsz=16312.1, num_updates=43200, lr=0.00030429, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=42397 +epoch 026: 1041 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=459870, ups=1.07, wpb=431750, bsz=16312.1, num_updates=43200, lr=0.00030429, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=42397 +epoch 026: 1041 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=459870, ups=1.07, wpb=431750, bsz=16312.1, num_updates=43200, lr=0.00030429, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=42397 +epoch 026: 1041 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=459870, ups=1.07, wpb=431750, bsz=16312.1, num_updates=43200, lr=0.00030429, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=42397 +epoch 026: 1041 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=459870, ups=1.07, wpb=431750, bsz=16312.1, num_updates=43200, lr=0.00030429, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=42397 +epoch 026: 1041 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=459870, ups=1.07, wpb=431750, bsz=16312.1, num_updates=43200, lr=0.00030429, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=42397 +epoch 026: 1141 / 1689 loss=4.1, nll_loss=2.475, ppl=5.56, wps=461695, ups=1.07, wpb=432169, bsz=17002.3, num_updates=43300, lr=0.000303939, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=42491 +epoch 026: 1141 / 1689 loss=4.1, nll_loss=2.475, ppl=5.56, wps=461695, ups=1.07, wpb=432169, bsz=17002.3, num_updates=43300, lr=0.000303939, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=42491 +epoch 026: 1141 / 1689 loss=4.1, nll_loss=2.475, ppl=5.56, wps=461695, ups=1.07, wpb=432169, bsz=17002.3, num_updates=43300, lr=0.000303939, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=42491 +epoch 026: 1141 / 1689 loss=4.1, nll_loss=2.475, ppl=5.56, wps=461695, ups=1.07, wpb=432169, bsz=17002.3, num_updates=43300, lr=0.000303939, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=42491 +epoch 026: 1141 / 1689 loss=4.1, nll_loss=2.475, ppl=5.56, wps=461695, ups=1.07, wpb=432169, bsz=17002.3, num_updates=43300, lr=0.000303939, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=42491 +epoch 026: 1141 / 1689 loss=4.1, nll_loss=2.475, ppl=5.56, wps=461695, ups=1.07, wpb=432169, bsz=17002.3, num_updates=43300, lr=0.000303939, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=42491 +epoch 026: 1141 / 1689 loss=4.1, nll_loss=2.475, ppl=5.56, wps=461695, ups=1.07, wpb=432169, bsz=17002.3, num_updates=43300, lr=0.000303939, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=42491 +epoch 026: 1141 / 1689 loss=4.1, nll_loss=2.475, ppl=5.56, wps=461695, ups=1.07, wpb=432169, bsz=17002.3, num_updates=43300, lr=0.000303939, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=42491 +epoch 026: 1141 / 1689 loss=4.1, nll_loss=2.475, ppl=5.56, wps=461695, ups=1.07, wpb=432169, bsz=17002.3, num_updates=43300, lr=0.000303939, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=42491 +epoch 026: 1141 / 1689 loss=4.1, nll_loss=2.475, ppl=5.56, wps=461695, ups=1.07, wpb=432169, bsz=17002.3, num_updates=43300, lr=0.000303939, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=42491 +epoch 026: 1141 / 1689 loss=4.1, nll_loss=2.475, ppl=5.56, wps=461695, ups=1.07, wpb=432169, bsz=17002.3, num_updates=43300, lr=0.000303939, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=42491 +epoch 026: 1141 / 1689 loss=4.1, nll_loss=2.475, ppl=5.56, wps=461695, ups=1.07, wpb=432169, bsz=17002.3, num_updates=43300, lr=0.000303939, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=42491 +epoch 026: 1141 / 1689 loss=4.1, nll_loss=2.475, ppl=5.56, wps=461695, ups=1.07, wpb=432169, bsz=17002.3, num_updates=43300, lr=0.000303939, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=42491 +epoch 026: 1141 / 1689 loss=4.1, nll_loss=2.475, ppl=5.56, wps=461695, ups=1.07, wpb=432169, bsz=17002.3, num_updates=43300, lr=0.000303939, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=42491 +epoch 026: 1141 / 1689 loss=4.1, nll_loss=2.475, ppl=5.56, wps=461695, ups=1.07, wpb=432169, bsz=17002.3, num_updates=43300, lr=0.000303939, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=42491 +epoch 026: 1141 / 1689 loss=4.1, nll_loss=2.475, ppl=5.56, wps=461695, ups=1.07, wpb=432169, bsz=17002.3, num_updates=43300, lr=0.000303939, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=42491 +epoch 026: 1141 / 1689 loss=4.1, nll_loss=2.475, ppl=5.56, wps=461695, ups=1.07, wpb=432169, bsz=17002.3, num_updates=43300, lr=0.000303939, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=42491 +epoch 026: 1141 / 1689 loss=4.1, nll_loss=2.475, ppl=5.56, wps=461695, ups=1.07, wpb=432169, bsz=17002.3, num_updates=43300, lr=0.000303939, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=42491 +epoch 026: 1141 / 1689 loss=4.1, nll_loss=2.475, ppl=5.56, wps=461695, ups=1.07, wpb=432169, bsz=17002.3, num_updates=43300, lr=0.000303939, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=42491 +epoch 026: 1141 / 1689 loss=4.1, nll_loss=2.475, ppl=5.56, wps=461695, ups=1.07, wpb=432169, bsz=17002.3, num_updates=43300, lr=0.000303939, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=42491 +epoch 026: 1141 / 1689 loss=4.1, nll_loss=2.475, ppl=5.56, wps=461695, ups=1.07, wpb=432169, bsz=17002.3, num_updates=43300, lr=0.000303939, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=42491 +epoch 026: 1141 / 1689 loss=4.1, nll_loss=2.475, ppl=5.56, wps=461695, ups=1.07, wpb=432169, bsz=17002.3, num_updates=43300, lr=0.000303939, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=42491 +epoch 026: 1141 / 1689 loss=4.1, nll_loss=2.475, ppl=5.56, wps=461695, ups=1.07, wpb=432169, bsz=17002.3, num_updates=43300, lr=0.000303939, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=42491 +epoch 026: 1141 / 1689 loss=4.1, nll_loss=2.475, ppl=5.56, wps=461695, ups=1.07, wpb=432169, bsz=17002.3, num_updates=43300, lr=0.000303939, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=42491 +epoch 026: 1141 / 1689 loss=4.1, nll_loss=2.475, ppl=5.56, wps=461695, ups=1.07, wpb=432169, bsz=17002.3, num_updates=43300, lr=0.000303939, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=42491 +epoch 026: 1141 / 1689 loss=4.1, nll_loss=2.475, ppl=5.56, wps=461695, ups=1.07, wpb=432169, bsz=17002.3, num_updates=43300, lr=0.000303939, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=20.1, wall=42491 +epoch 026: 1243 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=453769, ups=1.05, wpb=432804, bsz=16172.9, num_updates=43400, lr=0.000303588, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=94, gb_free=17.7, wall=42586 +epoch 026: 1243 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=453769, ups=1.05, wpb=432804, bsz=16172.9, num_updates=43400, lr=0.000303588, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=94, gb_free=17.7, wall=42586 +epoch 026: 1243 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=453769, ups=1.05, wpb=432804, bsz=16172.9, num_updates=43400, lr=0.000303588, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=94, gb_free=17.7, wall=42586 +epoch 026: 1243 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=453769, ups=1.05, wpb=432804, bsz=16172.9, num_updates=43400, lr=0.000303588, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=94, gb_free=17.7, wall=42586 +epoch 026: 1243 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=453769, ups=1.05, wpb=432804, bsz=16172.9, num_updates=43400, lr=0.000303588, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=94, gb_free=17.7, wall=42586 +epoch 026: 1243 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=453769, ups=1.05, wpb=432804, bsz=16172.9, num_updates=43400, lr=0.000303588, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=94, gb_free=17.7, wall=42586 +epoch 026: 1243 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=453769, ups=1.05, wpb=432804, bsz=16172.9, num_updates=43400, lr=0.000303588, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=94, gb_free=17.7, wall=42586 +epoch 026: 1243 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=453769, ups=1.05, wpb=432804, bsz=16172.9, num_updates=43400, lr=0.000303588, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=94, gb_free=17.7, wall=42586 +epoch 026: 1243 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=453769, ups=1.05, wpb=432804, bsz=16172.9, num_updates=43400, lr=0.000303588, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=94, gb_free=17.7, wall=42586 +epoch 026: 1243 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=453769, ups=1.05, wpb=432804, bsz=16172.9, num_updates=43400, lr=0.000303588, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=94, gb_free=17.7, wall=42586 +epoch 026: 1243 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=453769, ups=1.05, wpb=432804, bsz=16172.9, num_updates=43400, lr=0.000303588, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=94, gb_free=17.7, wall=42586 +epoch 026: 1243 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=453769, ups=1.05, wpb=432804, bsz=16172.9, num_updates=43400, lr=0.000303588, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=94, gb_free=17.7, wall=42586 +epoch 026: 1243 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=453769, ups=1.05, wpb=432804, bsz=16172.9, num_updates=43400, lr=0.000303588, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=94, gb_free=17.7, wall=42586 +epoch 026: 1243 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=453769, ups=1.05, wpb=432804, bsz=16172.9, num_updates=43400, lr=0.000303588, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=94, gb_free=17.7, wall=42586 +epoch 026: 1243 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=453769, ups=1.05, wpb=432804, bsz=16172.9, num_updates=43400, lr=0.000303588, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=94, gb_free=17.7, wall=42586 +epoch 026: 1243 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=453769, ups=1.05, wpb=432804, bsz=16172.9, num_updates=43400, lr=0.000303588, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=94, gb_free=17.7, wall=42586 +epoch 026: 1243 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=453769, ups=1.05, wpb=432804, bsz=16172.9, num_updates=43400, lr=0.000303588, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=94, gb_free=17.7, wall=42586 +epoch 026: 1243 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=453769, ups=1.05, wpb=432804, bsz=16172.9, num_updates=43400, lr=0.000303588, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=94, gb_free=17.7, wall=42586 +epoch 026: 1243 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=453769, ups=1.05, wpb=432804, bsz=16172.9, num_updates=43400, lr=0.000303588, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=94, gb_free=17.7, wall=42586 +epoch 026: 1243 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=453769, ups=1.05, wpb=432804, bsz=16172.9, num_updates=43400, lr=0.000303588, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=94, gb_free=17.7, wall=42586 +epoch 026: 1243 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=453769, ups=1.05, wpb=432804, bsz=16172.9, num_updates=43400, lr=0.000303588, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=94, gb_free=17.7, wall=42586 +epoch 026: 1243 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=453769, ups=1.05, wpb=432804, bsz=16172.9, num_updates=43400, lr=0.000303588, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=94, gb_free=17.7, wall=42586 +epoch 026: 1243 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=453769, ups=1.05, wpb=432804, bsz=16172.9, num_updates=43400, lr=0.000303588, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=94, gb_free=17.7, wall=42586 +epoch 026: 1243 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=453769, ups=1.05, wpb=432804, bsz=16172.9, num_updates=43400, lr=0.000303588, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=94, gb_free=17.7, wall=42586 +epoch 026: 1243 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=453769, ups=1.05, wpb=432804, bsz=16172.9, num_updates=43400, lr=0.000303588, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=94, gb_free=17.7, wall=42586 +epoch 026: 1243 / 1689 loss=4.091, nll_loss=2.465, ppl=5.52, wps=453769, ups=1.05, wpb=432804, bsz=16172.9, num_updates=43400, lr=0.000303588, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=94, gb_free=17.7, wall=42586 +epoch 026: 1343 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=462930, ups=1.07, wpb=433051, bsz=16431.4, num_updates=43500, lr=0.000303239, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=42680 +epoch 026: 1343 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=462930, ups=1.07, wpb=433051, bsz=16431.4, num_updates=43500, lr=0.000303239, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=42680 +epoch 026: 1343 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=462930, ups=1.07, wpb=433051, bsz=16431.4, num_updates=43500, lr=0.000303239, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=42680 +epoch 026: 1343 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=462930, ups=1.07, wpb=433051, bsz=16431.4, num_updates=43500, lr=0.000303239, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=42680 +epoch 026: 1343 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=462930, ups=1.07, wpb=433051, bsz=16431.4, num_updates=43500, lr=0.000303239, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=42680 +epoch 026: 1343 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=462930, ups=1.07, wpb=433051, bsz=16431.4, num_updates=43500, lr=0.000303239, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=42680 +epoch 026: 1343 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=462930, ups=1.07, wpb=433051, bsz=16431.4, num_updates=43500, lr=0.000303239, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=42680 +epoch 026: 1343 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=462930, ups=1.07, wpb=433051, bsz=16431.4, num_updates=43500, lr=0.000303239, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=42680 +epoch 026: 1343 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=462930, ups=1.07, wpb=433051, bsz=16431.4, num_updates=43500, lr=0.000303239, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=42680 +epoch 026: 1343 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=462930, ups=1.07, wpb=433051, bsz=16431.4, num_updates=43500, lr=0.000303239, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=42680 +epoch 026: 1343 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=462930, ups=1.07, wpb=433051, bsz=16431.4, num_updates=43500, lr=0.000303239, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=42680 +epoch 026: 1343 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=462930, ups=1.07, wpb=433051, bsz=16431.4, num_updates=43500, lr=0.000303239, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=42680 +epoch 026: 1343 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=462930, ups=1.07, wpb=433051, bsz=16431.4, num_updates=43500, lr=0.000303239, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=42680 +epoch 026: 1343 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=462930, ups=1.07, wpb=433051, bsz=16431.4, num_updates=43500, lr=0.000303239, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=42680 +epoch 026: 1343 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=462930, ups=1.07, wpb=433051, bsz=16431.4, num_updates=43500, lr=0.000303239, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=42680 +epoch 026: 1343 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=462930, ups=1.07, wpb=433051, bsz=16431.4, num_updates=43500, lr=0.000303239, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=42680 +epoch 026: 1343 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=462930, ups=1.07, wpb=433051, bsz=16431.4, num_updates=43500, lr=0.000303239, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=42680 +epoch 026: 1343 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=462930, ups=1.07, wpb=433051, bsz=16431.4, num_updates=43500, lr=0.000303239, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=42680 +epoch 026: 1343 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=462930, ups=1.07, wpb=433051, bsz=16431.4, num_updates=43500, lr=0.000303239, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=42680 +epoch 026: 1343 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=462930, ups=1.07, wpb=433051, bsz=16431.4, num_updates=43500, lr=0.000303239, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=42680 +epoch 026: 1343 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=462930, ups=1.07, wpb=433051, bsz=16431.4, num_updates=43500, lr=0.000303239, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=42680 +epoch 026: 1343 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=462930, ups=1.07, wpb=433051, bsz=16431.4, num_updates=43500, lr=0.000303239, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=42680 +epoch 026: 1343 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=462930, ups=1.07, wpb=433051, bsz=16431.4, num_updates=43500, lr=0.000303239, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=42680 +epoch 026: 1343 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=462930, ups=1.07, wpb=433051, bsz=16431.4, num_updates=43500, lr=0.000303239, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=42680 +epoch 026: 1343 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=462930, ups=1.07, wpb=433051, bsz=16431.4, num_updates=43500, lr=0.000303239, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=42680 +epoch 026: 1343 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=462930, ups=1.07, wpb=433051, bsz=16431.4, num_updates=43500, lr=0.000303239, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=42680 +epoch 026: 1443 / 1689 loss=4.094, nll_loss=2.469, ppl=5.53, wps=458938, ups=1.05, wpb=435339, bsz=16736.8, num_updates=43600, lr=0.000302891, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=42775 +epoch 026: 1443 / 1689 loss=4.094, nll_loss=2.469, ppl=5.53, wps=458938, ups=1.05, wpb=435339, bsz=16736.8, num_updates=43600, lr=0.000302891, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=42775 +epoch 026: 1443 / 1689 loss=4.094, nll_loss=2.469, ppl=5.53, wps=458938, ups=1.05, wpb=435339, bsz=16736.8, num_updates=43600, lr=0.000302891, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=42775 +epoch 026: 1443 / 1689 loss=4.094, nll_loss=2.469, ppl=5.53, wps=458938, ups=1.05, wpb=435339, bsz=16736.8, num_updates=43600, lr=0.000302891, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=42775 +epoch 026: 1443 / 1689 loss=4.094, nll_loss=2.469, ppl=5.53, wps=458938, ups=1.05, wpb=435339, bsz=16736.8, num_updates=43600, lr=0.000302891, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=42775 +epoch 026: 1443 / 1689 loss=4.094, nll_loss=2.469, ppl=5.53, wps=458938, ups=1.05, wpb=435339, bsz=16736.8, num_updates=43600, lr=0.000302891, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=42775 +epoch 026: 1443 / 1689 loss=4.094, nll_loss=2.469, ppl=5.53, wps=458938, ups=1.05, wpb=435339, bsz=16736.8, num_updates=43600, lr=0.000302891, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=42775 +epoch 026: 1443 / 1689 loss=4.094, nll_loss=2.469, ppl=5.53, wps=458938, ups=1.05, wpb=435339, bsz=16736.8, num_updates=43600, lr=0.000302891, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=42775 +epoch 026: 1443 / 1689 loss=4.094, nll_loss=2.469, ppl=5.53, wps=458938, ups=1.05, wpb=435339, bsz=16736.8, num_updates=43600, lr=0.000302891, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=42775 +epoch 026: 1443 / 1689 loss=4.094, nll_loss=2.469, ppl=5.53, wps=458938, ups=1.05, wpb=435339, bsz=16736.8, num_updates=43600, lr=0.000302891, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=42775 +epoch 026: 1443 / 1689 loss=4.094, nll_loss=2.469, ppl=5.53, wps=458938, ups=1.05, wpb=435339, bsz=16736.8, num_updates=43600, lr=0.000302891, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=42775 +epoch 026: 1443 / 1689 loss=4.094, nll_loss=2.469, ppl=5.53, wps=458938, ups=1.05, wpb=435339, bsz=16736.8, num_updates=43600, lr=0.000302891, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=42775 +epoch 026: 1443 / 1689 loss=4.094, nll_loss=2.469, ppl=5.53, wps=458938, ups=1.05, wpb=435339, bsz=16736.8, num_updates=43600, lr=0.000302891, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=42775 +epoch 026: 1443 / 1689 loss=4.094, nll_loss=2.469, ppl=5.53, wps=458938, ups=1.05, wpb=435339, bsz=16736.8, num_updates=43600, lr=0.000302891, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=42775 +epoch 026: 1443 / 1689 loss=4.094, nll_loss=2.469, ppl=5.53, wps=458938, ups=1.05, wpb=435339, bsz=16736.8, num_updates=43600, lr=0.000302891, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=42775 +epoch 026: 1443 / 1689 loss=4.094, nll_loss=2.469, ppl=5.53, wps=458938, ups=1.05, wpb=435339, bsz=16736.8, num_updates=43600, lr=0.000302891, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=42775 +epoch 026: 1443 / 1689 loss=4.094, nll_loss=2.469, ppl=5.53, wps=458938, ups=1.05, wpb=435339, bsz=16736.8, num_updates=43600, lr=0.000302891, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=42775 +epoch 026: 1443 / 1689 loss=4.094, nll_loss=2.469, ppl=5.53, wps=458938, ups=1.05, wpb=435339, bsz=16736.8, num_updates=43600, lr=0.000302891, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=42775 +epoch 026: 1443 / 1689 loss=4.094, nll_loss=2.469, ppl=5.53, wps=458938, ups=1.05, wpb=435339, bsz=16736.8, num_updates=43600, lr=0.000302891, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=42775 +epoch 026: 1443 / 1689 loss=4.094, nll_loss=2.469, ppl=5.53, wps=458938, ups=1.05, wpb=435339, bsz=16736.8, num_updates=43600, lr=0.000302891, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=42775 +epoch 026: 1443 / 1689 loss=4.094, nll_loss=2.469, ppl=5.53, wps=458938, ups=1.05, wpb=435339, bsz=16736.8, num_updates=43600, lr=0.000302891, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=42775 +epoch 026: 1443 / 1689 loss=4.094, nll_loss=2.469, ppl=5.53, wps=458938, ups=1.05, wpb=435339, bsz=16736.8, num_updates=43600, lr=0.000302891, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=42775 +epoch 026: 1443 / 1689 loss=4.094, nll_loss=2.469, ppl=5.53, wps=458938, ups=1.05, wpb=435339, bsz=16736.8, num_updates=43600, lr=0.000302891, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=42775 +epoch 026: 1443 / 1689 loss=4.094, nll_loss=2.469, ppl=5.53, wps=458938, ups=1.05, wpb=435339, bsz=16736.8, num_updates=43600, lr=0.000302891, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=42775 +epoch 026: 1443 / 1689 loss=4.094, nll_loss=2.469, ppl=5.53, wps=458938, ups=1.05, wpb=435339, bsz=16736.8, num_updates=43600, lr=0.000302891, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=42775 +epoch 026: 1443 / 1689 loss=4.094, nll_loss=2.469, ppl=5.53, wps=458938, ups=1.05, wpb=435339, bsz=16736.8, num_updates=43600, lr=0.000302891, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=42775 +epoch 026: 1543 / 1689 loss=4.096, nll_loss=2.471, ppl=5.54, wps=459452, ups=1.06, wpb=432874, bsz=16407.8, num_updates=43700, lr=0.000302545, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=42869 +epoch 026: 1543 / 1689 loss=4.096, nll_loss=2.471, ppl=5.54, wps=459452, ups=1.06, wpb=432874, bsz=16407.8, num_updates=43700, lr=0.000302545, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=42869 +epoch 026: 1543 / 1689 loss=4.096, nll_loss=2.471, ppl=5.54, wps=459452, ups=1.06, wpb=432874, bsz=16407.8, num_updates=43700, lr=0.000302545, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=42869 +epoch 026: 1543 / 1689 loss=4.096, nll_loss=2.471, ppl=5.54, wps=459452, ups=1.06, wpb=432874, bsz=16407.8, num_updates=43700, lr=0.000302545, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=42869 +epoch 026: 1543 / 1689 loss=4.096, nll_loss=2.471, ppl=5.54, wps=459452, ups=1.06, wpb=432874, bsz=16407.8, num_updates=43700, lr=0.000302545, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=42869 +epoch 026: 1543 / 1689 loss=4.096, nll_loss=2.471, ppl=5.54, wps=459452, ups=1.06, wpb=432874, bsz=16407.8, num_updates=43700, lr=0.000302545, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=42869 +epoch 026: 1543 / 1689 loss=4.096, nll_loss=2.471, ppl=5.54, wps=459452, ups=1.06, wpb=432874, bsz=16407.8, num_updates=43700, lr=0.000302545, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=42869 +epoch 026: 1543 / 1689 loss=4.096, nll_loss=2.471, ppl=5.54, wps=459452, ups=1.06, wpb=432874, bsz=16407.8, num_updates=43700, lr=0.000302545, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=42869 +epoch 026: 1543 / 1689 loss=4.096, nll_loss=2.471, ppl=5.54, wps=459452, ups=1.06, wpb=432874, bsz=16407.8, num_updates=43700, lr=0.000302545, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=42869 +epoch 026: 1543 / 1689 loss=4.096, nll_loss=2.471, ppl=5.54, wps=459452, ups=1.06, wpb=432874, bsz=16407.8, num_updates=43700, lr=0.000302545, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=42869 +epoch 026: 1543 / 1689 loss=4.096, nll_loss=2.471, ppl=5.54, wps=459452, ups=1.06, wpb=432874, bsz=16407.8, num_updates=43700, lr=0.000302545, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=42869 +epoch 026: 1543 / 1689 loss=4.096, nll_loss=2.471, ppl=5.54, wps=459452, ups=1.06, wpb=432874, bsz=16407.8, num_updates=43700, lr=0.000302545, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=42869 +epoch 026: 1543 / 1689 loss=4.096, nll_loss=2.471, ppl=5.54, wps=459452, ups=1.06, wpb=432874, bsz=16407.8, num_updates=43700, lr=0.000302545, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=42869 +epoch 026: 1543 / 1689 loss=4.096, nll_loss=2.471, ppl=5.54, wps=459452, ups=1.06, wpb=432874, bsz=16407.8, num_updates=43700, lr=0.000302545, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=42869 +epoch 026: 1543 / 1689 loss=4.096, nll_loss=2.471, ppl=5.54, wps=459452, ups=1.06, wpb=432874, bsz=16407.8, num_updates=43700, lr=0.000302545, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=42869 +epoch 026: 1543 / 1689 loss=4.096, nll_loss=2.471, ppl=5.54, wps=459452, ups=1.06, wpb=432874, bsz=16407.8, num_updates=43700, lr=0.000302545, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=42869 +epoch 026: 1543 / 1689 loss=4.096, nll_loss=2.471, ppl=5.54, wps=459452, ups=1.06, wpb=432874, bsz=16407.8, num_updates=43700, lr=0.000302545, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=42869 +epoch 026: 1543 / 1689 loss=4.096, nll_loss=2.471, ppl=5.54, wps=459452, ups=1.06, wpb=432874, bsz=16407.8, num_updates=43700, lr=0.000302545, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=42869 +epoch 026: 1543 / 1689 loss=4.096, nll_loss=2.471, ppl=5.54, wps=459452, ups=1.06, wpb=432874, bsz=16407.8, num_updates=43700, lr=0.000302545, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=42869 +epoch 026: 1543 / 1689 loss=4.096, nll_loss=2.471, ppl=5.54, wps=459452, ups=1.06, wpb=432874, bsz=16407.8, num_updates=43700, lr=0.000302545, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=42869 +epoch 026: 1543 / 1689 loss=4.096, nll_loss=2.471, ppl=5.54, wps=459452, ups=1.06, wpb=432874, bsz=16407.8, num_updates=43700, lr=0.000302545, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=42869 +epoch 026: 1543 / 1689 loss=4.096, nll_loss=2.471, ppl=5.54, wps=459452, ups=1.06, wpb=432874, bsz=16407.8, num_updates=43700, lr=0.000302545, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=42869 +epoch 026: 1543 / 1689 loss=4.096, nll_loss=2.471, ppl=5.54, wps=459452, ups=1.06, wpb=432874, bsz=16407.8, num_updates=43700, lr=0.000302545, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=42869 +epoch 026: 1543 / 1689 loss=4.096, nll_loss=2.471, ppl=5.54, wps=459452, ups=1.06, wpb=432874, bsz=16407.8, num_updates=43700, lr=0.000302545, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=42869 +epoch 026: 1543 / 1689 loss=4.096, nll_loss=2.471, ppl=5.54, wps=459452, ups=1.06, wpb=432874, bsz=16407.8, num_updates=43700, lr=0.000302545, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=42869 +epoch 026: 1543 / 1689 loss=4.096, nll_loss=2.471, ppl=5.54, wps=459452, ups=1.06, wpb=432874, bsz=16407.8, num_updates=43700, lr=0.000302545, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=42869 +epoch 026: 1643 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=460820, ups=1.06, wpb=433322, bsz=16424.5, num_updates=43800, lr=0.000302199, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=21.3, wall=42963 +epoch 026: 1643 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=460820, ups=1.06, wpb=433322, bsz=16424.5, num_updates=43800, lr=0.000302199, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=21.3, wall=42963 +epoch 026: 1643 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=460820, ups=1.06, wpb=433322, bsz=16424.5, num_updates=43800, lr=0.000302199, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=21.3, wall=42963 +epoch 026: 1643 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=460820, ups=1.06, wpb=433322, bsz=16424.5, num_updates=43800, lr=0.000302199, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=21.3, wall=42963 +epoch 026: 1643 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=460820, ups=1.06, wpb=433322, bsz=16424.5, num_updates=43800, lr=0.000302199, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=21.3, wall=42963 +epoch 026: 1643 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=460820, ups=1.06, wpb=433322, bsz=16424.5, num_updates=43800, lr=0.000302199, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=21.3, wall=42963 +epoch 026: 1643 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=460820, ups=1.06, wpb=433322, bsz=16424.5, num_updates=43800, lr=0.000302199, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=21.3, wall=42963 +epoch 026: 1643 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=460820, ups=1.06, wpb=433322, bsz=16424.5, num_updates=43800, lr=0.000302199, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=21.3, wall=42963 +epoch 026: 1643 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=460820, ups=1.06, wpb=433322, bsz=16424.5, num_updates=43800, lr=0.000302199, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=21.3, wall=42963 +epoch 026: 1643 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=460820, ups=1.06, wpb=433322, bsz=16424.5, num_updates=43800, lr=0.000302199, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=21.3, wall=42963 +epoch 026: 1643 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=460820, ups=1.06, wpb=433322, bsz=16424.5, num_updates=43800, lr=0.000302199, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=21.3, wall=42963 +epoch 026: 1643 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=460820, ups=1.06, wpb=433322, bsz=16424.5, num_updates=43800, lr=0.000302199, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=21.3, wall=42963 +epoch 026: 1643 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=460820, ups=1.06, wpb=433322, bsz=16424.5, num_updates=43800, lr=0.000302199, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=21.3, wall=42963 +epoch 026: 1643 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=460820, ups=1.06, wpb=433322, bsz=16424.5, num_updates=43800, lr=0.000302199, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=21.3, wall=42963 +epoch 026: 1643 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=460820, ups=1.06, wpb=433322, bsz=16424.5, num_updates=43800, lr=0.000302199, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=21.3, wall=42963 +epoch 026: 1643 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=460820, ups=1.06, wpb=433322, bsz=16424.5, num_updates=43800, lr=0.000302199, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=21.3, wall=42963 +epoch 026: 1643 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=460820, ups=1.06, wpb=433322, bsz=16424.5, num_updates=43800, lr=0.000302199, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=21.3, wall=42963 +epoch 026: 1643 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=460820, ups=1.06, wpb=433322, bsz=16424.5, num_updates=43800, lr=0.000302199, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=21.3, wall=42963 +epoch 026: 1643 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=460820, ups=1.06, wpb=433322, bsz=16424.5, num_updates=43800, lr=0.000302199, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=21.3, wall=42963 +epoch 026: 1643 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=460820, ups=1.06, wpb=433322, bsz=16424.5, num_updates=43800, lr=0.000302199, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=21.3, wall=42963 +epoch 026: 1643 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=460820, ups=1.06, wpb=433322, bsz=16424.5, num_updates=43800, lr=0.000302199, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=21.3, wall=42963 +epoch 026: 1643 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=460820, ups=1.06, wpb=433322, bsz=16424.5, num_updates=43800, lr=0.000302199, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=21.3, wall=42963 +epoch 026: 1643 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=460820, ups=1.06, wpb=433322, bsz=16424.5, num_updates=43800, lr=0.000302199, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=21.3, wall=42963 +epoch 026: 1643 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=460820, ups=1.06, wpb=433322, bsz=16424.5, num_updates=43800, lr=0.000302199, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=21.3, wall=42963 +epoch 026: 1643 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=460820, ups=1.06, wpb=433322, bsz=16424.5, num_updates=43800, lr=0.000302199, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=21.3, wall=42963 +epoch 026: 1643 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=460820, ups=1.06, wpb=433322, bsz=16424.5, num_updates=43800, lr=0.000302199, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=21.3, wall=42963 +end of epoch 26 (average epoch stats below) +epoch 026 | loss 4.088 | nll_loss 2.461 | ppl 5.51 | wps 457132 | ups 1.05 | wpb 433543 | bsz 16499.1 | num_updates 43846 | lr 0.00030204 | gnorm 0.217 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 19.2 | wall 43006 +epoch 026 | loss 4.088 | nll_loss 2.461 | ppl 5.51 | wps 457132 | ups 1.05 | wpb 433543 | bsz 16499.1 | num_updates 43846 | lr 0.00030204 | gnorm 0.217 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 19.2 | wall 43006 +epoch 026 | loss 4.088 | nll_loss 2.461 | ppl 5.51 | wps 457132 | ups 1.05 | wpb 433543 | bsz 16499.1 | num_updates 43846 | lr 0.00030204 | gnorm 0.217 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 19.2 | wall 43006 +epoch 026 | loss 4.088 | nll_loss 2.461 | ppl 5.51 | wps 457132 | ups 1.05 | wpb 433543 | bsz 16499.1 | num_updates 43846 | lr 0.00030204 | gnorm 0.217 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 19.2 | wall 43006 +epoch 026 | loss 4.088 | nll_loss 2.461 | ppl 5.51 | wps 457132 | ups 1.05 | wpb 433543 | bsz 16499.1 | num_updates 43846 | lr 0.00030204 | gnorm 0.217 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 19.2 | wall 43006 +epoch 026 | loss 4.088 | nll_loss 2.461 | ppl 5.51 | wps 457132 | ups 1.05 | wpb 433543 | bsz 16499.1 | num_updates 43846 | lr 0.00030204 | gnorm 0.217 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 19.2 | wall 43006 +epoch 026 | loss 4.088 | nll_loss 2.461 | ppl 5.51 | wps 457132 | ups 1.05 | wpb 433543 | bsz 16499.1 | num_updates 43846 | lr 0.00030204 | gnorm 0.217 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 19.2 | wall 43006 +epoch 026 | loss 4.088 | nll_loss 2.461 | ppl 5.51 | wps 457132 | ups 1.05 | wpb 433543 | bsz 16499.1 | num_updates 43846 | lr 0.00030204 | gnorm 0.217 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 19.2 | wall 43006 +epoch 026 | loss 4.088 | nll_loss 2.461 | ppl 5.51 | wps 457132 | ups 1.05 | wpb 433543 | bsz 16499.1 | num_updates 43846 | lr 0.00030204 | gnorm 0.217 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 19.2 | wall 43006 +epoch 026 | loss 4.088 | nll_loss 2.461 | ppl 5.51 | wps 457132 | ups 1.05 | wpb 433543 | bsz 16499.1 | num_updates 43846 | lr 0.00030204 | gnorm 0.217 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 19.2 | wall 43006 +epoch 026 | loss 4.088 | nll_loss 2.461 | ppl 5.51 | wps 457132 | ups 1.05 | wpb 433543 | bsz 16499.1 | num_updates 43846 | lr 0.00030204 | gnorm 0.217 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 19.2 | wall 43006 +epoch 026 | loss 4.088 | nll_loss 2.461 | ppl 5.51 | wps 457132 | ups 1.05 | wpb 433543 | bsz 16499.1 | num_updates 43846 | lr 0.00030204 | gnorm 0.217 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 19.2 | wall 43006 +epoch 026 | loss 4.088 | nll_loss 2.461 | ppl 5.51 | wps 457132 | ups 1.05 | wpb 433543 | bsz 16499.1 | num_updates 43846 | lr 0.00030204 | gnorm 0.217 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 19.2 | wall 43006 +epoch 026 | loss 4.088 | nll_loss 2.461 | ppl 5.51 | wps 457132 | ups 1.05 | wpb 433543 | bsz 16499.1 | num_updates 43846 | lr 0.00030204 | gnorm 0.217 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 19.2 | wall 43006 +epoch 026 | loss 4.088 | nll_loss 2.461 | ppl 5.51 | wps 457132 | ups 1.05 | wpb 433543 | bsz 16499.1 | num_updates 43846 | lr 0.00030204 | gnorm 0.217 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 19.2 | wall 43006 +epoch 026 | loss 4.088 | nll_loss 2.461 | ppl 5.51 | wps 457132 | ups 1.05 | wpb 433543 | bsz 16499.1 | num_updates 43846 | lr 0.00030204 | gnorm 0.217 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 19.2 | wall 43006 +epoch 026 | loss 4.088 | nll_loss 2.461 | ppl 5.51 | wps 457132 | ups 1.05 | wpb 433543 | bsz 16499.1 | num_updates 43846 | lr 0.00030204 | gnorm 0.217 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 19.2 | wall 43006 +epoch 026 | loss 4.088 | nll_loss 2.461 | ppl 5.51 | wps 457132 | ups 1.05 | wpb 433543 | bsz 16499.1 | num_updates 43846 | lr 0.00030204 | gnorm 0.217 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 19.2 | wall 43006 +epoch 026 | loss 4.088 | nll_loss 2.461 | ppl 5.51 | wps 457132 | ups 1.05 | wpb 433543 | bsz 16499.1 | num_updates 43846 | lr 0.00030204 | gnorm 0.217 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 19.2 | wall 43006 +epoch 026 | loss 4.088 | nll_loss 2.461 | ppl 5.51 | wps 457132 | ups 1.05 | wpb 433543 | bsz 16499.1 | num_updates 43846 | lr 0.00030204 | gnorm 0.217 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 19.2 | wall 43006 +epoch 026 | loss 4.088 | nll_loss 2.461 | ppl 5.51 | wps 457132 | ups 1.05 | wpb 433543 | bsz 16499.1 | num_updates 43846 | lr 0.00030204 | gnorm 0.217 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 19.2 | wall 43006 +epoch 026 | loss 4.088 | nll_loss 2.461 | ppl 5.51 | wps 457132 | ups 1.05 | wpb 433543 | bsz 16499.1 | num_updates 43846 | lr 0.00030204 | gnorm 0.217 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 19.2 | wall 43006 +epoch 026 | loss 4.088 | nll_loss 2.461 | ppl 5.51 | wps 457132 | ups 1.05 | wpb 433543 | bsz 16499.1 | num_updates 43846 | lr 0.00030204 | gnorm 0.217 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 19.2 | wall 43006 +epoch 026 | loss 4.088 | nll_loss 2.461 | ppl 5.51 | wps 457132 | ups 1.05 | wpb 433543 | bsz 16499.1 | num_updates 43846 | lr 0.00030204 | gnorm 0.217 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 19.2 | wall 43006 +epoch 026 | loss 4.088 | nll_loss 2.461 | ppl 5.51 | wps 457132 | ups 1.05 | wpb 433543 | bsz 16499.1 | num_updates 43846 | lr 0.00030204 | gnorm 0.217 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 19.2 | wall 43006 +epoch 026 | loss 4.088 | nll_loss 2.461 | ppl 5.51 | wps 457132 | ups 1.05 | wpb 433543 | bsz 16499.1 | num_updates 43846 | lr 0.00030204 | gnorm 0.217 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 19.2 | wall 43006 +Start iterating over samples +epoch 027: 54 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=455806, ups=1.06, wpb=429906, bsz=16068.7, num_updates=43900, lr=0.000301855, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=43057 +epoch 027: 54 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=455806, ups=1.06, wpb=429906, bsz=16068.7, num_updates=43900, lr=0.000301855, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=43057 +epoch 027: 54 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=455806, ups=1.06, wpb=429906, bsz=16068.7, num_updates=43900, lr=0.000301855, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=43057 +epoch 027: 54 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=455806, ups=1.06, wpb=429906, bsz=16068.7, num_updates=43900, lr=0.000301855, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=43057 +epoch 027: 54 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=455806, ups=1.06, wpb=429906, bsz=16068.7, num_updates=43900, lr=0.000301855, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=43057 +epoch 027: 54 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=455806, ups=1.06, wpb=429906, bsz=16068.7, num_updates=43900, lr=0.000301855, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=43057 +epoch 027: 54 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=455806, ups=1.06, wpb=429906, bsz=16068.7, num_updates=43900, lr=0.000301855, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=43057 +epoch 027: 54 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=455806, ups=1.06, wpb=429906, bsz=16068.7, num_updates=43900, lr=0.000301855, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=43057 +epoch 027: 54 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=455806, ups=1.06, wpb=429906, bsz=16068.7, num_updates=43900, lr=0.000301855, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=43057 +epoch 027: 54 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=455806, ups=1.06, wpb=429906, bsz=16068.7, num_updates=43900, lr=0.000301855, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=43057 +epoch 027: 54 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=455806, ups=1.06, wpb=429906, bsz=16068.7, num_updates=43900, lr=0.000301855, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=43057 +epoch 027: 54 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=455806, ups=1.06, wpb=429906, bsz=16068.7, num_updates=43900, lr=0.000301855, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=43057 +epoch 027: 54 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=455806, ups=1.06, wpb=429906, bsz=16068.7, num_updates=43900, lr=0.000301855, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=43057 +epoch 027: 54 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=455806, ups=1.06, wpb=429906, bsz=16068.7, num_updates=43900, lr=0.000301855, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=43057 +epoch 027: 54 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=455806, ups=1.06, wpb=429906, bsz=16068.7, num_updates=43900, lr=0.000301855, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=43057 +epoch 027: 54 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=455806, ups=1.06, wpb=429906, bsz=16068.7, num_updates=43900, lr=0.000301855, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=43057 +epoch 027: 54 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=455806, ups=1.06, wpb=429906, bsz=16068.7, num_updates=43900, lr=0.000301855, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=43057 +epoch 027: 54 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=455806, ups=1.06, wpb=429906, bsz=16068.7, num_updates=43900, lr=0.000301855, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=43057 +epoch 027: 54 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=455806, ups=1.06, wpb=429906, bsz=16068.7, num_updates=43900, lr=0.000301855, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=43057 +epoch 027: 54 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=455806, ups=1.06, wpb=429906, bsz=16068.7, num_updates=43900, lr=0.000301855, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=43057 +epoch 027: 54 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=455806, ups=1.06, wpb=429906, bsz=16068.7, num_updates=43900, lr=0.000301855, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=43057 +epoch 027: 54 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=455806, ups=1.06, wpb=429906, bsz=16068.7, num_updates=43900, lr=0.000301855, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=43057 +epoch 027: 54 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=455806, ups=1.06, wpb=429906, bsz=16068.7, num_updates=43900, lr=0.000301855, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=43057 +epoch 027: 54 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=455806, ups=1.06, wpb=429906, bsz=16068.7, num_updates=43900, lr=0.000301855, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=43057 +epoch 027: 54 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=455806, ups=1.06, wpb=429906, bsz=16068.7, num_updates=43900, lr=0.000301855, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=43057 +epoch 027: 54 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=455806, ups=1.06, wpb=429906, bsz=16068.7, num_updates=43900, lr=0.000301855, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=43057 +epoch 027: 54 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=455806, ups=1.06, wpb=429906, bsz=16068.7, num_updates=43900, lr=0.000301855, gnorm=0.226, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=43057 +epoch 027: 154 / 1689 loss=4.075, nll_loss=2.446, ppl=5.45, wps=461685, ups=1.06, wpb=434034, bsz=16667, num_updates=44000, lr=0.000301511, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=43151 +epoch 027: 154 / 1689 loss=4.075, nll_loss=2.446, ppl=5.45, wps=461685, ups=1.06, wpb=434034, bsz=16667, num_updates=44000, lr=0.000301511, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=43151 +epoch 027: 154 / 1689 loss=4.075, nll_loss=2.446, ppl=5.45, wps=461685, ups=1.06, wpb=434034, bsz=16667, num_updates=44000, lr=0.000301511, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=43151 +epoch 027: 154 / 1689 loss=4.075, nll_loss=2.446, ppl=5.45, wps=461685, ups=1.06, wpb=434034, bsz=16667, num_updates=44000, lr=0.000301511, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=43151 +epoch 027: 154 / 1689 loss=4.075, nll_loss=2.446, ppl=5.45, wps=461685, ups=1.06, wpb=434034, bsz=16667, num_updates=44000, lr=0.000301511, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=43151 +epoch 027: 154 / 1689 loss=4.075, nll_loss=2.446, ppl=5.45, wps=461685, ups=1.06, wpb=434034, bsz=16667, num_updates=44000, lr=0.000301511, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=43151 +epoch 027: 154 / 1689 loss=4.075, nll_loss=2.446, ppl=5.45, wps=461685, ups=1.06, wpb=434034, bsz=16667, num_updates=44000, lr=0.000301511, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=43151 +epoch 027: 154 / 1689 loss=4.075, nll_loss=2.446, ppl=5.45, wps=461685, ups=1.06, wpb=434034, bsz=16667, num_updates=44000, lr=0.000301511, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=43151 +epoch 027: 154 / 1689 loss=4.075, nll_loss=2.446, ppl=5.45, wps=461685, ups=1.06, wpb=434034, bsz=16667, num_updates=44000, lr=0.000301511, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=43151 +epoch 027: 154 / 1689 loss=4.075, nll_loss=2.446, ppl=5.45, wps=461685, ups=1.06, wpb=434034, bsz=16667, num_updates=44000, lr=0.000301511, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=43151 +epoch 027: 154 / 1689 loss=4.075, nll_loss=2.446, ppl=5.45, wps=461685, ups=1.06, wpb=434034, bsz=16667, num_updates=44000, lr=0.000301511, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=43151 +epoch 027: 154 / 1689 loss=4.075, nll_loss=2.446, ppl=5.45, wps=461685, ups=1.06, wpb=434034, bsz=16667, num_updates=44000, lr=0.000301511, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=43151 +epoch 027: 154 / 1689 loss=4.075, nll_loss=2.446, ppl=5.45, wps=461685, ups=1.06, wpb=434034, bsz=16667, num_updates=44000, lr=0.000301511, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=43151 +epoch 027: 154 / 1689 loss=4.075, nll_loss=2.446, ppl=5.45, wps=461685, ups=1.06, wpb=434034, bsz=16667, num_updates=44000, lr=0.000301511, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=43151 +epoch 027: 154 / 1689 loss=4.075, nll_loss=2.446, ppl=5.45, wps=461685, ups=1.06, wpb=434034, bsz=16667, num_updates=44000, lr=0.000301511, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=43151 +epoch 027: 154 / 1689 loss=4.075, nll_loss=2.446, ppl=5.45, wps=461685, ups=1.06, wpb=434034, bsz=16667, num_updates=44000, lr=0.000301511, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=43151 +epoch 027: 154 / 1689 loss=4.075, nll_loss=2.446, ppl=5.45, wps=461685, ups=1.06, wpb=434034, bsz=16667, num_updates=44000, lr=0.000301511, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=43151 +epoch 027: 154 / 1689 loss=4.075, nll_loss=2.446, ppl=5.45, wps=461685, ups=1.06, wpb=434034, bsz=16667, num_updates=44000, lr=0.000301511, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=43151 +epoch 027: 154 / 1689 loss=4.075, nll_loss=2.446, ppl=5.45, wps=461685, ups=1.06, wpb=434034, bsz=16667, num_updates=44000, lr=0.000301511, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=43151 +epoch 027: 154 / 1689 loss=4.075, nll_loss=2.446, ppl=5.45, wps=461685, ups=1.06, wpb=434034, bsz=16667, num_updates=44000, lr=0.000301511, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=43151 +epoch 027: 154 / 1689 loss=4.075, nll_loss=2.446, ppl=5.45, wps=461685, ups=1.06, wpb=434034, bsz=16667, num_updates=44000, lr=0.000301511, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=43151 +epoch 027: 154 / 1689 loss=4.075, nll_loss=2.446, ppl=5.45, wps=461685, ups=1.06, wpb=434034, bsz=16667, num_updates=44000, lr=0.000301511, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=43151 +epoch 027: 154 / 1689 loss=4.075, nll_loss=2.446, ppl=5.45, wps=461685, ups=1.06, wpb=434034, bsz=16667, num_updates=44000, lr=0.000301511, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=43151 +epoch 027: 154 / 1689 loss=4.075, nll_loss=2.446, ppl=5.45, wps=461685, ups=1.06, wpb=434034, bsz=16667, num_updates=44000, lr=0.000301511, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=43151 +epoch 027: 154 / 1689 loss=4.075, nll_loss=2.446, ppl=5.45, wps=461685, ups=1.06, wpb=434034, bsz=16667, num_updates=44000, lr=0.000301511, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=43151 +epoch 027: 154 / 1689 loss=4.075, nll_loss=2.446, ppl=5.45, wps=461685, ups=1.06, wpb=434034, bsz=16667, num_updates=44000, lr=0.000301511, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=43151 +epoch 027: 154 / 1689 loss=4.075, nll_loss=2.446, ppl=5.45, wps=461685, ups=1.06, wpb=434034, bsz=16667, num_updates=44000, lr=0.000301511, gnorm=0.212, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=43151 +begin validation on "valid" subset +epoch 027 | valid on 'valid' subset | loss 4.23 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 44000 | best_loss 4.23 +epoch 027 | valid on 'valid' subset | loss 4.23 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 44000 | best_loss 4.23 +epoch 027 | valid on 'valid' subset | loss 4.23 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 44000 | best_loss 4.23 +epoch 027 | valid on 'valid' subset | loss 4.23 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 44000 | best_loss 4.23 +epoch 027 | valid on 'valid' subset | loss 4.23 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 44000 | best_loss 4.23 +epoch 027 | valid on 'valid' subset | loss 4.23 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 44000 | best_loss 4.23 +epoch 027 | valid on 'valid' subset | loss 4.23 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 44000 | best_loss 4.23 +epoch 027 | valid on 'valid' subset | loss 4.23 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 44000 | best_loss 4.23 +epoch 027 | valid on 'valid' subset | loss 4.23 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 44000 | best_loss 4.23 +epoch 027 | valid on 'valid' subset | loss 4.23 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 44000 | best_loss 4.23 +epoch 027 | valid on 'valid' subset | loss 4.23 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 44000 | best_loss 4.23 +epoch 027 | valid on 'valid' subset | loss 4.23 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 44000 | best_loss 4.23 +epoch 027 | valid on 'valid' subset | loss 4.23 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 44000 | best_loss 4.23 +epoch 027 | valid on 'valid' subset | loss 4.23 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 44000 | best_loss 4.23 +epoch 027 | valid on 'valid' subset | loss 4.23 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 44000 | best_loss 4.23 +epoch 027 | valid on 'valid' subset | loss 4.23 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 44000 | best_loss 4.23 +epoch 027 | valid on 'valid' subset | loss 4.23 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 44000 | best_loss 4.23 +epoch 027 | valid on 'valid' subset | loss 4.23 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 44000 | best_loss 4.23 +epoch 027 | valid on 'valid' subset | loss 4.23 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 44000 | best_loss 4.23 +epoch 027 | valid on 'valid' subset | loss 4.23 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 44000 | best_loss 4.23 +epoch 027 | valid on 'valid' subset | loss 4.23 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 44000 | best_loss 4.23 +epoch 027 | valid on 'valid' subset | loss 4.23 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 44000 | best_loss 4.23 +epoch 027 | valid on 'valid' subset | loss 4.23 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 44000 | best_loss 4.23 +epoch 027 | valid on 'valid' subset | loss 4.23 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 44000 | best_loss 4.23 +epoch 027 | valid on 'valid' subset | loss 4.23 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 44000 | best_loss 4.23 +epoch 027 | valid on 'valid' subset | loss 4.23 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 44000 | best_loss 4.23 +epoch 027 | valid on 'valid' subset | loss 4.23 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 44000 | best_loss 4.23 +epoch 027: 254 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=380894, ups=0.88, wpb=433802, bsz=16087.9, num_updates=44100, lr=0.000301169, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=20.1, wall=43265 +epoch 027: 254 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=380894, ups=0.88, wpb=433802, bsz=16087.9, num_updates=44100, lr=0.000301169, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=20.1, wall=43265 +epoch 027: 254 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=380894, ups=0.88, wpb=433802, bsz=16087.9, num_updates=44100, lr=0.000301169, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=20.1, wall=43265 +epoch 027: 254 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=380894, ups=0.88, wpb=433802, bsz=16087.9, num_updates=44100, lr=0.000301169, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=20.1, wall=43265 +epoch 027: 254 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=380894, ups=0.88, wpb=433802, bsz=16087.9, num_updates=44100, lr=0.000301169, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=20.1, wall=43265 +epoch 027: 254 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=380894, ups=0.88, wpb=433802, bsz=16087.9, num_updates=44100, lr=0.000301169, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=20.1, wall=43265 +epoch 027: 254 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=380894, ups=0.88, wpb=433802, bsz=16087.9, num_updates=44100, lr=0.000301169, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=20.1, wall=43265 +epoch 027: 254 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=380894, ups=0.88, wpb=433802, bsz=16087.9, num_updates=44100, lr=0.000301169, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=20.1, wall=43265 +epoch 027: 254 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=380894, ups=0.88, wpb=433802, bsz=16087.9, num_updates=44100, lr=0.000301169, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=20.1, wall=43265 +epoch 027: 254 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=380894, ups=0.88, wpb=433802, bsz=16087.9, num_updates=44100, lr=0.000301169, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=20.1, wall=43265 +epoch 027: 254 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=380894, ups=0.88, wpb=433802, bsz=16087.9, num_updates=44100, lr=0.000301169, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=20.1, wall=43265 +epoch 027: 254 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=380894, ups=0.88, wpb=433802, bsz=16087.9, num_updates=44100, lr=0.000301169, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=20.1, wall=43265 +epoch 027: 254 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=380894, ups=0.88, wpb=433802, bsz=16087.9, num_updates=44100, lr=0.000301169, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=20.1, wall=43265 +epoch 027: 254 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=380894, ups=0.88, wpb=433802, bsz=16087.9, num_updates=44100, lr=0.000301169, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=20.1, wall=43265 +epoch 027: 254 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=380894, ups=0.88, wpb=433802, bsz=16087.9, num_updates=44100, lr=0.000301169, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=20.1, wall=43265 +epoch 027: 254 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=380894, ups=0.88, wpb=433802, bsz=16087.9, num_updates=44100, lr=0.000301169, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=20.1, wall=43265 +epoch 027: 254 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=380894, ups=0.88, wpb=433802, bsz=16087.9, num_updates=44100, lr=0.000301169, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=20.1, wall=43265 +epoch 027: 254 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=380894, ups=0.88, wpb=433802, bsz=16087.9, num_updates=44100, lr=0.000301169, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=20.1, wall=43265 +epoch 027: 254 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=380894, ups=0.88, wpb=433802, bsz=16087.9, num_updates=44100, lr=0.000301169, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=20.1, wall=43265 +epoch 027: 254 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=380894, ups=0.88, wpb=433802, bsz=16087.9, num_updates=44100, lr=0.000301169, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=20.1, wall=43265 +epoch 027: 254 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=380894, ups=0.88, wpb=433802, bsz=16087.9, num_updates=44100, lr=0.000301169, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=20.1, wall=43265 +epoch 027: 254 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=380894, ups=0.88, wpb=433802, bsz=16087.9, num_updates=44100, lr=0.000301169, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=20.1, wall=43265 +epoch 027: 254 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=380894, ups=0.88, wpb=433802, bsz=16087.9, num_updates=44100, lr=0.000301169, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=20.1, wall=43265 +epoch 027: 254 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=380894, ups=0.88, wpb=433802, bsz=16087.9, num_updates=44100, lr=0.000301169, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=20.1, wall=43265 +epoch 027: 254 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=380894, ups=0.88, wpb=433802, bsz=16087.9, num_updates=44100, lr=0.000301169, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=20.1, wall=43265 +epoch 027: 254 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=380894, ups=0.88, wpb=433802, bsz=16087.9, num_updates=44100, lr=0.000301169, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=20.1, wall=43265 +epoch 027: 254 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=380894, ups=0.88, wpb=433802, bsz=16087.9, num_updates=44100, lr=0.000301169, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=20.1, wall=43265 +epoch 027: 354 / 1689 loss=4.068, nll_loss=2.438, ppl=5.42, wps=464391, ups=1.07, wpb=434518, bsz=16420.6, num_updates=44200, lr=0.000300828, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=43359 +epoch 027: 354 / 1689 loss=4.068, nll_loss=2.438, ppl=5.42, wps=464391, ups=1.07, wpb=434518, bsz=16420.6, num_updates=44200, lr=0.000300828, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=43359 +epoch 027: 354 / 1689 loss=4.068, nll_loss=2.438, ppl=5.42, wps=464391, ups=1.07, wpb=434518, bsz=16420.6, num_updates=44200, lr=0.000300828, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=43359 +epoch 027: 354 / 1689 loss=4.068, nll_loss=2.438, ppl=5.42, wps=464391, ups=1.07, wpb=434518, bsz=16420.6, num_updates=44200, lr=0.000300828, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=43359 +epoch 027: 354 / 1689 loss=4.068, nll_loss=2.438, ppl=5.42, wps=464391, ups=1.07, wpb=434518, bsz=16420.6, num_updates=44200, lr=0.000300828, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=43359 +epoch 027: 354 / 1689 loss=4.068, nll_loss=2.438, ppl=5.42, wps=464391, ups=1.07, wpb=434518, bsz=16420.6, num_updates=44200, lr=0.000300828, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=43359 +epoch 027: 354 / 1689 loss=4.068, nll_loss=2.438, ppl=5.42, wps=464391, ups=1.07, wpb=434518, bsz=16420.6, num_updates=44200, lr=0.000300828, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=43359 +epoch 027: 354 / 1689 loss=4.068, nll_loss=2.438, ppl=5.42, wps=464391, ups=1.07, wpb=434518, bsz=16420.6, num_updates=44200, lr=0.000300828, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=43359 +epoch 027: 354 / 1689 loss=4.068, nll_loss=2.438, ppl=5.42, wps=464391, ups=1.07, wpb=434518, bsz=16420.6, num_updates=44200, lr=0.000300828, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=43359 +epoch 027: 354 / 1689 loss=4.068, nll_loss=2.438, ppl=5.42, wps=464391, ups=1.07, wpb=434518, bsz=16420.6, num_updates=44200, lr=0.000300828, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=43359 +epoch 027: 354 / 1689 loss=4.068, nll_loss=2.438, ppl=5.42, wps=464391, ups=1.07, wpb=434518, bsz=16420.6, num_updates=44200, lr=0.000300828, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=43359 +epoch 027: 354 / 1689 loss=4.068, nll_loss=2.438, ppl=5.42, wps=464391, ups=1.07, wpb=434518, bsz=16420.6, num_updates=44200, lr=0.000300828, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=43359 +epoch 027: 354 / 1689 loss=4.068, nll_loss=2.438, ppl=5.42, wps=464391, ups=1.07, wpb=434518, bsz=16420.6, num_updates=44200, lr=0.000300828, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=43359 +epoch 027: 354 / 1689 loss=4.068, nll_loss=2.438, ppl=5.42, wps=464391, ups=1.07, wpb=434518, bsz=16420.6, num_updates=44200, lr=0.000300828, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=43359 +epoch 027: 354 / 1689 loss=4.068, nll_loss=2.438, ppl=5.42, wps=464391, ups=1.07, wpb=434518, bsz=16420.6, num_updates=44200, lr=0.000300828, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=43359 +epoch 027: 354 / 1689 loss=4.068, nll_loss=2.438, ppl=5.42, wps=464391, ups=1.07, wpb=434518, bsz=16420.6, num_updates=44200, lr=0.000300828, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=43359 +epoch 027: 354 / 1689 loss=4.068, nll_loss=2.438, ppl=5.42, wps=464391, ups=1.07, wpb=434518, bsz=16420.6, num_updates=44200, lr=0.000300828, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=43359 +epoch 027: 354 / 1689 loss=4.068, nll_loss=2.438, ppl=5.42, wps=464391, ups=1.07, wpb=434518, bsz=16420.6, num_updates=44200, lr=0.000300828, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=43359 +epoch 027: 354 / 1689 loss=4.068, nll_loss=2.438, ppl=5.42, wps=464391, ups=1.07, wpb=434518, bsz=16420.6, num_updates=44200, lr=0.000300828, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=43359 +epoch 027: 354 / 1689 loss=4.068, nll_loss=2.438, ppl=5.42, wps=464391, ups=1.07, wpb=434518, bsz=16420.6, num_updates=44200, lr=0.000300828, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=43359 +epoch 027: 354 / 1689 loss=4.068, nll_loss=2.438, ppl=5.42, wps=464391, ups=1.07, wpb=434518, bsz=16420.6, num_updates=44200, lr=0.000300828, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=43359 +epoch 027: 354 / 1689 loss=4.068, nll_loss=2.438, ppl=5.42, wps=464391, ups=1.07, wpb=434518, bsz=16420.6, num_updates=44200, lr=0.000300828, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=43359 +epoch 027: 354 / 1689 loss=4.068, nll_loss=2.438, ppl=5.42, wps=464391, ups=1.07, wpb=434518, bsz=16420.6, num_updates=44200, lr=0.000300828, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=43359 +epoch 027: 354 / 1689 loss=4.068, nll_loss=2.438, ppl=5.42, wps=464391, ups=1.07, wpb=434518, bsz=16420.6, num_updates=44200, lr=0.000300828, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=43359 +epoch 027: 354 / 1689 loss=4.068, nll_loss=2.438, ppl=5.42, wps=464391, ups=1.07, wpb=434518, bsz=16420.6, num_updates=44200, lr=0.000300828, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=43359 +epoch 027: 354 / 1689 loss=4.068, nll_loss=2.438, ppl=5.42, wps=464391, ups=1.07, wpb=434518, bsz=16420.6, num_updates=44200, lr=0.000300828, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=43359 +epoch 027: 354 / 1689 loss=4.068, nll_loss=2.438, ppl=5.42, wps=464391, ups=1.07, wpb=434518, bsz=16420.6, num_updates=44200, lr=0.000300828, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.3, wall=43359 +epoch 027: 454 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=467322, ups=1.08, wpb=433255, bsz=16445.4, num_updates=44300, lr=0.000300489, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=43452 +epoch 027: 454 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=467322, ups=1.08, wpb=433255, bsz=16445.4, num_updates=44300, lr=0.000300489, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=43452 +epoch 027: 454 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=467322, ups=1.08, wpb=433255, bsz=16445.4, num_updates=44300, lr=0.000300489, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=43452 +epoch 027: 454 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=467322, ups=1.08, wpb=433255, bsz=16445.4, num_updates=44300, lr=0.000300489, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=43452 +epoch 027: 454 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=467322, ups=1.08, wpb=433255, bsz=16445.4, num_updates=44300, lr=0.000300489, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=43452 +epoch 027: 454 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=467322, ups=1.08, wpb=433255, bsz=16445.4, num_updates=44300, lr=0.000300489, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=43452 +epoch 027: 454 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=467322, ups=1.08, wpb=433255, bsz=16445.4, num_updates=44300, lr=0.000300489, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=43452 +epoch 027: 454 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=467322, ups=1.08, wpb=433255, bsz=16445.4, num_updates=44300, lr=0.000300489, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=43452 +epoch 027: 454 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=467322, ups=1.08, wpb=433255, bsz=16445.4, num_updates=44300, lr=0.000300489, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=43452 +epoch 027: 454 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=467322, ups=1.08, wpb=433255, bsz=16445.4, num_updates=44300, lr=0.000300489, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=43452 +epoch 027: 454 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=467322, ups=1.08, wpb=433255, bsz=16445.4, num_updates=44300, lr=0.000300489, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=43452 +epoch 027: 454 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=467322, ups=1.08, wpb=433255, bsz=16445.4, num_updates=44300, lr=0.000300489, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=43452 +epoch 027: 454 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=467322, ups=1.08, wpb=433255, bsz=16445.4, num_updates=44300, lr=0.000300489, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=43452 +epoch 027: 454 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=467322, ups=1.08, wpb=433255, bsz=16445.4, num_updates=44300, lr=0.000300489, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=43452 +epoch 027: 454 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=467322, ups=1.08, wpb=433255, bsz=16445.4, num_updates=44300, lr=0.000300489, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=43452 +epoch 027: 454 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=467322, ups=1.08, wpb=433255, bsz=16445.4, num_updates=44300, lr=0.000300489, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=43452 +epoch 027: 454 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=467322, ups=1.08, wpb=433255, bsz=16445.4, num_updates=44300, lr=0.000300489, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=43452 +epoch 027: 454 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=467322, ups=1.08, wpb=433255, bsz=16445.4, num_updates=44300, lr=0.000300489, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=43452 +epoch 027: 454 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=467322, ups=1.08, wpb=433255, bsz=16445.4, num_updates=44300, lr=0.000300489, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=43452 +epoch 027: 454 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=467322, ups=1.08, wpb=433255, bsz=16445.4, num_updates=44300, lr=0.000300489, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=43452 +epoch 027: 454 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=467322, ups=1.08, wpb=433255, bsz=16445.4, num_updates=44300, lr=0.000300489, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=43452 +epoch 027: 454 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=467322, ups=1.08, wpb=433255, bsz=16445.4, num_updates=44300, lr=0.000300489, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=43452 +epoch 027: 454 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=467322, ups=1.08, wpb=433255, bsz=16445.4, num_updates=44300, lr=0.000300489, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=43452 +epoch 027: 454 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=467322, ups=1.08, wpb=433255, bsz=16445.4, num_updates=44300, lr=0.000300489, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=43452 +epoch 027: 454 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=467322, ups=1.08, wpb=433255, bsz=16445.4, num_updates=44300, lr=0.000300489, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=43452 +epoch 027: 454 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=467322, ups=1.08, wpb=433255, bsz=16445.4, num_updates=44300, lr=0.000300489, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=43452 +epoch 027: 454 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=467322, ups=1.08, wpb=433255, bsz=16445.4, num_updates=44300, lr=0.000300489, gnorm=0.223, clip=0, loss_scale=1, train_wall=92, gb_free=19.9, wall=43452 +epoch 027: 554 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=462289, ups=1.07, wpb=432636, bsz=16436.1, num_updates=44400, lr=0.00030015, gnorm=0.206, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=43545 +epoch 027: 554 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=462289, ups=1.07, wpb=432636, bsz=16436.1, num_updates=44400, lr=0.00030015, gnorm=0.206, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=43545 +epoch 027: 554 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=462289, ups=1.07, wpb=432636, bsz=16436.1, num_updates=44400, lr=0.00030015, gnorm=0.206, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=43545 +epoch 027: 554 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=462289, ups=1.07, wpb=432636, bsz=16436.1, num_updates=44400, lr=0.00030015, gnorm=0.206, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=43545 +epoch 027: 554 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=462289, ups=1.07, wpb=432636, bsz=16436.1, num_updates=44400, lr=0.00030015, gnorm=0.206, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=43545 +epoch 027: 554 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=462289, ups=1.07, wpb=432636, bsz=16436.1, num_updates=44400, lr=0.00030015, gnorm=0.206, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=43545 +epoch 027: 554 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=462289, ups=1.07, wpb=432636, bsz=16436.1, num_updates=44400, lr=0.00030015, gnorm=0.206, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=43545 +epoch 027: 554 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=462289, ups=1.07, wpb=432636, bsz=16436.1, num_updates=44400, lr=0.00030015, gnorm=0.206, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=43545 +epoch 027: 554 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=462289, ups=1.07, wpb=432636, bsz=16436.1, num_updates=44400, lr=0.00030015, gnorm=0.206, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=43545 +epoch 027: 554 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=462289, ups=1.07, wpb=432636, bsz=16436.1, num_updates=44400, lr=0.00030015, gnorm=0.206, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=43545 +epoch 027: 554 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=462289, ups=1.07, wpb=432636, bsz=16436.1, num_updates=44400, lr=0.00030015, gnorm=0.206, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=43545 +epoch 027: 554 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=462289, ups=1.07, wpb=432636, bsz=16436.1, num_updates=44400, lr=0.00030015, gnorm=0.206, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=43545 +epoch 027: 554 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=462289, ups=1.07, wpb=432636, bsz=16436.1, num_updates=44400, lr=0.00030015, gnorm=0.206, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=43545 +epoch 027: 554 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=462289, ups=1.07, wpb=432636, bsz=16436.1, num_updates=44400, lr=0.00030015, gnorm=0.206, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=43545 +epoch 027: 554 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=462289, ups=1.07, wpb=432636, bsz=16436.1, num_updates=44400, lr=0.00030015, gnorm=0.206, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=43545 +epoch 027: 554 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=462289, ups=1.07, wpb=432636, bsz=16436.1, num_updates=44400, lr=0.00030015, gnorm=0.206, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=43545 +epoch 027: 554 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=462289, ups=1.07, wpb=432636, bsz=16436.1, num_updates=44400, lr=0.00030015, gnorm=0.206, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=43545 +epoch 027: 554 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=462289, ups=1.07, wpb=432636, bsz=16436.1, num_updates=44400, lr=0.00030015, gnorm=0.206, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=43545 +epoch 027: 554 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=462289, ups=1.07, wpb=432636, bsz=16436.1, num_updates=44400, lr=0.00030015, gnorm=0.206, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=43545 +epoch 027: 554 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=462289, ups=1.07, wpb=432636, bsz=16436.1, num_updates=44400, lr=0.00030015, gnorm=0.206, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=43545 +epoch 027: 554 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=462289, ups=1.07, wpb=432636, bsz=16436.1, num_updates=44400, lr=0.00030015, gnorm=0.206, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=43545 +epoch 027: 554 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=462289, ups=1.07, wpb=432636, bsz=16436.1, num_updates=44400, lr=0.00030015, gnorm=0.206, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=43545 +epoch 027: 554 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=462289, ups=1.07, wpb=432636, bsz=16436.1, num_updates=44400, lr=0.00030015, gnorm=0.206, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=43545 +epoch 027: 554 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=462289, ups=1.07, wpb=432636, bsz=16436.1, num_updates=44400, lr=0.00030015, gnorm=0.206, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=43545 +epoch 027: 554 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=462289, ups=1.07, wpb=432636, bsz=16436.1, num_updates=44400, lr=0.00030015, gnorm=0.206, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=43545 +epoch 027: 554 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=462289, ups=1.07, wpb=432636, bsz=16436.1, num_updates=44400, lr=0.00030015, gnorm=0.206, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=43545 +epoch 027: 554 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=462289, ups=1.07, wpb=432636, bsz=16436.1, num_updates=44400, lr=0.00030015, gnorm=0.206, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=43545 +epoch 027: 655 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=457625, ups=1.05, wpb=435270, bsz=16401, num_updates=44500, lr=0.000299813, gnorm=0.222, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=43640 +epoch 027: 655 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=457625, ups=1.05, wpb=435270, bsz=16401, num_updates=44500, lr=0.000299813, gnorm=0.222, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=43640 +epoch 027: 655 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=457625, ups=1.05, wpb=435270, bsz=16401, num_updates=44500, lr=0.000299813, gnorm=0.222, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=43640 +epoch 027: 655 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=457625, ups=1.05, wpb=435270, bsz=16401, num_updates=44500, lr=0.000299813, gnorm=0.222, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=43640 +epoch 027: 655 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=457625, ups=1.05, wpb=435270, bsz=16401, num_updates=44500, lr=0.000299813, gnorm=0.222, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=43640 +epoch 027: 655 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=457625, ups=1.05, wpb=435270, bsz=16401, num_updates=44500, lr=0.000299813, gnorm=0.222, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=43640 +epoch 027: 655 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=457625, ups=1.05, wpb=435270, bsz=16401, num_updates=44500, lr=0.000299813, gnorm=0.222, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=43640 +epoch 027: 655 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=457625, ups=1.05, wpb=435270, bsz=16401, num_updates=44500, lr=0.000299813, gnorm=0.222, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=43640 +epoch 027: 655 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=457625, ups=1.05, wpb=435270, bsz=16401, num_updates=44500, lr=0.000299813, gnorm=0.222, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=43640 +epoch 027: 655 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=457625, ups=1.05, wpb=435270, bsz=16401, num_updates=44500, lr=0.000299813, gnorm=0.222, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=43640 +epoch 027: 655 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=457625, ups=1.05, wpb=435270, bsz=16401, num_updates=44500, lr=0.000299813, gnorm=0.222, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=43640 +epoch 027: 655 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=457625, ups=1.05, wpb=435270, bsz=16401, num_updates=44500, lr=0.000299813, gnorm=0.222, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=43640 +epoch 027: 655 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=457625, ups=1.05, wpb=435270, bsz=16401, num_updates=44500, lr=0.000299813, gnorm=0.222, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=43640 +epoch 027: 655 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=457625, ups=1.05, wpb=435270, bsz=16401, num_updates=44500, lr=0.000299813, gnorm=0.222, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=43640 +epoch 027: 655 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=457625, ups=1.05, wpb=435270, bsz=16401, num_updates=44500, lr=0.000299813, gnorm=0.222, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=43640 +epoch 027: 655 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=457625, ups=1.05, wpb=435270, bsz=16401, num_updates=44500, lr=0.000299813, gnorm=0.222, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=43640 +epoch 027: 655 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=457625, ups=1.05, wpb=435270, bsz=16401, num_updates=44500, lr=0.000299813, gnorm=0.222, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=43640 +epoch 027: 655 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=457625, ups=1.05, wpb=435270, bsz=16401, num_updates=44500, lr=0.000299813, gnorm=0.222, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=43640 +epoch 027: 655 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=457625, ups=1.05, wpb=435270, bsz=16401, num_updates=44500, lr=0.000299813, gnorm=0.222, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=43640 +epoch 027: 655 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=457625, ups=1.05, wpb=435270, bsz=16401, num_updates=44500, lr=0.000299813, gnorm=0.222, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=43640 +epoch 027: 655 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=457625, ups=1.05, wpb=435270, bsz=16401, num_updates=44500, lr=0.000299813, gnorm=0.222, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=43640 +epoch 027: 655 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=457625, ups=1.05, wpb=435270, bsz=16401, num_updates=44500, lr=0.000299813, gnorm=0.222, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=43640 +epoch 027: 655 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=457625, ups=1.05, wpb=435270, bsz=16401, num_updates=44500, lr=0.000299813, gnorm=0.222, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=43640 +epoch 027: 655 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=457625, ups=1.05, wpb=435270, bsz=16401, num_updates=44500, lr=0.000299813, gnorm=0.222, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=43640 +epoch 027: 655 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=457625, ups=1.05, wpb=435270, bsz=16401, num_updates=44500, lr=0.000299813, gnorm=0.222, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=43640 +epoch 027: 655 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=457625, ups=1.05, wpb=435270, bsz=16401, num_updates=44500, lr=0.000299813, gnorm=0.222, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=43640 +epoch 027: 655 / 1689 loss=4.094, nll_loss=2.468, ppl=5.53, wps=457625, ups=1.05, wpb=435270, bsz=16401, num_updates=44500, lr=0.000299813, gnorm=0.222, clip=0, loss_scale=1, train_wall=94, gb_free=19.5, wall=43640 +epoch 027: 756 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459049, ups=1.06, wpb=434750, bsz=16543.3, num_updates=44600, lr=0.000299476, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.6, wall=43735 +epoch 027: 756 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459049, ups=1.06, wpb=434750, bsz=16543.3, num_updates=44600, lr=0.000299476, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.6, wall=43735 +epoch 027: 756 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459049, ups=1.06, wpb=434750, bsz=16543.3, num_updates=44600, lr=0.000299476, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.6, wall=43735 +epoch 027: 756 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459049, ups=1.06, wpb=434750, bsz=16543.3, num_updates=44600, lr=0.000299476, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.6, wall=43735 +epoch 027: 756 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459049, ups=1.06, wpb=434750, bsz=16543.3, num_updates=44600, lr=0.000299476, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.6, wall=43735 +epoch 027: 756 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459049, ups=1.06, wpb=434750, bsz=16543.3, num_updates=44600, lr=0.000299476, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.6, wall=43735 +epoch 027: 756 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459049, ups=1.06, wpb=434750, bsz=16543.3, num_updates=44600, lr=0.000299476, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.6, wall=43735 +epoch 027: 756 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459049, ups=1.06, wpb=434750, bsz=16543.3, num_updates=44600, lr=0.000299476, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.6, wall=43735 +epoch 027: 756 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459049, ups=1.06, wpb=434750, bsz=16543.3, num_updates=44600, lr=0.000299476, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.6, wall=43735 +epoch 027: 756 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459049, ups=1.06, wpb=434750, bsz=16543.3, num_updates=44600, lr=0.000299476, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.6, wall=43735 +epoch 027: 756 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459049, ups=1.06, wpb=434750, bsz=16543.3, num_updates=44600, lr=0.000299476, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.6, wall=43735 +epoch 027: 756 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459049, ups=1.06, wpb=434750, bsz=16543.3, num_updates=44600, lr=0.000299476, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.6, wall=43735 +epoch 027: 756 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459049, ups=1.06, wpb=434750, bsz=16543.3, num_updates=44600, lr=0.000299476, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.6, wall=43735 +epoch 027: 756 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459049, ups=1.06, wpb=434750, bsz=16543.3, num_updates=44600, lr=0.000299476, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.6, wall=43735 +epoch 027: 756 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459049, ups=1.06, wpb=434750, bsz=16543.3, num_updates=44600, lr=0.000299476, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.6, wall=43735 +epoch 027: 756 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459049, ups=1.06, wpb=434750, bsz=16543.3, num_updates=44600, lr=0.000299476, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.6, wall=43735 +epoch 027: 756 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459049, ups=1.06, wpb=434750, bsz=16543.3, num_updates=44600, lr=0.000299476, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.6, wall=43735 +epoch 027: 756 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459049, ups=1.06, wpb=434750, bsz=16543.3, num_updates=44600, lr=0.000299476, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.6, wall=43735 +epoch 027: 756 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459049, ups=1.06, wpb=434750, bsz=16543.3, num_updates=44600, lr=0.000299476, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.6, wall=43735 +epoch 027: 756 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459049, ups=1.06, wpb=434750, bsz=16543.3, num_updates=44600, lr=0.000299476, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.6, wall=43735 +epoch 027: 756 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459049, ups=1.06, wpb=434750, bsz=16543.3, num_updates=44600, lr=0.000299476, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.6, wall=43735 +epoch 027: 756 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459049, ups=1.06, wpb=434750, bsz=16543.3, num_updates=44600, lr=0.000299476, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.6, wall=43735 +epoch 027: 756 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459049, ups=1.06, wpb=434750, bsz=16543.3, num_updates=44600, lr=0.000299476, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.6, wall=43735 +epoch 027: 756 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459049, ups=1.06, wpb=434750, bsz=16543.3, num_updates=44600, lr=0.000299476, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.6, wall=43735 +epoch 027: 756 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459049, ups=1.06, wpb=434750, bsz=16543.3, num_updates=44600, lr=0.000299476, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.6, wall=43735 +epoch 027: 756 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459049, ups=1.06, wpb=434750, bsz=16543.3, num_updates=44600, lr=0.000299476, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.6, wall=43735 +epoch 027: 756 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=459049, ups=1.06, wpb=434750, bsz=16543.3, num_updates=44600, lr=0.000299476, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.6, wall=43735 +epoch 027: 856 / 1689 loss=4.093, nll_loss=2.467, ppl=5.53, wps=467636, ups=1.07, wpb=435603, bsz=16785.6, num_updates=44700, lr=0.000299141, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=43828 +epoch 027: 856 / 1689 loss=4.093, nll_loss=2.467, ppl=5.53, wps=467636, ups=1.07, wpb=435603, bsz=16785.6, num_updates=44700, lr=0.000299141, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=43828 +epoch 027: 856 / 1689 loss=4.093, nll_loss=2.467, ppl=5.53, wps=467636, ups=1.07, wpb=435603, bsz=16785.6, num_updates=44700, lr=0.000299141, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=43828 +epoch 027: 856 / 1689 loss=4.093, nll_loss=2.467, ppl=5.53, wps=467636, ups=1.07, wpb=435603, bsz=16785.6, num_updates=44700, lr=0.000299141, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=43828 +epoch 027: 856 / 1689 loss=4.093, nll_loss=2.467, ppl=5.53, wps=467636, ups=1.07, wpb=435603, bsz=16785.6, num_updates=44700, lr=0.000299141, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=43828 +epoch 027: 856 / 1689 loss=4.093, nll_loss=2.467, ppl=5.53, wps=467636, ups=1.07, wpb=435603, bsz=16785.6, num_updates=44700, lr=0.000299141, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=43828 +epoch 027: 856 / 1689 loss=4.093, nll_loss=2.467, ppl=5.53, wps=467636, ups=1.07, wpb=435603, bsz=16785.6, num_updates=44700, lr=0.000299141, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=43828 +epoch 027: 856 / 1689 loss=4.093, nll_loss=2.467, ppl=5.53, wps=467636, ups=1.07, wpb=435603, bsz=16785.6, num_updates=44700, lr=0.000299141, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=43828 +epoch 027: 856 / 1689 loss=4.093, nll_loss=2.467, ppl=5.53, wps=467636, ups=1.07, wpb=435603, bsz=16785.6, num_updates=44700, lr=0.000299141, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=43828 +epoch 027: 856 / 1689 loss=4.093, nll_loss=2.467, ppl=5.53, wps=467636, ups=1.07, wpb=435603, bsz=16785.6, num_updates=44700, lr=0.000299141, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=43828 +epoch 027: 856 / 1689 loss=4.093, nll_loss=2.467, ppl=5.53, wps=467636, ups=1.07, wpb=435603, bsz=16785.6, num_updates=44700, lr=0.000299141, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=43828 +epoch 027: 856 / 1689 loss=4.093, nll_loss=2.467, ppl=5.53, wps=467636, ups=1.07, wpb=435603, bsz=16785.6, num_updates=44700, lr=0.000299141, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=43828 +epoch 027: 856 / 1689 loss=4.093, nll_loss=2.467, ppl=5.53, wps=467636, ups=1.07, wpb=435603, bsz=16785.6, num_updates=44700, lr=0.000299141, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=43828 +epoch 027: 856 / 1689 loss=4.093, nll_loss=2.467, ppl=5.53, wps=467636, ups=1.07, wpb=435603, bsz=16785.6, num_updates=44700, lr=0.000299141, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=43828 +epoch 027: 856 / 1689 loss=4.093, nll_loss=2.467, ppl=5.53, wps=467636, ups=1.07, wpb=435603, bsz=16785.6, num_updates=44700, lr=0.000299141, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=43828 +epoch 027: 856 / 1689 loss=4.093, nll_loss=2.467, ppl=5.53, wps=467636, ups=1.07, wpb=435603, bsz=16785.6, num_updates=44700, lr=0.000299141, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=43828 +epoch 027: 856 / 1689 loss=4.093, nll_loss=2.467, ppl=5.53, wps=467636, ups=1.07, wpb=435603, bsz=16785.6, num_updates=44700, lr=0.000299141, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=43828 +epoch 027: 856 / 1689 loss=4.093, nll_loss=2.467, ppl=5.53, wps=467636, ups=1.07, wpb=435603, bsz=16785.6, num_updates=44700, lr=0.000299141, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=43828 +epoch 027: 856 / 1689 loss=4.093, nll_loss=2.467, ppl=5.53, wps=467636, ups=1.07, wpb=435603, bsz=16785.6, num_updates=44700, lr=0.000299141, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=43828 +epoch 027: 856 / 1689 loss=4.093, nll_loss=2.467, ppl=5.53, wps=467636, ups=1.07, wpb=435603, bsz=16785.6, num_updates=44700, lr=0.000299141, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=43828 +epoch 027: 856 / 1689 loss=4.093, nll_loss=2.467, ppl=5.53, wps=467636, ups=1.07, wpb=435603, bsz=16785.6, num_updates=44700, lr=0.000299141, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=43828 +epoch 027: 856 / 1689 loss=4.093, nll_loss=2.467, ppl=5.53, wps=467636, ups=1.07, wpb=435603, bsz=16785.6, num_updates=44700, lr=0.000299141, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=43828 +epoch 027: 856 / 1689 loss=4.093, nll_loss=2.467, ppl=5.53, wps=467636, ups=1.07, wpb=435603, bsz=16785.6, num_updates=44700, lr=0.000299141, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=43828 +epoch 027: 856 / 1689 loss=4.093, nll_loss=2.467, ppl=5.53, wps=467636, ups=1.07, wpb=435603, bsz=16785.6, num_updates=44700, lr=0.000299141, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=43828 +epoch 027: 856 / 1689 loss=4.093, nll_loss=2.467, ppl=5.53, wps=467636, ups=1.07, wpb=435603, bsz=16785.6, num_updates=44700, lr=0.000299141, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=43828 +epoch 027: 856 / 1689 loss=4.093, nll_loss=2.467, ppl=5.53, wps=467636, ups=1.07, wpb=435603, bsz=16785.6, num_updates=44700, lr=0.000299141, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=43828 +epoch 027: 856 / 1689 loss=4.093, nll_loss=2.467, ppl=5.53, wps=467636, ups=1.07, wpb=435603, bsz=16785.6, num_updates=44700, lr=0.000299141, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=43828 +epoch 027: 956 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=463689, ups=1.08, wpb=431315, bsz=16270.9, num_updates=44800, lr=0.000298807, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=91, gb_free=19, wall=43921 +epoch 027: 956 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=463689, ups=1.08, wpb=431315, bsz=16270.9, num_updates=44800, lr=0.000298807, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=91, gb_free=19, wall=43921 +epoch 027: 956 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=463689, ups=1.08, wpb=431315, bsz=16270.9, num_updates=44800, lr=0.000298807, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=91, gb_free=19, wall=43921 +epoch 027: 956 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=463689, ups=1.08, wpb=431315, bsz=16270.9, num_updates=44800, lr=0.000298807, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=91, gb_free=19, wall=43921 +epoch 027: 956 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=463689, ups=1.08, wpb=431315, bsz=16270.9, num_updates=44800, lr=0.000298807, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=91, gb_free=19, wall=43921 +epoch 027: 956 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=463689, ups=1.08, wpb=431315, bsz=16270.9, num_updates=44800, lr=0.000298807, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=91, gb_free=19, wall=43921 +epoch 027: 956 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=463689, ups=1.08, wpb=431315, bsz=16270.9, num_updates=44800, lr=0.000298807, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=91, gb_free=19, wall=43921 +epoch 027: 956 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=463689, ups=1.08, wpb=431315, bsz=16270.9, num_updates=44800, lr=0.000298807, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=91, gb_free=19, wall=43921 +epoch 027: 956 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=463689, ups=1.08, wpb=431315, bsz=16270.9, num_updates=44800, lr=0.000298807, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=91, gb_free=19, wall=43921 +epoch 027: 956 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=463689, ups=1.08, wpb=431315, bsz=16270.9, num_updates=44800, lr=0.000298807, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=91, gb_free=19, wall=43921 +epoch 027: 956 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=463689, ups=1.08, wpb=431315, bsz=16270.9, num_updates=44800, lr=0.000298807, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=91, gb_free=19, wall=43921 +epoch 027: 956 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=463689, ups=1.08, wpb=431315, bsz=16270.9, num_updates=44800, lr=0.000298807, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=91, gb_free=19, wall=43921 +epoch 027: 956 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=463689, ups=1.08, wpb=431315, bsz=16270.9, num_updates=44800, lr=0.000298807, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=91, gb_free=19, wall=43921 +epoch 027: 956 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=463689, ups=1.08, wpb=431315, bsz=16270.9, num_updates=44800, lr=0.000298807, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=91, gb_free=19, wall=43921 +epoch 027: 956 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=463689, ups=1.08, wpb=431315, bsz=16270.9, num_updates=44800, lr=0.000298807, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=91, gb_free=19, wall=43921 +epoch 027: 956 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=463689, ups=1.08, wpb=431315, bsz=16270.9, num_updates=44800, lr=0.000298807, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=91, gb_free=19, wall=43921 +epoch 027: 956 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=463689, ups=1.08, wpb=431315, bsz=16270.9, num_updates=44800, lr=0.000298807, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=91, gb_free=19, wall=43921 +epoch 027: 956 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=463689, ups=1.08, wpb=431315, bsz=16270.9, num_updates=44800, lr=0.000298807, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=91, gb_free=19, wall=43921 +epoch 027: 956 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=463689, ups=1.08, wpb=431315, bsz=16270.9, num_updates=44800, lr=0.000298807, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=91, gb_free=19, wall=43921 +epoch 027: 956 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=463689, ups=1.08, wpb=431315, bsz=16270.9, num_updates=44800, lr=0.000298807, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=91, gb_free=19, wall=43921 +epoch 027: 956 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=463689, ups=1.08, wpb=431315, bsz=16270.9, num_updates=44800, lr=0.000298807, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=91, gb_free=19, wall=43921 +epoch 027: 956 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=463689, ups=1.08, wpb=431315, bsz=16270.9, num_updates=44800, lr=0.000298807, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=91, gb_free=19, wall=43921 +epoch 027: 956 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=463689, ups=1.08, wpb=431315, bsz=16270.9, num_updates=44800, lr=0.000298807, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=91, gb_free=19, wall=43921 +epoch 027: 956 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=463689, ups=1.08, wpb=431315, bsz=16270.9, num_updates=44800, lr=0.000298807, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=91, gb_free=19, wall=43921 +epoch 027: 956 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=463689, ups=1.08, wpb=431315, bsz=16270.9, num_updates=44800, lr=0.000298807, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=91, gb_free=19, wall=43921 +epoch 027: 956 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=463689, ups=1.08, wpb=431315, bsz=16270.9, num_updates=44800, lr=0.000298807, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=91, gb_free=19, wall=43921 +epoch 027: 956 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=463689, ups=1.08, wpb=431315, bsz=16270.9, num_updates=44800, lr=0.000298807, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=91, gb_free=19, wall=43921 +epoch 027: 1056 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=460574, ups=1.06, wpb=433527, bsz=16867.7, num_updates=44900, lr=0.000298474, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=44015 +epoch 027: 1056 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=460574, ups=1.06, wpb=433527, bsz=16867.7, num_updates=44900, lr=0.000298474, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=44015 +epoch 027: 1056 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=460574, ups=1.06, wpb=433527, bsz=16867.7, num_updates=44900, lr=0.000298474, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=44015 +epoch 027: 1056 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=460574, ups=1.06, wpb=433527, bsz=16867.7, num_updates=44900, lr=0.000298474, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=44015 +epoch 027: 1056 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=460574, ups=1.06, wpb=433527, bsz=16867.7, num_updates=44900, lr=0.000298474, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=44015 +epoch 027: 1056 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=460574, ups=1.06, wpb=433527, bsz=16867.7, num_updates=44900, lr=0.000298474, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=44015 +epoch 027: 1056 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=460574, ups=1.06, wpb=433527, bsz=16867.7, num_updates=44900, lr=0.000298474, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=44015 +epoch 027: 1056 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=460574, ups=1.06, wpb=433527, bsz=16867.7, num_updates=44900, lr=0.000298474, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=44015 +epoch 027: 1056 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=460574, ups=1.06, wpb=433527, bsz=16867.7, num_updates=44900, lr=0.000298474, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=44015 +epoch 027: 1056 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=460574, ups=1.06, wpb=433527, bsz=16867.7, num_updates=44900, lr=0.000298474, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=44015 +epoch 027: 1056 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=460574, ups=1.06, wpb=433527, bsz=16867.7, num_updates=44900, lr=0.000298474, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=44015 +epoch 027: 1056 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=460574, ups=1.06, wpb=433527, bsz=16867.7, num_updates=44900, lr=0.000298474, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=44015 +epoch 027: 1056 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=460574, ups=1.06, wpb=433527, bsz=16867.7, num_updates=44900, lr=0.000298474, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=44015 +epoch 027: 1056 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=460574, ups=1.06, wpb=433527, bsz=16867.7, num_updates=44900, lr=0.000298474, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=44015 +epoch 027: 1056 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=460574, ups=1.06, wpb=433527, bsz=16867.7, num_updates=44900, lr=0.000298474, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=44015 +epoch 027: 1056 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=460574, ups=1.06, wpb=433527, bsz=16867.7, num_updates=44900, lr=0.000298474, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=44015 +epoch 027: 1056 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=460574, ups=1.06, wpb=433527, bsz=16867.7, num_updates=44900, lr=0.000298474, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=44015 +epoch 027: 1056 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=460574, ups=1.06, wpb=433527, bsz=16867.7, num_updates=44900, lr=0.000298474, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=44015 +epoch 027: 1056 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=460574, ups=1.06, wpb=433527, bsz=16867.7, num_updates=44900, lr=0.000298474, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=44015 +epoch 027: 1056 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=460574, ups=1.06, wpb=433527, bsz=16867.7, num_updates=44900, lr=0.000298474, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=44015 +epoch 027: 1056 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=460574, ups=1.06, wpb=433527, bsz=16867.7, num_updates=44900, lr=0.000298474, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=44015 +epoch 027: 1056 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=460574, ups=1.06, wpb=433527, bsz=16867.7, num_updates=44900, lr=0.000298474, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=44015 +epoch 027: 1056 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=460574, ups=1.06, wpb=433527, bsz=16867.7, num_updates=44900, lr=0.000298474, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=44015 +epoch 027: 1056 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=460574, ups=1.06, wpb=433527, bsz=16867.7, num_updates=44900, lr=0.000298474, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=44015 +epoch 027: 1056 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=460574, ups=1.06, wpb=433527, bsz=16867.7, num_updates=44900, lr=0.000298474, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=44015 +epoch 027: 1056 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=460574, ups=1.06, wpb=433527, bsz=16867.7, num_updates=44900, lr=0.000298474, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=44015 +epoch 027: 1056 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=460574, ups=1.06, wpb=433527, bsz=16867.7, num_updates=44900, lr=0.000298474, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=44015 +epoch 027: 1156 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=460396, ups=1.06, wpb=434224, bsz=16964.4, num_updates=45000, lr=0.000298142, gnorm=0.224, clip=0, loss_scale=0.5, train_wall=93, gb_free=21.2, wall=44110 +epoch 027: 1156 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=460396, ups=1.06, wpb=434224, bsz=16964.4, num_updates=45000, lr=0.000298142, gnorm=0.224, clip=0, loss_scale=0.5, train_wall=93, gb_free=21.2, wall=44110 +epoch 027: 1156 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=460396, ups=1.06, wpb=434224, bsz=16964.4, num_updates=45000, lr=0.000298142, gnorm=0.224, clip=0, loss_scale=0.5, train_wall=93, gb_free=21.2, wall=44110 +epoch 027: 1156 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=460396, ups=1.06, wpb=434224, bsz=16964.4, num_updates=45000, lr=0.000298142, gnorm=0.224, clip=0, loss_scale=0.5, train_wall=93, gb_free=21.2, wall=44110 +epoch 027: 1156 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=460396, ups=1.06, wpb=434224, bsz=16964.4, num_updates=45000, lr=0.000298142, gnorm=0.224, clip=0, loss_scale=0.5, train_wall=93, gb_free=21.2, wall=44110 +epoch 027: 1156 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=460396, ups=1.06, wpb=434224, bsz=16964.4, num_updates=45000, lr=0.000298142, gnorm=0.224, clip=0, loss_scale=0.5, train_wall=93, gb_free=21.2, wall=44110 +epoch 027: 1156 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=460396, ups=1.06, wpb=434224, bsz=16964.4, num_updates=45000, lr=0.000298142, gnorm=0.224, clip=0, loss_scale=0.5, train_wall=93, gb_free=21.2, wall=44110 +epoch 027: 1156 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=460396, ups=1.06, wpb=434224, bsz=16964.4, num_updates=45000, lr=0.000298142, gnorm=0.224, clip=0, loss_scale=0.5, train_wall=93, gb_free=21.2, wall=44110 +epoch 027: 1156 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=460396, ups=1.06, wpb=434224, bsz=16964.4, num_updates=45000, lr=0.000298142, gnorm=0.224, clip=0, loss_scale=0.5, train_wall=93, gb_free=21.2, wall=44110 +epoch 027: 1156 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=460396, ups=1.06, wpb=434224, bsz=16964.4, num_updates=45000, lr=0.000298142, gnorm=0.224, clip=0, loss_scale=0.5, train_wall=93, gb_free=21.2, wall=44110 +epoch 027: 1156 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=460396, ups=1.06, wpb=434224, bsz=16964.4, num_updates=45000, lr=0.000298142, gnorm=0.224, clip=0, loss_scale=0.5, train_wall=93, gb_free=21.2, wall=44110 +epoch 027: 1156 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=460396, ups=1.06, wpb=434224, bsz=16964.4, num_updates=45000, lr=0.000298142, gnorm=0.224, clip=0, loss_scale=0.5, train_wall=93, gb_free=21.2, wall=44110 +epoch 027: 1156 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=460396, ups=1.06, wpb=434224, bsz=16964.4, num_updates=45000, lr=0.000298142, gnorm=0.224, clip=0, loss_scale=0.5, train_wall=93, gb_free=21.2, wall=44110 +epoch 027: 1156 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=460396, ups=1.06, wpb=434224, bsz=16964.4, num_updates=45000, lr=0.000298142, gnorm=0.224, clip=0, loss_scale=0.5, train_wall=93, gb_free=21.2, wall=44110 +epoch 027: 1156 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=460396, ups=1.06, wpb=434224, bsz=16964.4, num_updates=45000, lr=0.000298142, gnorm=0.224, clip=0, loss_scale=0.5, train_wall=93, gb_free=21.2, wall=44110 +epoch 027: 1156 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=460396, ups=1.06, wpb=434224, bsz=16964.4, num_updates=45000, lr=0.000298142, gnorm=0.224, clip=0, loss_scale=0.5, train_wall=93, gb_free=21.2, wall=44110 +epoch 027: 1156 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=460396, ups=1.06, wpb=434224, bsz=16964.4, num_updates=45000, lr=0.000298142, gnorm=0.224, clip=0, loss_scale=0.5, train_wall=93, gb_free=21.2, wall=44110 +epoch 027: 1156 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=460396, ups=1.06, wpb=434224, bsz=16964.4, num_updates=45000, lr=0.000298142, gnorm=0.224, clip=0, loss_scale=0.5, train_wall=93, gb_free=21.2, wall=44110 +epoch 027: 1156 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=460396, ups=1.06, wpb=434224, bsz=16964.4, num_updates=45000, lr=0.000298142, gnorm=0.224, clip=0, loss_scale=0.5, train_wall=93, gb_free=21.2, wall=44110 +epoch 027: 1156 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=460396, ups=1.06, wpb=434224, bsz=16964.4, num_updates=45000, lr=0.000298142, gnorm=0.224, clip=0, loss_scale=0.5, train_wall=93, gb_free=21.2, wall=44110 +epoch 027: 1156 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=460396, ups=1.06, wpb=434224, bsz=16964.4, num_updates=45000, lr=0.000298142, gnorm=0.224, clip=0, loss_scale=0.5, train_wall=93, gb_free=21.2, wall=44110 +epoch 027: 1156 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=460396, ups=1.06, wpb=434224, bsz=16964.4, num_updates=45000, lr=0.000298142, gnorm=0.224, clip=0, loss_scale=0.5, train_wall=93, gb_free=21.2, wall=44110 +epoch 027: 1156 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=460396, ups=1.06, wpb=434224, bsz=16964.4, num_updates=45000, lr=0.000298142, gnorm=0.224, clip=0, loss_scale=0.5, train_wall=93, gb_free=21.2, wall=44110 +epoch 027: 1156 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=460396, ups=1.06, wpb=434224, bsz=16964.4, num_updates=45000, lr=0.000298142, gnorm=0.224, clip=0, loss_scale=0.5, train_wall=93, gb_free=21.2, wall=44110 +epoch 027: 1156 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=460396, ups=1.06, wpb=434224, bsz=16964.4, num_updates=45000, lr=0.000298142, gnorm=0.224, clip=0, loss_scale=0.5, train_wall=93, gb_free=21.2, wall=44110 +epoch 027: 1156 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=460396, ups=1.06, wpb=434224, bsz=16964.4, num_updates=45000, lr=0.000298142, gnorm=0.224, clip=0, loss_scale=0.5, train_wall=93, gb_free=21.2, wall=44110 +epoch 027: 1156 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=460396, ups=1.06, wpb=434224, bsz=16964.4, num_updates=45000, lr=0.000298142, gnorm=0.224, clip=0, loss_scale=0.5, train_wall=93, gb_free=21.2, wall=44110 +begin validation on "valid" subset +epoch 027 | valid on 'valid' subset | loss 4.225 | nll_loss 2.59 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 45000 | best_loss 4.225 +epoch 027 | valid on 'valid' subset | loss 4.225 | nll_loss 2.59 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 45000 | best_loss 4.225 +epoch 027 | valid on 'valid' subset | loss 4.225 | nll_loss 2.59 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 45000 | best_loss 4.225 +epoch 027 | valid on 'valid' subset | loss 4.225 | nll_loss 2.59 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 45000 | best_loss 4.225 +epoch 027 | valid on 'valid' subset | loss 4.225 | nll_loss 2.59 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 45000 | best_loss 4.225 +epoch 027 | valid on 'valid' subset | loss 4.225 | nll_loss 2.59 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 45000 | best_loss 4.225 +epoch 027 | valid on 'valid' subset | loss 4.225 | nll_loss 2.59 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 45000 | best_loss 4.225 +epoch 027 | valid on 'valid' subset | loss 4.225 | nll_loss 2.59 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 45000 | best_loss 4.225 +epoch 027 | valid on 'valid' subset | loss 4.225 | nll_loss 2.59 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 45000 | best_loss 4.225 +epoch 027 | valid on 'valid' subset | loss 4.225 | nll_loss 2.59 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 45000 | best_loss 4.225 +epoch 027 | valid on 'valid' subset | loss 4.225 | nll_loss 2.59 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 45000 | best_loss 4.225 +epoch 027 | valid on 'valid' subset | loss 4.225 | nll_loss 2.59 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 45000 | best_loss 4.225 +epoch 027 | valid on 'valid' subset | loss 4.225 | nll_loss 2.59 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 45000 | best_loss 4.225 +epoch 027 | valid on 'valid' subset | loss 4.225 | nll_loss 2.59 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 45000 | best_loss 4.225 +epoch 027 | valid on 'valid' subset | loss 4.225 | nll_loss 2.59 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 45000 | best_loss 4.225 +epoch 027 | valid on 'valid' subset | loss 4.225 | nll_loss 2.59 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 45000 | best_loss 4.225 +epoch 027 | valid on 'valid' subset | loss 4.225 | nll_loss 2.59 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 45000 | best_loss 4.225 +epoch 027 | valid on 'valid' subset | loss 4.225 | nll_loss 2.59 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 45000 | best_loss 4.225 +epoch 027 | valid on 'valid' subset | loss 4.225 | nll_loss 2.59 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 45000 | best_loss 4.225 +epoch 027 | valid on 'valid' subset | loss 4.225 | nll_loss 2.59 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 45000 | best_loss 4.225 +epoch 027 | valid on 'valid' subset | loss 4.225 | nll_loss 2.59 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 45000 | best_loss 4.225 +epoch 027 | valid on 'valid' subset | loss 4.225 | nll_loss 2.59 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 45000 | best_loss 4.225 +epoch 027 | valid on 'valid' subset | loss 4.225 | nll_loss 2.59 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 45000 | best_loss 4.225 +epoch 027 | valid on 'valid' subset | loss 4.225 | nll_loss 2.59 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 45000 | best_loss 4.225 +epoch 027 | valid on 'valid' subset | loss 4.225 | nll_loss 2.59 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 45000 | best_loss 4.225 +epoch 027 | valid on 'valid' subset | loss 4.225 | nll_loss 2.59 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 45000 | best_loss 4.225 +epoch 027 | valid on 'valid' subset | loss 4.225 | nll_loss 2.59 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 45000 | best_loss 4.225 +epoch 027: 1256 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=380189, ups=0.87, wpb=436546, bsz=16410.8, num_updates=45100, lr=0.000297812, gnorm=0.202, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=44225 +epoch 027: 1256 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=380189, ups=0.87, wpb=436546, bsz=16410.8, num_updates=45100, lr=0.000297812, gnorm=0.202, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=44225 +epoch 027: 1256 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=380189, ups=0.87, wpb=436546, bsz=16410.8, num_updates=45100, lr=0.000297812, gnorm=0.202, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=44225 +epoch 027: 1256 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=380189, ups=0.87, wpb=436546, bsz=16410.8, num_updates=45100, lr=0.000297812, gnorm=0.202, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=44225 +epoch 027: 1256 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=380189, ups=0.87, wpb=436546, bsz=16410.8, num_updates=45100, lr=0.000297812, gnorm=0.202, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=44225 +epoch 027: 1256 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=380189, ups=0.87, wpb=436546, bsz=16410.8, num_updates=45100, lr=0.000297812, gnorm=0.202, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=44225 +epoch 027: 1256 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=380189, ups=0.87, wpb=436546, bsz=16410.8, num_updates=45100, lr=0.000297812, gnorm=0.202, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=44225 +epoch 027: 1256 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=380189, ups=0.87, wpb=436546, bsz=16410.8, num_updates=45100, lr=0.000297812, gnorm=0.202, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=44225 +epoch 027: 1256 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=380189, ups=0.87, wpb=436546, bsz=16410.8, num_updates=45100, lr=0.000297812, gnorm=0.202, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=44225 +epoch 027: 1256 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=380189, ups=0.87, wpb=436546, bsz=16410.8, num_updates=45100, lr=0.000297812, gnorm=0.202, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=44225 +epoch 027: 1256 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=380189, ups=0.87, wpb=436546, bsz=16410.8, num_updates=45100, lr=0.000297812, gnorm=0.202, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=44225 +epoch 027: 1256 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=380189, ups=0.87, wpb=436546, bsz=16410.8, num_updates=45100, lr=0.000297812, gnorm=0.202, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=44225 +epoch 027: 1256 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=380189, ups=0.87, wpb=436546, bsz=16410.8, num_updates=45100, lr=0.000297812, gnorm=0.202, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=44225 +epoch 027: 1256 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=380189, ups=0.87, wpb=436546, bsz=16410.8, num_updates=45100, lr=0.000297812, gnorm=0.202, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=44225 +epoch 027: 1256 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=380189, ups=0.87, wpb=436546, bsz=16410.8, num_updates=45100, lr=0.000297812, gnorm=0.202, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=44225 +epoch 027: 1256 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=380189, ups=0.87, wpb=436546, bsz=16410.8, num_updates=45100, lr=0.000297812, gnorm=0.202, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=44225 +epoch 027: 1256 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=380189, ups=0.87, wpb=436546, bsz=16410.8, num_updates=45100, lr=0.000297812, gnorm=0.202, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=44225 +epoch 027: 1256 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=380189, ups=0.87, wpb=436546, bsz=16410.8, num_updates=45100, lr=0.000297812, gnorm=0.202, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=44225 +epoch 027: 1256 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=380189, ups=0.87, wpb=436546, bsz=16410.8, num_updates=45100, lr=0.000297812, gnorm=0.202, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=44225 +epoch 027: 1256 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=380189, ups=0.87, wpb=436546, bsz=16410.8, num_updates=45100, lr=0.000297812, gnorm=0.202, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=44225 +epoch 027: 1256 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=380189, ups=0.87, wpb=436546, bsz=16410.8, num_updates=45100, lr=0.000297812, gnorm=0.202, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=44225 +epoch 027: 1256 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=380189, ups=0.87, wpb=436546, bsz=16410.8, num_updates=45100, lr=0.000297812, gnorm=0.202, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=44225 +epoch 027: 1256 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=380189, ups=0.87, wpb=436546, bsz=16410.8, num_updates=45100, lr=0.000297812, gnorm=0.202, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=44225 +epoch 027: 1256 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=380189, ups=0.87, wpb=436546, bsz=16410.8, num_updates=45100, lr=0.000297812, gnorm=0.202, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=44225 +epoch 027: 1256 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=380189, ups=0.87, wpb=436546, bsz=16410.8, num_updates=45100, lr=0.000297812, gnorm=0.202, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=44225 +epoch 027: 1256 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=380189, ups=0.87, wpb=436546, bsz=16410.8, num_updates=45100, lr=0.000297812, gnorm=0.202, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=44225 +epoch 027: 1256 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=380189, ups=0.87, wpb=436546, bsz=16410.8, num_updates=45100, lr=0.000297812, gnorm=0.202, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=44225 +epoch 027: 1356 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=463547, ups=1.07, wpb=433622, bsz=16501.8, num_updates=45200, lr=0.000297482, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=44318 +epoch 027: 1356 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=463547, ups=1.07, wpb=433622, bsz=16501.8, num_updates=45200, lr=0.000297482, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=44318 +epoch 027: 1356 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=463547, ups=1.07, wpb=433622, bsz=16501.8, num_updates=45200, lr=0.000297482, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=44318 +epoch 027: 1356 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=463547, ups=1.07, wpb=433622, bsz=16501.8, num_updates=45200, lr=0.000297482, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=44318 +epoch 027: 1356 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=463547, ups=1.07, wpb=433622, bsz=16501.8, num_updates=45200, lr=0.000297482, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=44318 +epoch 027: 1356 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=463547, ups=1.07, wpb=433622, bsz=16501.8, num_updates=45200, lr=0.000297482, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=44318 +epoch 027: 1356 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=463547, ups=1.07, wpb=433622, bsz=16501.8, num_updates=45200, lr=0.000297482, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=44318 +epoch 027: 1356 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=463547, ups=1.07, wpb=433622, bsz=16501.8, num_updates=45200, lr=0.000297482, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=44318 +epoch 027: 1356 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=463547, ups=1.07, wpb=433622, bsz=16501.8, num_updates=45200, lr=0.000297482, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=44318 +epoch 027: 1356 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=463547, ups=1.07, wpb=433622, bsz=16501.8, num_updates=45200, lr=0.000297482, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=44318 +epoch 027: 1356 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=463547, ups=1.07, wpb=433622, bsz=16501.8, num_updates=45200, lr=0.000297482, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=44318 +epoch 027: 1356 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=463547, ups=1.07, wpb=433622, bsz=16501.8, num_updates=45200, lr=0.000297482, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=44318 +epoch 027: 1356 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=463547, ups=1.07, wpb=433622, bsz=16501.8, num_updates=45200, lr=0.000297482, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=44318 +epoch 027: 1356 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=463547, ups=1.07, wpb=433622, bsz=16501.8, num_updates=45200, lr=0.000297482, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=44318 +epoch 027: 1356 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=463547, ups=1.07, wpb=433622, bsz=16501.8, num_updates=45200, lr=0.000297482, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=44318 +epoch 027: 1356 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=463547, ups=1.07, wpb=433622, bsz=16501.8, num_updates=45200, lr=0.000297482, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=44318 +epoch 027: 1356 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=463547, ups=1.07, wpb=433622, bsz=16501.8, num_updates=45200, lr=0.000297482, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=44318 +epoch 027: 1356 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=463547, ups=1.07, wpb=433622, bsz=16501.8, num_updates=45200, lr=0.000297482, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=44318 +epoch 027: 1356 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=463547, ups=1.07, wpb=433622, bsz=16501.8, num_updates=45200, lr=0.000297482, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=44318 +epoch 027: 1356 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=463547, ups=1.07, wpb=433622, bsz=16501.8, num_updates=45200, lr=0.000297482, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=44318 +epoch 027: 1356 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=463547, ups=1.07, wpb=433622, bsz=16501.8, num_updates=45200, lr=0.000297482, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=44318 +epoch 027: 1356 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=463547, ups=1.07, wpb=433622, bsz=16501.8, num_updates=45200, lr=0.000297482, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=44318 +epoch 027: 1356 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=463547, ups=1.07, wpb=433622, bsz=16501.8, num_updates=45200, lr=0.000297482, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=44318 +epoch 027: 1356 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=463547, ups=1.07, wpb=433622, bsz=16501.8, num_updates=45200, lr=0.000297482, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=44318 +epoch 027: 1356 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=463547, ups=1.07, wpb=433622, bsz=16501.8, num_updates=45200, lr=0.000297482, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=44318 +epoch 027: 1356 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=463547, ups=1.07, wpb=433622, bsz=16501.8, num_updates=45200, lr=0.000297482, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=44318 +epoch 027: 1356 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=463547, ups=1.07, wpb=433622, bsz=16501.8, num_updates=45200, lr=0.000297482, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=20.4, wall=44318 +epoch 027: 1456 / 1689 loss=4.076, nll_loss=2.449, ppl=5.46, wps=458622, ups=1.06, wpb=431730, bsz=16383, num_updates=45300, lr=0.000297154, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=44412 +epoch 027: 1456 / 1689 loss=4.076, nll_loss=2.449, ppl=5.46, wps=458622, ups=1.06, wpb=431730, bsz=16383, num_updates=45300, lr=0.000297154, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=44412 +epoch 027: 1456 / 1689 loss=4.076, nll_loss=2.449, ppl=5.46, wps=458622, ups=1.06, wpb=431730, bsz=16383, num_updates=45300, lr=0.000297154, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=44412 +epoch 027: 1456 / 1689 loss=4.076, nll_loss=2.449, ppl=5.46, wps=458622, ups=1.06, wpb=431730, bsz=16383, num_updates=45300, lr=0.000297154, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=44412 +epoch 027: 1456 / 1689 loss=4.076, nll_loss=2.449, ppl=5.46, wps=458622, ups=1.06, wpb=431730, bsz=16383, num_updates=45300, lr=0.000297154, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=44412 +epoch 027: 1456 / 1689 loss=4.076, nll_loss=2.449, ppl=5.46, wps=458622, ups=1.06, wpb=431730, bsz=16383, num_updates=45300, lr=0.000297154, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=44412 +epoch 027: 1456 / 1689 loss=4.076, nll_loss=2.449, ppl=5.46, wps=458622, ups=1.06, wpb=431730, bsz=16383, num_updates=45300, lr=0.000297154, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=44412 +epoch 027: 1456 / 1689 loss=4.076, nll_loss=2.449, ppl=5.46, wps=458622, ups=1.06, wpb=431730, bsz=16383, num_updates=45300, lr=0.000297154, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=44412 +epoch 027: 1456 / 1689 loss=4.076, nll_loss=2.449, ppl=5.46, wps=458622, ups=1.06, wpb=431730, bsz=16383, num_updates=45300, lr=0.000297154, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=44412 +epoch 027: 1456 / 1689 loss=4.076, nll_loss=2.449, ppl=5.46, wps=458622, ups=1.06, wpb=431730, bsz=16383, num_updates=45300, lr=0.000297154, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=44412 +epoch 027: 1456 / 1689 loss=4.076, nll_loss=2.449, ppl=5.46, wps=458622, ups=1.06, wpb=431730, bsz=16383, num_updates=45300, lr=0.000297154, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=44412 +epoch 027: 1456 / 1689 loss=4.076, nll_loss=2.449, ppl=5.46, wps=458622, ups=1.06, wpb=431730, bsz=16383, num_updates=45300, lr=0.000297154, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=44412 +epoch 027: 1456 / 1689 loss=4.076, nll_loss=2.449, ppl=5.46, wps=458622, ups=1.06, wpb=431730, bsz=16383, num_updates=45300, lr=0.000297154, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=44412 +epoch 027: 1456 / 1689 loss=4.076, nll_loss=2.449, ppl=5.46, wps=458622, ups=1.06, wpb=431730, bsz=16383, num_updates=45300, lr=0.000297154, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=44412 +epoch 027: 1456 / 1689 loss=4.076, nll_loss=2.449, ppl=5.46, wps=458622, ups=1.06, wpb=431730, bsz=16383, num_updates=45300, lr=0.000297154, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=44412 +epoch 027: 1456 / 1689 loss=4.076, nll_loss=2.449, ppl=5.46, wps=458622, ups=1.06, wpb=431730, bsz=16383, num_updates=45300, lr=0.000297154, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=44412 +epoch 027: 1456 / 1689 loss=4.076, nll_loss=2.449, ppl=5.46, wps=458622, ups=1.06, wpb=431730, bsz=16383, num_updates=45300, lr=0.000297154, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=44412 +epoch 027: 1456 / 1689 loss=4.076, nll_loss=2.449, ppl=5.46, wps=458622, ups=1.06, wpb=431730, bsz=16383, num_updates=45300, lr=0.000297154, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=44412 +epoch 027: 1456 / 1689 loss=4.076, nll_loss=2.449, ppl=5.46, wps=458622, ups=1.06, wpb=431730, bsz=16383, num_updates=45300, lr=0.000297154, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=44412 +epoch 027: 1456 / 1689 loss=4.076, nll_loss=2.449, ppl=5.46, wps=458622, ups=1.06, wpb=431730, bsz=16383, num_updates=45300, lr=0.000297154, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=44412 +epoch 027: 1456 / 1689 loss=4.076, nll_loss=2.449, ppl=5.46, wps=458622, ups=1.06, wpb=431730, bsz=16383, num_updates=45300, lr=0.000297154, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=44412 +epoch 027: 1456 / 1689 loss=4.076, nll_loss=2.449, ppl=5.46, wps=458622, ups=1.06, wpb=431730, bsz=16383, num_updates=45300, lr=0.000297154, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=44412 +epoch 027: 1456 / 1689 loss=4.076, nll_loss=2.449, ppl=5.46, wps=458622, ups=1.06, wpb=431730, bsz=16383, num_updates=45300, lr=0.000297154, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=44412 +epoch 027: 1456 / 1689 loss=4.076, nll_loss=2.449, ppl=5.46, wps=458622, ups=1.06, wpb=431730, bsz=16383, num_updates=45300, lr=0.000297154, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=44412 +epoch 027: 1456 / 1689 loss=4.076, nll_loss=2.449, ppl=5.46, wps=458622, ups=1.06, wpb=431730, bsz=16383, num_updates=45300, lr=0.000297154, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=44412 +epoch 027: 1456 / 1689 loss=4.076, nll_loss=2.449, ppl=5.46, wps=458622, ups=1.06, wpb=431730, bsz=16383, num_updates=45300, lr=0.000297154, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=44412 +epoch 027: 1456 / 1689 loss=4.076, nll_loss=2.449, ppl=5.46, wps=458622, ups=1.06, wpb=431730, bsz=16383, num_updates=45300, lr=0.000297154, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=44412 +epoch 027: 1556 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=461243, ups=1.07, wpb=431941, bsz=16499.1, num_updates=45400, lr=0.000296826, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=44506 +epoch 027: 1556 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=461243, ups=1.07, wpb=431941, bsz=16499.1, num_updates=45400, lr=0.000296826, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=44506 +epoch 027: 1556 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=461243, ups=1.07, wpb=431941, bsz=16499.1, num_updates=45400, lr=0.000296826, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=44506 +epoch 027: 1556 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=461243, ups=1.07, wpb=431941, bsz=16499.1, num_updates=45400, lr=0.000296826, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=44506 +epoch 027: 1556 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=461243, ups=1.07, wpb=431941, bsz=16499.1, num_updates=45400, lr=0.000296826, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=44506 +epoch 027: 1556 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=461243, ups=1.07, wpb=431941, bsz=16499.1, num_updates=45400, lr=0.000296826, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=44506 +epoch 027: 1556 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=461243, ups=1.07, wpb=431941, bsz=16499.1, num_updates=45400, lr=0.000296826, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=44506 +epoch 027: 1556 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=461243, ups=1.07, wpb=431941, bsz=16499.1, num_updates=45400, lr=0.000296826, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=44506 +epoch 027: 1556 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=461243, ups=1.07, wpb=431941, bsz=16499.1, num_updates=45400, lr=0.000296826, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=44506 +epoch 027: 1556 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=461243, ups=1.07, wpb=431941, bsz=16499.1, num_updates=45400, lr=0.000296826, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=44506 +epoch 027: 1556 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=461243, ups=1.07, wpb=431941, bsz=16499.1, num_updates=45400, lr=0.000296826, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=44506 +epoch 027: 1556 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=461243, ups=1.07, wpb=431941, bsz=16499.1, num_updates=45400, lr=0.000296826, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=44506 +epoch 027: 1556 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=461243, ups=1.07, wpb=431941, bsz=16499.1, num_updates=45400, lr=0.000296826, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=44506 +epoch 027: 1556 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=461243, ups=1.07, wpb=431941, bsz=16499.1, num_updates=45400, lr=0.000296826, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=44506 +epoch 027: 1556 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=461243, ups=1.07, wpb=431941, bsz=16499.1, num_updates=45400, lr=0.000296826, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=44506 +epoch 027: 1556 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=461243, ups=1.07, wpb=431941, bsz=16499.1, num_updates=45400, lr=0.000296826, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=44506 +epoch 027: 1556 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=461243, ups=1.07, wpb=431941, bsz=16499.1, num_updates=45400, lr=0.000296826, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=44506 +epoch 027: 1556 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=461243, ups=1.07, wpb=431941, bsz=16499.1, num_updates=45400, lr=0.000296826, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=44506 +epoch 027: 1556 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=461243, ups=1.07, wpb=431941, bsz=16499.1, num_updates=45400, lr=0.000296826, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=44506 +epoch 027: 1556 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=461243, ups=1.07, wpb=431941, bsz=16499.1, num_updates=45400, lr=0.000296826, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=44506 +epoch 027: 1556 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=461243, ups=1.07, wpb=431941, bsz=16499.1, num_updates=45400, lr=0.000296826, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=44506 +epoch 027: 1556 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=461243, ups=1.07, wpb=431941, bsz=16499.1, num_updates=45400, lr=0.000296826, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=44506 +epoch 027: 1556 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=461243, ups=1.07, wpb=431941, bsz=16499.1, num_updates=45400, lr=0.000296826, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=44506 +epoch 027: 1556 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=461243, ups=1.07, wpb=431941, bsz=16499.1, num_updates=45400, lr=0.000296826, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=44506 +epoch 027: 1556 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=461243, ups=1.07, wpb=431941, bsz=16499.1, num_updates=45400, lr=0.000296826, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=44506 +epoch 027: 1556 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=461243, ups=1.07, wpb=431941, bsz=16499.1, num_updates=45400, lr=0.000296826, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=44506 +epoch 027: 1556 / 1689 loss=4.085, nll_loss=2.459, ppl=5.5, wps=461243, ups=1.07, wpb=431941, bsz=16499.1, num_updates=45400, lr=0.000296826, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=44506 +epoch 027: 1656 / 1689 loss=4.101, nll_loss=2.477, ppl=5.57, wps=463890, ups=1.07, wpb=433043, bsz=16606.6, num_updates=45500, lr=0.0002965, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=44599 +epoch 027: 1656 / 1689 loss=4.101, nll_loss=2.477, ppl=5.57, wps=463890, ups=1.07, wpb=433043, bsz=16606.6, num_updates=45500, lr=0.0002965, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=44599 +epoch 027: 1656 / 1689 loss=4.101, nll_loss=2.477, ppl=5.57, wps=463890, ups=1.07, wpb=433043, bsz=16606.6, num_updates=45500, lr=0.0002965, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=44599 +epoch 027: 1656 / 1689 loss=4.101, nll_loss=2.477, ppl=5.57, wps=463890, ups=1.07, wpb=433043, bsz=16606.6, num_updates=45500, lr=0.0002965, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=44599 +epoch 027: 1656 / 1689 loss=4.101, nll_loss=2.477, ppl=5.57, wps=463890, ups=1.07, wpb=433043, bsz=16606.6, num_updates=45500, lr=0.0002965, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=44599 +epoch 027: 1656 / 1689 loss=4.101, nll_loss=2.477, ppl=5.57, wps=463890, ups=1.07, wpb=433043, bsz=16606.6, num_updates=45500, lr=0.0002965, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=44599 +epoch 027: 1656 / 1689 loss=4.101, nll_loss=2.477, ppl=5.57, wps=463890, ups=1.07, wpb=433043, bsz=16606.6, num_updates=45500, lr=0.0002965, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=44599 +epoch 027: 1656 / 1689 loss=4.101, nll_loss=2.477, ppl=5.57, wps=463890, ups=1.07, wpb=433043, bsz=16606.6, num_updates=45500, lr=0.0002965, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=44599 +epoch 027: 1656 / 1689 loss=4.101, nll_loss=2.477, ppl=5.57, wps=463890, ups=1.07, wpb=433043, bsz=16606.6, num_updates=45500, lr=0.0002965, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=44599 +epoch 027: 1656 / 1689 loss=4.101, nll_loss=2.477, ppl=5.57, wps=463890, ups=1.07, wpb=433043, bsz=16606.6, num_updates=45500, lr=0.0002965, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=44599 +epoch 027: 1656 / 1689 loss=4.101, nll_loss=2.477, ppl=5.57, wps=463890, ups=1.07, wpb=433043, bsz=16606.6, num_updates=45500, lr=0.0002965, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=44599 +epoch 027: 1656 / 1689 loss=4.101, nll_loss=2.477, ppl=5.57, wps=463890, ups=1.07, wpb=433043, bsz=16606.6, num_updates=45500, lr=0.0002965, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=44599 +epoch 027: 1656 / 1689 loss=4.101, nll_loss=2.477, ppl=5.57, wps=463890, ups=1.07, wpb=433043, bsz=16606.6, num_updates=45500, lr=0.0002965, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=44599 +epoch 027: 1656 / 1689 loss=4.101, nll_loss=2.477, ppl=5.57, wps=463890, ups=1.07, wpb=433043, bsz=16606.6, num_updates=45500, lr=0.0002965, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=44599 +epoch 027: 1656 / 1689 loss=4.101, nll_loss=2.477, ppl=5.57, wps=463890, ups=1.07, wpb=433043, bsz=16606.6, num_updates=45500, lr=0.0002965, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=44599 +epoch 027: 1656 / 1689 loss=4.101, nll_loss=2.477, ppl=5.57, wps=463890, ups=1.07, wpb=433043, bsz=16606.6, num_updates=45500, lr=0.0002965, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=44599 +epoch 027: 1656 / 1689 loss=4.101, nll_loss=2.477, ppl=5.57, wps=463890, ups=1.07, wpb=433043, bsz=16606.6, num_updates=45500, lr=0.0002965, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=44599 +epoch 027: 1656 / 1689 loss=4.101, nll_loss=2.477, ppl=5.57, wps=463890, ups=1.07, wpb=433043, bsz=16606.6, num_updates=45500, lr=0.0002965, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=44599 +epoch 027: 1656 / 1689 loss=4.101, nll_loss=2.477, ppl=5.57, wps=463890, ups=1.07, wpb=433043, bsz=16606.6, num_updates=45500, lr=0.0002965, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=44599 +epoch 027: 1656 / 1689 loss=4.101, nll_loss=2.477, ppl=5.57, wps=463890, ups=1.07, wpb=433043, bsz=16606.6, num_updates=45500, lr=0.0002965, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=44599 +epoch 027: 1656 / 1689 loss=4.101, nll_loss=2.477, ppl=5.57, wps=463890, ups=1.07, wpb=433043, bsz=16606.6, num_updates=45500, lr=0.0002965, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=44599 +epoch 027: 1656 / 1689 loss=4.101, nll_loss=2.477, ppl=5.57, wps=463890, ups=1.07, wpb=433043, bsz=16606.6, num_updates=45500, lr=0.0002965, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=44599 +epoch 027: 1656 / 1689 loss=4.101, nll_loss=2.477, ppl=5.57, wps=463890, ups=1.07, wpb=433043, bsz=16606.6, num_updates=45500, lr=0.0002965, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=44599 +epoch 027: 1656 / 1689 loss=4.101, nll_loss=2.477, ppl=5.57, wps=463890, ups=1.07, wpb=433043, bsz=16606.6, num_updates=45500, lr=0.0002965, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=44599 +epoch 027: 1656 / 1689 loss=4.101, nll_loss=2.477, ppl=5.57, wps=463890, ups=1.07, wpb=433043, bsz=16606.6, num_updates=45500, lr=0.0002965, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=44599 +epoch 027: 1656 / 1689 loss=4.101, nll_loss=2.477, ppl=5.57, wps=463890, ups=1.07, wpb=433043, bsz=16606.6, num_updates=45500, lr=0.0002965, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=44599 +epoch 027: 1656 / 1689 loss=4.101, nll_loss=2.477, ppl=5.57, wps=463890, ups=1.07, wpb=433043, bsz=16606.6, num_updates=45500, lr=0.0002965, gnorm=0.22, clip=0, loss_scale=1, train_wall=92, gb_free=18.5, wall=44599 +end of epoch 27 (average epoch stats below) +epoch 027 | loss 4.083 | nll_loss 2.456 | ppl 5.49 | wps 450447 | ups 1.04 | wpb 433522 | bsz 16505.6 | num_updates 45533 | lr 0.000296392 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1561 | gb_free 20.6 | wall 44630 +epoch 027 | loss 4.083 | nll_loss 2.456 | ppl 5.49 | wps 450447 | ups 1.04 | wpb 433522 | bsz 16505.6 | num_updates 45533 | lr 0.000296392 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1561 | gb_free 20.6 | wall 44630 +epoch 027 | loss 4.083 | nll_loss 2.456 | ppl 5.49 | wps 450447 | ups 1.04 | wpb 433522 | bsz 16505.6 | num_updates 45533 | lr 0.000296392 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1561 | gb_free 20.6 | wall 44630 +epoch 027 | loss 4.083 | nll_loss 2.456 | ppl 5.49 | wps 450447 | ups 1.04 | wpb 433522 | bsz 16505.6 | num_updates 45533 | lr 0.000296392 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1561 | gb_free 20.6 | wall 44630 +epoch 027 | loss 4.083 | nll_loss 2.456 | ppl 5.49 | wps 450447 | ups 1.04 | wpb 433522 | bsz 16505.6 | num_updates 45533 | lr 0.000296392 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1561 | gb_free 20.6 | wall 44630 +epoch 027 | loss 4.083 | nll_loss 2.456 | ppl 5.49 | wps 450447 | ups 1.04 | wpb 433522 | bsz 16505.6 | num_updates 45533 | lr 0.000296392 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1561 | gb_free 20.6 | wall 44630 +epoch 027 | loss 4.083 | nll_loss 2.456 | ppl 5.49 | wps 450447 | ups 1.04 | wpb 433522 | bsz 16505.6 | num_updates 45533 | lr 0.000296392 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1561 | gb_free 20.6 | wall 44630 +epoch 027 | loss 4.083 | nll_loss 2.456 | ppl 5.49 | wps 450447 | ups 1.04 | wpb 433522 | bsz 16505.6 | num_updates 45533 | lr 0.000296392 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1561 | gb_free 20.6 | wall 44630 +epoch 027 | loss 4.083 | nll_loss 2.456 | ppl 5.49 | wps 450447 | ups 1.04 | wpb 433522 | bsz 16505.6 | num_updates 45533 | lr 0.000296392 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1561 | gb_free 20.6 | wall 44630 +epoch 027 | loss 4.083 | nll_loss 2.456 | ppl 5.49 | wps 450447 | ups 1.04 | wpb 433522 | bsz 16505.6 | num_updates 45533 | lr 0.000296392 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1561 | gb_free 20.6 | wall 44630 +epoch 027 | loss 4.083 | nll_loss 2.456 | ppl 5.49 | wps 450447 | ups 1.04 | wpb 433522 | bsz 16505.6 | num_updates 45533 | lr 0.000296392 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1561 | gb_free 20.6 | wall 44630 +epoch 027 | loss 4.083 | nll_loss 2.456 | ppl 5.49 | wps 450447 | ups 1.04 | wpb 433522 | bsz 16505.6 | num_updates 45533 | lr 0.000296392 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1561 | gb_free 20.6 | wall 44630 +epoch 027 | loss 4.083 | nll_loss 2.456 | ppl 5.49 | wps 450447 | ups 1.04 | wpb 433522 | bsz 16505.6 | num_updates 45533 | lr 0.000296392 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1561 | gb_free 20.6 | wall 44630 +epoch 027 | loss 4.083 | nll_loss 2.456 | ppl 5.49 | wps 450447 | ups 1.04 | wpb 433522 | bsz 16505.6 | num_updates 45533 | lr 0.000296392 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1561 | gb_free 20.6 | wall 44630 +epoch 027 | loss 4.083 | nll_loss 2.456 | ppl 5.49 | wps 450447 | ups 1.04 | wpb 433522 | bsz 16505.6 | num_updates 45533 | lr 0.000296392 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1561 | gb_free 20.6 | wall 44630 +epoch 027 | loss 4.083 | nll_loss 2.456 | ppl 5.49 | wps 450447 | ups 1.04 | wpb 433522 | bsz 16505.6 | num_updates 45533 | lr 0.000296392 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1561 | gb_free 20.6 | wall 44630 +epoch 027 | loss 4.083 | nll_loss 2.456 | ppl 5.49 | wps 450447 | ups 1.04 | wpb 433522 | bsz 16505.6 | num_updates 45533 | lr 0.000296392 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1561 | gb_free 20.6 | wall 44630 +epoch 027 | loss 4.083 | nll_loss 2.456 | ppl 5.49 | wps 450447 | ups 1.04 | wpb 433522 | bsz 16505.6 | num_updates 45533 | lr 0.000296392 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1561 | gb_free 20.6 | wall 44630 +epoch 027 | loss 4.083 | nll_loss 2.456 | ppl 5.49 | wps 450447 | ups 1.04 | wpb 433522 | bsz 16505.6 | num_updates 45533 | lr 0.000296392 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1561 | gb_free 20.6 | wall 44630 +epoch 027 | loss 4.083 | nll_loss 2.456 | ppl 5.49 | wps 450447 | ups 1.04 | wpb 433522 | bsz 16505.6 | num_updates 45533 | lr 0.000296392 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1561 | gb_free 20.6 | wall 44630 +epoch 027 | loss 4.083 | nll_loss 2.456 | ppl 5.49 | wps 450447 | ups 1.04 | wpb 433522 | bsz 16505.6 | num_updates 45533 | lr 0.000296392 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1561 | gb_free 20.6 | wall 44630 +epoch 027 | loss 4.083 | nll_loss 2.456 | ppl 5.49 | wps 450447 | ups 1.04 | wpb 433522 | bsz 16505.6 | num_updates 45533 | lr 0.000296392 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1561 | gb_free 20.6 | wall 44630 +epoch 027 | loss 4.083 | nll_loss 2.456 | ppl 5.49 | wps 450447 | ups 1.04 | wpb 433522 | bsz 16505.6 | num_updates 45533 | lr 0.000296392 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1561 | gb_free 20.6 | wall 44630 +epoch 027 | loss 4.083 | nll_loss 2.456 | ppl 5.49 | wps 450447 | ups 1.04 | wpb 433522 | bsz 16505.6 | num_updates 45533 | lr 0.000296392 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1561 | gb_free 20.6 | wall 44630 +epoch 027 | loss 4.083 | nll_loss 2.456 | ppl 5.49 | wps 450447 | ups 1.04 | wpb 433522 | bsz 16505.6 | num_updates 45533 | lr 0.000296392 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1561 | gb_free 20.6 | wall 44630 +epoch 027 | loss 4.083 | nll_loss 2.456 | ppl 5.49 | wps 450447 | ups 1.04 | wpb 433522 | bsz 16505.6 | num_updates 45533 | lr 0.000296392 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1561 | gb_free 20.6 | wall 44630 +epoch 027 | loss 4.083 | nll_loss 2.456 | ppl 5.49 | wps 450447 | ups 1.04 | wpb 433522 | bsz 16505.6 | num_updates 45533 | lr 0.000296392 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1561 | gb_free 20.6 | wall 44630 +Start iterating over samples +epoch 028: 67 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=454836, ups=1.06, wpb=430716, bsz=16662.5, num_updates=45600, lr=0.000296174, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=44694 +epoch 028: 67 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=454836, ups=1.06, wpb=430716, bsz=16662.5, num_updates=45600, lr=0.000296174, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=44694 +epoch 028: 67 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=454836, ups=1.06, wpb=430716, bsz=16662.5, num_updates=45600, lr=0.000296174, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=44694 +epoch 028: 67 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=454836, ups=1.06, wpb=430716, bsz=16662.5, num_updates=45600, lr=0.000296174, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=44694 +epoch 028: 67 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=454836, ups=1.06, wpb=430716, bsz=16662.5, num_updates=45600, lr=0.000296174, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=44694 +epoch 028: 67 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=454836, ups=1.06, wpb=430716, bsz=16662.5, num_updates=45600, lr=0.000296174, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=44694 +epoch 028: 67 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=454836, ups=1.06, wpb=430716, bsz=16662.5, num_updates=45600, lr=0.000296174, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=44694 +epoch 028: 67 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=454836, ups=1.06, wpb=430716, bsz=16662.5, num_updates=45600, lr=0.000296174, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=44694 +epoch 028: 67 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=454836, ups=1.06, wpb=430716, bsz=16662.5, num_updates=45600, lr=0.000296174, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=44694 +epoch 028: 67 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=454836, ups=1.06, wpb=430716, bsz=16662.5, num_updates=45600, lr=0.000296174, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=44694 +epoch 028: 67 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=454836, ups=1.06, wpb=430716, bsz=16662.5, num_updates=45600, lr=0.000296174, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=44694 +epoch 028: 67 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=454836, ups=1.06, wpb=430716, bsz=16662.5, num_updates=45600, lr=0.000296174, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=44694 +epoch 028: 67 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=454836, ups=1.06, wpb=430716, bsz=16662.5, num_updates=45600, lr=0.000296174, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=44694 +epoch 028: 67 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=454836, ups=1.06, wpb=430716, bsz=16662.5, num_updates=45600, lr=0.000296174, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=44694 +epoch 028: 67 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=454836, ups=1.06, wpb=430716, bsz=16662.5, num_updates=45600, lr=0.000296174, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=44694 +epoch 028: 67 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=454836, ups=1.06, wpb=430716, bsz=16662.5, num_updates=45600, lr=0.000296174, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=44694 +epoch 028: 67 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=454836, ups=1.06, wpb=430716, bsz=16662.5, num_updates=45600, lr=0.000296174, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=44694 +epoch 028: 67 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=454836, ups=1.06, wpb=430716, bsz=16662.5, num_updates=45600, lr=0.000296174, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=44694 +epoch 028: 67 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=454836, ups=1.06, wpb=430716, bsz=16662.5, num_updates=45600, lr=0.000296174, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=44694 +epoch 028: 67 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=454836, ups=1.06, wpb=430716, bsz=16662.5, num_updates=45600, lr=0.000296174, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=44694 +epoch 028: 67 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=454836, ups=1.06, wpb=430716, bsz=16662.5, num_updates=45600, lr=0.000296174, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=44694 +epoch 028: 67 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=454836, ups=1.06, wpb=430716, bsz=16662.5, num_updates=45600, lr=0.000296174, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=44694 +epoch 028: 67 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=454836, ups=1.06, wpb=430716, bsz=16662.5, num_updates=45600, lr=0.000296174, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=44694 +epoch 028: 67 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=454836, ups=1.06, wpb=430716, bsz=16662.5, num_updates=45600, lr=0.000296174, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=44694 +epoch 028: 67 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=454836, ups=1.06, wpb=430716, bsz=16662.5, num_updates=45600, lr=0.000296174, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=44694 +epoch 028: 67 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=454836, ups=1.06, wpb=430716, bsz=16662.5, num_updates=45600, lr=0.000296174, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=44694 +epoch 028: 67 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=454836, ups=1.06, wpb=430716, bsz=16662.5, num_updates=45600, lr=0.000296174, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=44694 +epoch 028: 67 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=454836, ups=1.06, wpb=430716, bsz=16662.5, num_updates=45600, lr=0.000296174, gnorm=0.222, clip=0, loss_scale=1, train_wall=93, gb_free=19.6, wall=44694 +epoch 028: 168 / 1689 loss=4.057, nll_loss=2.426, ppl=5.37, wps=454900, ups=1.05, wpb=434076, bsz=16408.6, num_updates=45700, lr=0.00029585, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=44789 +epoch 028: 168 / 1689 loss=4.057, nll_loss=2.426, ppl=5.37, wps=454900, ups=1.05, wpb=434076, bsz=16408.6, num_updates=45700, lr=0.00029585, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=44789 +epoch 028: 168 / 1689 loss=4.057, nll_loss=2.426, ppl=5.37, wps=454900, ups=1.05, wpb=434076, bsz=16408.6, num_updates=45700, lr=0.00029585, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=44789 +epoch 028: 168 / 1689 loss=4.057, nll_loss=2.426, ppl=5.37, wps=454900, ups=1.05, wpb=434076, bsz=16408.6, num_updates=45700, lr=0.00029585, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=44789 +epoch 028: 168 / 1689 loss=4.057, nll_loss=2.426, ppl=5.37, wps=454900, ups=1.05, wpb=434076, bsz=16408.6, num_updates=45700, lr=0.00029585, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=44789 +epoch 028: 168 / 1689 loss=4.057, nll_loss=2.426, ppl=5.37, wps=454900, ups=1.05, wpb=434076, bsz=16408.6, num_updates=45700, lr=0.00029585, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=44789 +epoch 028: 168 / 1689 loss=4.057, nll_loss=2.426, ppl=5.37, wps=454900, ups=1.05, wpb=434076, bsz=16408.6, num_updates=45700, lr=0.00029585, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=44789 +epoch 028: 168 / 1689 loss=4.057, nll_loss=2.426, ppl=5.37, wps=454900, ups=1.05, wpb=434076, bsz=16408.6, num_updates=45700, lr=0.00029585, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=44789 +epoch 028: 168 / 1689 loss=4.057, nll_loss=2.426, ppl=5.37, wps=454900, ups=1.05, wpb=434076, bsz=16408.6, num_updates=45700, lr=0.00029585, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=44789 +epoch 028: 168 / 1689 loss=4.057, nll_loss=2.426, ppl=5.37, wps=454900, ups=1.05, wpb=434076, bsz=16408.6, num_updates=45700, lr=0.00029585, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=44789 +epoch 028: 168 / 1689 loss=4.057, nll_loss=2.426, ppl=5.37, wps=454900, ups=1.05, wpb=434076, bsz=16408.6, num_updates=45700, lr=0.00029585, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=44789 +epoch 028: 168 / 1689 loss=4.057, nll_loss=2.426, ppl=5.37, wps=454900, ups=1.05, wpb=434076, bsz=16408.6, num_updates=45700, lr=0.00029585, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=44789 +epoch 028: 168 / 1689 loss=4.057, nll_loss=2.426, ppl=5.37, wps=454900, ups=1.05, wpb=434076, bsz=16408.6, num_updates=45700, lr=0.00029585, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=44789 +epoch 028: 168 / 1689 loss=4.057, nll_loss=2.426, ppl=5.37, wps=454900, ups=1.05, wpb=434076, bsz=16408.6, num_updates=45700, lr=0.00029585, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=44789 +epoch 028: 168 / 1689 loss=4.057, nll_loss=2.426, ppl=5.37, wps=454900, ups=1.05, wpb=434076, bsz=16408.6, num_updates=45700, lr=0.00029585, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=44789 +epoch 028: 168 / 1689 loss=4.057, nll_loss=2.426, ppl=5.37, wps=454900, ups=1.05, wpb=434076, bsz=16408.6, num_updates=45700, lr=0.00029585, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=44789 +epoch 028: 168 / 1689 loss=4.057, nll_loss=2.426, ppl=5.37, wps=454900, ups=1.05, wpb=434076, bsz=16408.6, num_updates=45700, lr=0.00029585, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=44789 +epoch 028: 168 / 1689 loss=4.057, nll_loss=2.426, ppl=5.37, wps=454900, ups=1.05, wpb=434076, bsz=16408.6, num_updates=45700, lr=0.00029585, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=44789 +epoch 028: 168 / 1689 loss=4.057, nll_loss=2.426, ppl=5.37, wps=454900, ups=1.05, wpb=434076, bsz=16408.6, num_updates=45700, lr=0.00029585, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=44789 +epoch 028: 168 / 1689 loss=4.057, nll_loss=2.426, ppl=5.37, wps=454900, ups=1.05, wpb=434076, bsz=16408.6, num_updates=45700, lr=0.00029585, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=44789 +epoch 028: 168 / 1689 loss=4.057, nll_loss=2.426, ppl=5.37, wps=454900, ups=1.05, wpb=434076, bsz=16408.6, num_updates=45700, lr=0.00029585, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=44789 +epoch 028: 168 / 1689 loss=4.057, nll_loss=2.426, ppl=5.37, wps=454900, ups=1.05, wpb=434076, bsz=16408.6, num_updates=45700, lr=0.00029585, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=44789 +epoch 028: 168 / 1689 loss=4.057, nll_loss=2.426, ppl=5.37, wps=454900, ups=1.05, wpb=434076, bsz=16408.6, num_updates=45700, lr=0.00029585, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=44789 +epoch 028: 168 / 1689 loss=4.057, nll_loss=2.426, ppl=5.37, wps=454900, ups=1.05, wpb=434076, bsz=16408.6, num_updates=45700, lr=0.00029585, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=44789 +epoch 028: 168 / 1689 loss=4.057, nll_loss=2.426, ppl=5.37, wps=454900, ups=1.05, wpb=434076, bsz=16408.6, num_updates=45700, lr=0.00029585, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=44789 +epoch 028: 168 / 1689 loss=4.057, nll_loss=2.426, ppl=5.37, wps=454900, ups=1.05, wpb=434076, bsz=16408.6, num_updates=45700, lr=0.00029585, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=44789 +epoch 028: 168 / 1689 loss=4.057, nll_loss=2.426, ppl=5.37, wps=454900, ups=1.05, wpb=434076, bsz=16408.6, num_updates=45700, lr=0.00029585, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=44789 +epoch 028: 168 / 1689 loss=4.057, nll_loss=2.426, ppl=5.37, wps=454900, ups=1.05, wpb=434076, bsz=16408.6, num_updates=45700, lr=0.00029585, gnorm=0.214, clip=0, loss_scale=1, train_wall=94, gb_free=19.2, wall=44789 +epoch 028: 268 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462703, ups=1.07, wpb=432960, bsz=16396.2, num_updates=45800, lr=0.000295527, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=44883 +epoch 028: 268 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462703, ups=1.07, wpb=432960, bsz=16396.2, num_updates=45800, lr=0.000295527, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=44883 +epoch 028: 268 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462703, ups=1.07, wpb=432960, bsz=16396.2, num_updates=45800, lr=0.000295527, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=44883 +epoch 028: 268 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462703, ups=1.07, wpb=432960, bsz=16396.2, num_updates=45800, lr=0.000295527, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=44883 +epoch 028: 268 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462703, ups=1.07, wpb=432960, bsz=16396.2, num_updates=45800, lr=0.000295527, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=44883 +epoch 028: 268 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462703, ups=1.07, wpb=432960, bsz=16396.2, num_updates=45800, lr=0.000295527, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=44883 +epoch 028: 268 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462703, ups=1.07, wpb=432960, bsz=16396.2, num_updates=45800, lr=0.000295527, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=44883 +epoch 028: 268 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462703, ups=1.07, wpb=432960, bsz=16396.2, num_updates=45800, lr=0.000295527, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=44883 +epoch 028: 268 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462703, ups=1.07, wpb=432960, bsz=16396.2, num_updates=45800, lr=0.000295527, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=44883 +epoch 028: 268 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462703, ups=1.07, wpb=432960, bsz=16396.2, num_updates=45800, lr=0.000295527, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=44883 +epoch 028: 268 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462703, ups=1.07, wpb=432960, bsz=16396.2, num_updates=45800, lr=0.000295527, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=44883 +epoch 028: 268 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462703, ups=1.07, wpb=432960, bsz=16396.2, num_updates=45800, lr=0.000295527, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=44883 +epoch 028: 268 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462703, ups=1.07, wpb=432960, bsz=16396.2, num_updates=45800, lr=0.000295527, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=44883 +epoch 028: 268 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462703, ups=1.07, wpb=432960, bsz=16396.2, num_updates=45800, lr=0.000295527, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=44883 +epoch 028: 268 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462703, ups=1.07, wpb=432960, bsz=16396.2, num_updates=45800, lr=0.000295527, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=44883 +epoch 028: 268 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462703, ups=1.07, wpb=432960, bsz=16396.2, num_updates=45800, lr=0.000295527, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=44883 +epoch 028: 268 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462703, ups=1.07, wpb=432960, bsz=16396.2, num_updates=45800, lr=0.000295527, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=44883 +epoch 028: 268 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462703, ups=1.07, wpb=432960, bsz=16396.2, num_updates=45800, lr=0.000295527, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=44883 +epoch 028: 268 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462703, ups=1.07, wpb=432960, bsz=16396.2, num_updates=45800, lr=0.000295527, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=44883 +epoch 028: 268 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462703, ups=1.07, wpb=432960, bsz=16396.2, num_updates=45800, lr=0.000295527, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=44883 +epoch 028: 268 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462703, ups=1.07, wpb=432960, bsz=16396.2, num_updates=45800, lr=0.000295527, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=44883 +epoch 028: 268 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462703, ups=1.07, wpb=432960, bsz=16396.2, num_updates=45800, lr=0.000295527, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=44883 +epoch 028: 268 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462703, ups=1.07, wpb=432960, bsz=16396.2, num_updates=45800, lr=0.000295527, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=44883 +epoch 028: 268 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462703, ups=1.07, wpb=432960, bsz=16396.2, num_updates=45800, lr=0.000295527, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=44883 +epoch 028: 268 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462703, ups=1.07, wpb=432960, bsz=16396.2, num_updates=45800, lr=0.000295527, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=44883 +epoch 028: 268 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462703, ups=1.07, wpb=432960, bsz=16396.2, num_updates=45800, lr=0.000295527, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=44883 +epoch 028: 268 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462703, ups=1.07, wpb=432960, bsz=16396.2, num_updates=45800, lr=0.000295527, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=44883 +epoch 028: 268 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462703, ups=1.07, wpb=432960, bsz=16396.2, num_updates=45800, lr=0.000295527, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=44883 +epoch 028: 368 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=464218, ups=1.07, wpb=434138, bsz=16686.9, num_updates=45900, lr=0.000295205, gnorm=0.211, clip=0, loss_scale=1, train_wall=91, gb_free=18.6, wall=44976 +epoch 028: 368 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=464218, ups=1.07, wpb=434138, bsz=16686.9, num_updates=45900, lr=0.000295205, gnorm=0.211, clip=0, loss_scale=1, train_wall=91, gb_free=18.6, wall=44976 +epoch 028: 368 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=464218, ups=1.07, wpb=434138, bsz=16686.9, num_updates=45900, lr=0.000295205, gnorm=0.211, clip=0, loss_scale=1, train_wall=91, gb_free=18.6, wall=44976 +epoch 028: 368 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=464218, ups=1.07, wpb=434138, bsz=16686.9, num_updates=45900, lr=0.000295205, gnorm=0.211, clip=0, loss_scale=1, train_wall=91, gb_free=18.6, wall=44976 +epoch 028: 368 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=464218, ups=1.07, wpb=434138, bsz=16686.9, num_updates=45900, lr=0.000295205, gnorm=0.211, clip=0, loss_scale=1, train_wall=91, gb_free=18.6, wall=44976 +epoch 028: 368 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=464218, ups=1.07, wpb=434138, bsz=16686.9, num_updates=45900, lr=0.000295205, gnorm=0.211, clip=0, loss_scale=1, train_wall=91, gb_free=18.6, wall=44976 +epoch 028: 368 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=464218, ups=1.07, wpb=434138, bsz=16686.9, num_updates=45900, lr=0.000295205, gnorm=0.211, clip=0, loss_scale=1, train_wall=91, gb_free=18.6, wall=44976 +epoch 028: 368 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=464218, ups=1.07, wpb=434138, bsz=16686.9, num_updates=45900, lr=0.000295205, gnorm=0.211, clip=0, loss_scale=1, train_wall=91, gb_free=18.6, wall=44976 +epoch 028: 368 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=464218, ups=1.07, wpb=434138, bsz=16686.9, num_updates=45900, lr=0.000295205, gnorm=0.211, clip=0, loss_scale=1, train_wall=91, gb_free=18.6, wall=44976 +epoch 028: 368 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=464218, ups=1.07, wpb=434138, bsz=16686.9, num_updates=45900, lr=0.000295205, gnorm=0.211, clip=0, loss_scale=1, train_wall=91, gb_free=18.6, wall=44976 +epoch 028: 368 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=464218, ups=1.07, wpb=434138, bsz=16686.9, num_updates=45900, lr=0.000295205, gnorm=0.211, clip=0, loss_scale=1, train_wall=91, gb_free=18.6, wall=44976 +epoch 028: 368 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=464218, ups=1.07, wpb=434138, bsz=16686.9, num_updates=45900, lr=0.000295205, gnorm=0.211, clip=0, loss_scale=1, train_wall=91, gb_free=18.6, wall=44976 +epoch 028: 368 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=464218, ups=1.07, wpb=434138, bsz=16686.9, num_updates=45900, lr=0.000295205, gnorm=0.211, clip=0, loss_scale=1, train_wall=91, gb_free=18.6, wall=44976 +epoch 028: 368 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=464218, ups=1.07, wpb=434138, bsz=16686.9, num_updates=45900, lr=0.000295205, gnorm=0.211, clip=0, loss_scale=1, train_wall=91, gb_free=18.6, wall=44976 +epoch 028: 368 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=464218, ups=1.07, wpb=434138, bsz=16686.9, num_updates=45900, lr=0.000295205, gnorm=0.211, clip=0, loss_scale=1, train_wall=91, gb_free=18.6, wall=44976 +epoch 028: 368 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=464218, ups=1.07, wpb=434138, bsz=16686.9, num_updates=45900, lr=0.000295205, gnorm=0.211, clip=0, loss_scale=1, train_wall=91, gb_free=18.6, wall=44976 +epoch 028: 368 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=464218, ups=1.07, wpb=434138, bsz=16686.9, num_updates=45900, lr=0.000295205, gnorm=0.211, clip=0, loss_scale=1, train_wall=91, gb_free=18.6, wall=44976 +epoch 028: 368 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=464218, ups=1.07, wpb=434138, bsz=16686.9, num_updates=45900, lr=0.000295205, gnorm=0.211, clip=0, loss_scale=1, train_wall=91, gb_free=18.6, wall=44976 +epoch 028: 368 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=464218, ups=1.07, wpb=434138, bsz=16686.9, num_updates=45900, lr=0.000295205, gnorm=0.211, clip=0, loss_scale=1, train_wall=91, gb_free=18.6, wall=44976 +epoch 028: 368 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=464218, ups=1.07, wpb=434138, bsz=16686.9, num_updates=45900, lr=0.000295205, gnorm=0.211, clip=0, loss_scale=1, train_wall=91, gb_free=18.6, wall=44976 +epoch 028: 368 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=464218, ups=1.07, wpb=434138, bsz=16686.9, num_updates=45900, lr=0.000295205, gnorm=0.211, clip=0, loss_scale=1, train_wall=91, gb_free=18.6, wall=44976 +epoch 028: 368 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=464218, ups=1.07, wpb=434138, bsz=16686.9, num_updates=45900, lr=0.000295205, gnorm=0.211, clip=0, loss_scale=1, train_wall=91, gb_free=18.6, wall=44976 +epoch 028: 368 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=464218, ups=1.07, wpb=434138, bsz=16686.9, num_updates=45900, lr=0.000295205, gnorm=0.211, clip=0, loss_scale=1, train_wall=91, gb_free=18.6, wall=44976 +epoch 028: 368 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=464218, ups=1.07, wpb=434138, bsz=16686.9, num_updates=45900, lr=0.000295205, gnorm=0.211, clip=0, loss_scale=1, train_wall=91, gb_free=18.6, wall=44976 +epoch 028: 368 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=464218, ups=1.07, wpb=434138, bsz=16686.9, num_updates=45900, lr=0.000295205, gnorm=0.211, clip=0, loss_scale=1, train_wall=91, gb_free=18.6, wall=44976 +epoch 028: 368 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=464218, ups=1.07, wpb=434138, bsz=16686.9, num_updates=45900, lr=0.000295205, gnorm=0.211, clip=0, loss_scale=1, train_wall=91, gb_free=18.6, wall=44976 +epoch 028: 368 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=464218, ups=1.07, wpb=434138, bsz=16686.9, num_updates=45900, lr=0.000295205, gnorm=0.211, clip=0, loss_scale=1, train_wall=91, gb_free=18.6, wall=44976 +epoch 028: 368 / 1689 loss=4.083, nll_loss=2.455, ppl=5.48, wps=464218, ups=1.07, wpb=434138, bsz=16686.9, num_updates=45900, lr=0.000295205, gnorm=0.211, clip=0, loss_scale=1, train_wall=91, gb_free=18.6, wall=44976 +epoch 028: 468 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=461508, ups=1.06, wpb=433403, bsz=16608.9, num_updates=46000, lr=0.000294884, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=45070 +epoch 028: 468 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=461508, ups=1.06, wpb=433403, bsz=16608.9, num_updates=46000, lr=0.000294884, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=45070 +epoch 028: 468 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=461508, ups=1.06, wpb=433403, bsz=16608.9, num_updates=46000, lr=0.000294884, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=45070 +epoch 028: 468 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=461508, ups=1.06, wpb=433403, bsz=16608.9, num_updates=46000, lr=0.000294884, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=45070 +epoch 028: 468 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=461508, ups=1.06, wpb=433403, bsz=16608.9, num_updates=46000, lr=0.000294884, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=45070 +epoch 028: 468 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=461508, ups=1.06, wpb=433403, bsz=16608.9, num_updates=46000, lr=0.000294884, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=45070 +epoch 028: 468 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=461508, ups=1.06, wpb=433403, bsz=16608.9, num_updates=46000, lr=0.000294884, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=45070 +epoch 028: 468 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=461508, ups=1.06, wpb=433403, bsz=16608.9, num_updates=46000, lr=0.000294884, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=45070 +epoch 028: 468 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=461508, ups=1.06, wpb=433403, bsz=16608.9, num_updates=46000, lr=0.000294884, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=45070 +epoch 028: 468 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=461508, ups=1.06, wpb=433403, bsz=16608.9, num_updates=46000, lr=0.000294884, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=45070 +epoch 028: 468 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=461508, ups=1.06, wpb=433403, bsz=16608.9, num_updates=46000, lr=0.000294884, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=45070 +epoch 028: 468 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=461508, ups=1.06, wpb=433403, bsz=16608.9, num_updates=46000, lr=0.000294884, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=45070 +epoch 028: 468 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=461508, ups=1.06, wpb=433403, bsz=16608.9, num_updates=46000, lr=0.000294884, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=45070 +epoch 028: 468 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=461508, ups=1.06, wpb=433403, bsz=16608.9, num_updates=46000, lr=0.000294884, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=45070 +epoch 028: 468 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=461508, ups=1.06, wpb=433403, bsz=16608.9, num_updates=46000, lr=0.000294884, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=45070 +epoch 028: 468 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=461508, ups=1.06, wpb=433403, bsz=16608.9, num_updates=46000, lr=0.000294884, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=45070 +epoch 028: 468 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=461508, ups=1.06, wpb=433403, bsz=16608.9, num_updates=46000, lr=0.000294884, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=45070 +epoch 028: 468 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=461508, ups=1.06, wpb=433403, bsz=16608.9, num_updates=46000, lr=0.000294884, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=45070 +epoch 028: 468 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=461508, ups=1.06, wpb=433403, bsz=16608.9, num_updates=46000, lr=0.000294884, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=45070 +epoch 028: 468 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=461508, ups=1.06, wpb=433403, bsz=16608.9, num_updates=46000, lr=0.000294884, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=45070 +epoch 028: 468 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=461508, ups=1.06, wpb=433403, bsz=16608.9, num_updates=46000, lr=0.000294884, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=45070 +epoch 028: 468 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=461508, ups=1.06, wpb=433403, bsz=16608.9, num_updates=46000, lr=0.000294884, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=45070 +epoch 028: 468 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=461508, ups=1.06, wpb=433403, bsz=16608.9, num_updates=46000, lr=0.000294884, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=45070 +epoch 028: 468 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=461508, ups=1.06, wpb=433403, bsz=16608.9, num_updates=46000, lr=0.000294884, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=45070 +epoch 028: 468 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=461508, ups=1.06, wpb=433403, bsz=16608.9, num_updates=46000, lr=0.000294884, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=45070 +epoch 028: 468 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=461508, ups=1.06, wpb=433403, bsz=16608.9, num_updates=46000, lr=0.000294884, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=45070 +epoch 028: 468 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=461508, ups=1.06, wpb=433403, bsz=16608.9, num_updates=46000, lr=0.000294884, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=45070 +epoch 028: 468 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=461508, ups=1.06, wpb=433403, bsz=16608.9, num_updates=46000, lr=0.000294884, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=45070 +begin validation on "valid" subset +epoch 028 | valid on 'valid' subset | loss 4.226 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 46000 | best_loss 4.225 +epoch 028 | valid on 'valid' subset | loss 4.226 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 46000 | best_loss 4.225 +epoch 028 | valid on 'valid' subset | loss 4.226 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 46000 | best_loss 4.225 +epoch 028 | valid on 'valid' subset | loss 4.226 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 46000 | best_loss 4.225 +epoch 028 | valid on 'valid' subset | loss 4.226 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 46000 | best_loss 4.225 +epoch 028 | valid on 'valid' subset | loss 4.226 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 46000 | best_loss 4.225 +epoch 028 | valid on 'valid' subset | loss 4.226 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 46000 | best_loss 4.225 +epoch 028 | valid on 'valid' subset | loss 4.226 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 46000 | best_loss 4.225 +epoch 028 | valid on 'valid' subset | loss 4.226 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 46000 | best_loss 4.225 +epoch 028 | valid on 'valid' subset | loss 4.226 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 46000 | best_loss 4.225 +epoch 028 | valid on 'valid' subset | loss 4.226 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 46000 | best_loss 4.225 +epoch 028 | valid on 'valid' subset | loss 4.226 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 46000 | best_loss 4.225 +epoch 028 | valid on 'valid' subset | loss 4.226 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 46000 | best_loss 4.225 +epoch 028 | valid on 'valid' subset | loss 4.226 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 46000 | best_loss 4.225 +epoch 028 | valid on 'valid' subset | loss 4.226 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 46000 | best_loss 4.225 +epoch 028 | valid on 'valid' subset | loss 4.226 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 46000 | best_loss 4.225 +epoch 028 | valid on 'valid' subset | loss 4.226 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 46000 | best_loss 4.225 +epoch 028 | valid on 'valid' subset | loss 4.226 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 46000 | best_loss 4.225 +epoch 028 | valid on 'valid' subset | loss 4.226 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 46000 | best_loss 4.225 +epoch 028 | valid on 'valid' subset | loss 4.226 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 46000 | best_loss 4.225 +epoch 028 | valid on 'valid' subset | loss 4.226 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 46000 | best_loss 4.225 +epoch 028 | valid on 'valid' subset | loss 4.226 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 46000 | best_loss 4.225 +epoch 028 | valid on 'valid' subset | loss 4.226 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 46000 | best_loss 4.225 +epoch 028 | valid on 'valid' subset | loss 4.226 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 46000 | best_loss 4.225 +epoch 028 | valid on 'valid' subset | loss 4.226 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 46000 | best_loss 4.225 +epoch 028 | valid on 'valid' subset | loss 4.226 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 46000 | best_loss 4.225 +epoch 028 | valid on 'valid' subset | loss 4.226 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 46000 | best_loss 4.225 +epoch 028 | valid on 'valid' subset | loss 4.226 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 46000 | best_loss 4.225 +epoch 028: 569 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=321887, ups=0.74, wpb=432782, bsz=16887.8, num_updates=46100, lr=0.000294564, gnorm=0.21, clip=0, loss_scale=0.5, train_wall=119, gb_free=19.4, wall=45205 +epoch 028: 569 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=321887, ups=0.74, wpb=432782, bsz=16887.8, num_updates=46100, lr=0.000294564, gnorm=0.21, clip=0, loss_scale=0.5, train_wall=119, gb_free=19.4, wall=45205 +epoch 028: 569 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=321887, ups=0.74, wpb=432782, bsz=16887.8, num_updates=46100, lr=0.000294564, gnorm=0.21, clip=0, loss_scale=0.5, train_wall=119, gb_free=19.4, wall=45205 +epoch 028: 569 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=321887, ups=0.74, wpb=432782, bsz=16887.8, num_updates=46100, lr=0.000294564, gnorm=0.21, clip=0, loss_scale=0.5, train_wall=119, gb_free=19.4, wall=45205 +epoch 028: 569 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=321887, ups=0.74, wpb=432782, bsz=16887.8, num_updates=46100, lr=0.000294564, gnorm=0.21, clip=0, loss_scale=0.5, train_wall=119, gb_free=19.4, wall=45205 +epoch 028: 569 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=321887, ups=0.74, wpb=432782, bsz=16887.8, num_updates=46100, lr=0.000294564, gnorm=0.21, clip=0, loss_scale=0.5, train_wall=119, gb_free=19.4, wall=45205 +epoch 028: 569 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=321887, ups=0.74, wpb=432782, bsz=16887.8, num_updates=46100, lr=0.000294564, gnorm=0.21, clip=0, loss_scale=0.5, train_wall=119, gb_free=19.4, wall=45205 +epoch 028: 569 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=321887, ups=0.74, wpb=432782, bsz=16887.8, num_updates=46100, lr=0.000294564, gnorm=0.21, clip=0, loss_scale=0.5, train_wall=119, gb_free=19.4, wall=45205 +epoch 028: 569 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=321887, ups=0.74, wpb=432782, bsz=16887.8, num_updates=46100, lr=0.000294564, gnorm=0.21, clip=0, loss_scale=0.5, train_wall=119, gb_free=19.4, wall=45205 +epoch 028: 569 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=321887, ups=0.74, wpb=432782, bsz=16887.8, num_updates=46100, lr=0.000294564, gnorm=0.21, clip=0, loss_scale=0.5, train_wall=119, gb_free=19.4, wall=45205 +epoch 028: 569 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=321887, ups=0.74, wpb=432782, bsz=16887.8, num_updates=46100, lr=0.000294564, gnorm=0.21, clip=0, loss_scale=0.5, train_wall=119, gb_free=19.4, wall=45205 +epoch 028: 569 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=321887, ups=0.74, wpb=432782, bsz=16887.8, num_updates=46100, lr=0.000294564, gnorm=0.21, clip=0, loss_scale=0.5, train_wall=119, gb_free=19.4, wall=45205 +epoch 028: 569 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=321887, ups=0.74, wpb=432782, bsz=16887.8, num_updates=46100, lr=0.000294564, gnorm=0.21, clip=0, loss_scale=0.5, train_wall=119, gb_free=19.4, wall=45205 +epoch 028: 569 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=321887, ups=0.74, wpb=432782, bsz=16887.8, num_updates=46100, lr=0.000294564, gnorm=0.21, clip=0, loss_scale=0.5, train_wall=119, gb_free=19.4, wall=45205 +epoch 028: 569 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=321887, ups=0.74, wpb=432782, bsz=16887.8, num_updates=46100, lr=0.000294564, gnorm=0.21, clip=0, loss_scale=0.5, train_wall=119, gb_free=19.4, wall=45205 +epoch 028: 569 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=321887, ups=0.74, wpb=432782, bsz=16887.8, num_updates=46100, lr=0.000294564, gnorm=0.21, clip=0, loss_scale=0.5, train_wall=119, gb_free=19.4, wall=45205 +epoch 028: 569 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=321887, ups=0.74, wpb=432782, bsz=16887.8, num_updates=46100, lr=0.000294564, gnorm=0.21, clip=0, loss_scale=0.5, train_wall=119, gb_free=19.4, wall=45205 +epoch 028: 569 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=321887, ups=0.74, wpb=432782, bsz=16887.8, num_updates=46100, lr=0.000294564, gnorm=0.21, clip=0, loss_scale=0.5, train_wall=119, gb_free=19.4, wall=45205 +epoch 028: 569 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=321887, ups=0.74, wpb=432782, bsz=16887.8, num_updates=46100, lr=0.000294564, gnorm=0.21, clip=0, loss_scale=0.5, train_wall=119, gb_free=19.4, wall=45205 +epoch 028: 569 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=321887, ups=0.74, wpb=432782, bsz=16887.8, num_updates=46100, lr=0.000294564, gnorm=0.21, clip=0, loss_scale=0.5, train_wall=119, gb_free=19.4, wall=45205 +epoch 028: 569 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=321887, ups=0.74, wpb=432782, bsz=16887.8, num_updates=46100, lr=0.000294564, gnorm=0.21, clip=0, loss_scale=0.5, train_wall=119, gb_free=19.4, wall=45205 +epoch 028: 569 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=321887, ups=0.74, wpb=432782, bsz=16887.8, num_updates=46100, lr=0.000294564, gnorm=0.21, clip=0, loss_scale=0.5, train_wall=119, gb_free=19.4, wall=45205 +epoch 028: 569 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=321887, ups=0.74, wpb=432782, bsz=16887.8, num_updates=46100, lr=0.000294564, gnorm=0.21, clip=0, loss_scale=0.5, train_wall=119, gb_free=19.4, wall=45205 +epoch 028: 569 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=321887, ups=0.74, wpb=432782, bsz=16887.8, num_updates=46100, lr=0.000294564, gnorm=0.21, clip=0, loss_scale=0.5, train_wall=119, gb_free=19.4, wall=45205 +epoch 028: 569 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=321887, ups=0.74, wpb=432782, bsz=16887.8, num_updates=46100, lr=0.000294564, gnorm=0.21, clip=0, loss_scale=0.5, train_wall=119, gb_free=19.4, wall=45205 +epoch 028: 569 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=321887, ups=0.74, wpb=432782, bsz=16887.8, num_updates=46100, lr=0.000294564, gnorm=0.21, clip=0, loss_scale=0.5, train_wall=119, gb_free=19.4, wall=45205 +epoch 028: 569 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=321887, ups=0.74, wpb=432782, bsz=16887.8, num_updates=46100, lr=0.000294564, gnorm=0.21, clip=0, loss_scale=0.5, train_wall=119, gb_free=19.4, wall=45205 +epoch 028: 569 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=321887, ups=0.74, wpb=432782, bsz=16887.8, num_updates=46100, lr=0.000294564, gnorm=0.21, clip=0, loss_scale=0.5, train_wall=119, gb_free=19.4, wall=45205 +epoch 028: 669 / 1689 loss=4.074, nll_loss=2.445, ppl=5.45, wps=464184, ups=1.07, wpb=435177, bsz=16577.9, num_updates=46200, lr=0.000294245, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=45299 +epoch 028: 669 / 1689 loss=4.074, nll_loss=2.445, ppl=5.45, wps=464184, ups=1.07, wpb=435177, bsz=16577.9, num_updates=46200, lr=0.000294245, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=45299 +epoch 028: 669 / 1689 loss=4.074, nll_loss=2.445, ppl=5.45, wps=464184, ups=1.07, wpb=435177, bsz=16577.9, num_updates=46200, lr=0.000294245, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=45299 +epoch 028: 669 / 1689 loss=4.074, nll_loss=2.445, ppl=5.45, wps=464184, ups=1.07, wpb=435177, bsz=16577.9, num_updates=46200, lr=0.000294245, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=45299 +epoch 028: 669 / 1689 loss=4.074, nll_loss=2.445, ppl=5.45, wps=464184, ups=1.07, wpb=435177, bsz=16577.9, num_updates=46200, lr=0.000294245, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=45299 +epoch 028: 669 / 1689 loss=4.074, nll_loss=2.445, ppl=5.45, wps=464184, ups=1.07, wpb=435177, bsz=16577.9, num_updates=46200, lr=0.000294245, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=45299 +epoch 028: 669 / 1689 loss=4.074, nll_loss=2.445, ppl=5.45, wps=464184, ups=1.07, wpb=435177, bsz=16577.9, num_updates=46200, lr=0.000294245, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=45299 +epoch 028: 669 / 1689 loss=4.074, nll_loss=2.445, ppl=5.45, wps=464184, ups=1.07, wpb=435177, bsz=16577.9, num_updates=46200, lr=0.000294245, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=45299 +epoch 028: 669 / 1689 loss=4.074, nll_loss=2.445, ppl=5.45, wps=464184, ups=1.07, wpb=435177, bsz=16577.9, num_updates=46200, lr=0.000294245, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=45299 +epoch 028: 669 / 1689 loss=4.074, nll_loss=2.445, ppl=5.45, wps=464184, ups=1.07, wpb=435177, bsz=16577.9, num_updates=46200, lr=0.000294245, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=45299 +epoch 028: 669 / 1689 loss=4.074, nll_loss=2.445, ppl=5.45, wps=464184, ups=1.07, wpb=435177, bsz=16577.9, num_updates=46200, lr=0.000294245, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=45299 +epoch 028: 669 / 1689 loss=4.074, nll_loss=2.445, ppl=5.45, wps=464184, ups=1.07, wpb=435177, bsz=16577.9, num_updates=46200, lr=0.000294245, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=45299 +epoch 028: 669 / 1689 loss=4.074, nll_loss=2.445, ppl=5.45, wps=464184, ups=1.07, wpb=435177, bsz=16577.9, num_updates=46200, lr=0.000294245, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=45299 +epoch 028: 669 / 1689 loss=4.074, nll_loss=2.445, ppl=5.45, wps=464184, ups=1.07, wpb=435177, bsz=16577.9, num_updates=46200, lr=0.000294245, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=45299 +epoch 028: 669 / 1689 loss=4.074, nll_loss=2.445, ppl=5.45, wps=464184, ups=1.07, wpb=435177, bsz=16577.9, num_updates=46200, lr=0.000294245, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=45299 +epoch 028: 669 / 1689 loss=4.074, nll_loss=2.445, ppl=5.45, wps=464184, ups=1.07, wpb=435177, bsz=16577.9, num_updates=46200, lr=0.000294245, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=45299 +epoch 028: 669 / 1689 loss=4.074, nll_loss=2.445, ppl=5.45, wps=464184, ups=1.07, wpb=435177, bsz=16577.9, num_updates=46200, lr=0.000294245, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=45299 +epoch 028: 669 / 1689 loss=4.074, nll_loss=2.445, ppl=5.45, wps=464184, ups=1.07, wpb=435177, bsz=16577.9, num_updates=46200, lr=0.000294245, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=45299 +epoch 028: 669 / 1689 loss=4.074, nll_loss=2.445, ppl=5.45, wps=464184, ups=1.07, wpb=435177, bsz=16577.9, num_updates=46200, lr=0.000294245, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=45299 +epoch 028: 669 / 1689 loss=4.074, nll_loss=2.445, ppl=5.45, wps=464184, ups=1.07, wpb=435177, bsz=16577.9, num_updates=46200, lr=0.000294245, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=45299 +epoch 028: 669 / 1689 loss=4.074, nll_loss=2.445, ppl=5.45, wps=464184, ups=1.07, wpb=435177, bsz=16577.9, num_updates=46200, lr=0.000294245, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=45299 +epoch 028: 669 / 1689 loss=4.074, nll_loss=2.445, ppl=5.45, wps=464184, ups=1.07, wpb=435177, bsz=16577.9, num_updates=46200, lr=0.000294245, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=45299 +epoch 028: 669 / 1689 loss=4.074, nll_loss=2.445, ppl=5.45, wps=464184, ups=1.07, wpb=435177, bsz=16577.9, num_updates=46200, lr=0.000294245, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=45299 +epoch 028: 669 / 1689 loss=4.074, nll_loss=2.445, ppl=5.45, wps=464184, ups=1.07, wpb=435177, bsz=16577.9, num_updates=46200, lr=0.000294245, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=45299 +epoch 028: 669 / 1689 loss=4.074, nll_loss=2.445, ppl=5.45, wps=464184, ups=1.07, wpb=435177, bsz=16577.9, num_updates=46200, lr=0.000294245, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=45299 +epoch 028: 669 / 1689 loss=4.074, nll_loss=2.445, ppl=5.45, wps=464184, ups=1.07, wpb=435177, bsz=16577.9, num_updates=46200, lr=0.000294245, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=45299 +epoch 028: 669 / 1689 loss=4.074, nll_loss=2.445, ppl=5.45, wps=464184, ups=1.07, wpb=435177, bsz=16577.9, num_updates=46200, lr=0.000294245, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=45299 +epoch 028: 669 / 1689 loss=4.074, nll_loss=2.445, ppl=5.45, wps=464184, ups=1.07, wpb=435177, bsz=16577.9, num_updates=46200, lr=0.000294245, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=45299 +epoch 028: 769 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=464118, ups=1.07, wpb=432545, bsz=16448.5, num_updates=46300, lr=0.000293927, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.2, wall=45392 +epoch 028: 769 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=464118, ups=1.07, wpb=432545, bsz=16448.5, num_updates=46300, lr=0.000293927, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.2, wall=45392 +epoch 028: 769 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=464118, ups=1.07, wpb=432545, bsz=16448.5, num_updates=46300, lr=0.000293927, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.2, wall=45392 +epoch 028: 769 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=464118, ups=1.07, wpb=432545, bsz=16448.5, num_updates=46300, lr=0.000293927, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.2, wall=45392 +epoch 028: 769 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=464118, ups=1.07, wpb=432545, bsz=16448.5, num_updates=46300, lr=0.000293927, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.2, wall=45392 +epoch 028: 769 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=464118, ups=1.07, wpb=432545, bsz=16448.5, num_updates=46300, lr=0.000293927, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.2, wall=45392 +epoch 028: 769 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=464118, ups=1.07, wpb=432545, bsz=16448.5, num_updates=46300, lr=0.000293927, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.2, wall=45392 +epoch 028: 769 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=464118, ups=1.07, wpb=432545, bsz=16448.5, num_updates=46300, lr=0.000293927, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.2, wall=45392 +epoch 028: 769 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=464118, ups=1.07, wpb=432545, bsz=16448.5, num_updates=46300, lr=0.000293927, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.2, wall=45392 +epoch 028: 769 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=464118, ups=1.07, wpb=432545, bsz=16448.5, num_updates=46300, lr=0.000293927, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.2, wall=45392 +epoch 028: 769 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=464118, ups=1.07, wpb=432545, bsz=16448.5, num_updates=46300, lr=0.000293927, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.2, wall=45392 +epoch 028: 769 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=464118, ups=1.07, wpb=432545, bsz=16448.5, num_updates=46300, lr=0.000293927, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.2, wall=45392 +epoch 028: 769 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=464118, ups=1.07, wpb=432545, bsz=16448.5, num_updates=46300, lr=0.000293927, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.2, wall=45392 +epoch 028: 769 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=464118, ups=1.07, wpb=432545, bsz=16448.5, num_updates=46300, lr=0.000293927, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.2, wall=45392 +epoch 028: 769 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=464118, ups=1.07, wpb=432545, bsz=16448.5, num_updates=46300, lr=0.000293927, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.2, wall=45392 +epoch 028: 769 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=464118, ups=1.07, wpb=432545, bsz=16448.5, num_updates=46300, lr=0.000293927, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.2, wall=45392 +epoch 028: 769 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=464118, ups=1.07, wpb=432545, bsz=16448.5, num_updates=46300, lr=0.000293927, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.2, wall=45392 +epoch 028: 769 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=464118, ups=1.07, wpb=432545, bsz=16448.5, num_updates=46300, lr=0.000293927, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.2, wall=45392 +epoch 028: 769 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=464118, ups=1.07, wpb=432545, bsz=16448.5, num_updates=46300, lr=0.000293927, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.2, wall=45392 +epoch 028: 769 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=464118, ups=1.07, wpb=432545, bsz=16448.5, num_updates=46300, lr=0.000293927, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.2, wall=45392 +epoch 028: 769 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=464118, ups=1.07, wpb=432545, bsz=16448.5, num_updates=46300, lr=0.000293927, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.2, wall=45392 +epoch 028: 769 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=464118, ups=1.07, wpb=432545, bsz=16448.5, num_updates=46300, lr=0.000293927, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.2, wall=45392 +epoch 028: 769 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=464118, ups=1.07, wpb=432545, bsz=16448.5, num_updates=46300, lr=0.000293927, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.2, wall=45392 +epoch 028: 769 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=464118, ups=1.07, wpb=432545, bsz=16448.5, num_updates=46300, lr=0.000293927, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.2, wall=45392 +epoch 028: 769 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=464118, ups=1.07, wpb=432545, bsz=16448.5, num_updates=46300, lr=0.000293927, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.2, wall=45392 +epoch 028: 769 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=464118, ups=1.07, wpb=432545, bsz=16448.5, num_updates=46300, lr=0.000293927, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.2, wall=45392 +epoch 028: 769 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=464118, ups=1.07, wpb=432545, bsz=16448.5, num_updates=46300, lr=0.000293927, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.2, wall=45392 +epoch 028: 769 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=464118, ups=1.07, wpb=432545, bsz=16448.5, num_updates=46300, lr=0.000293927, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.2, wall=45392 +epoch 028: 869 / 1689 loss=4.07, nll_loss=2.441, ppl=5.43, wps=460152, ups=1.07, wpb=431811, bsz=16432.9, num_updates=46400, lr=0.00029361, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=45486 +epoch 028: 869 / 1689 loss=4.07, nll_loss=2.441, ppl=5.43, wps=460152, ups=1.07, wpb=431811, bsz=16432.9, num_updates=46400, lr=0.00029361, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=45486 +epoch 028: 869 / 1689 loss=4.07, nll_loss=2.441, ppl=5.43, wps=460152, ups=1.07, wpb=431811, bsz=16432.9, num_updates=46400, lr=0.00029361, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=45486 +epoch 028: 869 / 1689 loss=4.07, nll_loss=2.441, ppl=5.43, wps=460152, ups=1.07, wpb=431811, bsz=16432.9, num_updates=46400, lr=0.00029361, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=45486 +epoch 028: 869 / 1689 loss=4.07, nll_loss=2.441, ppl=5.43, wps=460152, ups=1.07, wpb=431811, bsz=16432.9, num_updates=46400, lr=0.00029361, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=45486 +epoch 028: 869 / 1689 loss=4.07, nll_loss=2.441, ppl=5.43, wps=460152, ups=1.07, wpb=431811, bsz=16432.9, num_updates=46400, lr=0.00029361, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=45486 +epoch 028: 869 / 1689 loss=4.07, nll_loss=2.441, ppl=5.43, wps=460152, ups=1.07, wpb=431811, bsz=16432.9, num_updates=46400, lr=0.00029361, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=45486 +epoch 028: 869 / 1689 loss=4.07, nll_loss=2.441, ppl=5.43, wps=460152, ups=1.07, wpb=431811, bsz=16432.9, num_updates=46400, lr=0.00029361, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=45486 +epoch 028: 869 / 1689 loss=4.07, nll_loss=2.441, ppl=5.43, wps=460152, ups=1.07, wpb=431811, bsz=16432.9, num_updates=46400, lr=0.00029361, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=45486 +epoch 028: 869 / 1689 loss=4.07, nll_loss=2.441, ppl=5.43, wps=460152, ups=1.07, wpb=431811, bsz=16432.9, num_updates=46400, lr=0.00029361, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=45486 +epoch 028: 869 / 1689 loss=4.07, nll_loss=2.441, ppl=5.43, wps=460152, ups=1.07, wpb=431811, bsz=16432.9, num_updates=46400, lr=0.00029361, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=45486 +epoch 028: 869 / 1689 loss=4.07, nll_loss=2.441, ppl=5.43, wps=460152, ups=1.07, wpb=431811, bsz=16432.9, num_updates=46400, lr=0.00029361, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=45486 +epoch 028: 869 / 1689 loss=4.07, nll_loss=2.441, ppl=5.43, wps=460152, ups=1.07, wpb=431811, bsz=16432.9, num_updates=46400, lr=0.00029361, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=45486 +epoch 028: 869 / 1689 loss=4.07, nll_loss=2.441, ppl=5.43, wps=460152, ups=1.07, wpb=431811, bsz=16432.9, num_updates=46400, lr=0.00029361, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=45486 +epoch 028: 869 / 1689 loss=4.07, nll_loss=2.441, ppl=5.43, wps=460152, ups=1.07, wpb=431811, bsz=16432.9, num_updates=46400, lr=0.00029361, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=45486 +epoch 028: 869 / 1689 loss=4.07, nll_loss=2.441, ppl=5.43, wps=460152, ups=1.07, wpb=431811, bsz=16432.9, num_updates=46400, lr=0.00029361, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=45486 +epoch 028: 869 / 1689 loss=4.07, nll_loss=2.441, ppl=5.43, wps=460152, ups=1.07, wpb=431811, bsz=16432.9, num_updates=46400, lr=0.00029361, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=45486 +epoch 028: 869 / 1689 loss=4.07, nll_loss=2.441, ppl=5.43, wps=460152, ups=1.07, wpb=431811, bsz=16432.9, num_updates=46400, lr=0.00029361, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=45486 +epoch 028: 869 / 1689 loss=4.07, nll_loss=2.441, ppl=5.43, wps=460152, ups=1.07, wpb=431811, bsz=16432.9, num_updates=46400, lr=0.00029361, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=45486 +epoch 028: 869 / 1689 loss=4.07, nll_loss=2.441, ppl=5.43, wps=460152, ups=1.07, wpb=431811, bsz=16432.9, num_updates=46400, lr=0.00029361, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=45486 +epoch 028: 869 / 1689 loss=4.07, nll_loss=2.441, ppl=5.43, wps=460152, ups=1.07, wpb=431811, bsz=16432.9, num_updates=46400, lr=0.00029361, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=45486 +epoch 028: 869 / 1689 loss=4.07, nll_loss=2.441, ppl=5.43, wps=460152, ups=1.07, wpb=431811, bsz=16432.9, num_updates=46400, lr=0.00029361, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=45486 +epoch 028: 869 / 1689 loss=4.07, nll_loss=2.441, ppl=5.43, wps=460152, ups=1.07, wpb=431811, bsz=16432.9, num_updates=46400, lr=0.00029361, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=45486 +epoch 028: 869 / 1689 loss=4.07, nll_loss=2.441, ppl=5.43, wps=460152, ups=1.07, wpb=431811, bsz=16432.9, num_updates=46400, lr=0.00029361, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=45486 +epoch 028: 869 / 1689 loss=4.07, nll_loss=2.441, ppl=5.43, wps=460152, ups=1.07, wpb=431811, bsz=16432.9, num_updates=46400, lr=0.00029361, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=45486 +epoch 028: 869 / 1689 loss=4.07, nll_loss=2.441, ppl=5.43, wps=460152, ups=1.07, wpb=431811, bsz=16432.9, num_updates=46400, lr=0.00029361, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=45486 +epoch 028: 869 / 1689 loss=4.07, nll_loss=2.441, ppl=5.43, wps=460152, ups=1.07, wpb=431811, bsz=16432.9, num_updates=46400, lr=0.00029361, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=45486 +epoch 028: 869 / 1689 loss=4.07, nll_loss=2.441, ppl=5.43, wps=460152, ups=1.07, wpb=431811, bsz=16432.9, num_updates=46400, lr=0.00029361, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=45486 +epoch 028: 969 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464034, ups=1.07, wpb=434578, bsz=16394.4, num_updates=46500, lr=0.000293294, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.6, wall=45579 +epoch 028: 969 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464034, ups=1.07, wpb=434578, bsz=16394.4, num_updates=46500, lr=0.000293294, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.6, wall=45579 +epoch 028: 969 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464034, ups=1.07, wpb=434578, bsz=16394.4, num_updates=46500, lr=0.000293294, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.6, wall=45579 +epoch 028: 969 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464034, ups=1.07, wpb=434578, bsz=16394.4, num_updates=46500, lr=0.000293294, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.6, wall=45579 +epoch 028: 969 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464034, ups=1.07, wpb=434578, bsz=16394.4, num_updates=46500, lr=0.000293294, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.6, wall=45579 +epoch 028: 969 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464034, ups=1.07, wpb=434578, bsz=16394.4, num_updates=46500, lr=0.000293294, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.6, wall=45579 +epoch 028: 969 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464034, ups=1.07, wpb=434578, bsz=16394.4, num_updates=46500, lr=0.000293294, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.6, wall=45579 +epoch 028: 969 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464034, ups=1.07, wpb=434578, bsz=16394.4, num_updates=46500, lr=0.000293294, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.6, wall=45579 +epoch 028: 969 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464034, ups=1.07, wpb=434578, bsz=16394.4, num_updates=46500, lr=0.000293294, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.6, wall=45579 +epoch 028: 969 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464034, ups=1.07, wpb=434578, bsz=16394.4, num_updates=46500, lr=0.000293294, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.6, wall=45579 +epoch 028: 969 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464034, ups=1.07, wpb=434578, bsz=16394.4, num_updates=46500, lr=0.000293294, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.6, wall=45579 +epoch 028: 969 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464034, ups=1.07, wpb=434578, bsz=16394.4, num_updates=46500, lr=0.000293294, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.6, wall=45579 +epoch 028: 969 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464034, ups=1.07, wpb=434578, bsz=16394.4, num_updates=46500, lr=0.000293294, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.6, wall=45579 +epoch 028: 969 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464034, ups=1.07, wpb=434578, bsz=16394.4, num_updates=46500, lr=0.000293294, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.6, wall=45579 +epoch 028: 969 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464034, ups=1.07, wpb=434578, bsz=16394.4, num_updates=46500, lr=0.000293294, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.6, wall=45579 +epoch 028: 969 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464034, ups=1.07, wpb=434578, bsz=16394.4, num_updates=46500, lr=0.000293294, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.6, wall=45579 +epoch 028: 969 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464034, ups=1.07, wpb=434578, bsz=16394.4, num_updates=46500, lr=0.000293294, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.6, wall=45579 +epoch 028: 969 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464034, ups=1.07, wpb=434578, bsz=16394.4, num_updates=46500, lr=0.000293294, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.6, wall=45579 +epoch 028: 969 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464034, ups=1.07, wpb=434578, bsz=16394.4, num_updates=46500, lr=0.000293294, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.6, wall=45579 +epoch 028: 969 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464034, ups=1.07, wpb=434578, bsz=16394.4, num_updates=46500, lr=0.000293294, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.6, wall=45579 +epoch 028: 969 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464034, ups=1.07, wpb=434578, bsz=16394.4, num_updates=46500, lr=0.000293294, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.6, wall=45579 +epoch 028: 969 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464034, ups=1.07, wpb=434578, bsz=16394.4, num_updates=46500, lr=0.000293294, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.6, wall=45579 +epoch 028: 969 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464034, ups=1.07, wpb=434578, bsz=16394.4, num_updates=46500, lr=0.000293294, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.6, wall=45579 +epoch 028: 969 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464034, ups=1.07, wpb=434578, bsz=16394.4, num_updates=46500, lr=0.000293294, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.6, wall=45579 +epoch 028: 969 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464034, ups=1.07, wpb=434578, bsz=16394.4, num_updates=46500, lr=0.000293294, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.6, wall=45579 +epoch 028: 969 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464034, ups=1.07, wpb=434578, bsz=16394.4, num_updates=46500, lr=0.000293294, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.6, wall=45579 +epoch 028: 969 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464034, ups=1.07, wpb=434578, bsz=16394.4, num_updates=46500, lr=0.000293294, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.6, wall=45579 +epoch 028: 969 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464034, ups=1.07, wpb=434578, bsz=16394.4, num_updates=46500, lr=0.000293294, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.6, wall=45579 +epoch 028: 1069 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=464960, ups=1.07, wpb=432692, bsz=16114.7, num_updates=46600, lr=0.000292979, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=45672 +epoch 028: 1069 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=464960, ups=1.07, wpb=432692, bsz=16114.7, num_updates=46600, lr=0.000292979, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=45672 +epoch 028: 1069 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=464960, ups=1.07, wpb=432692, bsz=16114.7, num_updates=46600, lr=0.000292979, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=45672 +epoch 028: 1069 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=464960, ups=1.07, wpb=432692, bsz=16114.7, num_updates=46600, lr=0.000292979, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=45672 +epoch 028: 1069 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=464960, ups=1.07, wpb=432692, bsz=16114.7, num_updates=46600, lr=0.000292979, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=45672 +epoch 028: 1069 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=464960, ups=1.07, wpb=432692, bsz=16114.7, num_updates=46600, lr=0.000292979, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=45672 +epoch 028: 1069 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=464960, ups=1.07, wpb=432692, bsz=16114.7, num_updates=46600, lr=0.000292979, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=45672 +epoch 028: 1069 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=464960, ups=1.07, wpb=432692, bsz=16114.7, num_updates=46600, lr=0.000292979, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=45672 +epoch 028: 1069 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=464960, ups=1.07, wpb=432692, bsz=16114.7, num_updates=46600, lr=0.000292979, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=45672 +epoch 028: 1069 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=464960, ups=1.07, wpb=432692, bsz=16114.7, num_updates=46600, lr=0.000292979, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=45672 +epoch 028: 1069 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=464960, ups=1.07, wpb=432692, bsz=16114.7, num_updates=46600, lr=0.000292979, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=45672 +epoch 028: 1069 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=464960, ups=1.07, wpb=432692, bsz=16114.7, num_updates=46600, lr=0.000292979, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=45672 +epoch 028: 1069 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=464960, ups=1.07, wpb=432692, bsz=16114.7, num_updates=46600, lr=0.000292979, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=45672 +epoch 028: 1069 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=464960, ups=1.07, wpb=432692, bsz=16114.7, num_updates=46600, lr=0.000292979, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=45672 +epoch 028: 1069 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=464960, ups=1.07, wpb=432692, bsz=16114.7, num_updates=46600, lr=0.000292979, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=45672 +epoch 028: 1069 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=464960, ups=1.07, wpb=432692, bsz=16114.7, num_updates=46600, lr=0.000292979, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=45672 +epoch 028: 1069 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=464960, ups=1.07, wpb=432692, bsz=16114.7, num_updates=46600, lr=0.000292979, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=45672 +epoch 028: 1069 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=464960, ups=1.07, wpb=432692, bsz=16114.7, num_updates=46600, lr=0.000292979, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=45672 +epoch 028: 1069 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=464960, ups=1.07, wpb=432692, bsz=16114.7, num_updates=46600, lr=0.000292979, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=45672 +epoch 028: 1069 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=464960, ups=1.07, wpb=432692, bsz=16114.7, num_updates=46600, lr=0.000292979, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=45672 +epoch 028: 1069 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=464960, ups=1.07, wpb=432692, bsz=16114.7, num_updates=46600, lr=0.000292979, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=45672 +epoch 028: 1069 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=464960, ups=1.07, wpb=432692, bsz=16114.7, num_updates=46600, lr=0.000292979, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=45672 +epoch 028: 1069 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=464960, ups=1.07, wpb=432692, bsz=16114.7, num_updates=46600, lr=0.000292979, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=45672 +epoch 028: 1069 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=464960, ups=1.07, wpb=432692, bsz=16114.7, num_updates=46600, lr=0.000292979, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=45672 +epoch 028: 1069 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=464960, ups=1.07, wpb=432692, bsz=16114.7, num_updates=46600, lr=0.000292979, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=45672 +epoch 028: 1069 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=464960, ups=1.07, wpb=432692, bsz=16114.7, num_updates=46600, lr=0.000292979, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=45672 +epoch 028: 1069 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=464960, ups=1.07, wpb=432692, bsz=16114.7, num_updates=46600, lr=0.000292979, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=45672 +epoch 028: 1069 / 1689 loss=4.092, nll_loss=2.465, ppl=5.52, wps=464960, ups=1.07, wpb=432692, bsz=16114.7, num_updates=46600, lr=0.000292979, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=45672 +epoch 028: 1169 / 1689 loss=4.073, nll_loss=2.445, ppl=5.44, wps=462979, ups=1.07, wpb=434314, bsz=16336.5, num_updates=46700, lr=0.000292666, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=45766 +epoch 028: 1169 / 1689 loss=4.073, nll_loss=2.445, ppl=5.44, wps=462979, ups=1.07, wpb=434314, bsz=16336.5, num_updates=46700, lr=0.000292666, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=45766 +epoch 028: 1169 / 1689 loss=4.073, nll_loss=2.445, ppl=5.44, wps=462979, ups=1.07, wpb=434314, bsz=16336.5, num_updates=46700, lr=0.000292666, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=45766 +epoch 028: 1169 / 1689 loss=4.073, nll_loss=2.445, ppl=5.44, wps=462979, ups=1.07, wpb=434314, bsz=16336.5, num_updates=46700, lr=0.000292666, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=45766 +epoch 028: 1169 / 1689 loss=4.073, nll_loss=2.445, ppl=5.44, wps=462979, ups=1.07, wpb=434314, bsz=16336.5, num_updates=46700, lr=0.000292666, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=45766 +epoch 028: 1169 / 1689 loss=4.073, nll_loss=2.445, ppl=5.44, wps=462979, ups=1.07, wpb=434314, bsz=16336.5, num_updates=46700, lr=0.000292666, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=45766 +epoch 028: 1169 / 1689 loss=4.073, nll_loss=2.445, ppl=5.44, wps=462979, ups=1.07, wpb=434314, bsz=16336.5, num_updates=46700, lr=0.000292666, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=45766 +epoch 028: 1169 / 1689 loss=4.073, nll_loss=2.445, ppl=5.44, wps=462979, ups=1.07, wpb=434314, bsz=16336.5, num_updates=46700, lr=0.000292666, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=45766 +epoch 028: 1169 / 1689 loss=4.073, nll_loss=2.445, ppl=5.44, wps=462979, ups=1.07, wpb=434314, bsz=16336.5, num_updates=46700, lr=0.000292666, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=45766 +epoch 028: 1169 / 1689 loss=4.073, nll_loss=2.445, ppl=5.44, wps=462979, ups=1.07, wpb=434314, bsz=16336.5, num_updates=46700, lr=0.000292666, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=45766 +epoch 028: 1169 / 1689 loss=4.073, nll_loss=2.445, ppl=5.44, wps=462979, ups=1.07, wpb=434314, bsz=16336.5, num_updates=46700, lr=0.000292666, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=45766 +epoch 028: 1169 / 1689 loss=4.073, nll_loss=2.445, ppl=5.44, wps=462979, ups=1.07, wpb=434314, bsz=16336.5, num_updates=46700, lr=0.000292666, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=45766 +epoch 028: 1169 / 1689 loss=4.073, nll_loss=2.445, ppl=5.44, wps=462979, ups=1.07, wpb=434314, bsz=16336.5, num_updates=46700, lr=0.000292666, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=45766 +epoch 028: 1169 / 1689 loss=4.073, nll_loss=2.445, ppl=5.44, wps=462979, ups=1.07, wpb=434314, bsz=16336.5, num_updates=46700, lr=0.000292666, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=45766 +epoch 028: 1169 / 1689 loss=4.073, nll_loss=2.445, ppl=5.44, wps=462979, ups=1.07, wpb=434314, bsz=16336.5, num_updates=46700, lr=0.000292666, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=45766 +epoch 028: 1169 / 1689 loss=4.073, nll_loss=2.445, ppl=5.44, wps=462979, ups=1.07, wpb=434314, bsz=16336.5, num_updates=46700, lr=0.000292666, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=45766 +epoch 028: 1169 / 1689 loss=4.073, nll_loss=2.445, ppl=5.44, wps=462979, ups=1.07, wpb=434314, bsz=16336.5, num_updates=46700, lr=0.000292666, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=45766 +epoch 028: 1169 / 1689 loss=4.073, nll_loss=2.445, ppl=5.44, wps=462979, ups=1.07, wpb=434314, bsz=16336.5, num_updates=46700, lr=0.000292666, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=45766 +epoch 028: 1169 / 1689 loss=4.073, nll_loss=2.445, ppl=5.44, wps=462979, ups=1.07, wpb=434314, bsz=16336.5, num_updates=46700, lr=0.000292666, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=45766 +epoch 028: 1169 / 1689 loss=4.073, nll_loss=2.445, ppl=5.44, wps=462979, ups=1.07, wpb=434314, bsz=16336.5, num_updates=46700, lr=0.000292666, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=45766 +epoch 028: 1169 / 1689 loss=4.073, nll_loss=2.445, ppl=5.44, wps=462979, ups=1.07, wpb=434314, bsz=16336.5, num_updates=46700, lr=0.000292666, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=45766 +epoch 028: 1169 / 1689 loss=4.073, nll_loss=2.445, ppl=5.44, wps=462979, ups=1.07, wpb=434314, bsz=16336.5, num_updates=46700, lr=0.000292666, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=45766 +epoch 028: 1169 / 1689 loss=4.073, nll_loss=2.445, ppl=5.44, wps=462979, ups=1.07, wpb=434314, bsz=16336.5, num_updates=46700, lr=0.000292666, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=45766 +epoch 028: 1169 / 1689 loss=4.073, nll_loss=2.445, ppl=5.44, wps=462979, ups=1.07, wpb=434314, bsz=16336.5, num_updates=46700, lr=0.000292666, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=45766 +epoch 028: 1169 / 1689 loss=4.073, nll_loss=2.445, ppl=5.44, wps=462979, ups=1.07, wpb=434314, bsz=16336.5, num_updates=46700, lr=0.000292666, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=45766 +epoch 028: 1169 / 1689 loss=4.073, nll_loss=2.445, ppl=5.44, wps=462979, ups=1.07, wpb=434314, bsz=16336.5, num_updates=46700, lr=0.000292666, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=45766 +epoch 028: 1169 / 1689 loss=4.073, nll_loss=2.445, ppl=5.44, wps=462979, ups=1.07, wpb=434314, bsz=16336.5, num_updates=46700, lr=0.000292666, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=45766 +epoch 028: 1169 / 1689 loss=4.073, nll_loss=2.445, ppl=5.44, wps=462979, ups=1.07, wpb=434314, bsz=16336.5, num_updates=46700, lr=0.000292666, gnorm=0.207, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=45766 +epoch 028: 1269 / 1689 loss=4.081, nll_loss=2.454, ppl=5.48, wps=457405, ups=1.05, wpb=434496, bsz=16355.4, num_updates=46800, lr=0.000292353, gnorm=0.215, clip=0, loss_scale=1, train_wall=94, gb_free=18.8, wall=45861 +epoch 028: 1269 / 1689 loss=4.081, nll_loss=2.454, ppl=5.48, wps=457405, ups=1.05, wpb=434496, bsz=16355.4, num_updates=46800, lr=0.000292353, gnorm=0.215, clip=0, loss_scale=1, train_wall=94, gb_free=18.8, wall=45861 +epoch 028: 1269 / 1689 loss=4.081, nll_loss=2.454, ppl=5.48, wps=457405, ups=1.05, wpb=434496, bsz=16355.4, num_updates=46800, lr=0.000292353, gnorm=0.215, clip=0, loss_scale=1, train_wall=94, gb_free=18.8, wall=45861 +epoch 028: 1269 / 1689 loss=4.081, nll_loss=2.454, ppl=5.48, wps=457405, ups=1.05, wpb=434496, bsz=16355.4, num_updates=46800, lr=0.000292353, gnorm=0.215, clip=0, loss_scale=1, train_wall=94, gb_free=18.8, wall=45861 +epoch 028: 1269 / 1689 loss=4.081, nll_loss=2.454, ppl=5.48, wps=457405, ups=1.05, wpb=434496, bsz=16355.4, num_updates=46800, lr=0.000292353, gnorm=0.215, clip=0, loss_scale=1, train_wall=94, gb_free=18.8, wall=45861 +epoch 028: 1269 / 1689 loss=4.081, nll_loss=2.454, ppl=5.48, wps=457405, ups=1.05, wpb=434496, bsz=16355.4, num_updates=46800, lr=0.000292353, gnorm=0.215, clip=0, loss_scale=1, train_wall=94, gb_free=18.8, wall=45861 +epoch 028: 1269 / 1689 loss=4.081, nll_loss=2.454, ppl=5.48, wps=457405, ups=1.05, wpb=434496, bsz=16355.4, num_updates=46800, lr=0.000292353, gnorm=0.215, clip=0, loss_scale=1, train_wall=94, gb_free=18.8, wall=45861 +epoch 028: 1269 / 1689 loss=4.081, nll_loss=2.454, ppl=5.48, wps=457405, ups=1.05, wpb=434496, bsz=16355.4, num_updates=46800, lr=0.000292353, gnorm=0.215, clip=0, loss_scale=1, train_wall=94, gb_free=18.8, wall=45861 +epoch 028: 1269 / 1689 loss=4.081, nll_loss=2.454, ppl=5.48, wps=457405, ups=1.05, wpb=434496, bsz=16355.4, num_updates=46800, lr=0.000292353, gnorm=0.215, clip=0, loss_scale=1, train_wall=94, gb_free=18.8, wall=45861 +epoch 028: 1269 / 1689 loss=4.081, nll_loss=2.454, ppl=5.48, wps=457405, ups=1.05, wpb=434496, bsz=16355.4, num_updates=46800, lr=0.000292353, gnorm=0.215, clip=0, loss_scale=1, train_wall=94, gb_free=18.8, wall=45861 +epoch 028: 1269 / 1689 loss=4.081, nll_loss=2.454, ppl=5.48, wps=457405, ups=1.05, wpb=434496, bsz=16355.4, num_updates=46800, lr=0.000292353, gnorm=0.215, clip=0, loss_scale=1, train_wall=94, gb_free=18.8, wall=45861 +epoch 028: 1269 / 1689 loss=4.081, nll_loss=2.454, ppl=5.48, wps=457405, ups=1.05, wpb=434496, bsz=16355.4, num_updates=46800, lr=0.000292353, gnorm=0.215, clip=0, loss_scale=1, train_wall=94, gb_free=18.8, wall=45861 +epoch 028: 1269 / 1689 loss=4.081, nll_loss=2.454, ppl=5.48, wps=457405, ups=1.05, wpb=434496, bsz=16355.4, num_updates=46800, lr=0.000292353, gnorm=0.215, clip=0, loss_scale=1, train_wall=94, gb_free=18.8, wall=45861 +epoch 028: 1269 / 1689 loss=4.081, nll_loss=2.454, ppl=5.48, wps=457405, ups=1.05, wpb=434496, bsz=16355.4, num_updates=46800, lr=0.000292353, gnorm=0.215, clip=0, loss_scale=1, train_wall=94, gb_free=18.8, wall=45861 +epoch 028: 1269 / 1689 loss=4.081, nll_loss=2.454, ppl=5.48, wps=457405, ups=1.05, wpb=434496, bsz=16355.4, num_updates=46800, lr=0.000292353, gnorm=0.215, clip=0, loss_scale=1, train_wall=94, gb_free=18.8, wall=45861 +epoch 028: 1269 / 1689 loss=4.081, nll_loss=2.454, ppl=5.48, wps=457405, ups=1.05, wpb=434496, bsz=16355.4, num_updates=46800, lr=0.000292353, gnorm=0.215, clip=0, loss_scale=1, train_wall=94, gb_free=18.8, wall=45861 +epoch 028: 1269 / 1689 loss=4.081, nll_loss=2.454, ppl=5.48, wps=457405, ups=1.05, wpb=434496, bsz=16355.4, num_updates=46800, lr=0.000292353, gnorm=0.215, clip=0, loss_scale=1, train_wall=94, gb_free=18.8, wall=45861 +epoch 028: 1269 / 1689 loss=4.081, nll_loss=2.454, ppl=5.48, wps=457405, ups=1.05, wpb=434496, bsz=16355.4, num_updates=46800, lr=0.000292353, gnorm=0.215, clip=0, loss_scale=1, train_wall=94, gb_free=18.8, wall=45861 +epoch 028: 1269 / 1689 loss=4.081, nll_loss=2.454, ppl=5.48, wps=457405, ups=1.05, wpb=434496, bsz=16355.4, num_updates=46800, lr=0.000292353, gnorm=0.215, clip=0, loss_scale=1, train_wall=94, gb_free=18.8, wall=45861 +epoch 028: 1269 / 1689 loss=4.081, nll_loss=2.454, ppl=5.48, wps=457405, ups=1.05, wpb=434496, bsz=16355.4, num_updates=46800, lr=0.000292353, gnorm=0.215, clip=0, loss_scale=1, train_wall=94, gb_free=18.8, wall=45861 +epoch 028: 1269 / 1689 loss=4.081, nll_loss=2.454, ppl=5.48, wps=457405, ups=1.05, wpb=434496, bsz=16355.4, num_updates=46800, lr=0.000292353, gnorm=0.215, clip=0, loss_scale=1, train_wall=94, gb_free=18.8, wall=45861 +epoch 028: 1269 / 1689 loss=4.081, nll_loss=2.454, ppl=5.48, wps=457405, ups=1.05, wpb=434496, bsz=16355.4, num_updates=46800, lr=0.000292353, gnorm=0.215, clip=0, loss_scale=1, train_wall=94, gb_free=18.8, wall=45861 +epoch 028: 1269 / 1689 loss=4.081, nll_loss=2.454, ppl=5.48, wps=457405, ups=1.05, wpb=434496, bsz=16355.4, num_updates=46800, lr=0.000292353, gnorm=0.215, clip=0, loss_scale=1, train_wall=94, gb_free=18.8, wall=45861 +epoch 028: 1269 / 1689 loss=4.081, nll_loss=2.454, ppl=5.48, wps=457405, ups=1.05, wpb=434496, bsz=16355.4, num_updates=46800, lr=0.000292353, gnorm=0.215, clip=0, loss_scale=1, train_wall=94, gb_free=18.8, wall=45861 +epoch 028: 1269 / 1689 loss=4.081, nll_loss=2.454, ppl=5.48, wps=457405, ups=1.05, wpb=434496, bsz=16355.4, num_updates=46800, lr=0.000292353, gnorm=0.215, clip=0, loss_scale=1, train_wall=94, gb_free=18.8, wall=45861 +epoch 028: 1269 / 1689 loss=4.081, nll_loss=2.454, ppl=5.48, wps=457405, ups=1.05, wpb=434496, bsz=16355.4, num_updates=46800, lr=0.000292353, gnorm=0.215, clip=0, loss_scale=1, train_wall=94, gb_free=18.8, wall=45861 +epoch 028: 1269 / 1689 loss=4.081, nll_loss=2.454, ppl=5.48, wps=457405, ups=1.05, wpb=434496, bsz=16355.4, num_updates=46800, lr=0.000292353, gnorm=0.215, clip=0, loss_scale=1, train_wall=94, gb_free=18.8, wall=45861 +epoch 028: 1269 / 1689 loss=4.081, nll_loss=2.454, ppl=5.48, wps=457405, ups=1.05, wpb=434496, bsz=16355.4, num_updates=46800, lr=0.000292353, gnorm=0.215, clip=0, loss_scale=1, train_wall=94, gb_free=18.8, wall=45861 +epoch 028: 1369 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=458415, ups=1.06, wpb=433414, bsz=16633.5, num_updates=46900, lr=0.000292041, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=45956 +epoch 028: 1369 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=458415, ups=1.06, wpb=433414, bsz=16633.5, num_updates=46900, lr=0.000292041, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=45956 +epoch 028: 1369 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=458415, ups=1.06, wpb=433414, bsz=16633.5, num_updates=46900, lr=0.000292041, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=45956 +epoch 028: 1369 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=458415, ups=1.06, wpb=433414, bsz=16633.5, num_updates=46900, lr=0.000292041, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=45956 +epoch 028: 1369 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=458415, ups=1.06, wpb=433414, bsz=16633.5, num_updates=46900, lr=0.000292041, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=45956 +epoch 028: 1369 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=458415, ups=1.06, wpb=433414, bsz=16633.5, num_updates=46900, lr=0.000292041, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=45956 +epoch 028: 1369 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=458415, ups=1.06, wpb=433414, bsz=16633.5, num_updates=46900, lr=0.000292041, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=45956 +epoch 028: 1369 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=458415, ups=1.06, wpb=433414, bsz=16633.5, num_updates=46900, lr=0.000292041, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=45956 +epoch 028: 1369 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=458415, ups=1.06, wpb=433414, bsz=16633.5, num_updates=46900, lr=0.000292041, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=45956 +epoch 028: 1369 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=458415, ups=1.06, wpb=433414, bsz=16633.5, num_updates=46900, lr=0.000292041, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=45956 +epoch 028: 1369 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=458415, ups=1.06, wpb=433414, bsz=16633.5, num_updates=46900, lr=0.000292041, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=45956 +epoch 028: 1369 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=458415, ups=1.06, wpb=433414, bsz=16633.5, num_updates=46900, lr=0.000292041, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=45956 +epoch 028: 1369 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=458415, ups=1.06, wpb=433414, bsz=16633.5, num_updates=46900, lr=0.000292041, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=45956 +epoch 028: 1369 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=458415, ups=1.06, wpb=433414, bsz=16633.5, num_updates=46900, lr=0.000292041, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=45956 +epoch 028: 1369 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=458415, ups=1.06, wpb=433414, bsz=16633.5, num_updates=46900, lr=0.000292041, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=45956 +epoch 028: 1369 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=458415, ups=1.06, wpb=433414, bsz=16633.5, num_updates=46900, lr=0.000292041, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=45956 +epoch 028: 1369 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=458415, ups=1.06, wpb=433414, bsz=16633.5, num_updates=46900, lr=0.000292041, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=45956 +epoch 028: 1369 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=458415, ups=1.06, wpb=433414, bsz=16633.5, num_updates=46900, lr=0.000292041, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=45956 +epoch 028: 1369 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=458415, ups=1.06, wpb=433414, bsz=16633.5, num_updates=46900, lr=0.000292041, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=45956 +epoch 028: 1369 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=458415, ups=1.06, wpb=433414, bsz=16633.5, num_updates=46900, lr=0.000292041, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=45956 +epoch 028: 1369 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=458415, ups=1.06, wpb=433414, bsz=16633.5, num_updates=46900, lr=0.000292041, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=45956 +epoch 028: 1369 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=458415, ups=1.06, wpb=433414, bsz=16633.5, num_updates=46900, lr=0.000292041, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=45956 +epoch 028: 1369 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=458415, ups=1.06, wpb=433414, bsz=16633.5, num_updates=46900, lr=0.000292041, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=45956 +epoch 028: 1369 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=458415, ups=1.06, wpb=433414, bsz=16633.5, num_updates=46900, lr=0.000292041, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=45956 +epoch 028: 1369 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=458415, ups=1.06, wpb=433414, bsz=16633.5, num_updates=46900, lr=0.000292041, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=45956 +epoch 028: 1369 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=458415, ups=1.06, wpb=433414, bsz=16633.5, num_updates=46900, lr=0.000292041, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=45956 +epoch 028: 1369 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=458415, ups=1.06, wpb=433414, bsz=16633.5, num_updates=46900, lr=0.000292041, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=45956 +epoch 028: 1369 / 1689 loss=4.099, nll_loss=2.474, ppl=5.56, wps=458415, ups=1.06, wpb=433414, bsz=16633.5, num_updates=46900, lr=0.000292041, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=20.6, wall=45956 +epoch 028: 1469 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459887, ups=1.06, wpb=431880, bsz=16707.8, num_updates=47000, lr=0.00029173, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46050 +epoch 028: 1469 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459887, ups=1.06, wpb=431880, bsz=16707.8, num_updates=47000, lr=0.00029173, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46050 +epoch 028: 1469 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459887, ups=1.06, wpb=431880, bsz=16707.8, num_updates=47000, lr=0.00029173, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46050 +epoch 028: 1469 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459887, ups=1.06, wpb=431880, bsz=16707.8, num_updates=47000, lr=0.00029173, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46050 +epoch 028: 1469 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459887, ups=1.06, wpb=431880, bsz=16707.8, num_updates=47000, lr=0.00029173, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46050 +epoch 028: 1469 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459887, ups=1.06, wpb=431880, bsz=16707.8, num_updates=47000, lr=0.00029173, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46050 +epoch 028: 1469 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459887, ups=1.06, wpb=431880, bsz=16707.8, num_updates=47000, lr=0.00029173, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46050 +epoch 028: 1469 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459887, ups=1.06, wpb=431880, bsz=16707.8, num_updates=47000, lr=0.00029173, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46050 +epoch 028: 1469 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459887, ups=1.06, wpb=431880, bsz=16707.8, num_updates=47000, lr=0.00029173, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46050 +epoch 028: 1469 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459887, ups=1.06, wpb=431880, bsz=16707.8, num_updates=47000, lr=0.00029173, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46050 +epoch 028: 1469 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459887, ups=1.06, wpb=431880, bsz=16707.8, num_updates=47000, lr=0.00029173, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46050 +epoch 028: 1469 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459887, ups=1.06, wpb=431880, bsz=16707.8, num_updates=47000, lr=0.00029173, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46050 +epoch 028: 1469 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459887, ups=1.06, wpb=431880, bsz=16707.8, num_updates=47000, lr=0.00029173, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46050 +epoch 028: 1469 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459887, ups=1.06, wpb=431880, bsz=16707.8, num_updates=47000, lr=0.00029173, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46050 +epoch 028: 1469 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459887, ups=1.06, wpb=431880, bsz=16707.8, num_updates=47000, lr=0.00029173, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46050 +epoch 028: 1469 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459887, ups=1.06, wpb=431880, bsz=16707.8, num_updates=47000, lr=0.00029173, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46050 +epoch 028: 1469 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459887, ups=1.06, wpb=431880, bsz=16707.8, num_updates=47000, lr=0.00029173, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46050 +epoch 028: 1469 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459887, ups=1.06, wpb=431880, bsz=16707.8, num_updates=47000, lr=0.00029173, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46050 +epoch 028: 1469 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459887, ups=1.06, wpb=431880, bsz=16707.8, num_updates=47000, lr=0.00029173, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46050 +epoch 028: 1469 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459887, ups=1.06, wpb=431880, bsz=16707.8, num_updates=47000, lr=0.00029173, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46050 +epoch 028: 1469 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459887, ups=1.06, wpb=431880, bsz=16707.8, num_updates=47000, lr=0.00029173, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46050 +epoch 028: 1469 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459887, ups=1.06, wpb=431880, bsz=16707.8, num_updates=47000, lr=0.00029173, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46050 +epoch 028: 1469 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459887, ups=1.06, wpb=431880, bsz=16707.8, num_updates=47000, lr=0.00029173, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46050 +epoch 028: 1469 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459887, ups=1.06, wpb=431880, bsz=16707.8, num_updates=47000, lr=0.00029173, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46050 +epoch 028: 1469 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459887, ups=1.06, wpb=431880, bsz=16707.8, num_updates=47000, lr=0.00029173, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46050 +epoch 028: 1469 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459887, ups=1.06, wpb=431880, bsz=16707.8, num_updates=47000, lr=0.00029173, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46050 +epoch 028: 1469 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459887, ups=1.06, wpb=431880, bsz=16707.8, num_updates=47000, lr=0.00029173, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46050 +epoch 028: 1469 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459887, ups=1.06, wpb=431880, bsz=16707.8, num_updates=47000, lr=0.00029173, gnorm=0.232, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46050 +begin validation on "valid" subset +epoch 028 | valid on 'valid' subset | loss 4.224 | nll_loss 2.593 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 47000 | best_loss 4.224 +epoch 028 | valid on 'valid' subset | loss 4.224 | nll_loss 2.593 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 47000 | best_loss 4.224 +epoch 028 | valid on 'valid' subset | loss 4.224 | nll_loss 2.593 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 47000 | best_loss 4.224 +epoch 028 | valid on 'valid' subset | loss 4.224 | nll_loss 2.593 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 47000 | best_loss 4.224 +epoch 028 | valid on 'valid' subset | loss 4.224 | nll_loss 2.593 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 47000 | best_loss 4.224 +epoch 028 | valid on 'valid' subset | loss 4.224 | nll_loss 2.593 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 47000 | best_loss 4.224 +epoch 028 | valid on 'valid' subset | loss 4.224 | nll_loss 2.593 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 47000 | best_loss 4.224 +epoch 028 | valid on 'valid' subset | loss 4.224 | nll_loss 2.593 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 47000 | best_loss 4.224 +epoch 028 | valid on 'valid' subset | loss 4.224 | nll_loss 2.593 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 47000 | best_loss 4.224 +epoch 028 | valid on 'valid' subset | loss 4.224 | nll_loss 2.593 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 47000 | best_loss 4.224 +epoch 028 | valid on 'valid' subset | loss 4.224 | nll_loss 2.593 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 47000 | best_loss 4.224 +epoch 028 | valid on 'valid' subset | loss 4.224 | nll_loss 2.593 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 47000 | best_loss 4.224 +epoch 028 | valid on 'valid' subset | loss 4.224 | nll_loss 2.593 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 47000 | best_loss 4.224 +epoch 028 | valid on 'valid' subset | loss 4.224 | nll_loss 2.593 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 47000 | best_loss 4.224 +epoch 028 | valid on 'valid' subset | loss 4.224 | nll_loss 2.593 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 47000 | best_loss 4.224 +epoch 028 | valid on 'valid' subset | loss 4.224 | nll_loss 2.593 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 47000 | best_loss 4.224 +epoch 028 | valid on 'valid' subset | loss 4.224 | nll_loss 2.593 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 47000 | best_loss 4.224 +epoch 028 | valid on 'valid' subset | loss 4.224 | nll_loss 2.593 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 47000 | best_loss 4.224 +epoch 028 | valid on 'valid' subset | loss 4.224 | nll_loss 2.593 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 47000 | best_loss 4.224 +epoch 028 | valid on 'valid' subset | loss 4.224 | nll_loss 2.593 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 47000 | best_loss 4.224 +epoch 028 | valid on 'valid' subset | loss 4.224 | nll_loss 2.593 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 47000 | best_loss 4.224 +epoch 028 | valid on 'valid' subset | loss 4.224 | nll_loss 2.593 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 47000 | best_loss 4.224 +epoch 028 | valid on 'valid' subset | loss 4.224 | nll_loss 2.593 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 47000 | best_loss 4.224 +epoch 028 | valid on 'valid' subset | loss 4.224 | nll_loss 2.593 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 47000 | best_loss 4.224 +epoch 028 | valid on 'valid' subset | loss 4.224 | nll_loss 2.593 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 47000 | best_loss 4.224 +epoch 028 | valid on 'valid' subset | loss 4.224 | nll_loss 2.593 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 47000 | best_loss 4.224 +epoch 028 | valid on 'valid' subset | loss 4.224 | nll_loss 2.593 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 47000 | best_loss 4.224 +epoch 028 | valid on 'valid' subset | loss 4.224 | nll_loss 2.593 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 47000 | best_loss 4.224 +epoch 028: 1569 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=292799, ups=0.67, wpb=433880, bsz=16444.9, num_updates=47100, lr=0.00029142, gnorm=0.209, clip=0, loss_scale=2, train_wall=120, gb_free=18.3, wall=46198 +epoch 028: 1569 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=292799, ups=0.67, wpb=433880, bsz=16444.9, num_updates=47100, lr=0.00029142, gnorm=0.209, clip=0, loss_scale=2, train_wall=120, gb_free=18.3, wall=46198 +epoch 028: 1569 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=292799, ups=0.67, wpb=433880, bsz=16444.9, num_updates=47100, lr=0.00029142, gnorm=0.209, clip=0, loss_scale=2, train_wall=120, gb_free=18.3, wall=46198 +epoch 028: 1569 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=292799, ups=0.67, wpb=433880, bsz=16444.9, num_updates=47100, lr=0.00029142, gnorm=0.209, clip=0, loss_scale=2, train_wall=120, gb_free=18.3, wall=46198 +epoch 028: 1569 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=292799, ups=0.67, wpb=433880, bsz=16444.9, num_updates=47100, lr=0.00029142, gnorm=0.209, clip=0, loss_scale=2, train_wall=120, gb_free=18.3, wall=46198 +epoch 028: 1569 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=292799, ups=0.67, wpb=433880, bsz=16444.9, num_updates=47100, lr=0.00029142, gnorm=0.209, clip=0, loss_scale=2, train_wall=120, gb_free=18.3, wall=46198 +epoch 028: 1569 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=292799, ups=0.67, wpb=433880, bsz=16444.9, num_updates=47100, lr=0.00029142, gnorm=0.209, clip=0, loss_scale=2, train_wall=120, gb_free=18.3, wall=46198 +epoch 028: 1569 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=292799, ups=0.67, wpb=433880, bsz=16444.9, num_updates=47100, lr=0.00029142, gnorm=0.209, clip=0, loss_scale=2, train_wall=120, gb_free=18.3, wall=46198 +epoch 028: 1569 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=292799, ups=0.67, wpb=433880, bsz=16444.9, num_updates=47100, lr=0.00029142, gnorm=0.209, clip=0, loss_scale=2, train_wall=120, gb_free=18.3, wall=46198 +epoch 028: 1569 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=292799, ups=0.67, wpb=433880, bsz=16444.9, num_updates=47100, lr=0.00029142, gnorm=0.209, clip=0, loss_scale=2, train_wall=120, gb_free=18.3, wall=46198 +epoch 028: 1569 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=292799, ups=0.67, wpb=433880, bsz=16444.9, num_updates=47100, lr=0.00029142, gnorm=0.209, clip=0, loss_scale=2, train_wall=120, gb_free=18.3, wall=46198 +epoch 028: 1569 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=292799, ups=0.67, wpb=433880, bsz=16444.9, num_updates=47100, lr=0.00029142, gnorm=0.209, clip=0, loss_scale=2, train_wall=120, gb_free=18.3, wall=46198 +epoch 028: 1569 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=292799, ups=0.67, wpb=433880, bsz=16444.9, num_updates=47100, lr=0.00029142, gnorm=0.209, clip=0, loss_scale=2, train_wall=120, gb_free=18.3, wall=46198 +epoch 028: 1569 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=292799, ups=0.67, wpb=433880, bsz=16444.9, num_updates=47100, lr=0.00029142, gnorm=0.209, clip=0, loss_scale=2, train_wall=120, gb_free=18.3, wall=46198 +epoch 028: 1569 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=292799, ups=0.67, wpb=433880, bsz=16444.9, num_updates=47100, lr=0.00029142, gnorm=0.209, clip=0, loss_scale=2, train_wall=120, gb_free=18.3, wall=46198 +epoch 028: 1569 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=292799, ups=0.67, wpb=433880, bsz=16444.9, num_updates=47100, lr=0.00029142, gnorm=0.209, clip=0, loss_scale=2, train_wall=120, gb_free=18.3, wall=46198 +epoch 028: 1569 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=292799, ups=0.67, wpb=433880, bsz=16444.9, num_updates=47100, lr=0.00029142, gnorm=0.209, clip=0, loss_scale=2, train_wall=120, gb_free=18.3, wall=46198 +epoch 028: 1569 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=292799, ups=0.67, wpb=433880, bsz=16444.9, num_updates=47100, lr=0.00029142, gnorm=0.209, clip=0, loss_scale=2, train_wall=120, gb_free=18.3, wall=46198 +epoch 028: 1569 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=292799, ups=0.67, wpb=433880, bsz=16444.9, num_updates=47100, lr=0.00029142, gnorm=0.209, clip=0, loss_scale=2, train_wall=120, gb_free=18.3, wall=46198 +epoch 028: 1569 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=292799, ups=0.67, wpb=433880, bsz=16444.9, num_updates=47100, lr=0.00029142, gnorm=0.209, clip=0, loss_scale=2, train_wall=120, gb_free=18.3, wall=46198 +epoch 028: 1569 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=292799, ups=0.67, wpb=433880, bsz=16444.9, num_updates=47100, lr=0.00029142, gnorm=0.209, clip=0, loss_scale=2, train_wall=120, gb_free=18.3, wall=46198 +epoch 028: 1569 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=292799, ups=0.67, wpb=433880, bsz=16444.9, num_updates=47100, lr=0.00029142, gnorm=0.209, clip=0, loss_scale=2, train_wall=120, gb_free=18.3, wall=46198 +epoch 028: 1569 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=292799, ups=0.67, wpb=433880, bsz=16444.9, num_updates=47100, lr=0.00029142, gnorm=0.209, clip=0, loss_scale=2, train_wall=120, gb_free=18.3, wall=46198 +epoch 028: 1569 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=292799, ups=0.67, wpb=433880, bsz=16444.9, num_updates=47100, lr=0.00029142, gnorm=0.209, clip=0, loss_scale=2, train_wall=120, gb_free=18.3, wall=46198 +epoch 028: 1569 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=292799, ups=0.67, wpb=433880, bsz=16444.9, num_updates=47100, lr=0.00029142, gnorm=0.209, clip=0, loss_scale=2, train_wall=120, gb_free=18.3, wall=46198 +epoch 028: 1569 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=292799, ups=0.67, wpb=433880, bsz=16444.9, num_updates=47100, lr=0.00029142, gnorm=0.209, clip=0, loss_scale=2, train_wall=120, gb_free=18.3, wall=46198 +epoch 028: 1569 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=292799, ups=0.67, wpb=433880, bsz=16444.9, num_updates=47100, lr=0.00029142, gnorm=0.209, clip=0, loss_scale=2, train_wall=120, gb_free=18.3, wall=46198 +epoch 028: 1569 / 1689 loss=4.088, nll_loss=2.462, ppl=5.51, wps=292799, ups=0.67, wpb=433880, bsz=16444.9, num_updates=47100, lr=0.00029142, gnorm=0.209, clip=0, loss_scale=2, train_wall=120, gb_free=18.3, wall=46198 +epoch 028: 1670 / 1689 loss=4.089, nll_loss=2.463, ppl=5.51, wps=460538, ups=1.06, wpb=436489, bsz=16646.4, num_updates=47200, lr=0.000291111, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=46293 +epoch 028: 1670 / 1689 loss=4.089, nll_loss=2.463, ppl=5.51, wps=460538, ups=1.06, wpb=436489, bsz=16646.4, num_updates=47200, lr=0.000291111, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=46293 +epoch 028: 1670 / 1689 loss=4.089, nll_loss=2.463, ppl=5.51, wps=460538, ups=1.06, wpb=436489, bsz=16646.4, num_updates=47200, lr=0.000291111, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=46293 +epoch 028: 1670 / 1689 loss=4.089, nll_loss=2.463, ppl=5.51, wps=460538, ups=1.06, wpb=436489, bsz=16646.4, num_updates=47200, lr=0.000291111, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=46293 +epoch 028: 1670 / 1689 loss=4.089, nll_loss=2.463, ppl=5.51, wps=460538, ups=1.06, wpb=436489, bsz=16646.4, num_updates=47200, lr=0.000291111, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=46293 +epoch 028: 1670 / 1689 loss=4.089, nll_loss=2.463, ppl=5.51, wps=460538, ups=1.06, wpb=436489, bsz=16646.4, num_updates=47200, lr=0.000291111, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=46293 +epoch 028: 1670 / 1689 loss=4.089, nll_loss=2.463, ppl=5.51, wps=460538, ups=1.06, wpb=436489, bsz=16646.4, num_updates=47200, lr=0.000291111, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=46293 +epoch 028: 1670 / 1689 loss=4.089, nll_loss=2.463, ppl=5.51, wps=460538, ups=1.06, wpb=436489, bsz=16646.4, num_updates=47200, lr=0.000291111, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=46293 +epoch 028: 1670 / 1689 loss=4.089, nll_loss=2.463, ppl=5.51, wps=460538, ups=1.06, wpb=436489, bsz=16646.4, num_updates=47200, lr=0.000291111, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=46293 +epoch 028: 1670 / 1689 loss=4.089, nll_loss=2.463, ppl=5.51, wps=460538, ups=1.06, wpb=436489, bsz=16646.4, num_updates=47200, lr=0.000291111, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=46293 +epoch 028: 1670 / 1689 loss=4.089, nll_loss=2.463, ppl=5.51, wps=460538, ups=1.06, wpb=436489, bsz=16646.4, num_updates=47200, lr=0.000291111, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=46293 +epoch 028: 1670 / 1689 loss=4.089, nll_loss=2.463, ppl=5.51, wps=460538, ups=1.06, wpb=436489, bsz=16646.4, num_updates=47200, lr=0.000291111, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=46293 +epoch 028: 1670 / 1689 loss=4.089, nll_loss=2.463, ppl=5.51, wps=460538, ups=1.06, wpb=436489, bsz=16646.4, num_updates=47200, lr=0.000291111, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=46293 +epoch 028: 1670 / 1689 loss=4.089, nll_loss=2.463, ppl=5.51, wps=460538, ups=1.06, wpb=436489, bsz=16646.4, num_updates=47200, lr=0.000291111, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=46293 +epoch 028: 1670 / 1689 loss=4.089, nll_loss=2.463, ppl=5.51, wps=460538, ups=1.06, wpb=436489, bsz=16646.4, num_updates=47200, lr=0.000291111, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=46293 +epoch 028: 1670 / 1689 loss=4.089, nll_loss=2.463, ppl=5.51, wps=460538, ups=1.06, wpb=436489, bsz=16646.4, num_updates=47200, lr=0.000291111, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=46293 +epoch 028: 1670 / 1689 loss=4.089, nll_loss=2.463, ppl=5.51, wps=460538, ups=1.06, wpb=436489, bsz=16646.4, num_updates=47200, lr=0.000291111, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=46293 +epoch 028: 1670 / 1689 loss=4.089, nll_loss=2.463, ppl=5.51, wps=460538, ups=1.06, wpb=436489, bsz=16646.4, num_updates=47200, lr=0.000291111, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=46293 +epoch 028: 1670 / 1689 loss=4.089, nll_loss=2.463, ppl=5.51, wps=460538, ups=1.06, wpb=436489, bsz=16646.4, num_updates=47200, lr=0.000291111, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=46293 +epoch 028: 1670 / 1689 loss=4.089, nll_loss=2.463, ppl=5.51, wps=460538, ups=1.06, wpb=436489, bsz=16646.4, num_updates=47200, lr=0.000291111, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=46293 +epoch 028: 1670 / 1689 loss=4.089, nll_loss=2.463, ppl=5.51, wps=460538, ups=1.06, wpb=436489, bsz=16646.4, num_updates=47200, lr=0.000291111, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=46293 +epoch 028: 1670 / 1689 loss=4.089, nll_loss=2.463, ppl=5.51, wps=460538, ups=1.06, wpb=436489, bsz=16646.4, num_updates=47200, lr=0.000291111, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=46293 +epoch 028: 1670 / 1689 loss=4.089, nll_loss=2.463, ppl=5.51, wps=460538, ups=1.06, wpb=436489, bsz=16646.4, num_updates=47200, lr=0.000291111, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=46293 +epoch 028: 1670 / 1689 loss=4.089, nll_loss=2.463, ppl=5.51, wps=460538, ups=1.06, wpb=436489, bsz=16646.4, num_updates=47200, lr=0.000291111, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=46293 +epoch 028: 1670 / 1689 loss=4.089, nll_loss=2.463, ppl=5.51, wps=460538, ups=1.06, wpb=436489, bsz=16646.4, num_updates=47200, lr=0.000291111, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=46293 +epoch 028: 1670 / 1689 loss=4.089, nll_loss=2.463, ppl=5.51, wps=460538, ups=1.06, wpb=436489, bsz=16646.4, num_updates=47200, lr=0.000291111, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=46293 +epoch 028: 1670 / 1689 loss=4.089, nll_loss=2.463, ppl=5.51, wps=460538, ups=1.06, wpb=436489, bsz=16646.4, num_updates=47200, lr=0.000291111, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=46293 +epoch 028: 1670 / 1689 loss=4.089, nll_loss=2.463, ppl=5.51, wps=460538, ups=1.06, wpb=436489, bsz=16646.4, num_updates=47200, lr=0.000291111, gnorm=0.225, clip=0, loss_scale=1, train_wall=93, gb_free=18.7, wall=46293 +end of epoch 28 (average epoch stats below) +epoch 028 | loss 4.078 | nll_loss 2.45 | ppl 5.47 | wps 435024 | ups 1 | wpb 433521 | bsz 16502.6 | num_updates 47219 | lr 0.000291053 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1616 | gb_free 21.1 | wall 46310 +epoch 028 | loss 4.078 | nll_loss 2.45 | ppl 5.47 | wps 435024 | ups 1 | wpb 433521 | bsz 16502.6 | num_updates 47219 | lr 0.000291053 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1616 | gb_free 21.1 | wall 46310 +epoch 028 | loss 4.078 | nll_loss 2.45 | ppl 5.47 | wps 435024 | ups 1 | wpb 433521 | bsz 16502.6 | num_updates 47219 | lr 0.000291053 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1616 | gb_free 21.1 | wall 46310 +epoch 028 | loss 4.078 | nll_loss 2.45 | ppl 5.47 | wps 435024 | ups 1 | wpb 433521 | bsz 16502.6 | num_updates 47219 | lr 0.000291053 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1616 | gb_free 21.1 | wall 46310 +epoch 028 | loss 4.078 | nll_loss 2.45 | ppl 5.47 | wps 435024 | ups 1 | wpb 433521 | bsz 16502.6 | num_updates 47219 | lr 0.000291053 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1616 | gb_free 21.1 | wall 46310 +epoch 028 | loss 4.078 | nll_loss 2.45 | ppl 5.47 | wps 435024 | ups 1 | wpb 433521 | bsz 16502.6 | num_updates 47219 | lr 0.000291053 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1616 | gb_free 21.1 | wall 46310 +epoch 028 | loss 4.078 | nll_loss 2.45 | ppl 5.47 | wps 435024 | ups 1 | wpb 433521 | bsz 16502.6 | num_updates 47219 | lr 0.000291053 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1616 | gb_free 21.1 | wall 46310 +epoch 028 | loss 4.078 | nll_loss 2.45 | ppl 5.47 | wps 435024 | ups 1 | wpb 433521 | bsz 16502.6 | num_updates 47219 | lr 0.000291053 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1616 | gb_free 21.1 | wall 46310 +epoch 028 | loss 4.078 | nll_loss 2.45 | ppl 5.47 | wps 435024 | ups 1 | wpb 433521 | bsz 16502.6 | num_updates 47219 | lr 0.000291053 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1616 | gb_free 21.1 | wall 46310 +epoch 028 | loss 4.078 | nll_loss 2.45 | ppl 5.47 | wps 435024 | ups 1 | wpb 433521 | bsz 16502.6 | num_updates 47219 | lr 0.000291053 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1616 | gb_free 21.1 | wall 46310 +epoch 028 | loss 4.078 | nll_loss 2.45 | ppl 5.47 | wps 435024 | ups 1 | wpb 433521 | bsz 16502.6 | num_updates 47219 | lr 0.000291053 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1616 | gb_free 21.1 | wall 46310 +epoch 028 | loss 4.078 | nll_loss 2.45 | ppl 5.47 | wps 435024 | ups 1 | wpb 433521 | bsz 16502.6 | num_updates 47219 | lr 0.000291053 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1616 | gb_free 21.1 | wall 46310 +epoch 028 | loss 4.078 | nll_loss 2.45 | ppl 5.47 | wps 435024 | ups 1 | wpb 433521 | bsz 16502.6 | num_updates 47219 | lr 0.000291053 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1616 | gb_free 21.1 | wall 46310 +epoch 028 | loss 4.078 | nll_loss 2.45 | ppl 5.47 | wps 435024 | ups 1 | wpb 433521 | bsz 16502.6 | num_updates 47219 | lr 0.000291053 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1616 | gb_free 21.1 | wall 46310 +epoch 028 | loss 4.078 | nll_loss 2.45 | ppl 5.47 | wps 435024 | ups 1 | wpb 433521 | bsz 16502.6 | num_updates 47219 | lr 0.000291053 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1616 | gb_free 21.1 | wall 46310 +epoch 028 | loss 4.078 | nll_loss 2.45 | ppl 5.47 | wps 435024 | ups 1 | wpb 433521 | bsz 16502.6 | num_updates 47219 | lr 0.000291053 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1616 | gb_free 21.1 | wall 46310 +epoch 028 | loss 4.078 | nll_loss 2.45 | ppl 5.47 | wps 435024 | ups 1 | wpb 433521 | bsz 16502.6 | num_updates 47219 | lr 0.000291053 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1616 | gb_free 21.1 | wall 46310 +epoch 028 | loss 4.078 | nll_loss 2.45 | ppl 5.47 | wps 435024 | ups 1 | wpb 433521 | bsz 16502.6 | num_updates 47219 | lr 0.000291053 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1616 | gb_free 21.1 | wall 46310 +epoch 028 | loss 4.078 | nll_loss 2.45 | ppl 5.47 | wps 435024 | ups 1 | wpb 433521 | bsz 16502.6 | num_updates 47219 | lr 0.000291053 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1616 | gb_free 21.1 | wall 46310 +epoch 028 | loss 4.078 | nll_loss 2.45 | ppl 5.47 | wps 435024 | ups 1 | wpb 433521 | bsz 16502.6 | num_updates 47219 | lr 0.000291053 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1616 | gb_free 21.1 | wall 46310 +epoch 028 | loss 4.078 | nll_loss 2.45 | ppl 5.47 | wps 435024 | ups 1 | wpb 433521 | bsz 16502.6 | num_updates 47219 | lr 0.000291053 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1616 | gb_free 21.1 | wall 46310 +epoch 028 | loss 4.078 | nll_loss 2.45 | ppl 5.47 | wps 435024 | ups 1 | wpb 433521 | bsz 16502.6 | num_updates 47219 | lr 0.000291053 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1616 | gb_free 21.1 | wall 46310 +epoch 028 | loss 4.078 | nll_loss 2.45 | ppl 5.47 | wps 435024 | ups 1 | wpb 433521 | bsz 16502.6 | num_updates 47219 | lr 0.000291053 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1616 | gb_free 21.1 | wall 46310 +epoch 028 | loss 4.078 | nll_loss 2.45 | ppl 5.47 | wps 435024 | ups 1 | wpb 433521 | bsz 16502.6 | num_updates 47219 | lr 0.000291053 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1616 | gb_free 21.1 | wall 46310 +epoch 028 | loss 4.078 | nll_loss 2.45 | ppl 5.47 | wps 435024 | ups 1 | wpb 433521 | bsz 16502.6 | num_updates 47219 | lr 0.000291053 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1616 | gb_free 21.1 | wall 46310 +epoch 028 | loss 4.078 | nll_loss 2.45 | ppl 5.47 | wps 435024 | ups 1 | wpb 433521 | bsz 16502.6 | num_updates 47219 | lr 0.000291053 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1616 | gb_free 21.1 | wall 46310 +epoch 028 | loss 4.078 | nll_loss 2.45 | ppl 5.47 | wps 435024 | ups 1 | wpb 433521 | bsz 16502.6 | num_updates 47219 | lr 0.000291053 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1616 | gb_free 21.1 | wall 46310 +epoch 028 | loss 4.078 | nll_loss 2.45 | ppl 5.47 | wps 435024 | ups 1 | wpb 433521 | bsz 16502.6 | num_updates 47219 | lr 0.000291053 | gnorm 0.216 | clip 0 | loss_scale 1 | train_wall 1616 | gb_free 21.1 | wall 46310 +Start iterating over samples +epoch 029: 81 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=457898, ups=1.06, wpb=430047, bsz=16366, num_updates=47300, lr=0.000290803, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=46386 +epoch 029: 81 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=457898, ups=1.06, wpb=430047, bsz=16366, num_updates=47300, lr=0.000290803, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=46386 +epoch 029: 81 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=457898, ups=1.06, wpb=430047, bsz=16366, num_updates=47300, lr=0.000290803, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=46386 +epoch 029: 81 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=457898, ups=1.06, wpb=430047, bsz=16366, num_updates=47300, lr=0.000290803, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=46386 +epoch 029: 81 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=457898, ups=1.06, wpb=430047, bsz=16366, num_updates=47300, lr=0.000290803, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=46386 +epoch 029: 81 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=457898, ups=1.06, wpb=430047, bsz=16366, num_updates=47300, lr=0.000290803, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=46386 +epoch 029: 81 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=457898, ups=1.06, wpb=430047, bsz=16366, num_updates=47300, lr=0.000290803, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=46386 +epoch 029: 81 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=457898, ups=1.06, wpb=430047, bsz=16366, num_updates=47300, lr=0.000290803, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=46386 +epoch 029: 81 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=457898, ups=1.06, wpb=430047, bsz=16366, num_updates=47300, lr=0.000290803, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=46386 +epoch 029: 81 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=457898, ups=1.06, wpb=430047, bsz=16366, num_updates=47300, lr=0.000290803, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=46386 +epoch 029: 81 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=457898, ups=1.06, wpb=430047, bsz=16366, num_updates=47300, lr=0.000290803, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=46386 +epoch 029: 81 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=457898, ups=1.06, wpb=430047, bsz=16366, num_updates=47300, lr=0.000290803, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=46386 +epoch 029: 81 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=457898, ups=1.06, wpb=430047, bsz=16366, num_updates=47300, lr=0.000290803, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=46386 +epoch 029: 81 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=457898, ups=1.06, wpb=430047, bsz=16366, num_updates=47300, lr=0.000290803, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=46386 +epoch 029: 81 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=457898, ups=1.06, wpb=430047, bsz=16366, num_updates=47300, lr=0.000290803, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=46386 +epoch 029: 81 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=457898, ups=1.06, wpb=430047, bsz=16366, num_updates=47300, lr=0.000290803, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=46386 +epoch 029: 81 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=457898, ups=1.06, wpb=430047, bsz=16366, num_updates=47300, lr=0.000290803, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=46386 +epoch 029: 81 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=457898, ups=1.06, wpb=430047, bsz=16366, num_updates=47300, lr=0.000290803, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=46386 +epoch 029: 81 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=457898, ups=1.06, wpb=430047, bsz=16366, num_updates=47300, lr=0.000290803, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=46386 +epoch 029: 81 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=457898, ups=1.06, wpb=430047, bsz=16366, num_updates=47300, lr=0.000290803, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=46386 +epoch 029: 81 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=457898, ups=1.06, wpb=430047, bsz=16366, num_updates=47300, lr=0.000290803, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=46386 +epoch 029: 81 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=457898, ups=1.06, wpb=430047, bsz=16366, num_updates=47300, lr=0.000290803, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=46386 +epoch 029: 81 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=457898, ups=1.06, wpb=430047, bsz=16366, num_updates=47300, lr=0.000290803, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=46386 +epoch 029: 81 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=457898, ups=1.06, wpb=430047, bsz=16366, num_updates=47300, lr=0.000290803, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=46386 +epoch 029: 81 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=457898, ups=1.06, wpb=430047, bsz=16366, num_updates=47300, lr=0.000290803, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=46386 +epoch 029: 81 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=457898, ups=1.06, wpb=430047, bsz=16366, num_updates=47300, lr=0.000290803, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=46386 +epoch 029: 81 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=457898, ups=1.06, wpb=430047, bsz=16366, num_updates=47300, lr=0.000290803, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=46386 +epoch 029: 81 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=457898, ups=1.06, wpb=430047, bsz=16366, num_updates=47300, lr=0.000290803, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=46386 +epoch 029: 81 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=457898, ups=1.06, wpb=430047, bsz=16366, num_updates=47300, lr=0.000290803, gnorm=0.207, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=46386 +epoch 029: 181 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=460621, ups=1.06, wpb=434321, bsz=16959.5, num_updates=47400, lr=0.000290496, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=46481 +epoch 029: 181 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=460621, ups=1.06, wpb=434321, bsz=16959.5, num_updates=47400, lr=0.000290496, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=46481 +epoch 029: 181 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=460621, ups=1.06, wpb=434321, bsz=16959.5, num_updates=47400, lr=0.000290496, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=46481 +epoch 029: 181 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=460621, ups=1.06, wpb=434321, bsz=16959.5, num_updates=47400, lr=0.000290496, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=46481 +epoch 029: 181 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=460621, ups=1.06, wpb=434321, bsz=16959.5, num_updates=47400, lr=0.000290496, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=46481 +epoch 029: 181 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=460621, ups=1.06, wpb=434321, bsz=16959.5, num_updates=47400, lr=0.000290496, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=46481 +epoch 029: 181 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=460621, ups=1.06, wpb=434321, bsz=16959.5, num_updates=47400, lr=0.000290496, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=46481 +epoch 029: 181 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=460621, ups=1.06, wpb=434321, bsz=16959.5, num_updates=47400, lr=0.000290496, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=46481 +epoch 029: 181 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=460621, ups=1.06, wpb=434321, bsz=16959.5, num_updates=47400, lr=0.000290496, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=46481 +epoch 029: 181 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=460621, ups=1.06, wpb=434321, bsz=16959.5, num_updates=47400, lr=0.000290496, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=46481 +epoch 029: 181 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=460621, ups=1.06, wpb=434321, bsz=16959.5, num_updates=47400, lr=0.000290496, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=46481 +epoch 029: 181 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=460621, ups=1.06, wpb=434321, bsz=16959.5, num_updates=47400, lr=0.000290496, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=46481 +epoch 029: 181 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=460621, ups=1.06, wpb=434321, bsz=16959.5, num_updates=47400, lr=0.000290496, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=46481 +epoch 029: 181 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=460621, ups=1.06, wpb=434321, bsz=16959.5, num_updates=47400, lr=0.000290496, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=46481 +epoch 029: 181 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=460621, ups=1.06, wpb=434321, bsz=16959.5, num_updates=47400, lr=0.000290496, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=46481 +epoch 029: 181 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=460621, ups=1.06, wpb=434321, bsz=16959.5, num_updates=47400, lr=0.000290496, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=46481 +epoch 029: 181 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=460621, ups=1.06, wpb=434321, bsz=16959.5, num_updates=47400, lr=0.000290496, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=46481 +epoch 029: 181 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=460621, ups=1.06, wpb=434321, bsz=16959.5, num_updates=47400, lr=0.000290496, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=46481 +epoch 029: 181 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=460621, ups=1.06, wpb=434321, bsz=16959.5, num_updates=47400, lr=0.000290496, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=46481 +epoch 029: 181 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=460621, ups=1.06, wpb=434321, bsz=16959.5, num_updates=47400, lr=0.000290496, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=46481 +epoch 029: 181 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=460621, ups=1.06, wpb=434321, bsz=16959.5, num_updates=47400, lr=0.000290496, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=46481 +epoch 029: 181 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=460621, ups=1.06, wpb=434321, bsz=16959.5, num_updates=47400, lr=0.000290496, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=46481 +epoch 029: 181 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=460621, ups=1.06, wpb=434321, bsz=16959.5, num_updates=47400, lr=0.000290496, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=46481 +epoch 029: 181 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=460621, ups=1.06, wpb=434321, bsz=16959.5, num_updates=47400, lr=0.000290496, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=46481 +epoch 029: 181 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=460621, ups=1.06, wpb=434321, bsz=16959.5, num_updates=47400, lr=0.000290496, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=46481 +epoch 029: 181 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=460621, ups=1.06, wpb=434321, bsz=16959.5, num_updates=47400, lr=0.000290496, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=46481 +epoch 029: 181 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=460621, ups=1.06, wpb=434321, bsz=16959.5, num_updates=47400, lr=0.000290496, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=46481 +epoch 029: 181 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=460621, ups=1.06, wpb=434321, bsz=16959.5, num_updates=47400, lr=0.000290496, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=46481 +epoch 029: 181 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=460621, ups=1.06, wpb=434321, bsz=16959.5, num_updates=47400, lr=0.000290496, gnorm=0.22, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=46481 +epoch 029: 281 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=458397, ups=1.06, wpb=433442, bsz=16320.8, num_updates=47500, lr=0.000290191, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=46575 +epoch 029: 281 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=458397, ups=1.06, wpb=433442, bsz=16320.8, num_updates=47500, lr=0.000290191, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=46575 +epoch 029: 281 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=458397, ups=1.06, wpb=433442, bsz=16320.8, num_updates=47500, lr=0.000290191, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=46575 +epoch 029: 281 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=458397, ups=1.06, wpb=433442, bsz=16320.8, num_updates=47500, lr=0.000290191, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=46575 +epoch 029: 281 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=458397, ups=1.06, wpb=433442, bsz=16320.8, num_updates=47500, lr=0.000290191, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=46575 +epoch 029: 281 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=458397, ups=1.06, wpb=433442, bsz=16320.8, num_updates=47500, lr=0.000290191, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=46575 +epoch 029: 281 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=458397, ups=1.06, wpb=433442, bsz=16320.8, num_updates=47500, lr=0.000290191, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=46575 +epoch 029: 281 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=458397, ups=1.06, wpb=433442, bsz=16320.8, num_updates=47500, lr=0.000290191, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=46575 +epoch 029: 281 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=458397, ups=1.06, wpb=433442, bsz=16320.8, num_updates=47500, lr=0.000290191, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=46575 +epoch 029: 281 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=458397, ups=1.06, wpb=433442, bsz=16320.8, num_updates=47500, lr=0.000290191, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=46575 +epoch 029: 281 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=458397, ups=1.06, wpb=433442, bsz=16320.8, num_updates=47500, lr=0.000290191, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=46575 +epoch 029: 281 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=458397, ups=1.06, wpb=433442, bsz=16320.8, num_updates=47500, lr=0.000290191, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=46575 +epoch 029: 281 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=458397, ups=1.06, wpb=433442, bsz=16320.8, num_updates=47500, lr=0.000290191, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=46575 +epoch 029: 281 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=458397, ups=1.06, wpb=433442, bsz=16320.8, num_updates=47500, lr=0.000290191, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=46575 +epoch 029: 281 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=458397, ups=1.06, wpb=433442, bsz=16320.8, num_updates=47500, lr=0.000290191, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=46575 +epoch 029: 281 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=458397, ups=1.06, wpb=433442, bsz=16320.8, num_updates=47500, lr=0.000290191, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=46575 +epoch 029: 281 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=458397, ups=1.06, wpb=433442, bsz=16320.8, num_updates=47500, lr=0.000290191, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=46575 +epoch 029: 281 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=458397, ups=1.06, wpb=433442, bsz=16320.8, num_updates=47500, lr=0.000290191, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=46575 +epoch 029: 281 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=458397, ups=1.06, wpb=433442, bsz=16320.8, num_updates=47500, lr=0.000290191, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=46575 +epoch 029: 281 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=458397, ups=1.06, wpb=433442, bsz=16320.8, num_updates=47500, lr=0.000290191, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=46575 +epoch 029: 281 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=458397, ups=1.06, wpb=433442, bsz=16320.8, num_updates=47500, lr=0.000290191, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=46575 +epoch 029: 281 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=458397, ups=1.06, wpb=433442, bsz=16320.8, num_updates=47500, lr=0.000290191, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=46575 +epoch 029: 281 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=458397, ups=1.06, wpb=433442, bsz=16320.8, num_updates=47500, lr=0.000290191, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=46575 +epoch 029: 281 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=458397, ups=1.06, wpb=433442, bsz=16320.8, num_updates=47500, lr=0.000290191, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=46575 +epoch 029: 281 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=458397, ups=1.06, wpb=433442, bsz=16320.8, num_updates=47500, lr=0.000290191, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=46575 +epoch 029: 281 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=458397, ups=1.06, wpb=433442, bsz=16320.8, num_updates=47500, lr=0.000290191, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=46575 +epoch 029: 281 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=458397, ups=1.06, wpb=433442, bsz=16320.8, num_updates=47500, lr=0.000290191, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=46575 +epoch 029: 281 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=458397, ups=1.06, wpb=433442, bsz=16320.8, num_updates=47500, lr=0.000290191, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=46575 +epoch 029: 281 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=458397, ups=1.06, wpb=433442, bsz=16320.8, num_updates=47500, lr=0.000290191, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=46575 +epoch 029: 381 / 1689 loss=4.064, nll_loss=2.433, ppl=5.4, wps=463238, ups=1.07, wpb=430968, bsz=16059.8, num_updates=47600, lr=0.000289886, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46668 +epoch 029: 381 / 1689 loss=4.064, nll_loss=2.433, ppl=5.4, wps=463238, ups=1.07, wpb=430968, bsz=16059.8, num_updates=47600, lr=0.000289886, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46668 +epoch 029: 381 / 1689 loss=4.064, nll_loss=2.433, ppl=5.4, wps=463238, ups=1.07, wpb=430968, bsz=16059.8, num_updates=47600, lr=0.000289886, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46668 +epoch 029: 381 / 1689 loss=4.064, nll_loss=2.433, ppl=5.4, wps=463238, ups=1.07, wpb=430968, bsz=16059.8, num_updates=47600, lr=0.000289886, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46668 +epoch 029: 381 / 1689 loss=4.064, nll_loss=2.433, ppl=5.4, wps=463238, ups=1.07, wpb=430968, bsz=16059.8, num_updates=47600, lr=0.000289886, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46668 +epoch 029: 381 / 1689 loss=4.064, nll_loss=2.433, ppl=5.4, wps=463238, ups=1.07, wpb=430968, bsz=16059.8, num_updates=47600, lr=0.000289886, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46668 +epoch 029: 381 / 1689 loss=4.064, nll_loss=2.433, ppl=5.4, wps=463238, ups=1.07, wpb=430968, bsz=16059.8, num_updates=47600, lr=0.000289886, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46668 +epoch 029: 381 / 1689 loss=4.064, nll_loss=2.433, ppl=5.4, wps=463238, ups=1.07, wpb=430968, bsz=16059.8, num_updates=47600, lr=0.000289886, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46668 +epoch 029: 381 / 1689 loss=4.064, nll_loss=2.433, ppl=5.4, wps=463238, ups=1.07, wpb=430968, bsz=16059.8, num_updates=47600, lr=0.000289886, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46668 +epoch 029: 381 / 1689 loss=4.064, nll_loss=2.433, ppl=5.4, wps=463238, ups=1.07, wpb=430968, bsz=16059.8, num_updates=47600, lr=0.000289886, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46668 +epoch 029: 381 / 1689 loss=4.064, nll_loss=2.433, ppl=5.4, wps=463238, ups=1.07, wpb=430968, bsz=16059.8, num_updates=47600, lr=0.000289886, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46668 +epoch 029: 381 / 1689 loss=4.064, nll_loss=2.433, ppl=5.4, wps=463238, ups=1.07, wpb=430968, bsz=16059.8, num_updates=47600, lr=0.000289886, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46668 +epoch 029: 381 / 1689 loss=4.064, nll_loss=2.433, ppl=5.4, wps=463238, ups=1.07, wpb=430968, bsz=16059.8, num_updates=47600, lr=0.000289886, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46668 +epoch 029: 381 / 1689 loss=4.064, nll_loss=2.433, ppl=5.4, wps=463238, ups=1.07, wpb=430968, bsz=16059.8, num_updates=47600, lr=0.000289886, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46668 +epoch 029: 381 / 1689 loss=4.064, nll_loss=2.433, ppl=5.4, wps=463238, ups=1.07, wpb=430968, bsz=16059.8, num_updates=47600, lr=0.000289886, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46668 +epoch 029: 381 / 1689 loss=4.064, nll_loss=2.433, ppl=5.4, wps=463238, ups=1.07, wpb=430968, bsz=16059.8, num_updates=47600, lr=0.000289886, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46668 +epoch 029: 381 / 1689 loss=4.064, nll_loss=2.433, ppl=5.4, wps=463238, ups=1.07, wpb=430968, bsz=16059.8, num_updates=47600, lr=0.000289886, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46668 +epoch 029: 381 / 1689 loss=4.064, nll_loss=2.433, ppl=5.4, wps=463238, ups=1.07, wpb=430968, bsz=16059.8, num_updates=47600, lr=0.000289886, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46668 +epoch 029: 381 / 1689 loss=4.064, nll_loss=2.433, ppl=5.4, wps=463238, ups=1.07, wpb=430968, bsz=16059.8, num_updates=47600, lr=0.000289886, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46668 +epoch 029: 381 / 1689 loss=4.064, nll_loss=2.433, ppl=5.4, wps=463238, ups=1.07, wpb=430968, bsz=16059.8, num_updates=47600, lr=0.000289886, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46668 +epoch 029: 381 / 1689 loss=4.064, nll_loss=2.433, ppl=5.4, wps=463238, ups=1.07, wpb=430968, bsz=16059.8, num_updates=47600, lr=0.000289886, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46668 +epoch 029: 381 / 1689 loss=4.064, nll_loss=2.433, ppl=5.4, wps=463238, ups=1.07, wpb=430968, bsz=16059.8, num_updates=47600, lr=0.000289886, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46668 +epoch 029: 381 / 1689 loss=4.064, nll_loss=2.433, ppl=5.4, wps=463238, ups=1.07, wpb=430968, bsz=16059.8, num_updates=47600, lr=0.000289886, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46668 +epoch 029: 381 / 1689 loss=4.064, nll_loss=2.433, ppl=5.4, wps=463238, ups=1.07, wpb=430968, bsz=16059.8, num_updates=47600, lr=0.000289886, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46668 +epoch 029: 381 / 1689 loss=4.064, nll_loss=2.433, ppl=5.4, wps=463238, ups=1.07, wpb=430968, bsz=16059.8, num_updates=47600, lr=0.000289886, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46668 +epoch 029: 381 / 1689 loss=4.064, nll_loss=2.433, ppl=5.4, wps=463238, ups=1.07, wpb=430968, bsz=16059.8, num_updates=47600, lr=0.000289886, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46668 +epoch 029: 381 / 1689 loss=4.064, nll_loss=2.433, ppl=5.4, wps=463238, ups=1.07, wpb=430968, bsz=16059.8, num_updates=47600, lr=0.000289886, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46668 +epoch 029: 381 / 1689 loss=4.064, nll_loss=2.433, ppl=5.4, wps=463238, ups=1.07, wpb=430968, bsz=16059.8, num_updates=47600, lr=0.000289886, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46668 +epoch 029: 381 / 1689 loss=4.064, nll_loss=2.433, ppl=5.4, wps=463238, ups=1.07, wpb=430968, bsz=16059.8, num_updates=47600, lr=0.000289886, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=46668 +epoch 029: 481 / 1689 loss=4.065, nll_loss=2.435, ppl=5.41, wps=462006, ups=1.06, wpb=434162, bsz=16793.4, num_updates=47700, lr=0.000289581, gnorm=0.217, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=46762 +epoch 029: 481 / 1689 loss=4.065, nll_loss=2.435, ppl=5.41, wps=462006, ups=1.06, wpb=434162, bsz=16793.4, num_updates=47700, lr=0.000289581, gnorm=0.217, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=46762 +epoch 029: 481 / 1689 loss=4.065, nll_loss=2.435, ppl=5.41, wps=462006, ups=1.06, wpb=434162, bsz=16793.4, num_updates=47700, lr=0.000289581, gnorm=0.217, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=46762 +epoch 029: 481 / 1689 loss=4.065, nll_loss=2.435, ppl=5.41, wps=462006, ups=1.06, wpb=434162, bsz=16793.4, num_updates=47700, lr=0.000289581, gnorm=0.217, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=46762 +epoch 029: 481 / 1689 loss=4.065, nll_loss=2.435, ppl=5.41, wps=462006, ups=1.06, wpb=434162, bsz=16793.4, num_updates=47700, lr=0.000289581, gnorm=0.217, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=46762 +epoch 029: 481 / 1689 loss=4.065, nll_loss=2.435, ppl=5.41, wps=462006, ups=1.06, wpb=434162, bsz=16793.4, num_updates=47700, lr=0.000289581, gnorm=0.217, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=46762 +epoch 029: 481 / 1689 loss=4.065, nll_loss=2.435, ppl=5.41, wps=462006, ups=1.06, wpb=434162, bsz=16793.4, num_updates=47700, lr=0.000289581, gnorm=0.217, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=46762 +epoch 029: 481 / 1689 loss=4.065, nll_loss=2.435, ppl=5.41, wps=462006, ups=1.06, wpb=434162, bsz=16793.4, num_updates=47700, lr=0.000289581, gnorm=0.217, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=46762 +epoch 029: 481 / 1689 loss=4.065, nll_loss=2.435, ppl=5.41, wps=462006, ups=1.06, wpb=434162, bsz=16793.4, num_updates=47700, lr=0.000289581, gnorm=0.217, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=46762 +epoch 029: 481 / 1689 loss=4.065, nll_loss=2.435, ppl=5.41, wps=462006, ups=1.06, wpb=434162, bsz=16793.4, num_updates=47700, lr=0.000289581, gnorm=0.217, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=46762 +epoch 029: 481 / 1689 loss=4.065, nll_loss=2.435, ppl=5.41, wps=462006, ups=1.06, wpb=434162, bsz=16793.4, num_updates=47700, lr=0.000289581, gnorm=0.217, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=46762 +epoch 029: 481 / 1689 loss=4.065, nll_loss=2.435, ppl=5.41, wps=462006, ups=1.06, wpb=434162, bsz=16793.4, num_updates=47700, lr=0.000289581, gnorm=0.217, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=46762 +epoch 029: 481 / 1689 loss=4.065, nll_loss=2.435, ppl=5.41, wps=462006, ups=1.06, wpb=434162, bsz=16793.4, num_updates=47700, lr=0.000289581, gnorm=0.217, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=46762 +epoch 029: 481 / 1689 loss=4.065, nll_loss=2.435, ppl=5.41, wps=462006, ups=1.06, wpb=434162, bsz=16793.4, num_updates=47700, lr=0.000289581, gnorm=0.217, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=46762 +epoch 029: 481 / 1689 loss=4.065, nll_loss=2.435, ppl=5.41, wps=462006, ups=1.06, wpb=434162, bsz=16793.4, num_updates=47700, lr=0.000289581, gnorm=0.217, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=46762 +epoch 029: 481 / 1689 loss=4.065, nll_loss=2.435, ppl=5.41, wps=462006, ups=1.06, wpb=434162, bsz=16793.4, num_updates=47700, lr=0.000289581, gnorm=0.217, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=46762 +epoch 029: 481 / 1689 loss=4.065, nll_loss=2.435, ppl=5.41, wps=462006, ups=1.06, wpb=434162, bsz=16793.4, num_updates=47700, lr=0.000289581, gnorm=0.217, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=46762 +epoch 029: 481 / 1689 loss=4.065, nll_loss=2.435, ppl=5.41, wps=462006, ups=1.06, wpb=434162, bsz=16793.4, num_updates=47700, lr=0.000289581, gnorm=0.217, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=46762 +epoch 029: 481 / 1689 loss=4.065, nll_loss=2.435, ppl=5.41, wps=462006, ups=1.06, wpb=434162, bsz=16793.4, num_updates=47700, lr=0.000289581, gnorm=0.217, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=46762 +epoch 029: 481 / 1689 loss=4.065, nll_loss=2.435, ppl=5.41, wps=462006, ups=1.06, wpb=434162, bsz=16793.4, num_updates=47700, lr=0.000289581, gnorm=0.217, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=46762 +epoch 029: 481 / 1689 loss=4.065, nll_loss=2.435, ppl=5.41, wps=462006, ups=1.06, wpb=434162, bsz=16793.4, num_updates=47700, lr=0.000289581, gnorm=0.217, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=46762 +epoch 029: 481 / 1689 loss=4.065, nll_loss=2.435, ppl=5.41, wps=462006, ups=1.06, wpb=434162, bsz=16793.4, num_updates=47700, lr=0.000289581, gnorm=0.217, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=46762 +epoch 029: 481 / 1689 loss=4.065, nll_loss=2.435, ppl=5.41, wps=462006, ups=1.06, wpb=434162, bsz=16793.4, num_updates=47700, lr=0.000289581, gnorm=0.217, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=46762 +epoch 029: 481 / 1689 loss=4.065, nll_loss=2.435, ppl=5.41, wps=462006, ups=1.06, wpb=434162, bsz=16793.4, num_updates=47700, lr=0.000289581, gnorm=0.217, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=46762 +epoch 029: 481 / 1689 loss=4.065, nll_loss=2.435, ppl=5.41, wps=462006, ups=1.06, wpb=434162, bsz=16793.4, num_updates=47700, lr=0.000289581, gnorm=0.217, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=46762 +epoch 029: 481 / 1689 loss=4.065, nll_loss=2.435, ppl=5.41, wps=462006, ups=1.06, wpb=434162, bsz=16793.4, num_updates=47700, lr=0.000289581, gnorm=0.217, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=46762 +epoch 029: 481 / 1689 loss=4.065, nll_loss=2.435, ppl=5.41, wps=462006, ups=1.06, wpb=434162, bsz=16793.4, num_updates=47700, lr=0.000289581, gnorm=0.217, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=46762 +epoch 029: 481 / 1689 loss=4.065, nll_loss=2.435, ppl=5.41, wps=462006, ups=1.06, wpb=434162, bsz=16793.4, num_updates=47700, lr=0.000289581, gnorm=0.217, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=46762 +epoch 029: 481 / 1689 loss=4.065, nll_loss=2.435, ppl=5.41, wps=462006, ups=1.06, wpb=434162, bsz=16793.4, num_updates=47700, lr=0.000289581, gnorm=0.217, clip=0, loss_scale=2, train_wall=93, gb_free=18.9, wall=46762 +epoch 029: 582 / 1689 loss=4.066, nll_loss=2.437, ppl=5.42, wps=457653, ups=1.06, wpb=433248, bsz=16864.2, num_updates=47800, lr=0.000289278, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=46857 +epoch 029: 582 / 1689 loss=4.066, nll_loss=2.437, ppl=5.42, wps=457653, ups=1.06, wpb=433248, bsz=16864.2, num_updates=47800, lr=0.000289278, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=46857 +epoch 029: 582 / 1689 loss=4.066, nll_loss=2.437, ppl=5.42, wps=457653, ups=1.06, wpb=433248, bsz=16864.2, num_updates=47800, lr=0.000289278, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=46857 +epoch 029: 582 / 1689 loss=4.066, nll_loss=2.437, ppl=5.42, wps=457653, ups=1.06, wpb=433248, bsz=16864.2, num_updates=47800, lr=0.000289278, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=46857 +epoch 029: 582 / 1689 loss=4.066, nll_loss=2.437, ppl=5.42, wps=457653, ups=1.06, wpb=433248, bsz=16864.2, num_updates=47800, lr=0.000289278, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=46857 +epoch 029: 582 / 1689 loss=4.066, nll_loss=2.437, ppl=5.42, wps=457653, ups=1.06, wpb=433248, bsz=16864.2, num_updates=47800, lr=0.000289278, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=46857 +epoch 029: 582 / 1689 loss=4.066, nll_loss=2.437, ppl=5.42, wps=457653, ups=1.06, wpb=433248, bsz=16864.2, num_updates=47800, lr=0.000289278, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=46857 +epoch 029: 582 / 1689 loss=4.066, nll_loss=2.437, ppl=5.42, wps=457653, ups=1.06, wpb=433248, bsz=16864.2, num_updates=47800, lr=0.000289278, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=46857 +epoch 029: 582 / 1689 loss=4.066, nll_loss=2.437, ppl=5.42, wps=457653, ups=1.06, wpb=433248, bsz=16864.2, num_updates=47800, lr=0.000289278, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=46857 +epoch 029: 582 / 1689 loss=4.066, nll_loss=2.437, ppl=5.42, wps=457653, ups=1.06, wpb=433248, bsz=16864.2, num_updates=47800, lr=0.000289278, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=46857 +epoch 029: 582 / 1689 loss=4.066, nll_loss=2.437, ppl=5.42, wps=457653, ups=1.06, wpb=433248, bsz=16864.2, num_updates=47800, lr=0.000289278, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=46857 +epoch 029: 582 / 1689 loss=4.066, nll_loss=2.437, ppl=5.42, wps=457653, ups=1.06, wpb=433248, bsz=16864.2, num_updates=47800, lr=0.000289278, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=46857 +epoch 029: 582 / 1689 loss=4.066, nll_loss=2.437, ppl=5.42, wps=457653, ups=1.06, wpb=433248, bsz=16864.2, num_updates=47800, lr=0.000289278, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=46857 +epoch 029: 582 / 1689 loss=4.066, nll_loss=2.437, ppl=5.42, wps=457653, ups=1.06, wpb=433248, bsz=16864.2, num_updates=47800, lr=0.000289278, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=46857 +epoch 029: 582 / 1689 loss=4.066, nll_loss=2.437, ppl=5.42, wps=457653, ups=1.06, wpb=433248, bsz=16864.2, num_updates=47800, lr=0.000289278, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=46857 +epoch 029: 582 / 1689 loss=4.066, nll_loss=2.437, ppl=5.42, wps=457653, ups=1.06, wpb=433248, bsz=16864.2, num_updates=47800, lr=0.000289278, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=46857 +epoch 029: 582 / 1689 loss=4.066, nll_loss=2.437, ppl=5.42, wps=457653, ups=1.06, wpb=433248, bsz=16864.2, num_updates=47800, lr=0.000289278, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=46857 +epoch 029: 582 / 1689 loss=4.066, nll_loss=2.437, ppl=5.42, wps=457653, ups=1.06, wpb=433248, bsz=16864.2, num_updates=47800, lr=0.000289278, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=46857 +epoch 029: 582 / 1689 loss=4.066, nll_loss=2.437, ppl=5.42, wps=457653, ups=1.06, wpb=433248, bsz=16864.2, num_updates=47800, lr=0.000289278, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=46857 +epoch 029: 582 / 1689 loss=4.066, nll_loss=2.437, ppl=5.42, wps=457653, ups=1.06, wpb=433248, bsz=16864.2, num_updates=47800, lr=0.000289278, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=46857 +epoch 029: 582 / 1689 loss=4.066, nll_loss=2.437, ppl=5.42, wps=457653, ups=1.06, wpb=433248, bsz=16864.2, num_updates=47800, lr=0.000289278, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=46857 +epoch 029: 582 / 1689 loss=4.066, nll_loss=2.437, ppl=5.42, wps=457653, ups=1.06, wpb=433248, bsz=16864.2, num_updates=47800, lr=0.000289278, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=46857 +epoch 029: 582 / 1689 loss=4.066, nll_loss=2.437, ppl=5.42, wps=457653, ups=1.06, wpb=433248, bsz=16864.2, num_updates=47800, lr=0.000289278, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=46857 +epoch 029: 582 / 1689 loss=4.066, nll_loss=2.437, ppl=5.42, wps=457653, ups=1.06, wpb=433248, bsz=16864.2, num_updates=47800, lr=0.000289278, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=46857 +epoch 029: 582 / 1689 loss=4.066, nll_loss=2.437, ppl=5.42, wps=457653, ups=1.06, wpb=433248, bsz=16864.2, num_updates=47800, lr=0.000289278, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=46857 +epoch 029: 582 / 1689 loss=4.066, nll_loss=2.437, ppl=5.42, wps=457653, ups=1.06, wpb=433248, bsz=16864.2, num_updates=47800, lr=0.000289278, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=46857 +epoch 029: 582 / 1689 loss=4.066, nll_loss=2.437, ppl=5.42, wps=457653, ups=1.06, wpb=433248, bsz=16864.2, num_updates=47800, lr=0.000289278, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=46857 +epoch 029: 582 / 1689 loss=4.066, nll_loss=2.437, ppl=5.42, wps=457653, ups=1.06, wpb=433248, bsz=16864.2, num_updates=47800, lr=0.000289278, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=46857 +epoch 029: 582 / 1689 loss=4.066, nll_loss=2.437, ppl=5.42, wps=457653, ups=1.06, wpb=433248, bsz=16864.2, num_updates=47800, lr=0.000289278, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=19, wall=46857 +epoch 029: 682 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=463610, ups=1.07, wpb=432860, bsz=16564.1, num_updates=47900, lr=0.000288976, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=46950 +epoch 029: 682 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=463610, ups=1.07, wpb=432860, bsz=16564.1, num_updates=47900, lr=0.000288976, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=46950 +epoch 029: 682 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=463610, ups=1.07, wpb=432860, bsz=16564.1, num_updates=47900, lr=0.000288976, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=46950 +epoch 029: 682 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=463610, ups=1.07, wpb=432860, bsz=16564.1, num_updates=47900, lr=0.000288976, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=46950 +epoch 029: 682 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=463610, ups=1.07, wpb=432860, bsz=16564.1, num_updates=47900, lr=0.000288976, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=46950 +epoch 029: 682 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=463610, ups=1.07, wpb=432860, bsz=16564.1, num_updates=47900, lr=0.000288976, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=46950 +epoch 029: 682 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=463610, ups=1.07, wpb=432860, bsz=16564.1, num_updates=47900, lr=0.000288976, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=46950 +epoch 029: 682 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=463610, ups=1.07, wpb=432860, bsz=16564.1, num_updates=47900, lr=0.000288976, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=46950 +epoch 029: 682 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=463610, ups=1.07, wpb=432860, bsz=16564.1, num_updates=47900, lr=0.000288976, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=46950 +epoch 029: 682 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=463610, ups=1.07, wpb=432860, bsz=16564.1, num_updates=47900, lr=0.000288976, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=46950 +epoch 029: 682 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=463610, ups=1.07, wpb=432860, bsz=16564.1, num_updates=47900, lr=0.000288976, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=46950 +epoch 029: 682 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=463610, ups=1.07, wpb=432860, bsz=16564.1, num_updates=47900, lr=0.000288976, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=46950 +epoch 029: 682 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=463610, ups=1.07, wpb=432860, bsz=16564.1, num_updates=47900, lr=0.000288976, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=46950 +epoch 029: 682 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=463610, ups=1.07, wpb=432860, bsz=16564.1, num_updates=47900, lr=0.000288976, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=46950 +epoch 029: 682 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=463610, ups=1.07, wpb=432860, bsz=16564.1, num_updates=47900, lr=0.000288976, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=46950 +epoch 029: 682 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=463610, ups=1.07, wpb=432860, bsz=16564.1, num_updates=47900, lr=0.000288976, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=46950 +epoch 029: 682 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=463610, ups=1.07, wpb=432860, bsz=16564.1, num_updates=47900, lr=0.000288976, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=46950 +epoch 029: 682 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=463610, ups=1.07, wpb=432860, bsz=16564.1, num_updates=47900, lr=0.000288976, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=46950 +epoch 029: 682 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=463610, ups=1.07, wpb=432860, bsz=16564.1, num_updates=47900, lr=0.000288976, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=46950 +epoch 029: 682 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=463610, ups=1.07, wpb=432860, bsz=16564.1, num_updates=47900, lr=0.000288976, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=46950 +epoch 029: 682 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=463610, ups=1.07, wpb=432860, bsz=16564.1, num_updates=47900, lr=0.000288976, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=46950 +epoch 029: 682 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=463610, ups=1.07, wpb=432860, bsz=16564.1, num_updates=47900, lr=0.000288976, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=46950 +epoch 029: 682 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=463610, ups=1.07, wpb=432860, bsz=16564.1, num_updates=47900, lr=0.000288976, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=46950 +epoch 029: 682 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=463610, ups=1.07, wpb=432860, bsz=16564.1, num_updates=47900, lr=0.000288976, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=46950 +epoch 029: 682 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=463610, ups=1.07, wpb=432860, bsz=16564.1, num_updates=47900, lr=0.000288976, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=46950 +epoch 029: 682 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=463610, ups=1.07, wpb=432860, bsz=16564.1, num_updates=47900, lr=0.000288976, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=46950 +epoch 029: 682 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=463610, ups=1.07, wpb=432860, bsz=16564.1, num_updates=47900, lr=0.000288976, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=46950 +epoch 029: 682 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=463610, ups=1.07, wpb=432860, bsz=16564.1, num_updates=47900, lr=0.000288976, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=46950 +epoch 029: 682 / 1689 loss=4.083, nll_loss=2.456, ppl=5.49, wps=463610, ups=1.07, wpb=432860, bsz=16564.1, num_updates=47900, lr=0.000288976, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=46950 +epoch 029: 782 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=463348, ups=1.07, wpb=433894, bsz=16387.8, num_updates=48000, lr=0.000288675, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=47044 +epoch 029: 782 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=463348, ups=1.07, wpb=433894, bsz=16387.8, num_updates=48000, lr=0.000288675, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=47044 +epoch 029: 782 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=463348, ups=1.07, wpb=433894, bsz=16387.8, num_updates=48000, lr=0.000288675, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=47044 +epoch 029: 782 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=463348, ups=1.07, wpb=433894, bsz=16387.8, num_updates=48000, lr=0.000288675, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=47044 +epoch 029: 782 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=463348, ups=1.07, wpb=433894, bsz=16387.8, num_updates=48000, lr=0.000288675, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=47044 +epoch 029: 782 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=463348, ups=1.07, wpb=433894, bsz=16387.8, num_updates=48000, lr=0.000288675, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=47044 +epoch 029: 782 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=463348, ups=1.07, wpb=433894, bsz=16387.8, num_updates=48000, lr=0.000288675, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=47044 +epoch 029: 782 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=463348, ups=1.07, wpb=433894, bsz=16387.8, num_updates=48000, lr=0.000288675, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=47044 +epoch 029: 782 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=463348, ups=1.07, wpb=433894, bsz=16387.8, num_updates=48000, lr=0.000288675, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=47044 +epoch 029: 782 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=463348, ups=1.07, wpb=433894, bsz=16387.8, num_updates=48000, lr=0.000288675, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=47044 +epoch 029: 782 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=463348, ups=1.07, wpb=433894, bsz=16387.8, num_updates=48000, lr=0.000288675, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=47044 +epoch 029: 782 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=463348, ups=1.07, wpb=433894, bsz=16387.8, num_updates=48000, lr=0.000288675, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=47044 +epoch 029: 782 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=463348, ups=1.07, wpb=433894, bsz=16387.8, num_updates=48000, lr=0.000288675, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=47044 +epoch 029: 782 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=463348, ups=1.07, wpb=433894, bsz=16387.8, num_updates=48000, lr=0.000288675, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=47044 +epoch 029: 782 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=463348, ups=1.07, wpb=433894, bsz=16387.8, num_updates=48000, lr=0.000288675, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=47044 +epoch 029: 782 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=463348, ups=1.07, wpb=433894, bsz=16387.8, num_updates=48000, lr=0.000288675, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=47044 +epoch 029: 782 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=463348, ups=1.07, wpb=433894, bsz=16387.8, num_updates=48000, lr=0.000288675, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=47044 +epoch 029: 782 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=463348, ups=1.07, wpb=433894, bsz=16387.8, num_updates=48000, lr=0.000288675, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=47044 +epoch 029: 782 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=463348, ups=1.07, wpb=433894, bsz=16387.8, num_updates=48000, lr=0.000288675, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=47044 +epoch 029: 782 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=463348, ups=1.07, wpb=433894, bsz=16387.8, num_updates=48000, lr=0.000288675, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=47044 +epoch 029: 782 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=463348, ups=1.07, wpb=433894, bsz=16387.8, num_updates=48000, lr=0.000288675, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=47044 +epoch 029: 782 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=463348, ups=1.07, wpb=433894, bsz=16387.8, num_updates=48000, lr=0.000288675, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=47044 +epoch 029: 782 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=463348, ups=1.07, wpb=433894, bsz=16387.8, num_updates=48000, lr=0.000288675, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=47044 +epoch 029: 782 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=463348, ups=1.07, wpb=433894, bsz=16387.8, num_updates=48000, lr=0.000288675, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=47044 +epoch 029: 782 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=463348, ups=1.07, wpb=433894, bsz=16387.8, num_updates=48000, lr=0.000288675, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=47044 +epoch 029: 782 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=463348, ups=1.07, wpb=433894, bsz=16387.8, num_updates=48000, lr=0.000288675, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=47044 +epoch 029: 782 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=463348, ups=1.07, wpb=433894, bsz=16387.8, num_updates=48000, lr=0.000288675, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=47044 +epoch 029: 782 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=463348, ups=1.07, wpb=433894, bsz=16387.8, num_updates=48000, lr=0.000288675, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=47044 +epoch 029: 782 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=463348, ups=1.07, wpb=433894, bsz=16387.8, num_updates=48000, lr=0.000288675, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=47044 +begin validation on "valid" subset +epoch 029 | valid on 'valid' subset | loss 4.235 | nll_loss 2.6 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 48000 | best_loss 4.224 +epoch 029 | valid on 'valid' subset | loss 4.235 | nll_loss 2.6 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 48000 | best_loss 4.224 +epoch 029 | valid on 'valid' subset | loss 4.235 | nll_loss 2.6 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 48000 | best_loss 4.224 +epoch 029 | valid on 'valid' subset | loss 4.235 | nll_loss 2.6 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 48000 | best_loss 4.224 +epoch 029 | valid on 'valid' subset | loss 4.235 | nll_loss 2.6 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 48000 | best_loss 4.224 +epoch 029 | valid on 'valid' subset | loss 4.235 | nll_loss 2.6 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 48000 | best_loss 4.224 +epoch 029 | valid on 'valid' subset | loss 4.235 | nll_loss 2.6 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 48000 | best_loss 4.224 +epoch 029 | valid on 'valid' subset | loss 4.235 | nll_loss 2.6 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 48000 | best_loss 4.224 +epoch 029 | valid on 'valid' subset | loss 4.235 | nll_loss 2.6 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 48000 | best_loss 4.224 +epoch 029 | valid on 'valid' subset | loss 4.235 | nll_loss 2.6 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 48000 | best_loss 4.224 +epoch 029 | valid on 'valid' subset | loss 4.235 | nll_loss 2.6 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 48000 | best_loss 4.224 +epoch 029 | valid on 'valid' subset | loss 4.235 | nll_loss 2.6 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 48000 | best_loss 4.224 +epoch 029 | valid on 'valid' subset | loss 4.235 | nll_loss 2.6 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 48000 | best_loss 4.224 +epoch 029 | valid on 'valid' subset | loss 4.235 | nll_loss 2.6 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 48000 | best_loss 4.224 +epoch 029 | valid on 'valid' subset | loss 4.235 | nll_loss 2.6 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 48000 | best_loss 4.224 +epoch 029 | valid on 'valid' subset | loss 4.235 | nll_loss 2.6 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 48000 | best_loss 4.224 +epoch 029 | valid on 'valid' subset | loss 4.235 | nll_loss 2.6 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 48000 | best_loss 4.224 +epoch 029 | valid on 'valid' subset | loss 4.235 | nll_loss 2.6 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 48000 | best_loss 4.224 +epoch 029 | valid on 'valid' subset | loss 4.235 | nll_loss 2.6 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 48000 | best_loss 4.224 +epoch 029 | valid on 'valid' subset | loss 4.235 | nll_loss 2.6 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 48000 | best_loss 4.224 +epoch 029 | valid on 'valid' subset | loss 4.235 | nll_loss 2.6 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 48000 | best_loss 4.224 +epoch 029 | valid on 'valid' subset | loss 4.235 | nll_loss 2.6 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 48000 | best_loss 4.224 +epoch 029 | valid on 'valid' subset | loss 4.235 | nll_loss 2.6 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 48000 | best_loss 4.224 +epoch 029 | valid on 'valid' subset | loss 4.235 | nll_loss 2.6 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 48000 | best_loss 4.224 +epoch 029 | valid on 'valid' subset | loss 4.235 | nll_loss 2.6 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 48000 | best_loss 4.224 +epoch 029 | valid on 'valid' subset | loss 4.235 | nll_loss 2.6 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 48000 | best_loss 4.224 +epoch 029 | valid on 'valid' subset | loss 4.235 | nll_loss 2.6 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 48000 | best_loss 4.224 +epoch 029 | valid on 'valid' subset | loss 4.235 | nll_loss 2.6 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 48000 | best_loss 4.224 +epoch 029 | valid on 'valid' subset | loss 4.235 | nll_loss 2.6 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 48000 | best_loss 4.224 +epoch 029: 882 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=346251, ups=0.8, wpb=433530, bsz=16551.6, num_updates=48100, lr=0.000288375, gnorm=0.208, clip=0, loss_scale=1, train_wall=96, gb_free=19.1, wall=47169 +epoch 029: 882 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=346251, ups=0.8, wpb=433530, bsz=16551.6, num_updates=48100, lr=0.000288375, gnorm=0.208, clip=0, loss_scale=1, train_wall=96, gb_free=19.1, wall=47169 +epoch 029: 882 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=346251, ups=0.8, wpb=433530, bsz=16551.6, num_updates=48100, lr=0.000288375, gnorm=0.208, clip=0, loss_scale=1, train_wall=96, gb_free=19.1, wall=47169 +epoch 029: 882 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=346251, ups=0.8, wpb=433530, bsz=16551.6, num_updates=48100, lr=0.000288375, gnorm=0.208, clip=0, loss_scale=1, train_wall=96, gb_free=19.1, wall=47169 +epoch 029: 882 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=346251, ups=0.8, wpb=433530, bsz=16551.6, num_updates=48100, lr=0.000288375, gnorm=0.208, clip=0, loss_scale=1, train_wall=96, gb_free=19.1, wall=47169 +epoch 029: 882 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=346251, ups=0.8, wpb=433530, bsz=16551.6, num_updates=48100, lr=0.000288375, gnorm=0.208, clip=0, loss_scale=1, train_wall=96, gb_free=19.1, wall=47169 +epoch 029: 882 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=346251, ups=0.8, wpb=433530, bsz=16551.6, num_updates=48100, lr=0.000288375, gnorm=0.208, clip=0, loss_scale=1, train_wall=96, gb_free=19.1, wall=47169 +epoch 029: 882 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=346251, ups=0.8, wpb=433530, bsz=16551.6, num_updates=48100, lr=0.000288375, gnorm=0.208, clip=0, loss_scale=1, train_wall=96, gb_free=19.1, wall=47169 +epoch 029: 882 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=346251, ups=0.8, wpb=433530, bsz=16551.6, num_updates=48100, lr=0.000288375, gnorm=0.208, clip=0, loss_scale=1, train_wall=96, gb_free=19.1, wall=47169 +epoch 029: 882 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=346251, ups=0.8, wpb=433530, bsz=16551.6, num_updates=48100, lr=0.000288375, gnorm=0.208, clip=0, loss_scale=1, train_wall=96, gb_free=19.1, wall=47169 +epoch 029: 882 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=346251, ups=0.8, wpb=433530, bsz=16551.6, num_updates=48100, lr=0.000288375, gnorm=0.208, clip=0, loss_scale=1, train_wall=96, gb_free=19.1, wall=47169 +epoch 029: 882 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=346251, ups=0.8, wpb=433530, bsz=16551.6, num_updates=48100, lr=0.000288375, gnorm=0.208, clip=0, loss_scale=1, train_wall=96, gb_free=19.1, wall=47169 +epoch 029: 882 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=346251, ups=0.8, wpb=433530, bsz=16551.6, num_updates=48100, lr=0.000288375, gnorm=0.208, clip=0, loss_scale=1, train_wall=96, gb_free=19.1, wall=47169 +epoch 029: 882 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=346251, ups=0.8, wpb=433530, bsz=16551.6, num_updates=48100, lr=0.000288375, gnorm=0.208, clip=0, loss_scale=1, train_wall=96, gb_free=19.1, wall=47169 +epoch 029: 882 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=346251, ups=0.8, wpb=433530, bsz=16551.6, num_updates=48100, lr=0.000288375, gnorm=0.208, clip=0, loss_scale=1, train_wall=96, gb_free=19.1, wall=47169 +epoch 029: 882 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=346251, ups=0.8, wpb=433530, bsz=16551.6, num_updates=48100, lr=0.000288375, gnorm=0.208, clip=0, loss_scale=1, train_wall=96, gb_free=19.1, wall=47169 +epoch 029: 882 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=346251, ups=0.8, wpb=433530, bsz=16551.6, num_updates=48100, lr=0.000288375, gnorm=0.208, clip=0, loss_scale=1, train_wall=96, gb_free=19.1, wall=47169 +epoch 029: 882 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=346251, ups=0.8, wpb=433530, bsz=16551.6, num_updates=48100, lr=0.000288375, gnorm=0.208, clip=0, loss_scale=1, train_wall=96, gb_free=19.1, wall=47169 +epoch 029: 882 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=346251, ups=0.8, wpb=433530, bsz=16551.6, num_updates=48100, lr=0.000288375, gnorm=0.208, clip=0, loss_scale=1, train_wall=96, gb_free=19.1, wall=47169 +epoch 029: 882 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=346251, ups=0.8, wpb=433530, bsz=16551.6, num_updates=48100, lr=0.000288375, gnorm=0.208, clip=0, loss_scale=1, train_wall=96, gb_free=19.1, wall=47169 +epoch 029: 882 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=346251, ups=0.8, wpb=433530, bsz=16551.6, num_updates=48100, lr=0.000288375, gnorm=0.208, clip=0, loss_scale=1, train_wall=96, gb_free=19.1, wall=47169 +epoch 029: 882 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=346251, ups=0.8, wpb=433530, bsz=16551.6, num_updates=48100, lr=0.000288375, gnorm=0.208, clip=0, loss_scale=1, train_wall=96, gb_free=19.1, wall=47169 +epoch 029: 882 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=346251, ups=0.8, wpb=433530, bsz=16551.6, num_updates=48100, lr=0.000288375, gnorm=0.208, clip=0, loss_scale=1, train_wall=96, gb_free=19.1, wall=47169 +epoch 029: 882 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=346251, ups=0.8, wpb=433530, bsz=16551.6, num_updates=48100, lr=0.000288375, gnorm=0.208, clip=0, loss_scale=1, train_wall=96, gb_free=19.1, wall=47169 +epoch 029: 882 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=346251, ups=0.8, wpb=433530, bsz=16551.6, num_updates=48100, lr=0.000288375, gnorm=0.208, clip=0, loss_scale=1, train_wall=96, gb_free=19.1, wall=47169 +epoch 029: 882 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=346251, ups=0.8, wpb=433530, bsz=16551.6, num_updates=48100, lr=0.000288375, gnorm=0.208, clip=0, loss_scale=1, train_wall=96, gb_free=19.1, wall=47169 +epoch 029: 882 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=346251, ups=0.8, wpb=433530, bsz=16551.6, num_updates=48100, lr=0.000288375, gnorm=0.208, clip=0, loss_scale=1, train_wall=96, gb_free=19.1, wall=47169 +epoch 029: 882 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=346251, ups=0.8, wpb=433530, bsz=16551.6, num_updates=48100, lr=0.000288375, gnorm=0.208, clip=0, loss_scale=1, train_wall=96, gb_free=19.1, wall=47169 +epoch 029: 882 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=346251, ups=0.8, wpb=433530, bsz=16551.6, num_updates=48100, lr=0.000288375, gnorm=0.208, clip=0, loss_scale=1, train_wall=96, gb_free=19.1, wall=47169 +epoch 029: 982 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=462635, ups=1.07, wpb=431284, bsz=16572.7, num_updates=48200, lr=0.000288076, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=47262 +epoch 029: 982 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=462635, ups=1.07, wpb=431284, bsz=16572.7, num_updates=48200, lr=0.000288076, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=47262 +epoch 029: 982 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=462635, ups=1.07, wpb=431284, bsz=16572.7, num_updates=48200, lr=0.000288076, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=47262 +epoch 029: 982 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=462635, ups=1.07, wpb=431284, bsz=16572.7, num_updates=48200, lr=0.000288076, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=47262 +epoch 029: 982 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=462635, ups=1.07, wpb=431284, bsz=16572.7, num_updates=48200, lr=0.000288076, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=47262 +epoch 029: 982 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=462635, ups=1.07, wpb=431284, bsz=16572.7, num_updates=48200, lr=0.000288076, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=47262 +epoch 029: 982 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=462635, ups=1.07, wpb=431284, bsz=16572.7, num_updates=48200, lr=0.000288076, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=47262 +epoch 029: 982 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=462635, ups=1.07, wpb=431284, bsz=16572.7, num_updates=48200, lr=0.000288076, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=47262 +epoch 029: 982 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=462635, ups=1.07, wpb=431284, bsz=16572.7, num_updates=48200, lr=0.000288076, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=47262 +epoch 029: 982 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=462635, ups=1.07, wpb=431284, bsz=16572.7, num_updates=48200, lr=0.000288076, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=47262 +epoch 029: 982 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=462635, ups=1.07, wpb=431284, bsz=16572.7, num_updates=48200, lr=0.000288076, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=47262 +epoch 029: 982 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=462635, ups=1.07, wpb=431284, bsz=16572.7, num_updates=48200, lr=0.000288076, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=47262 +epoch 029: 982 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=462635, ups=1.07, wpb=431284, bsz=16572.7, num_updates=48200, lr=0.000288076, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=47262 +epoch 029: 982 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=462635, ups=1.07, wpb=431284, bsz=16572.7, num_updates=48200, lr=0.000288076, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=47262 +epoch 029: 982 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=462635, ups=1.07, wpb=431284, bsz=16572.7, num_updates=48200, lr=0.000288076, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=47262 +epoch 029: 982 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=462635, ups=1.07, wpb=431284, bsz=16572.7, num_updates=48200, lr=0.000288076, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=47262 +epoch 029: 982 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=462635, ups=1.07, wpb=431284, bsz=16572.7, num_updates=48200, lr=0.000288076, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=47262 +epoch 029: 982 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=462635, ups=1.07, wpb=431284, bsz=16572.7, num_updates=48200, lr=0.000288076, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=47262 +epoch 029: 982 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=462635, ups=1.07, wpb=431284, bsz=16572.7, num_updates=48200, lr=0.000288076, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=47262 +epoch 029: 982 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=462635, ups=1.07, wpb=431284, bsz=16572.7, num_updates=48200, lr=0.000288076, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=47262 +epoch 029: 982 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=462635, ups=1.07, wpb=431284, bsz=16572.7, num_updates=48200, lr=0.000288076, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=47262 +epoch 029: 982 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=462635, ups=1.07, wpb=431284, bsz=16572.7, num_updates=48200, lr=0.000288076, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=47262 +epoch 029: 982 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=462635, ups=1.07, wpb=431284, bsz=16572.7, num_updates=48200, lr=0.000288076, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=47262 +epoch 029: 982 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=462635, ups=1.07, wpb=431284, bsz=16572.7, num_updates=48200, lr=0.000288076, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=47262 +epoch 029: 982 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=462635, ups=1.07, wpb=431284, bsz=16572.7, num_updates=48200, lr=0.000288076, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=47262 +epoch 029: 982 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=462635, ups=1.07, wpb=431284, bsz=16572.7, num_updates=48200, lr=0.000288076, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=47262 +epoch 029: 982 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=462635, ups=1.07, wpb=431284, bsz=16572.7, num_updates=48200, lr=0.000288076, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=47262 +epoch 029: 982 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=462635, ups=1.07, wpb=431284, bsz=16572.7, num_updates=48200, lr=0.000288076, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=47262 +epoch 029: 982 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=462635, ups=1.07, wpb=431284, bsz=16572.7, num_updates=48200, lr=0.000288076, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=19.1, wall=47262 +epoch 029: 1082 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=466768, ups=1.08, wpb=434095, bsz=16375, num_updates=48300, lr=0.000287777, gnorm=0.206, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=47355 +epoch 029: 1082 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=466768, ups=1.08, wpb=434095, bsz=16375, num_updates=48300, lr=0.000287777, gnorm=0.206, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=47355 +epoch 029: 1082 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=466768, ups=1.08, wpb=434095, bsz=16375, num_updates=48300, lr=0.000287777, gnorm=0.206, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=47355 +epoch 029: 1082 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=466768, ups=1.08, wpb=434095, bsz=16375, num_updates=48300, lr=0.000287777, gnorm=0.206, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=47355 +epoch 029: 1082 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=466768, ups=1.08, wpb=434095, bsz=16375, num_updates=48300, lr=0.000287777, gnorm=0.206, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=47355 +epoch 029: 1082 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=466768, ups=1.08, wpb=434095, bsz=16375, num_updates=48300, lr=0.000287777, gnorm=0.206, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=47355 +epoch 029: 1082 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=466768, ups=1.08, wpb=434095, bsz=16375, num_updates=48300, lr=0.000287777, gnorm=0.206, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=47355 +epoch 029: 1082 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=466768, ups=1.08, wpb=434095, bsz=16375, num_updates=48300, lr=0.000287777, gnorm=0.206, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=47355 +epoch 029: 1082 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=466768, ups=1.08, wpb=434095, bsz=16375, num_updates=48300, lr=0.000287777, gnorm=0.206, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=47355 +epoch 029: 1082 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=466768, ups=1.08, wpb=434095, bsz=16375, num_updates=48300, lr=0.000287777, gnorm=0.206, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=47355 +epoch 029: 1082 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=466768, ups=1.08, wpb=434095, bsz=16375, num_updates=48300, lr=0.000287777, gnorm=0.206, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=47355 +epoch 029: 1082 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=466768, ups=1.08, wpb=434095, bsz=16375, num_updates=48300, lr=0.000287777, gnorm=0.206, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=47355 +epoch 029: 1082 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=466768, ups=1.08, wpb=434095, bsz=16375, num_updates=48300, lr=0.000287777, gnorm=0.206, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=47355 +epoch 029: 1082 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=466768, ups=1.08, wpb=434095, bsz=16375, num_updates=48300, lr=0.000287777, gnorm=0.206, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=47355 +epoch 029: 1082 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=466768, ups=1.08, wpb=434095, bsz=16375, num_updates=48300, lr=0.000287777, gnorm=0.206, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=47355 +epoch 029: 1082 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=466768, ups=1.08, wpb=434095, bsz=16375, num_updates=48300, lr=0.000287777, gnorm=0.206, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=47355 +epoch 029: 1082 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=466768, ups=1.08, wpb=434095, bsz=16375, num_updates=48300, lr=0.000287777, gnorm=0.206, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=47355 +epoch 029: 1082 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=466768, ups=1.08, wpb=434095, bsz=16375, num_updates=48300, lr=0.000287777, gnorm=0.206, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=47355 +epoch 029: 1082 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=466768, ups=1.08, wpb=434095, bsz=16375, num_updates=48300, lr=0.000287777, gnorm=0.206, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=47355 +epoch 029: 1082 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=466768, ups=1.08, wpb=434095, bsz=16375, num_updates=48300, lr=0.000287777, gnorm=0.206, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=47355 +epoch 029: 1082 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=466768, ups=1.08, wpb=434095, bsz=16375, num_updates=48300, lr=0.000287777, gnorm=0.206, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=47355 +epoch 029: 1082 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=466768, ups=1.08, wpb=434095, bsz=16375, num_updates=48300, lr=0.000287777, gnorm=0.206, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=47355 +epoch 029: 1082 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=466768, ups=1.08, wpb=434095, bsz=16375, num_updates=48300, lr=0.000287777, gnorm=0.206, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=47355 +epoch 029: 1082 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=466768, ups=1.08, wpb=434095, bsz=16375, num_updates=48300, lr=0.000287777, gnorm=0.206, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=47355 +epoch 029: 1082 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=466768, ups=1.08, wpb=434095, bsz=16375, num_updates=48300, lr=0.000287777, gnorm=0.206, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=47355 +epoch 029: 1082 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=466768, ups=1.08, wpb=434095, bsz=16375, num_updates=48300, lr=0.000287777, gnorm=0.206, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=47355 +epoch 029: 1082 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=466768, ups=1.08, wpb=434095, bsz=16375, num_updates=48300, lr=0.000287777, gnorm=0.206, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=47355 +epoch 029: 1082 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=466768, ups=1.08, wpb=434095, bsz=16375, num_updates=48300, lr=0.000287777, gnorm=0.206, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=47355 +epoch 029: 1082 / 1689 loss=4.077, nll_loss=2.449, ppl=5.46, wps=466768, ups=1.08, wpb=434095, bsz=16375, num_updates=48300, lr=0.000287777, gnorm=0.206, clip=0, loss_scale=2, train_wall=92, gb_free=19.8, wall=47355 +epoch 029: 1183 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=460164, ups=1.06, wpb=434698, bsz=16696.3, num_updates=48400, lr=0.00028748, gnorm=0.208, clip=0, loss_scale=1, train_wall=94, gb_free=21.1, wall=47450 +epoch 029: 1183 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=460164, ups=1.06, wpb=434698, bsz=16696.3, num_updates=48400, lr=0.00028748, gnorm=0.208, clip=0, loss_scale=1, train_wall=94, gb_free=21.1, wall=47450 +epoch 029: 1183 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=460164, ups=1.06, wpb=434698, bsz=16696.3, num_updates=48400, lr=0.00028748, gnorm=0.208, clip=0, loss_scale=1, train_wall=94, gb_free=21.1, wall=47450 +epoch 029: 1183 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=460164, ups=1.06, wpb=434698, bsz=16696.3, num_updates=48400, lr=0.00028748, gnorm=0.208, clip=0, loss_scale=1, train_wall=94, gb_free=21.1, wall=47450 +epoch 029: 1183 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=460164, ups=1.06, wpb=434698, bsz=16696.3, num_updates=48400, lr=0.00028748, gnorm=0.208, clip=0, loss_scale=1, train_wall=94, gb_free=21.1, wall=47450 +epoch 029: 1183 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=460164, ups=1.06, wpb=434698, bsz=16696.3, num_updates=48400, lr=0.00028748, gnorm=0.208, clip=0, loss_scale=1, train_wall=94, gb_free=21.1, wall=47450 +epoch 029: 1183 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=460164, ups=1.06, wpb=434698, bsz=16696.3, num_updates=48400, lr=0.00028748, gnorm=0.208, clip=0, loss_scale=1, train_wall=94, gb_free=21.1, wall=47450 +epoch 029: 1183 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=460164, ups=1.06, wpb=434698, bsz=16696.3, num_updates=48400, lr=0.00028748, gnorm=0.208, clip=0, loss_scale=1, train_wall=94, gb_free=21.1, wall=47450 +epoch 029: 1183 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=460164, ups=1.06, wpb=434698, bsz=16696.3, num_updates=48400, lr=0.00028748, gnorm=0.208, clip=0, loss_scale=1, train_wall=94, gb_free=21.1, wall=47450 +epoch 029: 1183 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=460164, ups=1.06, wpb=434698, bsz=16696.3, num_updates=48400, lr=0.00028748, gnorm=0.208, clip=0, loss_scale=1, train_wall=94, gb_free=21.1, wall=47450 +epoch 029: 1183 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=460164, ups=1.06, wpb=434698, bsz=16696.3, num_updates=48400, lr=0.00028748, gnorm=0.208, clip=0, loss_scale=1, train_wall=94, gb_free=21.1, wall=47450 +epoch 029: 1183 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=460164, ups=1.06, wpb=434698, bsz=16696.3, num_updates=48400, lr=0.00028748, gnorm=0.208, clip=0, loss_scale=1, train_wall=94, gb_free=21.1, wall=47450 +epoch 029: 1183 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=460164, ups=1.06, wpb=434698, bsz=16696.3, num_updates=48400, lr=0.00028748, gnorm=0.208, clip=0, loss_scale=1, train_wall=94, gb_free=21.1, wall=47450 +epoch 029: 1183 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=460164, ups=1.06, wpb=434698, bsz=16696.3, num_updates=48400, lr=0.00028748, gnorm=0.208, clip=0, loss_scale=1, train_wall=94, gb_free=21.1, wall=47450 +epoch 029: 1183 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=460164, ups=1.06, wpb=434698, bsz=16696.3, num_updates=48400, lr=0.00028748, gnorm=0.208, clip=0, loss_scale=1, train_wall=94, gb_free=21.1, wall=47450 +epoch 029: 1183 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=460164, ups=1.06, wpb=434698, bsz=16696.3, num_updates=48400, lr=0.00028748, gnorm=0.208, clip=0, loss_scale=1, train_wall=94, gb_free=21.1, wall=47450 +epoch 029: 1183 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=460164, ups=1.06, wpb=434698, bsz=16696.3, num_updates=48400, lr=0.00028748, gnorm=0.208, clip=0, loss_scale=1, train_wall=94, gb_free=21.1, wall=47450 +epoch 029: 1183 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=460164, ups=1.06, wpb=434698, bsz=16696.3, num_updates=48400, lr=0.00028748, gnorm=0.208, clip=0, loss_scale=1, train_wall=94, gb_free=21.1, wall=47450 +epoch 029: 1183 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=460164, ups=1.06, wpb=434698, bsz=16696.3, num_updates=48400, lr=0.00028748, gnorm=0.208, clip=0, loss_scale=1, train_wall=94, gb_free=21.1, wall=47450 +epoch 029: 1183 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=460164, ups=1.06, wpb=434698, bsz=16696.3, num_updates=48400, lr=0.00028748, gnorm=0.208, clip=0, loss_scale=1, train_wall=94, gb_free=21.1, wall=47450 +epoch 029: 1183 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=460164, ups=1.06, wpb=434698, bsz=16696.3, num_updates=48400, lr=0.00028748, gnorm=0.208, clip=0, loss_scale=1, train_wall=94, gb_free=21.1, wall=47450 +epoch 029: 1183 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=460164, ups=1.06, wpb=434698, bsz=16696.3, num_updates=48400, lr=0.00028748, gnorm=0.208, clip=0, loss_scale=1, train_wall=94, gb_free=21.1, wall=47450 +epoch 029: 1183 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=460164, ups=1.06, wpb=434698, bsz=16696.3, num_updates=48400, lr=0.00028748, gnorm=0.208, clip=0, loss_scale=1, train_wall=94, gb_free=21.1, wall=47450 +epoch 029: 1183 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=460164, ups=1.06, wpb=434698, bsz=16696.3, num_updates=48400, lr=0.00028748, gnorm=0.208, clip=0, loss_scale=1, train_wall=94, gb_free=21.1, wall=47450 +epoch 029: 1183 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=460164, ups=1.06, wpb=434698, bsz=16696.3, num_updates=48400, lr=0.00028748, gnorm=0.208, clip=0, loss_scale=1, train_wall=94, gb_free=21.1, wall=47450 +epoch 029: 1183 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=460164, ups=1.06, wpb=434698, bsz=16696.3, num_updates=48400, lr=0.00028748, gnorm=0.208, clip=0, loss_scale=1, train_wall=94, gb_free=21.1, wall=47450 +epoch 029: 1183 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=460164, ups=1.06, wpb=434698, bsz=16696.3, num_updates=48400, lr=0.00028748, gnorm=0.208, clip=0, loss_scale=1, train_wall=94, gb_free=21.1, wall=47450 +epoch 029: 1183 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=460164, ups=1.06, wpb=434698, bsz=16696.3, num_updates=48400, lr=0.00028748, gnorm=0.208, clip=0, loss_scale=1, train_wall=94, gb_free=21.1, wall=47450 +epoch 029: 1183 / 1689 loss=4.086, nll_loss=2.459, ppl=5.5, wps=460164, ups=1.06, wpb=434698, bsz=16696.3, num_updates=48400, lr=0.00028748, gnorm=0.208, clip=0, loss_scale=1, train_wall=94, gb_free=21.1, wall=47450 +epoch 029: 1283 / 1689 loss=4.084, nll_loss=2.458, ppl=5.49, wps=464066, ups=1.07, wpb=435179, bsz=16561, num_updates=48500, lr=0.000287183, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=47544 +epoch 029: 1283 / 1689 loss=4.084, nll_loss=2.458, ppl=5.49, wps=464066, ups=1.07, wpb=435179, bsz=16561, num_updates=48500, lr=0.000287183, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=47544 +epoch 029: 1283 / 1689 loss=4.084, nll_loss=2.458, ppl=5.49, wps=464066, ups=1.07, wpb=435179, bsz=16561, num_updates=48500, lr=0.000287183, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=47544 +epoch 029: 1283 / 1689 loss=4.084, nll_loss=2.458, ppl=5.49, wps=464066, ups=1.07, wpb=435179, bsz=16561, num_updates=48500, lr=0.000287183, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=47544 +epoch 029: 1283 / 1689 loss=4.084, nll_loss=2.458, ppl=5.49, wps=464066, ups=1.07, wpb=435179, bsz=16561, num_updates=48500, lr=0.000287183, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=47544 +epoch 029: 1283 / 1689 loss=4.084, nll_loss=2.458, ppl=5.49, wps=464066, ups=1.07, wpb=435179, bsz=16561, num_updates=48500, lr=0.000287183, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=47544 +epoch 029: 1283 / 1689 loss=4.084, nll_loss=2.458, ppl=5.49, wps=464066, ups=1.07, wpb=435179, bsz=16561, num_updates=48500, lr=0.000287183, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=47544 +epoch 029: 1283 / 1689 loss=4.084, nll_loss=2.458, ppl=5.49, wps=464066, ups=1.07, wpb=435179, bsz=16561, num_updates=48500, lr=0.000287183, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=47544 +epoch 029: 1283 / 1689 loss=4.084, nll_loss=2.458, ppl=5.49, wps=464066, ups=1.07, wpb=435179, bsz=16561, num_updates=48500, lr=0.000287183, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=47544 +epoch 029: 1283 / 1689 loss=4.084, nll_loss=2.458, ppl=5.49, wps=464066, ups=1.07, wpb=435179, bsz=16561, num_updates=48500, lr=0.000287183, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=47544 +epoch 029: 1283 / 1689 loss=4.084, nll_loss=2.458, ppl=5.49, wps=464066, ups=1.07, wpb=435179, bsz=16561, num_updates=48500, lr=0.000287183, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=47544 +epoch 029: 1283 / 1689 loss=4.084, nll_loss=2.458, ppl=5.49, wps=464066, ups=1.07, wpb=435179, bsz=16561, num_updates=48500, lr=0.000287183, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=47544 +epoch 029: 1283 / 1689 loss=4.084, nll_loss=2.458, ppl=5.49, wps=464066, ups=1.07, wpb=435179, bsz=16561, num_updates=48500, lr=0.000287183, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=47544 +epoch 029: 1283 / 1689 loss=4.084, nll_loss=2.458, ppl=5.49, wps=464066, ups=1.07, wpb=435179, bsz=16561, num_updates=48500, lr=0.000287183, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=47544 +epoch 029: 1283 / 1689 loss=4.084, nll_loss=2.458, ppl=5.49, wps=464066, ups=1.07, wpb=435179, bsz=16561, num_updates=48500, lr=0.000287183, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=47544 +epoch 029: 1283 / 1689 loss=4.084, nll_loss=2.458, ppl=5.49, wps=464066, ups=1.07, wpb=435179, bsz=16561, num_updates=48500, lr=0.000287183, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=47544 +epoch 029: 1283 / 1689 loss=4.084, nll_loss=2.458, ppl=5.49, wps=464066, ups=1.07, wpb=435179, bsz=16561, num_updates=48500, lr=0.000287183, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=47544 +epoch 029: 1283 / 1689 loss=4.084, nll_loss=2.458, ppl=5.49, wps=464066, ups=1.07, wpb=435179, bsz=16561, num_updates=48500, lr=0.000287183, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=47544 +epoch 029: 1283 / 1689 loss=4.084, nll_loss=2.458, ppl=5.49, wps=464066, ups=1.07, wpb=435179, bsz=16561, num_updates=48500, lr=0.000287183, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=47544 +epoch 029: 1283 / 1689 loss=4.084, nll_loss=2.458, ppl=5.49, wps=464066, ups=1.07, wpb=435179, bsz=16561, num_updates=48500, lr=0.000287183, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=47544 +epoch 029: 1283 / 1689 loss=4.084, nll_loss=2.458, ppl=5.49, wps=464066, ups=1.07, wpb=435179, bsz=16561, num_updates=48500, lr=0.000287183, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=47544 +epoch 029: 1283 / 1689 loss=4.084, nll_loss=2.458, ppl=5.49, wps=464066, ups=1.07, wpb=435179, bsz=16561, num_updates=48500, lr=0.000287183, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=47544 +epoch 029: 1283 / 1689 loss=4.084, nll_loss=2.458, ppl=5.49, wps=464066, ups=1.07, wpb=435179, bsz=16561, num_updates=48500, lr=0.000287183, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=47544 +epoch 029: 1283 / 1689 loss=4.084, nll_loss=2.458, ppl=5.49, wps=464066, ups=1.07, wpb=435179, bsz=16561, num_updates=48500, lr=0.000287183, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=47544 +epoch 029: 1283 / 1689 loss=4.084, nll_loss=2.458, ppl=5.49, wps=464066, ups=1.07, wpb=435179, bsz=16561, num_updates=48500, lr=0.000287183, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=47544 +epoch 029: 1283 / 1689 loss=4.084, nll_loss=2.458, ppl=5.49, wps=464066, ups=1.07, wpb=435179, bsz=16561, num_updates=48500, lr=0.000287183, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=47544 +epoch 029: 1283 / 1689 loss=4.084, nll_loss=2.458, ppl=5.49, wps=464066, ups=1.07, wpb=435179, bsz=16561, num_updates=48500, lr=0.000287183, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=47544 +epoch 029: 1283 / 1689 loss=4.084, nll_loss=2.458, ppl=5.49, wps=464066, ups=1.07, wpb=435179, bsz=16561, num_updates=48500, lr=0.000287183, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=47544 +epoch 029: 1283 / 1689 loss=4.084, nll_loss=2.458, ppl=5.49, wps=464066, ups=1.07, wpb=435179, bsz=16561, num_updates=48500, lr=0.000287183, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=47544 +epoch 029: 1383 / 1689 loss=4.081, nll_loss=2.455, ppl=5.48, wps=463774, ups=1.07, wpb=433328, bsz=16580.2, num_updates=48600, lr=0.000286888, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=47637 +epoch 029: 1383 / 1689 loss=4.081, nll_loss=2.455, ppl=5.48, wps=463774, ups=1.07, wpb=433328, bsz=16580.2, num_updates=48600, lr=0.000286888, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=47637 +epoch 029: 1383 / 1689 loss=4.081, nll_loss=2.455, ppl=5.48, wps=463774, ups=1.07, wpb=433328, bsz=16580.2, num_updates=48600, lr=0.000286888, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=47637 +epoch 029: 1383 / 1689 loss=4.081, nll_loss=2.455, ppl=5.48, wps=463774, ups=1.07, wpb=433328, bsz=16580.2, num_updates=48600, lr=0.000286888, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=47637 +epoch 029: 1383 / 1689 loss=4.081, nll_loss=2.455, ppl=5.48, wps=463774, ups=1.07, wpb=433328, bsz=16580.2, num_updates=48600, lr=0.000286888, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=47637 +epoch 029: 1383 / 1689 loss=4.081, nll_loss=2.455, ppl=5.48, wps=463774, ups=1.07, wpb=433328, bsz=16580.2, num_updates=48600, lr=0.000286888, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=47637 +epoch 029: 1383 / 1689 loss=4.081, nll_loss=2.455, ppl=5.48, wps=463774, ups=1.07, wpb=433328, bsz=16580.2, num_updates=48600, lr=0.000286888, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=47637 +epoch 029: 1383 / 1689 loss=4.081, nll_loss=2.455, ppl=5.48, wps=463774, ups=1.07, wpb=433328, bsz=16580.2, num_updates=48600, lr=0.000286888, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=47637 +epoch 029: 1383 / 1689 loss=4.081, nll_loss=2.455, ppl=5.48, wps=463774, ups=1.07, wpb=433328, bsz=16580.2, num_updates=48600, lr=0.000286888, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=47637 +epoch 029: 1383 / 1689 loss=4.081, nll_loss=2.455, ppl=5.48, wps=463774, ups=1.07, wpb=433328, bsz=16580.2, num_updates=48600, lr=0.000286888, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=47637 +epoch 029: 1383 / 1689 loss=4.081, nll_loss=2.455, ppl=5.48, wps=463774, ups=1.07, wpb=433328, bsz=16580.2, num_updates=48600, lr=0.000286888, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=47637 +epoch 029: 1383 / 1689 loss=4.081, nll_loss=2.455, ppl=5.48, wps=463774, ups=1.07, wpb=433328, bsz=16580.2, num_updates=48600, lr=0.000286888, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=47637 +epoch 029: 1383 / 1689 loss=4.081, nll_loss=2.455, ppl=5.48, wps=463774, ups=1.07, wpb=433328, bsz=16580.2, num_updates=48600, lr=0.000286888, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=47637 +epoch 029: 1383 / 1689 loss=4.081, nll_loss=2.455, ppl=5.48, wps=463774, ups=1.07, wpb=433328, bsz=16580.2, num_updates=48600, lr=0.000286888, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=47637 +epoch 029: 1383 / 1689 loss=4.081, nll_loss=2.455, ppl=5.48, wps=463774, ups=1.07, wpb=433328, bsz=16580.2, num_updates=48600, lr=0.000286888, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=47637 +epoch 029: 1383 / 1689 loss=4.081, nll_loss=2.455, ppl=5.48, wps=463774, ups=1.07, wpb=433328, bsz=16580.2, num_updates=48600, lr=0.000286888, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=47637 +epoch 029: 1383 / 1689 loss=4.081, nll_loss=2.455, ppl=5.48, wps=463774, ups=1.07, wpb=433328, bsz=16580.2, num_updates=48600, lr=0.000286888, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=47637 +epoch 029: 1383 / 1689 loss=4.081, nll_loss=2.455, ppl=5.48, wps=463774, ups=1.07, wpb=433328, bsz=16580.2, num_updates=48600, lr=0.000286888, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=47637 +epoch 029: 1383 / 1689 loss=4.081, nll_loss=2.455, ppl=5.48, wps=463774, ups=1.07, wpb=433328, bsz=16580.2, num_updates=48600, lr=0.000286888, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=47637 +epoch 029: 1383 / 1689 loss=4.081, nll_loss=2.455, ppl=5.48, wps=463774, ups=1.07, wpb=433328, bsz=16580.2, num_updates=48600, lr=0.000286888, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=47637 +epoch 029: 1383 / 1689 loss=4.081, nll_loss=2.455, ppl=5.48, wps=463774, ups=1.07, wpb=433328, bsz=16580.2, num_updates=48600, lr=0.000286888, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=47637 +epoch 029: 1383 / 1689 loss=4.081, nll_loss=2.455, ppl=5.48, wps=463774, ups=1.07, wpb=433328, bsz=16580.2, num_updates=48600, lr=0.000286888, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=47637 +epoch 029: 1383 / 1689 loss=4.081, nll_loss=2.455, ppl=5.48, wps=463774, ups=1.07, wpb=433328, bsz=16580.2, num_updates=48600, lr=0.000286888, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=47637 +epoch 029: 1383 / 1689 loss=4.081, nll_loss=2.455, ppl=5.48, wps=463774, ups=1.07, wpb=433328, bsz=16580.2, num_updates=48600, lr=0.000286888, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=47637 +epoch 029: 1383 / 1689 loss=4.081, nll_loss=2.455, ppl=5.48, wps=463774, ups=1.07, wpb=433328, bsz=16580.2, num_updates=48600, lr=0.000286888, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=47637 +epoch 029: 1383 / 1689 loss=4.081, nll_loss=2.455, ppl=5.48, wps=463774, ups=1.07, wpb=433328, bsz=16580.2, num_updates=48600, lr=0.000286888, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=47637 +epoch 029: 1383 / 1689 loss=4.081, nll_loss=2.455, ppl=5.48, wps=463774, ups=1.07, wpb=433328, bsz=16580.2, num_updates=48600, lr=0.000286888, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=47637 +epoch 029: 1383 / 1689 loss=4.081, nll_loss=2.455, ppl=5.48, wps=463774, ups=1.07, wpb=433328, bsz=16580.2, num_updates=48600, lr=0.000286888, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=47637 +epoch 029: 1383 / 1689 loss=4.081, nll_loss=2.455, ppl=5.48, wps=463774, ups=1.07, wpb=433328, bsz=16580.2, num_updates=48600, lr=0.000286888, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=47637 +epoch 029: 1483 / 1689 loss=4.084, nll_loss=2.457, ppl=5.49, wps=466994, ups=1.07, wpb=436018, bsz=16361.6, num_updates=48700, lr=0.000286593, gnorm=0.224, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=47730 +epoch 029: 1483 / 1689 loss=4.084, nll_loss=2.457, ppl=5.49, wps=466994, ups=1.07, wpb=436018, bsz=16361.6, num_updates=48700, lr=0.000286593, gnorm=0.224, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=47730 +epoch 029: 1483 / 1689 loss=4.084, nll_loss=2.457, ppl=5.49, wps=466994, ups=1.07, wpb=436018, bsz=16361.6, num_updates=48700, lr=0.000286593, gnorm=0.224, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=47730 +epoch 029: 1483 / 1689 loss=4.084, nll_loss=2.457, ppl=5.49, wps=466994, ups=1.07, wpb=436018, bsz=16361.6, num_updates=48700, lr=0.000286593, gnorm=0.224, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=47730 +epoch 029: 1483 / 1689 loss=4.084, nll_loss=2.457, ppl=5.49, wps=466994, ups=1.07, wpb=436018, bsz=16361.6, num_updates=48700, lr=0.000286593, gnorm=0.224, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=47730 +epoch 029: 1483 / 1689 loss=4.084, nll_loss=2.457, ppl=5.49, wps=466994, ups=1.07, wpb=436018, bsz=16361.6, num_updates=48700, lr=0.000286593, gnorm=0.224, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=47730 +epoch 029: 1483 / 1689 loss=4.084, nll_loss=2.457, ppl=5.49, wps=466994, ups=1.07, wpb=436018, bsz=16361.6, num_updates=48700, lr=0.000286593, gnorm=0.224, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=47730 +epoch 029: 1483 / 1689 loss=4.084, nll_loss=2.457, ppl=5.49, wps=466994, ups=1.07, wpb=436018, bsz=16361.6, num_updates=48700, lr=0.000286593, gnorm=0.224, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=47730 +epoch 029: 1483 / 1689 loss=4.084, nll_loss=2.457, ppl=5.49, wps=466994, ups=1.07, wpb=436018, bsz=16361.6, num_updates=48700, lr=0.000286593, gnorm=0.224, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=47730 +epoch 029: 1483 / 1689 loss=4.084, nll_loss=2.457, ppl=5.49, wps=466994, ups=1.07, wpb=436018, bsz=16361.6, num_updates=48700, lr=0.000286593, gnorm=0.224, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=47730 +epoch 029: 1483 / 1689 loss=4.084, nll_loss=2.457, ppl=5.49, wps=466994, ups=1.07, wpb=436018, bsz=16361.6, num_updates=48700, lr=0.000286593, gnorm=0.224, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=47730 +epoch 029: 1483 / 1689 loss=4.084, nll_loss=2.457, ppl=5.49, wps=466994, ups=1.07, wpb=436018, bsz=16361.6, num_updates=48700, lr=0.000286593, gnorm=0.224, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=47730 +epoch 029: 1483 / 1689 loss=4.084, nll_loss=2.457, ppl=5.49, wps=466994, ups=1.07, wpb=436018, bsz=16361.6, num_updates=48700, lr=0.000286593, gnorm=0.224, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=47730 +epoch 029: 1483 / 1689 loss=4.084, nll_loss=2.457, ppl=5.49, wps=466994, ups=1.07, wpb=436018, bsz=16361.6, num_updates=48700, lr=0.000286593, gnorm=0.224, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=47730 +epoch 029: 1483 / 1689 loss=4.084, nll_loss=2.457, ppl=5.49, wps=466994, ups=1.07, wpb=436018, bsz=16361.6, num_updates=48700, lr=0.000286593, gnorm=0.224, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=47730 +epoch 029: 1483 / 1689 loss=4.084, nll_loss=2.457, ppl=5.49, wps=466994, ups=1.07, wpb=436018, bsz=16361.6, num_updates=48700, lr=0.000286593, gnorm=0.224, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=47730 +epoch 029: 1483 / 1689 loss=4.084, nll_loss=2.457, ppl=5.49, wps=466994, ups=1.07, wpb=436018, bsz=16361.6, num_updates=48700, lr=0.000286593, gnorm=0.224, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=47730 +epoch 029: 1483 / 1689 loss=4.084, nll_loss=2.457, ppl=5.49, wps=466994, ups=1.07, wpb=436018, bsz=16361.6, num_updates=48700, lr=0.000286593, gnorm=0.224, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=47730 +epoch 029: 1483 / 1689 loss=4.084, nll_loss=2.457, ppl=5.49, wps=466994, ups=1.07, wpb=436018, bsz=16361.6, num_updates=48700, lr=0.000286593, gnorm=0.224, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=47730 +epoch 029: 1483 / 1689 loss=4.084, nll_loss=2.457, ppl=5.49, wps=466994, ups=1.07, wpb=436018, bsz=16361.6, num_updates=48700, lr=0.000286593, gnorm=0.224, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=47730 +epoch 029: 1483 / 1689 loss=4.084, nll_loss=2.457, ppl=5.49, wps=466994, ups=1.07, wpb=436018, bsz=16361.6, num_updates=48700, lr=0.000286593, gnorm=0.224, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=47730 +epoch 029: 1483 / 1689 loss=4.084, nll_loss=2.457, ppl=5.49, wps=466994, ups=1.07, wpb=436018, bsz=16361.6, num_updates=48700, lr=0.000286593, gnorm=0.224, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=47730 +epoch 029: 1483 / 1689 loss=4.084, nll_loss=2.457, ppl=5.49, wps=466994, ups=1.07, wpb=436018, bsz=16361.6, num_updates=48700, lr=0.000286593, gnorm=0.224, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=47730 +epoch 029: 1483 / 1689 loss=4.084, nll_loss=2.457, ppl=5.49, wps=466994, ups=1.07, wpb=436018, bsz=16361.6, num_updates=48700, lr=0.000286593, gnorm=0.224, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=47730 +epoch 029: 1483 / 1689 loss=4.084, nll_loss=2.457, ppl=5.49, wps=466994, ups=1.07, wpb=436018, bsz=16361.6, num_updates=48700, lr=0.000286593, gnorm=0.224, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=47730 +epoch 029: 1483 / 1689 loss=4.084, nll_loss=2.457, ppl=5.49, wps=466994, ups=1.07, wpb=436018, bsz=16361.6, num_updates=48700, lr=0.000286593, gnorm=0.224, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=47730 +epoch 029: 1483 / 1689 loss=4.084, nll_loss=2.457, ppl=5.49, wps=466994, ups=1.07, wpb=436018, bsz=16361.6, num_updates=48700, lr=0.000286593, gnorm=0.224, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=47730 +epoch 029: 1483 / 1689 loss=4.084, nll_loss=2.457, ppl=5.49, wps=466994, ups=1.07, wpb=436018, bsz=16361.6, num_updates=48700, lr=0.000286593, gnorm=0.224, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=47730 +epoch 029: 1483 / 1689 loss=4.084, nll_loss=2.457, ppl=5.49, wps=466994, ups=1.07, wpb=436018, bsz=16361.6, num_updates=48700, lr=0.000286593, gnorm=0.224, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=47730 +epoch 029: 1583 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=463340, ups=1.07, wpb=433219, bsz=16229.7, num_updates=48800, lr=0.000286299, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=47824 +epoch 029: 1583 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=463340, ups=1.07, wpb=433219, bsz=16229.7, num_updates=48800, lr=0.000286299, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=47824 +epoch 029: 1583 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=463340, ups=1.07, wpb=433219, bsz=16229.7, num_updates=48800, lr=0.000286299, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=47824 +epoch 029: 1583 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=463340, ups=1.07, wpb=433219, bsz=16229.7, num_updates=48800, lr=0.000286299, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=47824 +epoch 029: 1583 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=463340, ups=1.07, wpb=433219, bsz=16229.7, num_updates=48800, lr=0.000286299, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=47824 +epoch 029: 1583 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=463340, ups=1.07, wpb=433219, bsz=16229.7, num_updates=48800, lr=0.000286299, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=47824 +epoch 029: 1583 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=463340, ups=1.07, wpb=433219, bsz=16229.7, num_updates=48800, lr=0.000286299, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=47824 +epoch 029: 1583 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=463340, ups=1.07, wpb=433219, bsz=16229.7, num_updates=48800, lr=0.000286299, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=47824 +epoch 029: 1583 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=463340, ups=1.07, wpb=433219, bsz=16229.7, num_updates=48800, lr=0.000286299, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=47824 +epoch 029: 1583 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=463340, ups=1.07, wpb=433219, bsz=16229.7, num_updates=48800, lr=0.000286299, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=47824 +epoch 029: 1583 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=463340, ups=1.07, wpb=433219, bsz=16229.7, num_updates=48800, lr=0.000286299, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=47824 +epoch 029: 1583 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=463340, ups=1.07, wpb=433219, bsz=16229.7, num_updates=48800, lr=0.000286299, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=47824 +epoch 029: 1583 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=463340, ups=1.07, wpb=433219, bsz=16229.7, num_updates=48800, lr=0.000286299, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=47824 +epoch 029: 1583 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=463340, ups=1.07, wpb=433219, bsz=16229.7, num_updates=48800, lr=0.000286299, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=47824 +epoch 029: 1583 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=463340, ups=1.07, wpb=433219, bsz=16229.7, num_updates=48800, lr=0.000286299, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=47824 +epoch 029: 1583 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=463340, ups=1.07, wpb=433219, bsz=16229.7, num_updates=48800, lr=0.000286299, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=47824 +epoch 029: 1583 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=463340, ups=1.07, wpb=433219, bsz=16229.7, num_updates=48800, lr=0.000286299, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=47824 +epoch 029: 1583 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=463340, ups=1.07, wpb=433219, bsz=16229.7, num_updates=48800, lr=0.000286299, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=47824 +epoch 029: 1583 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=463340, ups=1.07, wpb=433219, bsz=16229.7, num_updates=48800, lr=0.000286299, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=47824 +epoch 029: 1583 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=463340, ups=1.07, wpb=433219, bsz=16229.7, num_updates=48800, lr=0.000286299, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=47824 +epoch 029: 1583 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=463340, ups=1.07, wpb=433219, bsz=16229.7, num_updates=48800, lr=0.000286299, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=47824 +epoch 029: 1583 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=463340, ups=1.07, wpb=433219, bsz=16229.7, num_updates=48800, lr=0.000286299, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=47824 +epoch 029: 1583 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=463340, ups=1.07, wpb=433219, bsz=16229.7, num_updates=48800, lr=0.000286299, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=47824 +epoch 029: 1583 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=463340, ups=1.07, wpb=433219, bsz=16229.7, num_updates=48800, lr=0.000286299, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=47824 +epoch 029: 1583 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=463340, ups=1.07, wpb=433219, bsz=16229.7, num_updates=48800, lr=0.000286299, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=47824 +epoch 029: 1583 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=463340, ups=1.07, wpb=433219, bsz=16229.7, num_updates=48800, lr=0.000286299, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=47824 +epoch 029: 1583 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=463340, ups=1.07, wpb=433219, bsz=16229.7, num_updates=48800, lr=0.000286299, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=47824 +epoch 029: 1583 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=463340, ups=1.07, wpb=433219, bsz=16229.7, num_updates=48800, lr=0.000286299, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=47824 +epoch 029: 1583 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=463340, ups=1.07, wpb=433219, bsz=16229.7, num_updates=48800, lr=0.000286299, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=19.2, wall=47824 +epoch 029: 1684 / 1689 loss=4.08, nll_loss=2.453, ppl=5.48, wps=460853, ups=1.05, wpb=436966, bsz=16283.8, num_updates=48900, lr=0.000286006, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=47919 +epoch 029: 1684 / 1689 loss=4.08, nll_loss=2.453, ppl=5.48, wps=460853, ups=1.05, wpb=436966, bsz=16283.8, num_updates=48900, lr=0.000286006, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=47919 +epoch 029: 1684 / 1689 loss=4.08, nll_loss=2.453, ppl=5.48, wps=460853, ups=1.05, wpb=436966, bsz=16283.8, num_updates=48900, lr=0.000286006, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=47919 +epoch 029: 1684 / 1689 loss=4.08, nll_loss=2.453, ppl=5.48, wps=460853, ups=1.05, wpb=436966, bsz=16283.8, num_updates=48900, lr=0.000286006, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=47919 +epoch 029: 1684 / 1689 loss=4.08, nll_loss=2.453, ppl=5.48, wps=460853, ups=1.05, wpb=436966, bsz=16283.8, num_updates=48900, lr=0.000286006, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=47919 +epoch 029: 1684 / 1689 loss=4.08, nll_loss=2.453, ppl=5.48, wps=460853, ups=1.05, wpb=436966, bsz=16283.8, num_updates=48900, lr=0.000286006, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=47919 +epoch 029: 1684 / 1689 loss=4.08, nll_loss=2.453, ppl=5.48, wps=460853, ups=1.05, wpb=436966, bsz=16283.8, num_updates=48900, lr=0.000286006, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=47919 +epoch 029: 1684 / 1689 loss=4.08, nll_loss=2.453, ppl=5.48, wps=460853, ups=1.05, wpb=436966, bsz=16283.8, num_updates=48900, lr=0.000286006, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=47919 +epoch 029: 1684 / 1689 loss=4.08, nll_loss=2.453, ppl=5.48, wps=460853, ups=1.05, wpb=436966, bsz=16283.8, num_updates=48900, lr=0.000286006, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=47919 +epoch 029: 1684 / 1689 loss=4.08, nll_loss=2.453, ppl=5.48, wps=460853, ups=1.05, wpb=436966, bsz=16283.8, num_updates=48900, lr=0.000286006, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=47919 +epoch 029: 1684 / 1689 loss=4.08, nll_loss=2.453, ppl=5.48, wps=460853, ups=1.05, wpb=436966, bsz=16283.8, num_updates=48900, lr=0.000286006, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=47919 +epoch 029: 1684 / 1689 loss=4.08, nll_loss=2.453, ppl=5.48, wps=460853, ups=1.05, wpb=436966, bsz=16283.8, num_updates=48900, lr=0.000286006, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=47919 +epoch 029: 1684 / 1689 loss=4.08, nll_loss=2.453, ppl=5.48, wps=460853, ups=1.05, wpb=436966, bsz=16283.8, num_updates=48900, lr=0.000286006, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=47919 +epoch 029: 1684 / 1689 loss=4.08, nll_loss=2.453, ppl=5.48, wps=460853, ups=1.05, wpb=436966, bsz=16283.8, num_updates=48900, lr=0.000286006, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=47919 +epoch 029: 1684 / 1689 loss=4.08, nll_loss=2.453, ppl=5.48, wps=460853, ups=1.05, wpb=436966, bsz=16283.8, num_updates=48900, lr=0.000286006, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=47919 +epoch 029: 1684 / 1689 loss=4.08, nll_loss=2.453, ppl=5.48, wps=460853, ups=1.05, wpb=436966, bsz=16283.8, num_updates=48900, lr=0.000286006, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=47919 +epoch 029: 1684 / 1689 loss=4.08, nll_loss=2.453, ppl=5.48, wps=460853, ups=1.05, wpb=436966, bsz=16283.8, num_updates=48900, lr=0.000286006, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=47919 +epoch 029: 1684 / 1689 loss=4.08, nll_loss=2.453, ppl=5.48, wps=460853, ups=1.05, wpb=436966, bsz=16283.8, num_updates=48900, lr=0.000286006, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=47919 +epoch 029: 1684 / 1689 loss=4.08, nll_loss=2.453, ppl=5.48, wps=460853, ups=1.05, wpb=436966, bsz=16283.8, num_updates=48900, lr=0.000286006, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=47919 +epoch 029: 1684 / 1689 loss=4.08, nll_loss=2.453, ppl=5.48, wps=460853, ups=1.05, wpb=436966, bsz=16283.8, num_updates=48900, lr=0.000286006, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=47919 +epoch 029: 1684 / 1689 loss=4.08, nll_loss=2.453, ppl=5.48, wps=460853, ups=1.05, wpb=436966, bsz=16283.8, num_updates=48900, lr=0.000286006, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=47919 +epoch 029: 1684 / 1689 loss=4.08, nll_loss=2.453, ppl=5.48, wps=460853, ups=1.05, wpb=436966, bsz=16283.8, num_updates=48900, lr=0.000286006, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=47919 +epoch 029: 1684 / 1689 loss=4.08, nll_loss=2.453, ppl=5.48, wps=460853, ups=1.05, wpb=436966, bsz=16283.8, num_updates=48900, lr=0.000286006, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=47919 +epoch 029: 1684 / 1689 loss=4.08, nll_loss=2.453, ppl=5.48, wps=460853, ups=1.05, wpb=436966, bsz=16283.8, num_updates=48900, lr=0.000286006, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=47919 +epoch 029: 1684 / 1689 loss=4.08, nll_loss=2.453, ppl=5.48, wps=460853, ups=1.05, wpb=436966, bsz=16283.8, num_updates=48900, lr=0.000286006, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=47919 +epoch 029: 1684 / 1689 loss=4.08, nll_loss=2.453, ppl=5.48, wps=460853, ups=1.05, wpb=436966, bsz=16283.8, num_updates=48900, lr=0.000286006, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=47919 +epoch 029: 1684 / 1689 loss=4.08, nll_loss=2.453, ppl=5.48, wps=460853, ups=1.05, wpb=436966, bsz=16283.8, num_updates=48900, lr=0.000286006, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=47919 +epoch 029: 1684 / 1689 loss=4.08, nll_loss=2.453, ppl=5.48, wps=460853, ups=1.05, wpb=436966, bsz=16283.8, num_updates=48900, lr=0.000286006, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=47919 +epoch 029: 1684 / 1689 loss=4.08, nll_loss=2.453, ppl=5.48, wps=460853, ups=1.05, wpb=436966, bsz=16283.8, num_updates=48900, lr=0.000286006, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=47919 +end of epoch 29 (average epoch stats below) +epoch 029 | loss 4.074 | nll_loss 2.445 | ppl 5.45 | wps 453180 | ups 1.05 | wpb 433541 | bsz 16502.2 | num_updates 48905 | lr 0.000285992 | gnorm 0.213 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 22.6 | wall 47923 +epoch 029 | loss 4.074 | nll_loss 2.445 | ppl 5.45 | wps 453180 | ups 1.05 | wpb 433541 | bsz 16502.2 | num_updates 48905 | lr 0.000285992 | gnorm 0.213 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 22.6 | wall 47923 +epoch 029 | loss 4.074 | nll_loss 2.445 | ppl 5.45 | wps 453180 | ups 1.05 | wpb 433541 | bsz 16502.2 | num_updates 48905 | lr 0.000285992 | gnorm 0.213 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 22.6 | wall 47923 +epoch 029 | loss 4.074 | nll_loss 2.445 | ppl 5.45 | wps 453180 | ups 1.05 | wpb 433541 | bsz 16502.2 | num_updates 48905 | lr 0.000285992 | gnorm 0.213 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 22.6 | wall 47923 +epoch 029 | loss 4.074 | nll_loss 2.445 | ppl 5.45 | wps 453180 | ups 1.05 | wpb 433541 | bsz 16502.2 | num_updates 48905 | lr 0.000285992 | gnorm 0.213 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 22.6 | wall 47923 +epoch 029 | loss 4.074 | nll_loss 2.445 | ppl 5.45 | wps 453180 | ups 1.05 | wpb 433541 | bsz 16502.2 | num_updates 48905 | lr 0.000285992 | gnorm 0.213 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 22.6 | wall 47923 +epoch 029 | loss 4.074 | nll_loss 2.445 | ppl 5.45 | wps 453180 | ups 1.05 | wpb 433541 | bsz 16502.2 | num_updates 48905 | lr 0.000285992 | gnorm 0.213 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 22.6 | wall 47923 +epoch 029 | loss 4.074 | nll_loss 2.445 | ppl 5.45 | wps 453180 | ups 1.05 | wpb 433541 | bsz 16502.2 | num_updates 48905 | lr 0.000285992 | gnorm 0.213 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 22.6 | wall 47923 +epoch 029 | loss 4.074 | nll_loss 2.445 | ppl 5.45 | wps 453180 | ups 1.05 | wpb 433541 | bsz 16502.2 | num_updates 48905 | lr 0.000285992 | gnorm 0.213 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 22.6 | wall 47923 +epoch 029 | loss 4.074 | nll_loss 2.445 | ppl 5.45 | wps 453180 | ups 1.05 | wpb 433541 | bsz 16502.2 | num_updates 48905 | lr 0.000285992 | gnorm 0.213 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 22.6 | wall 47923 +epoch 029 | loss 4.074 | nll_loss 2.445 | ppl 5.45 | wps 453180 | ups 1.05 | wpb 433541 | bsz 16502.2 | num_updates 48905 | lr 0.000285992 | gnorm 0.213 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 22.6 | wall 47923 +epoch 029 | loss 4.074 | nll_loss 2.445 | ppl 5.45 | wps 453180 | ups 1.05 | wpb 433541 | bsz 16502.2 | num_updates 48905 | lr 0.000285992 | gnorm 0.213 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 22.6 | wall 47923 +epoch 029 | loss 4.074 | nll_loss 2.445 | ppl 5.45 | wps 453180 | ups 1.05 | wpb 433541 | bsz 16502.2 | num_updates 48905 | lr 0.000285992 | gnorm 0.213 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 22.6 | wall 47923 +epoch 029 | loss 4.074 | nll_loss 2.445 | ppl 5.45 | wps 453180 | ups 1.05 | wpb 433541 | bsz 16502.2 | num_updates 48905 | lr 0.000285992 | gnorm 0.213 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 22.6 | wall 47923 +epoch 029 | loss 4.074 | nll_loss 2.445 | ppl 5.45 | wps 453180 | ups 1.05 | wpb 433541 | bsz 16502.2 | num_updates 48905 | lr 0.000285992 | gnorm 0.213 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 22.6 | wall 47923 +epoch 029 | loss 4.074 | nll_loss 2.445 | ppl 5.45 | wps 453180 | ups 1.05 | wpb 433541 | bsz 16502.2 | num_updates 48905 | lr 0.000285992 | gnorm 0.213 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 22.6 | wall 47923 +epoch 029 | loss 4.074 | nll_loss 2.445 | ppl 5.45 | wps 453180 | ups 1.05 | wpb 433541 | bsz 16502.2 | num_updates 48905 | lr 0.000285992 | gnorm 0.213 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 22.6 | wall 47923 +epoch 029 | loss 4.074 | nll_loss 2.445 | ppl 5.45 | wps 453180 | ups 1.05 | wpb 433541 | bsz 16502.2 | num_updates 48905 | lr 0.000285992 | gnorm 0.213 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 22.6 | wall 47923 +epoch 029 | loss 4.074 | nll_loss 2.445 | ppl 5.45 | wps 453180 | ups 1.05 | wpb 433541 | bsz 16502.2 | num_updates 48905 | lr 0.000285992 | gnorm 0.213 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 22.6 | wall 47923 +epoch 029 | loss 4.074 | nll_loss 2.445 | ppl 5.45 | wps 453180 | ups 1.05 | wpb 433541 | bsz 16502.2 | num_updates 48905 | lr 0.000285992 | gnorm 0.213 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 22.6 | wall 47923 +epoch 029 | loss 4.074 | nll_loss 2.445 | ppl 5.45 | wps 453180 | ups 1.05 | wpb 433541 | bsz 16502.2 | num_updates 48905 | lr 0.000285992 | gnorm 0.213 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 22.6 | wall 47923 +epoch 029 | loss 4.074 | nll_loss 2.445 | ppl 5.45 | wps 453180 | ups 1.05 | wpb 433541 | bsz 16502.2 | num_updates 48905 | lr 0.000285992 | gnorm 0.213 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 22.6 | wall 47923 +epoch 029 | loss 4.074 | nll_loss 2.445 | ppl 5.45 | wps 453180 | ups 1.05 | wpb 433541 | bsz 16502.2 | num_updates 48905 | lr 0.000285992 | gnorm 0.213 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 22.6 | wall 47923 +epoch 029 | loss 4.074 | nll_loss 2.445 | ppl 5.45 | wps 453180 | ups 1.05 | wpb 433541 | bsz 16502.2 | num_updates 48905 | lr 0.000285992 | gnorm 0.213 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 22.6 | wall 47923 +epoch 029 | loss 4.074 | nll_loss 2.445 | ppl 5.45 | wps 453180 | ups 1.05 | wpb 433541 | bsz 16502.2 | num_updates 48905 | lr 0.000285992 | gnorm 0.213 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 22.6 | wall 47923 +epoch 029 | loss 4.074 | nll_loss 2.445 | ppl 5.45 | wps 453180 | ups 1.05 | wpb 433541 | bsz 16502.2 | num_updates 48905 | lr 0.000285992 | gnorm 0.213 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 22.6 | wall 47923 +epoch 029 | loss 4.074 | nll_loss 2.445 | ppl 5.45 | wps 453180 | ups 1.05 | wpb 433541 | bsz 16502.2 | num_updates 48905 | lr 0.000285992 | gnorm 0.213 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 22.6 | wall 47923 +epoch 029 | loss 4.074 | nll_loss 2.445 | ppl 5.45 | wps 453180 | ups 1.05 | wpb 433541 | bsz 16502.2 | num_updates 48905 | lr 0.000285992 | gnorm 0.213 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 22.6 | wall 47923 +epoch 029 | loss 4.074 | nll_loss 2.445 | ppl 5.45 | wps 453180 | ups 1.05 | wpb 433541 | bsz 16502.2 | num_updates 48905 | lr 0.000285992 | gnorm 0.213 | clip 0 | loss_scale 1 | train_wall 1562 | gb_free 22.6 | wall 47923 +Start iterating over samples +epoch 030: 95 / 1689 loss=4.061, nll_loss=2.43, ppl=5.39, wps=459096, ups=1.07, wpb=430082, bsz=16606.8, num_updates=49000, lr=0.000285714, gnorm=0.219, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=48013 +epoch 030: 95 / 1689 loss=4.061, nll_loss=2.43, ppl=5.39, wps=459096, ups=1.07, wpb=430082, bsz=16606.8, num_updates=49000, lr=0.000285714, gnorm=0.219, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=48013 +epoch 030: 95 / 1689 loss=4.061, nll_loss=2.43, ppl=5.39, wps=459096, ups=1.07, wpb=430082, bsz=16606.8, num_updates=49000, lr=0.000285714, gnorm=0.219, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=48013 +epoch 030: 95 / 1689 loss=4.061, nll_loss=2.43, ppl=5.39, wps=459096, ups=1.07, wpb=430082, bsz=16606.8, num_updates=49000, lr=0.000285714, gnorm=0.219, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=48013 +epoch 030: 95 / 1689 loss=4.061, nll_loss=2.43, ppl=5.39, wps=459096, ups=1.07, wpb=430082, bsz=16606.8, num_updates=49000, lr=0.000285714, gnorm=0.219, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=48013 +epoch 030: 95 / 1689 loss=4.061, nll_loss=2.43, ppl=5.39, wps=459096, ups=1.07, wpb=430082, bsz=16606.8, num_updates=49000, lr=0.000285714, gnorm=0.219, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=48013 +epoch 030: 95 / 1689 loss=4.061, nll_loss=2.43, ppl=5.39, wps=459096, ups=1.07, wpb=430082, bsz=16606.8, num_updates=49000, lr=0.000285714, gnorm=0.219, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=48013 +epoch 030: 95 / 1689 loss=4.061, nll_loss=2.43, ppl=5.39, wps=459096, ups=1.07, wpb=430082, bsz=16606.8, num_updates=49000, lr=0.000285714, gnorm=0.219, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=48013 +epoch 030: 95 / 1689 loss=4.061, nll_loss=2.43, ppl=5.39, wps=459096, ups=1.07, wpb=430082, bsz=16606.8, num_updates=49000, lr=0.000285714, gnorm=0.219, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=48013 +epoch 030: 95 / 1689 loss=4.061, nll_loss=2.43, ppl=5.39, wps=459096, ups=1.07, wpb=430082, bsz=16606.8, num_updates=49000, lr=0.000285714, gnorm=0.219, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=48013 +epoch 030: 95 / 1689 loss=4.061, nll_loss=2.43, ppl=5.39, wps=459096, ups=1.07, wpb=430082, bsz=16606.8, num_updates=49000, lr=0.000285714, gnorm=0.219, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=48013 +epoch 030: 95 / 1689 loss=4.061, nll_loss=2.43, ppl=5.39, wps=459096, ups=1.07, wpb=430082, bsz=16606.8, num_updates=49000, lr=0.000285714, gnorm=0.219, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=48013 +epoch 030: 95 / 1689 loss=4.061, nll_loss=2.43, ppl=5.39, wps=459096, ups=1.07, wpb=430082, bsz=16606.8, num_updates=49000, lr=0.000285714, gnorm=0.219, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=48013 +epoch 030: 95 / 1689 loss=4.061, nll_loss=2.43, ppl=5.39, wps=459096, ups=1.07, wpb=430082, bsz=16606.8, num_updates=49000, lr=0.000285714, gnorm=0.219, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=48013 +epoch 030: 95 / 1689 loss=4.061, nll_loss=2.43, ppl=5.39, wps=459096, ups=1.07, wpb=430082, bsz=16606.8, num_updates=49000, lr=0.000285714, gnorm=0.219, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=48013 +epoch 030: 95 / 1689 loss=4.061, nll_loss=2.43, ppl=5.39, wps=459096, ups=1.07, wpb=430082, bsz=16606.8, num_updates=49000, lr=0.000285714, gnorm=0.219, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=48013 +epoch 030: 95 / 1689 loss=4.061, nll_loss=2.43, ppl=5.39, wps=459096, ups=1.07, wpb=430082, bsz=16606.8, num_updates=49000, lr=0.000285714, gnorm=0.219, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=48013 +epoch 030: 95 / 1689 loss=4.061, nll_loss=2.43, ppl=5.39, wps=459096, ups=1.07, wpb=430082, bsz=16606.8, num_updates=49000, lr=0.000285714, gnorm=0.219, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=48013 +epoch 030: 95 / 1689 loss=4.061, nll_loss=2.43, ppl=5.39, wps=459096, ups=1.07, wpb=430082, bsz=16606.8, num_updates=49000, lr=0.000285714, gnorm=0.219, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=48013 +epoch 030: 95 / 1689 loss=4.061, nll_loss=2.43, ppl=5.39, wps=459096, ups=1.07, wpb=430082, bsz=16606.8, num_updates=49000, lr=0.000285714, gnorm=0.219, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=48013 +epoch 030: 95 / 1689 loss=4.061, nll_loss=2.43, ppl=5.39, wps=459096, ups=1.07, wpb=430082, bsz=16606.8, num_updates=49000, lr=0.000285714, gnorm=0.219, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=48013 +epoch 030: 95 / 1689 loss=4.061, nll_loss=2.43, ppl=5.39, wps=459096, ups=1.07, wpb=430082, bsz=16606.8, num_updates=49000, lr=0.000285714, gnorm=0.219, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=48013 +epoch 030: 95 / 1689 loss=4.061, nll_loss=2.43, ppl=5.39, wps=459096, ups=1.07, wpb=430082, bsz=16606.8, num_updates=49000, lr=0.000285714, gnorm=0.219, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=48013 +epoch 030: 95 / 1689 loss=4.061, nll_loss=2.43, ppl=5.39, wps=459096, ups=1.07, wpb=430082, bsz=16606.8, num_updates=49000, lr=0.000285714, gnorm=0.219, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=48013 +epoch 030: 95 / 1689 loss=4.061, nll_loss=2.43, ppl=5.39, wps=459096, ups=1.07, wpb=430082, bsz=16606.8, num_updates=49000, lr=0.000285714, gnorm=0.219, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=48013 +epoch 030: 95 / 1689 loss=4.061, nll_loss=2.43, ppl=5.39, wps=459096, ups=1.07, wpb=430082, bsz=16606.8, num_updates=49000, lr=0.000285714, gnorm=0.219, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=48013 +epoch 030: 95 / 1689 loss=4.061, nll_loss=2.43, ppl=5.39, wps=459096, ups=1.07, wpb=430082, bsz=16606.8, num_updates=49000, lr=0.000285714, gnorm=0.219, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=48013 +epoch 030: 95 / 1689 loss=4.061, nll_loss=2.43, ppl=5.39, wps=459096, ups=1.07, wpb=430082, bsz=16606.8, num_updates=49000, lr=0.000285714, gnorm=0.219, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=48013 +epoch 030: 95 / 1689 loss=4.061, nll_loss=2.43, ppl=5.39, wps=459096, ups=1.07, wpb=430082, bsz=16606.8, num_updates=49000, lr=0.000285714, gnorm=0.219, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=48013 +epoch 030: 95 / 1689 loss=4.061, nll_loss=2.43, ppl=5.39, wps=459096, ups=1.07, wpb=430082, bsz=16606.8, num_updates=49000, lr=0.000285714, gnorm=0.219, clip=0, loss_scale=1, train_wall=91, gb_free=18.8, wall=48013 +begin validation on "valid" subset +epoch 030 | valid on 'valid' subset | loss 4.228 | nll_loss 2.591 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 49000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.228 | nll_loss 2.591 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 49000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.228 | nll_loss 2.591 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 49000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.228 | nll_loss 2.591 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 49000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.228 | nll_loss 2.591 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 49000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.228 | nll_loss 2.591 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 49000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.228 | nll_loss 2.591 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 49000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.228 | nll_loss 2.591 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 49000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.228 | nll_loss 2.591 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 49000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.228 | nll_loss 2.591 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 49000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.228 | nll_loss 2.591 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 49000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.228 | nll_loss 2.591 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 49000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.228 | nll_loss 2.591 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 49000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.228 | nll_loss 2.591 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 49000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.228 | nll_loss 2.591 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 49000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.228 | nll_loss 2.591 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 49000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.228 | nll_loss 2.591 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 49000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.228 | nll_loss 2.591 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 49000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.228 | nll_loss 2.591 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 49000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.228 | nll_loss 2.591 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 49000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.228 | nll_loss 2.591 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 49000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.228 | nll_loss 2.591 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 49000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.228 | nll_loss 2.591 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 49000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.228 | nll_loss 2.591 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 49000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.228 | nll_loss 2.591 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 49000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.228 | nll_loss 2.591 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 49000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.228 | nll_loss 2.591 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 49000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.228 | nll_loss 2.591 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 49000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.228 | nll_loss 2.591 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 49000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.228 | nll_loss 2.591 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 49000 | best_loss 4.224 +epoch 030: 195 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=407330, ups=0.94, wpb=433638, bsz=16156.6, num_updates=49100, lr=0.000285423, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=48119 +epoch 030: 195 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=407330, ups=0.94, wpb=433638, bsz=16156.6, num_updates=49100, lr=0.000285423, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=48119 +epoch 030: 195 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=407330, ups=0.94, wpb=433638, bsz=16156.6, num_updates=49100, lr=0.000285423, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=48119 +epoch 030: 195 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=407330, ups=0.94, wpb=433638, bsz=16156.6, num_updates=49100, lr=0.000285423, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=48119 +epoch 030: 195 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=407330, ups=0.94, wpb=433638, bsz=16156.6, num_updates=49100, lr=0.000285423, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=48119 +epoch 030: 195 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=407330, ups=0.94, wpb=433638, bsz=16156.6, num_updates=49100, lr=0.000285423, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=48119 +epoch 030: 195 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=407330, ups=0.94, wpb=433638, bsz=16156.6, num_updates=49100, lr=0.000285423, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=48119 +epoch 030: 195 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=407330, ups=0.94, wpb=433638, bsz=16156.6, num_updates=49100, lr=0.000285423, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=48119 +epoch 030: 195 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=407330, ups=0.94, wpb=433638, bsz=16156.6, num_updates=49100, lr=0.000285423, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=48119 +epoch 030: 195 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=407330, ups=0.94, wpb=433638, bsz=16156.6, num_updates=49100, lr=0.000285423, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=48119 +epoch 030: 195 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=407330, ups=0.94, wpb=433638, bsz=16156.6, num_updates=49100, lr=0.000285423, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=48119 +epoch 030: 195 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=407330, ups=0.94, wpb=433638, bsz=16156.6, num_updates=49100, lr=0.000285423, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=48119 +epoch 030: 195 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=407330, ups=0.94, wpb=433638, bsz=16156.6, num_updates=49100, lr=0.000285423, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=48119 +epoch 030: 195 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=407330, ups=0.94, wpb=433638, bsz=16156.6, num_updates=49100, lr=0.000285423, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=48119 +epoch 030: 195 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=407330, ups=0.94, wpb=433638, bsz=16156.6, num_updates=49100, lr=0.000285423, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=48119 +epoch 030: 195 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=407330, ups=0.94, wpb=433638, bsz=16156.6, num_updates=49100, lr=0.000285423, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=48119 +epoch 030: 195 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=407330, ups=0.94, wpb=433638, bsz=16156.6, num_updates=49100, lr=0.000285423, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=48119 +epoch 030: 195 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=407330, ups=0.94, wpb=433638, bsz=16156.6, num_updates=49100, lr=0.000285423, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=48119 +epoch 030: 195 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=407330, ups=0.94, wpb=433638, bsz=16156.6, num_updates=49100, lr=0.000285423, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=48119 +epoch 030: 195 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=407330, ups=0.94, wpb=433638, bsz=16156.6, num_updates=49100, lr=0.000285423, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=48119 +epoch 030: 195 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=407330, ups=0.94, wpb=433638, bsz=16156.6, num_updates=49100, lr=0.000285423, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=48119 +epoch 030: 195 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=407330, ups=0.94, wpb=433638, bsz=16156.6, num_updates=49100, lr=0.000285423, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=48119 +epoch 030: 195 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=407330, ups=0.94, wpb=433638, bsz=16156.6, num_updates=49100, lr=0.000285423, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=48119 +epoch 030: 195 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=407330, ups=0.94, wpb=433638, bsz=16156.6, num_updates=49100, lr=0.000285423, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=48119 +epoch 030: 195 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=407330, ups=0.94, wpb=433638, bsz=16156.6, num_updates=49100, lr=0.000285423, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=48119 +epoch 030: 195 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=407330, ups=0.94, wpb=433638, bsz=16156.6, num_updates=49100, lr=0.000285423, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=48119 +epoch 030: 195 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=407330, ups=0.94, wpb=433638, bsz=16156.6, num_updates=49100, lr=0.000285423, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=48119 +epoch 030: 195 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=407330, ups=0.94, wpb=433638, bsz=16156.6, num_updates=49100, lr=0.000285423, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=48119 +epoch 030: 195 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=407330, ups=0.94, wpb=433638, bsz=16156.6, num_updates=49100, lr=0.000285423, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=48119 +epoch 030: 195 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=407330, ups=0.94, wpb=433638, bsz=16156.6, num_updates=49100, lr=0.000285423, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=48119 +epoch 030: 295 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=458141, ups=1.06, wpb=434029, bsz=16278.2, num_updates=49200, lr=0.000285133, gnorm=0.203, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=48214 +epoch 030: 295 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=458141, ups=1.06, wpb=434029, bsz=16278.2, num_updates=49200, lr=0.000285133, gnorm=0.203, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=48214 +epoch 030: 295 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=458141, ups=1.06, wpb=434029, bsz=16278.2, num_updates=49200, lr=0.000285133, gnorm=0.203, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=48214 +epoch 030: 295 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=458141, ups=1.06, wpb=434029, bsz=16278.2, num_updates=49200, lr=0.000285133, gnorm=0.203, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=48214 +epoch 030: 295 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=458141, ups=1.06, wpb=434029, bsz=16278.2, num_updates=49200, lr=0.000285133, gnorm=0.203, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=48214 +epoch 030: 295 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=458141, ups=1.06, wpb=434029, bsz=16278.2, num_updates=49200, lr=0.000285133, gnorm=0.203, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=48214 +epoch 030: 295 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=458141, ups=1.06, wpb=434029, bsz=16278.2, num_updates=49200, lr=0.000285133, gnorm=0.203, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=48214 +epoch 030: 295 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=458141, ups=1.06, wpb=434029, bsz=16278.2, num_updates=49200, lr=0.000285133, gnorm=0.203, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=48214 +epoch 030: 295 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=458141, ups=1.06, wpb=434029, bsz=16278.2, num_updates=49200, lr=0.000285133, gnorm=0.203, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=48214 +epoch 030: 295 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=458141, ups=1.06, wpb=434029, bsz=16278.2, num_updates=49200, lr=0.000285133, gnorm=0.203, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=48214 +epoch 030: 295 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=458141, ups=1.06, wpb=434029, bsz=16278.2, num_updates=49200, lr=0.000285133, gnorm=0.203, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=48214 +epoch 030: 295 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=458141, ups=1.06, wpb=434029, bsz=16278.2, num_updates=49200, lr=0.000285133, gnorm=0.203, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=48214 +epoch 030: 295 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=458141, ups=1.06, wpb=434029, bsz=16278.2, num_updates=49200, lr=0.000285133, gnorm=0.203, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=48214 +epoch 030: 295 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=458141, ups=1.06, wpb=434029, bsz=16278.2, num_updates=49200, lr=0.000285133, gnorm=0.203, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=48214 +epoch 030: 295 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=458141, ups=1.06, wpb=434029, bsz=16278.2, num_updates=49200, lr=0.000285133, gnorm=0.203, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=48214 +epoch 030: 295 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=458141, ups=1.06, wpb=434029, bsz=16278.2, num_updates=49200, lr=0.000285133, gnorm=0.203, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=48214 +epoch 030: 295 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=458141, ups=1.06, wpb=434029, bsz=16278.2, num_updates=49200, lr=0.000285133, gnorm=0.203, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=48214 +epoch 030: 295 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=458141, ups=1.06, wpb=434029, bsz=16278.2, num_updates=49200, lr=0.000285133, gnorm=0.203, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=48214 +epoch 030: 295 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=458141, ups=1.06, wpb=434029, bsz=16278.2, num_updates=49200, lr=0.000285133, gnorm=0.203, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=48214 +epoch 030: 295 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=458141, ups=1.06, wpb=434029, bsz=16278.2, num_updates=49200, lr=0.000285133, gnorm=0.203, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=48214 +epoch 030: 295 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=458141, ups=1.06, wpb=434029, bsz=16278.2, num_updates=49200, lr=0.000285133, gnorm=0.203, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=48214 +epoch 030: 295 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=458141, ups=1.06, wpb=434029, bsz=16278.2, num_updates=49200, lr=0.000285133, gnorm=0.203, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=48214 +epoch 030: 295 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=458141, ups=1.06, wpb=434029, bsz=16278.2, num_updates=49200, lr=0.000285133, gnorm=0.203, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=48214 +epoch 030: 295 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=458141, ups=1.06, wpb=434029, bsz=16278.2, num_updates=49200, lr=0.000285133, gnorm=0.203, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=48214 +epoch 030: 295 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=458141, ups=1.06, wpb=434029, bsz=16278.2, num_updates=49200, lr=0.000285133, gnorm=0.203, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=48214 +epoch 030: 295 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=458141, ups=1.06, wpb=434029, bsz=16278.2, num_updates=49200, lr=0.000285133, gnorm=0.203, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=48214 +epoch 030: 295 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=458141, ups=1.06, wpb=434029, bsz=16278.2, num_updates=49200, lr=0.000285133, gnorm=0.203, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=48214 +epoch 030: 295 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=458141, ups=1.06, wpb=434029, bsz=16278.2, num_updates=49200, lr=0.000285133, gnorm=0.203, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=48214 +epoch 030: 295 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=458141, ups=1.06, wpb=434029, bsz=16278.2, num_updates=49200, lr=0.000285133, gnorm=0.203, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=48214 +epoch 030: 295 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=458141, ups=1.06, wpb=434029, bsz=16278.2, num_updates=49200, lr=0.000285133, gnorm=0.203, clip=0, loss_scale=1, train_wall=93, gb_free=18.9, wall=48214 +epoch 030: 395 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=458513, ups=1.06, wpb=433592, bsz=16339.5, num_updates=49300, lr=0.000284844, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=48308 +epoch 030: 395 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=458513, ups=1.06, wpb=433592, bsz=16339.5, num_updates=49300, lr=0.000284844, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=48308 +epoch 030: 395 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=458513, ups=1.06, wpb=433592, bsz=16339.5, num_updates=49300, lr=0.000284844, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=48308 +epoch 030: 395 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=458513, ups=1.06, wpb=433592, bsz=16339.5, num_updates=49300, lr=0.000284844, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=48308 +epoch 030: 395 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=458513, ups=1.06, wpb=433592, bsz=16339.5, num_updates=49300, lr=0.000284844, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=48308 +epoch 030: 395 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=458513, ups=1.06, wpb=433592, bsz=16339.5, num_updates=49300, lr=0.000284844, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=48308 +epoch 030: 395 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=458513, ups=1.06, wpb=433592, bsz=16339.5, num_updates=49300, lr=0.000284844, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=48308 +epoch 030: 395 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=458513, ups=1.06, wpb=433592, bsz=16339.5, num_updates=49300, lr=0.000284844, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=48308 +epoch 030: 395 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=458513, ups=1.06, wpb=433592, bsz=16339.5, num_updates=49300, lr=0.000284844, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=48308 +epoch 030: 395 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=458513, ups=1.06, wpb=433592, bsz=16339.5, num_updates=49300, lr=0.000284844, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=48308 +epoch 030: 395 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=458513, ups=1.06, wpb=433592, bsz=16339.5, num_updates=49300, lr=0.000284844, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=48308 +epoch 030: 395 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=458513, ups=1.06, wpb=433592, bsz=16339.5, num_updates=49300, lr=0.000284844, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=48308 +epoch 030: 395 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=458513, ups=1.06, wpb=433592, bsz=16339.5, num_updates=49300, lr=0.000284844, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=48308 +epoch 030: 395 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=458513, ups=1.06, wpb=433592, bsz=16339.5, num_updates=49300, lr=0.000284844, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=48308 +epoch 030: 395 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=458513, ups=1.06, wpb=433592, bsz=16339.5, num_updates=49300, lr=0.000284844, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=48308 +epoch 030: 395 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=458513, ups=1.06, wpb=433592, bsz=16339.5, num_updates=49300, lr=0.000284844, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=48308 +epoch 030: 395 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=458513, ups=1.06, wpb=433592, bsz=16339.5, num_updates=49300, lr=0.000284844, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=48308 +epoch 030: 395 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=458513, ups=1.06, wpb=433592, bsz=16339.5, num_updates=49300, lr=0.000284844, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=48308 +epoch 030: 395 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=458513, ups=1.06, wpb=433592, bsz=16339.5, num_updates=49300, lr=0.000284844, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=48308 +epoch 030: 395 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=458513, ups=1.06, wpb=433592, bsz=16339.5, num_updates=49300, lr=0.000284844, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=48308 +epoch 030: 395 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=458513, ups=1.06, wpb=433592, bsz=16339.5, num_updates=49300, lr=0.000284844, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=48308 +epoch 030: 395 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=458513, ups=1.06, wpb=433592, bsz=16339.5, num_updates=49300, lr=0.000284844, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=48308 +epoch 030: 395 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=458513, ups=1.06, wpb=433592, bsz=16339.5, num_updates=49300, lr=0.000284844, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=48308 +epoch 030: 395 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=458513, ups=1.06, wpb=433592, bsz=16339.5, num_updates=49300, lr=0.000284844, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=48308 +epoch 030: 395 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=458513, ups=1.06, wpb=433592, bsz=16339.5, num_updates=49300, lr=0.000284844, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=48308 +epoch 030: 395 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=458513, ups=1.06, wpb=433592, bsz=16339.5, num_updates=49300, lr=0.000284844, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=48308 +epoch 030: 395 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=458513, ups=1.06, wpb=433592, bsz=16339.5, num_updates=49300, lr=0.000284844, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=48308 +epoch 030: 395 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=458513, ups=1.06, wpb=433592, bsz=16339.5, num_updates=49300, lr=0.000284844, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=48308 +epoch 030: 395 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=458513, ups=1.06, wpb=433592, bsz=16339.5, num_updates=49300, lr=0.000284844, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=48308 +epoch 030: 395 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=458513, ups=1.06, wpb=433592, bsz=16339.5, num_updates=49300, lr=0.000284844, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=48308 +epoch 030: 496 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=453878, ups=1.05, wpb=432563, bsz=16849.3, num_updates=49400, lr=0.000284555, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=94, gb_free=21.3, wall=48404 +epoch 030: 496 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=453878, ups=1.05, wpb=432563, bsz=16849.3, num_updates=49400, lr=0.000284555, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=94, gb_free=21.3, wall=48404 +epoch 030: 496 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=453878, ups=1.05, wpb=432563, bsz=16849.3, num_updates=49400, lr=0.000284555, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=94, gb_free=21.3, wall=48404 +epoch 030: 496 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=453878, ups=1.05, wpb=432563, bsz=16849.3, num_updates=49400, lr=0.000284555, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=94, gb_free=21.3, wall=48404 +epoch 030: 496 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=453878, ups=1.05, wpb=432563, bsz=16849.3, num_updates=49400, lr=0.000284555, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=94, gb_free=21.3, wall=48404 +epoch 030: 496 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=453878, ups=1.05, wpb=432563, bsz=16849.3, num_updates=49400, lr=0.000284555, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=94, gb_free=21.3, wall=48404 +epoch 030: 496 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=453878, ups=1.05, wpb=432563, bsz=16849.3, num_updates=49400, lr=0.000284555, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=94, gb_free=21.3, wall=48404 +epoch 030: 496 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=453878, ups=1.05, wpb=432563, bsz=16849.3, num_updates=49400, lr=0.000284555, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=94, gb_free=21.3, wall=48404 +epoch 030: 496 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=453878, ups=1.05, wpb=432563, bsz=16849.3, num_updates=49400, lr=0.000284555, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=94, gb_free=21.3, wall=48404 +epoch 030: 496 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=453878, ups=1.05, wpb=432563, bsz=16849.3, num_updates=49400, lr=0.000284555, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=94, gb_free=21.3, wall=48404 +epoch 030: 496 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=453878, ups=1.05, wpb=432563, bsz=16849.3, num_updates=49400, lr=0.000284555, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=94, gb_free=21.3, wall=48404 +epoch 030: 496 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=453878, ups=1.05, wpb=432563, bsz=16849.3, num_updates=49400, lr=0.000284555, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=94, gb_free=21.3, wall=48404 +epoch 030: 496 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=453878, ups=1.05, wpb=432563, bsz=16849.3, num_updates=49400, lr=0.000284555, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=94, gb_free=21.3, wall=48404 +epoch 030: 496 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=453878, ups=1.05, wpb=432563, bsz=16849.3, num_updates=49400, lr=0.000284555, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=94, gb_free=21.3, wall=48404 +epoch 030: 496 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=453878, ups=1.05, wpb=432563, bsz=16849.3, num_updates=49400, lr=0.000284555, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=94, gb_free=21.3, wall=48404 +epoch 030: 496 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=453878, ups=1.05, wpb=432563, bsz=16849.3, num_updates=49400, lr=0.000284555, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=94, gb_free=21.3, wall=48404 +epoch 030: 496 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=453878, ups=1.05, wpb=432563, bsz=16849.3, num_updates=49400, lr=0.000284555, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=94, gb_free=21.3, wall=48404 +epoch 030: 496 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=453878, ups=1.05, wpb=432563, bsz=16849.3, num_updates=49400, lr=0.000284555, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=94, gb_free=21.3, wall=48404 +epoch 030: 496 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=453878, ups=1.05, wpb=432563, bsz=16849.3, num_updates=49400, lr=0.000284555, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=94, gb_free=21.3, wall=48404 +epoch 030: 496 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=453878, ups=1.05, wpb=432563, bsz=16849.3, num_updates=49400, lr=0.000284555, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=94, gb_free=21.3, wall=48404 +epoch 030: 496 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=453878, ups=1.05, wpb=432563, bsz=16849.3, num_updates=49400, lr=0.000284555, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=94, gb_free=21.3, wall=48404 +epoch 030: 496 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=453878, ups=1.05, wpb=432563, bsz=16849.3, num_updates=49400, lr=0.000284555, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=94, gb_free=21.3, wall=48404 +epoch 030: 496 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=453878, ups=1.05, wpb=432563, bsz=16849.3, num_updates=49400, lr=0.000284555, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=94, gb_free=21.3, wall=48404 +epoch 030: 496 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=453878, ups=1.05, wpb=432563, bsz=16849.3, num_updates=49400, lr=0.000284555, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=94, gb_free=21.3, wall=48404 +epoch 030: 496 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=453878, ups=1.05, wpb=432563, bsz=16849.3, num_updates=49400, lr=0.000284555, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=94, gb_free=21.3, wall=48404 +epoch 030: 496 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=453878, ups=1.05, wpb=432563, bsz=16849.3, num_updates=49400, lr=0.000284555, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=94, gb_free=21.3, wall=48404 +epoch 030: 496 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=453878, ups=1.05, wpb=432563, bsz=16849.3, num_updates=49400, lr=0.000284555, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=94, gb_free=21.3, wall=48404 +epoch 030: 496 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=453878, ups=1.05, wpb=432563, bsz=16849.3, num_updates=49400, lr=0.000284555, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=94, gb_free=21.3, wall=48404 +epoch 030: 496 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=453878, ups=1.05, wpb=432563, bsz=16849.3, num_updates=49400, lr=0.000284555, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=94, gb_free=21.3, wall=48404 +epoch 030: 496 / 1689 loss=4.063, nll_loss=2.433, ppl=5.4, wps=453878, ups=1.05, wpb=432563, bsz=16849.3, num_updates=49400, lr=0.000284555, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=94, gb_free=21.3, wall=48404 +epoch 030: 596 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462441, ups=1.07, wpb=432582, bsz=16564, num_updates=49500, lr=0.000284268, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=48497 +epoch 030: 596 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462441, ups=1.07, wpb=432582, bsz=16564, num_updates=49500, lr=0.000284268, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=48497 +epoch 030: 596 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462441, ups=1.07, wpb=432582, bsz=16564, num_updates=49500, lr=0.000284268, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=48497 +epoch 030: 596 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462441, ups=1.07, wpb=432582, bsz=16564, num_updates=49500, lr=0.000284268, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=48497 +epoch 030: 596 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462441, ups=1.07, wpb=432582, bsz=16564, num_updates=49500, lr=0.000284268, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=48497 +epoch 030: 596 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462441, ups=1.07, wpb=432582, bsz=16564, num_updates=49500, lr=0.000284268, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=48497 +epoch 030: 596 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462441, ups=1.07, wpb=432582, bsz=16564, num_updates=49500, lr=0.000284268, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=48497 +epoch 030: 596 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462441, ups=1.07, wpb=432582, bsz=16564, num_updates=49500, lr=0.000284268, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=48497 +epoch 030: 596 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462441, ups=1.07, wpb=432582, bsz=16564, num_updates=49500, lr=0.000284268, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=48497 +epoch 030: 596 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462441, ups=1.07, wpb=432582, bsz=16564, num_updates=49500, lr=0.000284268, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=48497 +epoch 030: 596 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462441, ups=1.07, wpb=432582, bsz=16564, num_updates=49500, lr=0.000284268, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=48497 +epoch 030: 596 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462441, ups=1.07, wpb=432582, bsz=16564, num_updates=49500, lr=0.000284268, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=48497 +epoch 030: 596 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462441, ups=1.07, wpb=432582, bsz=16564, num_updates=49500, lr=0.000284268, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=48497 +epoch 030: 596 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462441, ups=1.07, wpb=432582, bsz=16564, num_updates=49500, lr=0.000284268, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=48497 +epoch 030: 596 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462441, ups=1.07, wpb=432582, bsz=16564, num_updates=49500, lr=0.000284268, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=48497 +epoch 030: 596 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462441, ups=1.07, wpb=432582, bsz=16564, num_updates=49500, lr=0.000284268, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=48497 +epoch 030: 596 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462441, ups=1.07, wpb=432582, bsz=16564, num_updates=49500, lr=0.000284268, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=48497 +epoch 030: 596 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462441, ups=1.07, wpb=432582, bsz=16564, num_updates=49500, lr=0.000284268, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=48497 +epoch 030: 596 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462441, ups=1.07, wpb=432582, bsz=16564, num_updates=49500, lr=0.000284268, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=48497 +epoch 030: 596 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462441, ups=1.07, wpb=432582, bsz=16564, num_updates=49500, lr=0.000284268, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=48497 +epoch 030: 596 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462441, ups=1.07, wpb=432582, bsz=16564, num_updates=49500, lr=0.000284268, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=48497 +epoch 030: 596 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462441, ups=1.07, wpb=432582, bsz=16564, num_updates=49500, lr=0.000284268, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=48497 +epoch 030: 596 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462441, ups=1.07, wpb=432582, bsz=16564, num_updates=49500, lr=0.000284268, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=48497 +epoch 030: 596 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462441, ups=1.07, wpb=432582, bsz=16564, num_updates=49500, lr=0.000284268, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=48497 +epoch 030: 596 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462441, ups=1.07, wpb=432582, bsz=16564, num_updates=49500, lr=0.000284268, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=48497 +epoch 030: 596 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462441, ups=1.07, wpb=432582, bsz=16564, num_updates=49500, lr=0.000284268, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=48497 +epoch 030: 596 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462441, ups=1.07, wpb=432582, bsz=16564, num_updates=49500, lr=0.000284268, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=48497 +epoch 030: 596 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462441, ups=1.07, wpb=432582, bsz=16564, num_updates=49500, lr=0.000284268, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=48497 +epoch 030: 596 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462441, ups=1.07, wpb=432582, bsz=16564, num_updates=49500, lr=0.000284268, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=48497 +epoch 030: 596 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=462441, ups=1.07, wpb=432582, bsz=16564, num_updates=49500, lr=0.000284268, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.9, wall=48497 +epoch 030: 696 / 1689 loss=4.06, nll_loss=2.429, ppl=5.39, wps=460511, ups=1.06, wpb=433328, bsz=16309.4, num_updates=49600, lr=0.000283981, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.6, wall=48591 +epoch 030: 696 / 1689 loss=4.06, nll_loss=2.429, ppl=5.39, wps=460511, ups=1.06, wpb=433328, bsz=16309.4, num_updates=49600, lr=0.000283981, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.6, wall=48591 +epoch 030: 696 / 1689 loss=4.06, nll_loss=2.429, ppl=5.39, wps=460511, ups=1.06, wpb=433328, bsz=16309.4, num_updates=49600, lr=0.000283981, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.6, wall=48591 +epoch 030: 696 / 1689 loss=4.06, nll_loss=2.429, ppl=5.39, wps=460511, ups=1.06, wpb=433328, bsz=16309.4, num_updates=49600, lr=0.000283981, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.6, wall=48591 +epoch 030: 696 / 1689 loss=4.06, nll_loss=2.429, ppl=5.39, wps=460511, ups=1.06, wpb=433328, bsz=16309.4, num_updates=49600, lr=0.000283981, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.6, wall=48591 +epoch 030: 696 / 1689 loss=4.06, nll_loss=2.429, ppl=5.39, wps=460511, ups=1.06, wpb=433328, bsz=16309.4, num_updates=49600, lr=0.000283981, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.6, wall=48591 +epoch 030: 696 / 1689 loss=4.06, nll_loss=2.429, ppl=5.39, wps=460511, ups=1.06, wpb=433328, bsz=16309.4, num_updates=49600, lr=0.000283981, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.6, wall=48591 +epoch 030: 696 / 1689 loss=4.06, nll_loss=2.429, ppl=5.39, wps=460511, ups=1.06, wpb=433328, bsz=16309.4, num_updates=49600, lr=0.000283981, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.6, wall=48591 +epoch 030: 696 / 1689 loss=4.06, nll_loss=2.429, ppl=5.39, wps=460511, ups=1.06, wpb=433328, bsz=16309.4, num_updates=49600, lr=0.000283981, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.6, wall=48591 +epoch 030: 696 / 1689 loss=4.06, nll_loss=2.429, ppl=5.39, wps=460511, ups=1.06, wpb=433328, bsz=16309.4, num_updates=49600, lr=0.000283981, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.6, wall=48591 +epoch 030: 696 / 1689 loss=4.06, nll_loss=2.429, ppl=5.39, wps=460511, ups=1.06, wpb=433328, bsz=16309.4, num_updates=49600, lr=0.000283981, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.6, wall=48591 +epoch 030: 696 / 1689 loss=4.06, nll_loss=2.429, ppl=5.39, wps=460511, ups=1.06, wpb=433328, bsz=16309.4, num_updates=49600, lr=0.000283981, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.6, wall=48591 +epoch 030: 696 / 1689 loss=4.06, nll_loss=2.429, ppl=5.39, wps=460511, ups=1.06, wpb=433328, bsz=16309.4, num_updates=49600, lr=0.000283981, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.6, wall=48591 +epoch 030: 696 / 1689 loss=4.06, nll_loss=2.429, ppl=5.39, wps=460511, ups=1.06, wpb=433328, bsz=16309.4, num_updates=49600, lr=0.000283981, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.6, wall=48591 +epoch 030: 696 / 1689 loss=4.06, nll_loss=2.429, ppl=5.39, wps=460511, ups=1.06, wpb=433328, bsz=16309.4, num_updates=49600, lr=0.000283981, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.6, wall=48591 +epoch 030: 696 / 1689 loss=4.06, nll_loss=2.429, ppl=5.39, wps=460511, ups=1.06, wpb=433328, bsz=16309.4, num_updates=49600, lr=0.000283981, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.6, wall=48591 +epoch 030: 696 / 1689 loss=4.06, nll_loss=2.429, ppl=5.39, wps=460511, ups=1.06, wpb=433328, bsz=16309.4, num_updates=49600, lr=0.000283981, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.6, wall=48591 +epoch 030: 696 / 1689 loss=4.06, nll_loss=2.429, ppl=5.39, wps=460511, ups=1.06, wpb=433328, bsz=16309.4, num_updates=49600, lr=0.000283981, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.6, wall=48591 +epoch 030: 696 / 1689 loss=4.06, nll_loss=2.429, ppl=5.39, wps=460511, ups=1.06, wpb=433328, bsz=16309.4, num_updates=49600, lr=0.000283981, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.6, wall=48591 +epoch 030: 696 / 1689 loss=4.06, nll_loss=2.429, ppl=5.39, wps=460511, ups=1.06, wpb=433328, bsz=16309.4, num_updates=49600, lr=0.000283981, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.6, wall=48591 +epoch 030: 696 / 1689 loss=4.06, nll_loss=2.429, ppl=5.39, wps=460511, ups=1.06, wpb=433328, bsz=16309.4, num_updates=49600, lr=0.000283981, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.6, wall=48591 +epoch 030: 696 / 1689 loss=4.06, nll_loss=2.429, ppl=5.39, wps=460511, ups=1.06, wpb=433328, bsz=16309.4, num_updates=49600, lr=0.000283981, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.6, wall=48591 +epoch 030: 696 / 1689 loss=4.06, nll_loss=2.429, ppl=5.39, wps=460511, ups=1.06, wpb=433328, bsz=16309.4, num_updates=49600, lr=0.000283981, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.6, wall=48591 +epoch 030: 696 / 1689 loss=4.06, nll_loss=2.429, ppl=5.39, wps=460511, ups=1.06, wpb=433328, bsz=16309.4, num_updates=49600, lr=0.000283981, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.6, wall=48591 +epoch 030: 696 / 1689 loss=4.06, nll_loss=2.429, ppl=5.39, wps=460511, ups=1.06, wpb=433328, bsz=16309.4, num_updates=49600, lr=0.000283981, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.6, wall=48591 +epoch 030: 696 / 1689 loss=4.06, nll_loss=2.429, ppl=5.39, wps=460511, ups=1.06, wpb=433328, bsz=16309.4, num_updates=49600, lr=0.000283981, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.6, wall=48591 +epoch 030: 696 / 1689 loss=4.06, nll_loss=2.429, ppl=5.39, wps=460511, ups=1.06, wpb=433328, bsz=16309.4, num_updates=49600, lr=0.000283981, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.6, wall=48591 +epoch 030: 696 / 1689 loss=4.06, nll_loss=2.429, ppl=5.39, wps=460511, ups=1.06, wpb=433328, bsz=16309.4, num_updates=49600, lr=0.000283981, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.6, wall=48591 +epoch 030: 696 / 1689 loss=4.06, nll_loss=2.429, ppl=5.39, wps=460511, ups=1.06, wpb=433328, bsz=16309.4, num_updates=49600, lr=0.000283981, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.6, wall=48591 +epoch 030: 696 / 1689 loss=4.06, nll_loss=2.429, ppl=5.39, wps=460511, ups=1.06, wpb=433328, bsz=16309.4, num_updates=49600, lr=0.000283981, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.6, wall=48591 +epoch 030: 796 / 1689 loss=4.068, nll_loss=2.439, ppl=5.42, wps=460353, ups=1.06, wpb=435128, bsz=16379.6, num_updates=49700, lr=0.000283695, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=48686 +epoch 030: 796 / 1689 loss=4.068, nll_loss=2.439, ppl=5.42, wps=460353, ups=1.06, wpb=435128, bsz=16379.6, num_updates=49700, lr=0.000283695, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=48686 +epoch 030: 796 / 1689 loss=4.068, nll_loss=2.439, ppl=5.42, wps=460353, ups=1.06, wpb=435128, bsz=16379.6, num_updates=49700, lr=0.000283695, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=48686 +epoch 030: 796 / 1689 loss=4.068, nll_loss=2.439, ppl=5.42, wps=460353, ups=1.06, wpb=435128, bsz=16379.6, num_updates=49700, lr=0.000283695, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=48686 +epoch 030: 796 / 1689 loss=4.068, nll_loss=2.439, ppl=5.42, wps=460353, ups=1.06, wpb=435128, bsz=16379.6, num_updates=49700, lr=0.000283695, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=48686 +epoch 030: 796 / 1689 loss=4.068, nll_loss=2.439, ppl=5.42, wps=460353, ups=1.06, wpb=435128, bsz=16379.6, num_updates=49700, lr=0.000283695, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=48686 +epoch 030: 796 / 1689 loss=4.068, nll_loss=2.439, ppl=5.42, wps=460353, ups=1.06, wpb=435128, bsz=16379.6, num_updates=49700, lr=0.000283695, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=48686 +epoch 030: 796 / 1689 loss=4.068, nll_loss=2.439, ppl=5.42, wps=460353, ups=1.06, wpb=435128, bsz=16379.6, num_updates=49700, lr=0.000283695, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=48686 +epoch 030: 796 / 1689 loss=4.068, nll_loss=2.439, ppl=5.42, wps=460353, ups=1.06, wpb=435128, bsz=16379.6, num_updates=49700, lr=0.000283695, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=48686 +epoch 030: 796 / 1689 loss=4.068, nll_loss=2.439, ppl=5.42, wps=460353, ups=1.06, wpb=435128, bsz=16379.6, num_updates=49700, lr=0.000283695, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=48686 +epoch 030: 796 / 1689 loss=4.068, nll_loss=2.439, ppl=5.42, wps=460353, ups=1.06, wpb=435128, bsz=16379.6, num_updates=49700, lr=0.000283695, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=48686 +epoch 030: 796 / 1689 loss=4.068, nll_loss=2.439, ppl=5.42, wps=460353, ups=1.06, wpb=435128, bsz=16379.6, num_updates=49700, lr=0.000283695, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=48686 +epoch 030: 796 / 1689 loss=4.068, nll_loss=2.439, ppl=5.42, wps=460353, ups=1.06, wpb=435128, bsz=16379.6, num_updates=49700, lr=0.000283695, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=48686 +epoch 030: 796 / 1689 loss=4.068, nll_loss=2.439, ppl=5.42, wps=460353, ups=1.06, wpb=435128, bsz=16379.6, num_updates=49700, lr=0.000283695, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=48686 +epoch 030: 796 / 1689 loss=4.068, nll_loss=2.439, ppl=5.42, wps=460353, ups=1.06, wpb=435128, bsz=16379.6, num_updates=49700, lr=0.000283695, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=48686 +epoch 030: 796 / 1689 loss=4.068, nll_loss=2.439, ppl=5.42, wps=460353, ups=1.06, wpb=435128, bsz=16379.6, num_updates=49700, lr=0.000283695, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=48686 +epoch 030: 796 / 1689 loss=4.068, nll_loss=2.439, ppl=5.42, wps=460353, ups=1.06, wpb=435128, bsz=16379.6, num_updates=49700, lr=0.000283695, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=48686 +epoch 030: 796 / 1689 loss=4.068, nll_loss=2.439, ppl=5.42, wps=460353, ups=1.06, wpb=435128, bsz=16379.6, num_updates=49700, lr=0.000283695, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=48686 +epoch 030: 796 / 1689 loss=4.068, nll_loss=2.439, ppl=5.42, wps=460353, ups=1.06, wpb=435128, bsz=16379.6, num_updates=49700, lr=0.000283695, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=48686 +epoch 030: 796 / 1689 loss=4.068, nll_loss=2.439, ppl=5.42, wps=460353, ups=1.06, wpb=435128, bsz=16379.6, num_updates=49700, lr=0.000283695, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=48686 +epoch 030: 796 / 1689 loss=4.068, nll_loss=2.439, ppl=5.42, wps=460353, ups=1.06, wpb=435128, bsz=16379.6, num_updates=49700, lr=0.000283695, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=48686 +epoch 030: 796 / 1689 loss=4.068, nll_loss=2.439, ppl=5.42, wps=460353, ups=1.06, wpb=435128, bsz=16379.6, num_updates=49700, lr=0.000283695, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=48686 +epoch 030: 796 / 1689 loss=4.068, nll_loss=2.439, ppl=5.42, wps=460353, ups=1.06, wpb=435128, bsz=16379.6, num_updates=49700, lr=0.000283695, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=48686 +epoch 030: 796 / 1689 loss=4.068, nll_loss=2.439, ppl=5.42, wps=460353, ups=1.06, wpb=435128, bsz=16379.6, num_updates=49700, lr=0.000283695, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=48686 +epoch 030: 796 / 1689 loss=4.068, nll_loss=2.439, ppl=5.42, wps=460353, ups=1.06, wpb=435128, bsz=16379.6, num_updates=49700, lr=0.000283695, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=48686 +epoch 030: 796 / 1689 loss=4.068, nll_loss=2.439, ppl=5.42, wps=460353, ups=1.06, wpb=435128, bsz=16379.6, num_updates=49700, lr=0.000283695, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=48686 +epoch 030: 796 / 1689 loss=4.068, nll_loss=2.439, ppl=5.42, wps=460353, ups=1.06, wpb=435128, bsz=16379.6, num_updates=49700, lr=0.000283695, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=48686 +epoch 030: 796 / 1689 loss=4.068, nll_loss=2.439, ppl=5.42, wps=460353, ups=1.06, wpb=435128, bsz=16379.6, num_updates=49700, lr=0.000283695, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=48686 +epoch 030: 796 / 1689 loss=4.068, nll_loss=2.439, ppl=5.42, wps=460353, ups=1.06, wpb=435128, bsz=16379.6, num_updates=49700, lr=0.000283695, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=48686 +epoch 030: 796 / 1689 loss=4.068, nll_loss=2.439, ppl=5.42, wps=460353, ups=1.06, wpb=435128, bsz=16379.6, num_updates=49700, lr=0.000283695, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=48686 +epoch 030: 896 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=461968, ups=1.06, wpb=435196, bsz=16368.7, num_updates=49800, lr=0.00028341, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=48780 +epoch 030: 896 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=461968, ups=1.06, wpb=435196, bsz=16368.7, num_updates=49800, lr=0.00028341, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=48780 +epoch 030: 896 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=461968, ups=1.06, wpb=435196, bsz=16368.7, num_updates=49800, lr=0.00028341, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=48780 +epoch 030: 896 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=461968, ups=1.06, wpb=435196, bsz=16368.7, num_updates=49800, lr=0.00028341, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=48780 +epoch 030: 896 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=461968, ups=1.06, wpb=435196, bsz=16368.7, num_updates=49800, lr=0.00028341, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=48780 +epoch 030: 896 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=461968, ups=1.06, wpb=435196, bsz=16368.7, num_updates=49800, lr=0.00028341, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=48780 +epoch 030: 896 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=461968, ups=1.06, wpb=435196, bsz=16368.7, num_updates=49800, lr=0.00028341, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=48780 +epoch 030: 896 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=461968, ups=1.06, wpb=435196, bsz=16368.7, num_updates=49800, lr=0.00028341, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=48780 +epoch 030: 896 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=461968, ups=1.06, wpb=435196, bsz=16368.7, num_updates=49800, lr=0.00028341, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=48780 +epoch 030: 896 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=461968, ups=1.06, wpb=435196, bsz=16368.7, num_updates=49800, lr=0.00028341, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=48780 +epoch 030: 896 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=461968, ups=1.06, wpb=435196, bsz=16368.7, num_updates=49800, lr=0.00028341, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=48780 +epoch 030: 896 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=461968, ups=1.06, wpb=435196, bsz=16368.7, num_updates=49800, lr=0.00028341, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=48780 +epoch 030: 896 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=461968, ups=1.06, wpb=435196, bsz=16368.7, num_updates=49800, lr=0.00028341, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=48780 +epoch 030: 896 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=461968, ups=1.06, wpb=435196, bsz=16368.7, num_updates=49800, lr=0.00028341, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=48780 +epoch 030: 896 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=461968, ups=1.06, wpb=435196, bsz=16368.7, num_updates=49800, lr=0.00028341, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=48780 +epoch 030: 896 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=461968, ups=1.06, wpb=435196, bsz=16368.7, num_updates=49800, lr=0.00028341, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=48780 +epoch 030: 896 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=461968, ups=1.06, wpb=435196, bsz=16368.7, num_updates=49800, lr=0.00028341, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=48780 +epoch 030: 896 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=461968, ups=1.06, wpb=435196, bsz=16368.7, num_updates=49800, lr=0.00028341, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=48780 +epoch 030: 896 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=461968, ups=1.06, wpb=435196, bsz=16368.7, num_updates=49800, lr=0.00028341, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=48780 +epoch 030: 896 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=461968, ups=1.06, wpb=435196, bsz=16368.7, num_updates=49800, lr=0.00028341, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=48780 +epoch 030: 896 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=461968, ups=1.06, wpb=435196, bsz=16368.7, num_updates=49800, lr=0.00028341, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=48780 +epoch 030: 896 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=461968, ups=1.06, wpb=435196, bsz=16368.7, num_updates=49800, lr=0.00028341, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=48780 +epoch 030: 896 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=461968, ups=1.06, wpb=435196, bsz=16368.7, num_updates=49800, lr=0.00028341, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=48780 +epoch 030: 896 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=461968, ups=1.06, wpb=435196, bsz=16368.7, num_updates=49800, lr=0.00028341, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=48780 +epoch 030: 896 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=461968, ups=1.06, wpb=435196, bsz=16368.7, num_updates=49800, lr=0.00028341, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=48780 +epoch 030: 896 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=461968, ups=1.06, wpb=435196, bsz=16368.7, num_updates=49800, lr=0.00028341, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=48780 +epoch 030: 896 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=461968, ups=1.06, wpb=435196, bsz=16368.7, num_updates=49800, lr=0.00028341, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=48780 +epoch 030: 896 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=461968, ups=1.06, wpb=435196, bsz=16368.7, num_updates=49800, lr=0.00028341, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=48780 +epoch 030: 896 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=461968, ups=1.06, wpb=435196, bsz=16368.7, num_updates=49800, lr=0.00028341, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=48780 +epoch 030: 896 / 1689 loss=4.082, nll_loss=2.454, ppl=5.48, wps=461968, ups=1.06, wpb=435196, bsz=16368.7, num_updates=49800, lr=0.00028341, gnorm=0.226, clip=0, loss_scale=0.5, train_wall=93, gb_free=20, wall=48780 +epoch 030: 996 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=462876, ups=1.07, wpb=432956, bsz=16577.2, num_updates=49900, lr=0.000283126, gnorm=0.206, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=48873 +epoch 030: 996 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=462876, ups=1.07, wpb=432956, bsz=16577.2, num_updates=49900, lr=0.000283126, gnorm=0.206, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=48873 +epoch 030: 996 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=462876, ups=1.07, wpb=432956, bsz=16577.2, num_updates=49900, lr=0.000283126, gnorm=0.206, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=48873 +epoch 030: 996 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=462876, ups=1.07, wpb=432956, bsz=16577.2, num_updates=49900, lr=0.000283126, gnorm=0.206, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=48873 +epoch 030: 996 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=462876, ups=1.07, wpb=432956, bsz=16577.2, num_updates=49900, lr=0.000283126, gnorm=0.206, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=48873 +epoch 030: 996 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=462876, ups=1.07, wpb=432956, bsz=16577.2, num_updates=49900, lr=0.000283126, gnorm=0.206, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=48873 +epoch 030: 996 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=462876, ups=1.07, wpb=432956, bsz=16577.2, num_updates=49900, lr=0.000283126, gnorm=0.206, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=48873 +epoch 030: 996 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=462876, ups=1.07, wpb=432956, bsz=16577.2, num_updates=49900, lr=0.000283126, gnorm=0.206, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=48873 +epoch 030: 996 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=462876, ups=1.07, wpb=432956, bsz=16577.2, num_updates=49900, lr=0.000283126, gnorm=0.206, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=48873 +epoch 030: 996 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=462876, ups=1.07, wpb=432956, bsz=16577.2, num_updates=49900, lr=0.000283126, gnorm=0.206, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=48873 +epoch 030: 996 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=462876, ups=1.07, wpb=432956, bsz=16577.2, num_updates=49900, lr=0.000283126, gnorm=0.206, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=48873 +epoch 030: 996 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=462876, ups=1.07, wpb=432956, bsz=16577.2, num_updates=49900, lr=0.000283126, gnorm=0.206, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=48873 +epoch 030: 996 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=462876, ups=1.07, wpb=432956, bsz=16577.2, num_updates=49900, lr=0.000283126, gnorm=0.206, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=48873 +epoch 030: 996 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=462876, ups=1.07, wpb=432956, bsz=16577.2, num_updates=49900, lr=0.000283126, gnorm=0.206, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=48873 +epoch 030: 996 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=462876, ups=1.07, wpb=432956, bsz=16577.2, num_updates=49900, lr=0.000283126, gnorm=0.206, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=48873 +epoch 030: 996 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=462876, ups=1.07, wpb=432956, bsz=16577.2, num_updates=49900, lr=0.000283126, gnorm=0.206, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=48873 +epoch 030: 996 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=462876, ups=1.07, wpb=432956, bsz=16577.2, num_updates=49900, lr=0.000283126, gnorm=0.206, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=48873 +epoch 030: 996 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=462876, ups=1.07, wpb=432956, bsz=16577.2, num_updates=49900, lr=0.000283126, gnorm=0.206, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=48873 +epoch 030: 996 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=462876, ups=1.07, wpb=432956, bsz=16577.2, num_updates=49900, lr=0.000283126, gnorm=0.206, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=48873 +epoch 030: 996 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=462876, ups=1.07, wpb=432956, bsz=16577.2, num_updates=49900, lr=0.000283126, gnorm=0.206, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=48873 +epoch 030: 996 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=462876, ups=1.07, wpb=432956, bsz=16577.2, num_updates=49900, lr=0.000283126, gnorm=0.206, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=48873 +epoch 030: 996 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=462876, ups=1.07, wpb=432956, bsz=16577.2, num_updates=49900, lr=0.000283126, gnorm=0.206, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=48873 +epoch 030: 996 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=462876, ups=1.07, wpb=432956, bsz=16577.2, num_updates=49900, lr=0.000283126, gnorm=0.206, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=48873 +epoch 030: 996 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=462876, ups=1.07, wpb=432956, bsz=16577.2, num_updates=49900, lr=0.000283126, gnorm=0.206, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=48873 +epoch 030: 996 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=462876, ups=1.07, wpb=432956, bsz=16577.2, num_updates=49900, lr=0.000283126, gnorm=0.206, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=48873 +epoch 030: 996 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=462876, ups=1.07, wpb=432956, bsz=16577.2, num_updates=49900, lr=0.000283126, gnorm=0.206, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=48873 +epoch 030: 996 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=462876, ups=1.07, wpb=432956, bsz=16577.2, num_updates=49900, lr=0.000283126, gnorm=0.206, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=48873 +epoch 030: 996 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=462876, ups=1.07, wpb=432956, bsz=16577.2, num_updates=49900, lr=0.000283126, gnorm=0.206, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=48873 +epoch 030: 996 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=462876, ups=1.07, wpb=432956, bsz=16577.2, num_updates=49900, lr=0.000283126, gnorm=0.206, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=48873 +epoch 030: 996 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=462876, ups=1.07, wpb=432956, bsz=16577.2, num_updates=49900, lr=0.000283126, gnorm=0.206, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=48873 +epoch 030: 1096 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461805, ups=1.07, wpb=431467, bsz=16677.8, num_updates=50000, lr=0.000282843, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=48967 +epoch 030: 1096 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461805, ups=1.07, wpb=431467, bsz=16677.8, num_updates=50000, lr=0.000282843, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=48967 +epoch 030: 1096 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461805, ups=1.07, wpb=431467, bsz=16677.8, num_updates=50000, lr=0.000282843, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=48967 +epoch 030: 1096 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461805, ups=1.07, wpb=431467, bsz=16677.8, num_updates=50000, lr=0.000282843, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=48967 +epoch 030: 1096 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461805, ups=1.07, wpb=431467, bsz=16677.8, num_updates=50000, lr=0.000282843, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=48967 +epoch 030: 1096 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461805, ups=1.07, wpb=431467, bsz=16677.8, num_updates=50000, lr=0.000282843, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=48967 +epoch 030: 1096 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461805, ups=1.07, wpb=431467, bsz=16677.8, num_updates=50000, lr=0.000282843, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=48967 +epoch 030: 1096 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461805, ups=1.07, wpb=431467, bsz=16677.8, num_updates=50000, lr=0.000282843, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=48967 +epoch 030: 1096 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461805, ups=1.07, wpb=431467, bsz=16677.8, num_updates=50000, lr=0.000282843, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=48967 +epoch 030: 1096 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461805, ups=1.07, wpb=431467, bsz=16677.8, num_updates=50000, lr=0.000282843, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=48967 +epoch 030: 1096 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461805, ups=1.07, wpb=431467, bsz=16677.8, num_updates=50000, lr=0.000282843, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=48967 +epoch 030: 1096 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461805, ups=1.07, wpb=431467, bsz=16677.8, num_updates=50000, lr=0.000282843, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=48967 +epoch 030: 1096 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461805, ups=1.07, wpb=431467, bsz=16677.8, num_updates=50000, lr=0.000282843, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=48967 +epoch 030: 1096 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461805, ups=1.07, wpb=431467, bsz=16677.8, num_updates=50000, lr=0.000282843, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=48967 +epoch 030: 1096 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461805, ups=1.07, wpb=431467, bsz=16677.8, num_updates=50000, lr=0.000282843, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=48967 +epoch 030: 1096 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461805, ups=1.07, wpb=431467, bsz=16677.8, num_updates=50000, lr=0.000282843, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=48967 +epoch 030: 1096 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461805, ups=1.07, wpb=431467, bsz=16677.8, num_updates=50000, lr=0.000282843, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=48967 +epoch 030: 1096 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461805, ups=1.07, wpb=431467, bsz=16677.8, num_updates=50000, lr=0.000282843, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=48967 +epoch 030: 1096 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461805, ups=1.07, wpb=431467, bsz=16677.8, num_updates=50000, lr=0.000282843, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=48967 +epoch 030: 1096 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461805, ups=1.07, wpb=431467, bsz=16677.8, num_updates=50000, lr=0.000282843, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=48967 +epoch 030: 1096 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461805, ups=1.07, wpb=431467, bsz=16677.8, num_updates=50000, lr=0.000282843, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=48967 +epoch 030: 1096 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461805, ups=1.07, wpb=431467, bsz=16677.8, num_updates=50000, lr=0.000282843, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=48967 +epoch 030: 1096 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461805, ups=1.07, wpb=431467, bsz=16677.8, num_updates=50000, lr=0.000282843, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=48967 +epoch 030: 1096 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461805, ups=1.07, wpb=431467, bsz=16677.8, num_updates=50000, lr=0.000282843, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=48967 +epoch 030: 1096 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461805, ups=1.07, wpb=431467, bsz=16677.8, num_updates=50000, lr=0.000282843, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=48967 +epoch 030: 1096 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461805, ups=1.07, wpb=431467, bsz=16677.8, num_updates=50000, lr=0.000282843, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=48967 +epoch 030: 1096 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461805, ups=1.07, wpb=431467, bsz=16677.8, num_updates=50000, lr=0.000282843, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=48967 +epoch 030: 1096 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461805, ups=1.07, wpb=431467, bsz=16677.8, num_updates=50000, lr=0.000282843, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=48967 +epoch 030: 1096 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461805, ups=1.07, wpb=431467, bsz=16677.8, num_updates=50000, lr=0.000282843, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=48967 +epoch 030: 1096 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461805, ups=1.07, wpb=431467, bsz=16677.8, num_updates=50000, lr=0.000282843, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=48967 +begin validation on "valid" subset +epoch 030 | valid on 'valid' subset | loss 4.226 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 50000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.226 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 50000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.226 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 50000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.226 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 50000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.226 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 50000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.226 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 50000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.226 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 50000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.226 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 50000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.226 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 50000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.226 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 50000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.226 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 50000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.226 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 50000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.226 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 50000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.226 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 50000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.226 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 50000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.226 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 50000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.226 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 50000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.226 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 50000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.226 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 50000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.226 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 50000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.226 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 50000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.226 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 50000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.226 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 50000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.226 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 50000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.226 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 50000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.226 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 50000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.226 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 50000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.226 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 50000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.226 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 50000 | best_loss 4.224 +epoch 030 | valid on 'valid' subset | loss 4.226 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 50000 | best_loss 4.224 +epoch 030: 1196 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=406916, ups=0.94, wpb=432360, bsz=16912.6, num_updates=50100, lr=0.00028256, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=49073 +epoch 030: 1196 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=406916, ups=0.94, wpb=432360, bsz=16912.6, num_updates=50100, lr=0.00028256, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=49073 +epoch 030: 1196 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=406916, ups=0.94, wpb=432360, bsz=16912.6, num_updates=50100, lr=0.00028256, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=49073 +epoch 030: 1196 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=406916, ups=0.94, wpb=432360, bsz=16912.6, num_updates=50100, lr=0.00028256, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=49073 +epoch 030: 1196 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=406916, ups=0.94, wpb=432360, bsz=16912.6, num_updates=50100, lr=0.00028256, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=49073 +epoch 030: 1196 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=406916, ups=0.94, wpb=432360, bsz=16912.6, num_updates=50100, lr=0.00028256, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=49073 +epoch 030: 1196 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=406916, ups=0.94, wpb=432360, bsz=16912.6, num_updates=50100, lr=0.00028256, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=49073 +epoch 030: 1196 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=406916, ups=0.94, wpb=432360, bsz=16912.6, num_updates=50100, lr=0.00028256, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=49073 +epoch 030: 1196 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=406916, ups=0.94, wpb=432360, bsz=16912.6, num_updates=50100, lr=0.00028256, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=49073 +epoch 030: 1196 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=406916, ups=0.94, wpb=432360, bsz=16912.6, num_updates=50100, lr=0.00028256, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=49073 +epoch 030: 1196 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=406916, ups=0.94, wpb=432360, bsz=16912.6, num_updates=50100, lr=0.00028256, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=49073 +epoch 030: 1196 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=406916, ups=0.94, wpb=432360, bsz=16912.6, num_updates=50100, lr=0.00028256, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=49073 +epoch 030: 1196 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=406916, ups=0.94, wpb=432360, bsz=16912.6, num_updates=50100, lr=0.00028256, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=49073 +epoch 030: 1196 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=406916, ups=0.94, wpb=432360, bsz=16912.6, num_updates=50100, lr=0.00028256, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=49073 +epoch 030: 1196 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=406916, ups=0.94, wpb=432360, bsz=16912.6, num_updates=50100, lr=0.00028256, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=49073 +epoch 030: 1196 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=406916, ups=0.94, wpb=432360, bsz=16912.6, num_updates=50100, lr=0.00028256, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=49073 +epoch 030: 1196 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=406916, ups=0.94, wpb=432360, bsz=16912.6, num_updates=50100, lr=0.00028256, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=49073 +epoch 030: 1196 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=406916, ups=0.94, wpb=432360, bsz=16912.6, num_updates=50100, lr=0.00028256, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=49073 +epoch 030: 1196 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=406916, ups=0.94, wpb=432360, bsz=16912.6, num_updates=50100, lr=0.00028256, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=49073 +epoch 030: 1196 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=406916, ups=0.94, wpb=432360, bsz=16912.6, num_updates=50100, lr=0.00028256, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=49073 +epoch 030: 1196 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=406916, ups=0.94, wpb=432360, bsz=16912.6, num_updates=50100, lr=0.00028256, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=49073 +epoch 030: 1196 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=406916, ups=0.94, wpb=432360, bsz=16912.6, num_updates=50100, lr=0.00028256, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=49073 +epoch 030: 1196 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=406916, ups=0.94, wpb=432360, bsz=16912.6, num_updates=50100, lr=0.00028256, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=49073 +epoch 030: 1196 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=406916, ups=0.94, wpb=432360, bsz=16912.6, num_updates=50100, lr=0.00028256, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=49073 +epoch 030: 1196 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=406916, ups=0.94, wpb=432360, bsz=16912.6, num_updates=50100, lr=0.00028256, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=49073 +epoch 030: 1196 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=406916, ups=0.94, wpb=432360, bsz=16912.6, num_updates=50100, lr=0.00028256, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=49073 +epoch 030: 1196 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=406916, ups=0.94, wpb=432360, bsz=16912.6, num_updates=50100, lr=0.00028256, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=49073 +epoch 030: 1196 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=406916, ups=0.94, wpb=432360, bsz=16912.6, num_updates=50100, lr=0.00028256, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=49073 +epoch 030: 1196 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=406916, ups=0.94, wpb=432360, bsz=16912.6, num_updates=50100, lr=0.00028256, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=49073 +epoch 030: 1196 / 1689 loss=4.067, nll_loss=2.438, ppl=5.42, wps=406916, ups=0.94, wpb=432360, bsz=16912.6, num_updates=50100, lr=0.00028256, gnorm=0.228, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=49073 +epoch 030: 1296 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464889, ups=1.07, wpb=435554, bsz=16304.8, num_updates=50200, lr=0.000282279, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=49167 +epoch 030: 1296 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464889, ups=1.07, wpb=435554, bsz=16304.8, num_updates=50200, lr=0.000282279, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=49167 +epoch 030: 1296 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464889, ups=1.07, wpb=435554, bsz=16304.8, num_updates=50200, lr=0.000282279, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=49167 +epoch 030: 1296 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464889, ups=1.07, wpb=435554, bsz=16304.8, num_updates=50200, lr=0.000282279, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=49167 +epoch 030: 1296 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464889, ups=1.07, wpb=435554, bsz=16304.8, num_updates=50200, lr=0.000282279, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=49167 +epoch 030: 1296 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464889, ups=1.07, wpb=435554, bsz=16304.8, num_updates=50200, lr=0.000282279, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=49167 +epoch 030: 1296 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464889, ups=1.07, wpb=435554, bsz=16304.8, num_updates=50200, lr=0.000282279, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=49167 +epoch 030: 1296 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464889, ups=1.07, wpb=435554, bsz=16304.8, num_updates=50200, lr=0.000282279, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=49167 +epoch 030: 1296 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464889, ups=1.07, wpb=435554, bsz=16304.8, num_updates=50200, lr=0.000282279, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=49167 +epoch 030: 1296 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464889, ups=1.07, wpb=435554, bsz=16304.8, num_updates=50200, lr=0.000282279, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=49167 +epoch 030: 1296 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464889, ups=1.07, wpb=435554, bsz=16304.8, num_updates=50200, lr=0.000282279, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=49167 +epoch 030: 1296 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464889, ups=1.07, wpb=435554, bsz=16304.8, num_updates=50200, lr=0.000282279, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=49167 +epoch 030: 1296 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464889, ups=1.07, wpb=435554, bsz=16304.8, num_updates=50200, lr=0.000282279, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=49167 +epoch 030: 1296 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464889, ups=1.07, wpb=435554, bsz=16304.8, num_updates=50200, lr=0.000282279, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=49167 +epoch 030: 1296 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464889, ups=1.07, wpb=435554, bsz=16304.8, num_updates=50200, lr=0.000282279, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=49167 +epoch 030: 1296 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464889, ups=1.07, wpb=435554, bsz=16304.8, num_updates=50200, lr=0.000282279, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=49167 +epoch 030: 1296 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464889, ups=1.07, wpb=435554, bsz=16304.8, num_updates=50200, lr=0.000282279, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=49167 +epoch 030: 1296 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464889, ups=1.07, wpb=435554, bsz=16304.8, num_updates=50200, lr=0.000282279, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=49167 +epoch 030: 1296 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464889, ups=1.07, wpb=435554, bsz=16304.8, num_updates=50200, lr=0.000282279, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=49167 +epoch 030: 1296 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464889, ups=1.07, wpb=435554, bsz=16304.8, num_updates=50200, lr=0.000282279, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=49167 +epoch 030: 1296 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464889, ups=1.07, wpb=435554, bsz=16304.8, num_updates=50200, lr=0.000282279, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=49167 +epoch 030: 1296 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464889, ups=1.07, wpb=435554, bsz=16304.8, num_updates=50200, lr=0.000282279, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=49167 +epoch 030: 1296 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464889, ups=1.07, wpb=435554, bsz=16304.8, num_updates=50200, lr=0.000282279, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=49167 +epoch 030: 1296 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464889, ups=1.07, wpb=435554, bsz=16304.8, num_updates=50200, lr=0.000282279, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=49167 +epoch 030: 1296 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464889, ups=1.07, wpb=435554, bsz=16304.8, num_updates=50200, lr=0.000282279, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=49167 +epoch 030: 1296 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464889, ups=1.07, wpb=435554, bsz=16304.8, num_updates=50200, lr=0.000282279, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=49167 +epoch 030: 1296 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464889, ups=1.07, wpb=435554, bsz=16304.8, num_updates=50200, lr=0.000282279, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=49167 +epoch 030: 1296 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464889, ups=1.07, wpb=435554, bsz=16304.8, num_updates=50200, lr=0.000282279, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=49167 +epoch 030: 1296 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464889, ups=1.07, wpb=435554, bsz=16304.8, num_updates=50200, lr=0.000282279, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=49167 +epoch 030: 1296 / 1689 loss=4.088, nll_loss=2.461, ppl=5.51, wps=464889, ups=1.07, wpb=435554, bsz=16304.8, num_updates=50200, lr=0.000282279, gnorm=0.222, clip=0, loss_scale=1, train_wall=92, gb_free=19.8, wall=49167 +epoch 030: 1396 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=458692, ups=1.06, wpb=432935, bsz=16610.3, num_updates=50300, lr=0.000281998, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=49261 +epoch 030: 1396 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=458692, ups=1.06, wpb=432935, bsz=16610.3, num_updates=50300, lr=0.000281998, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=49261 +epoch 030: 1396 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=458692, ups=1.06, wpb=432935, bsz=16610.3, num_updates=50300, lr=0.000281998, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=49261 +epoch 030: 1396 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=458692, ups=1.06, wpb=432935, bsz=16610.3, num_updates=50300, lr=0.000281998, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=49261 +epoch 030: 1396 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=458692, ups=1.06, wpb=432935, bsz=16610.3, num_updates=50300, lr=0.000281998, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=49261 +epoch 030: 1396 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=458692, ups=1.06, wpb=432935, bsz=16610.3, num_updates=50300, lr=0.000281998, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=49261 +epoch 030: 1396 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=458692, ups=1.06, wpb=432935, bsz=16610.3, num_updates=50300, lr=0.000281998, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=49261 +epoch 030: 1396 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=458692, ups=1.06, wpb=432935, bsz=16610.3, num_updates=50300, lr=0.000281998, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=49261 +epoch 030: 1396 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=458692, ups=1.06, wpb=432935, bsz=16610.3, num_updates=50300, lr=0.000281998, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=49261 +epoch 030: 1396 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=458692, ups=1.06, wpb=432935, bsz=16610.3, num_updates=50300, lr=0.000281998, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=49261 +epoch 030: 1396 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=458692, ups=1.06, wpb=432935, bsz=16610.3, num_updates=50300, lr=0.000281998, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=49261 +epoch 030: 1396 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=458692, ups=1.06, wpb=432935, bsz=16610.3, num_updates=50300, lr=0.000281998, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=49261 +epoch 030: 1396 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=458692, ups=1.06, wpb=432935, bsz=16610.3, num_updates=50300, lr=0.000281998, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=49261 +epoch 030: 1396 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=458692, ups=1.06, wpb=432935, bsz=16610.3, num_updates=50300, lr=0.000281998, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=49261 +epoch 030: 1396 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=458692, ups=1.06, wpb=432935, bsz=16610.3, num_updates=50300, lr=0.000281998, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=49261 +epoch 030: 1396 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=458692, ups=1.06, wpb=432935, bsz=16610.3, num_updates=50300, lr=0.000281998, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=49261 +epoch 030: 1396 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=458692, ups=1.06, wpb=432935, bsz=16610.3, num_updates=50300, lr=0.000281998, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=49261 +epoch 030: 1396 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=458692, ups=1.06, wpb=432935, bsz=16610.3, num_updates=50300, lr=0.000281998, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=49261 +epoch 030: 1396 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=458692, ups=1.06, wpb=432935, bsz=16610.3, num_updates=50300, lr=0.000281998, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=49261 +epoch 030: 1396 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=458692, ups=1.06, wpb=432935, bsz=16610.3, num_updates=50300, lr=0.000281998, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=49261 +epoch 030: 1396 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=458692, ups=1.06, wpb=432935, bsz=16610.3, num_updates=50300, lr=0.000281998, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=49261 +epoch 030: 1396 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=458692, ups=1.06, wpb=432935, bsz=16610.3, num_updates=50300, lr=0.000281998, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=49261 +epoch 030: 1396 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=458692, ups=1.06, wpb=432935, bsz=16610.3, num_updates=50300, lr=0.000281998, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=49261 +epoch 030: 1396 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=458692, ups=1.06, wpb=432935, bsz=16610.3, num_updates=50300, lr=0.000281998, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=49261 +epoch 030: 1396 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=458692, ups=1.06, wpb=432935, bsz=16610.3, num_updates=50300, lr=0.000281998, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=49261 +epoch 030: 1396 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=458692, ups=1.06, wpb=432935, bsz=16610.3, num_updates=50300, lr=0.000281998, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=49261 +epoch 030: 1396 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=458692, ups=1.06, wpb=432935, bsz=16610.3, num_updates=50300, lr=0.000281998, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=49261 +epoch 030: 1396 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=458692, ups=1.06, wpb=432935, bsz=16610.3, num_updates=50300, lr=0.000281998, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=49261 +epoch 030: 1396 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=458692, ups=1.06, wpb=432935, bsz=16610.3, num_updates=50300, lr=0.000281998, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=49261 +epoch 030: 1396 / 1689 loss=4.069, nll_loss=2.44, ppl=5.43, wps=458692, ups=1.06, wpb=432935, bsz=16610.3, num_updates=50300, lr=0.000281998, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=49261 +epoch 030: 1497 / 1689 loss=4.077, nll_loss=2.45, ppl=5.46, wps=455457, ups=1.05, wpb=433300, bsz=16701.4, num_updates=50400, lr=0.000281718, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=49356 +epoch 030: 1497 / 1689 loss=4.077, nll_loss=2.45, ppl=5.46, wps=455457, ups=1.05, wpb=433300, bsz=16701.4, num_updates=50400, lr=0.000281718, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=49356 +epoch 030: 1497 / 1689 loss=4.077, nll_loss=2.45, ppl=5.46, wps=455457, ups=1.05, wpb=433300, bsz=16701.4, num_updates=50400, lr=0.000281718, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=49356 +epoch 030: 1497 / 1689 loss=4.077, nll_loss=2.45, ppl=5.46, wps=455457, ups=1.05, wpb=433300, bsz=16701.4, num_updates=50400, lr=0.000281718, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=49356 +epoch 030: 1497 / 1689 loss=4.077, nll_loss=2.45, ppl=5.46, wps=455457, ups=1.05, wpb=433300, bsz=16701.4, num_updates=50400, lr=0.000281718, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=49356 +epoch 030: 1497 / 1689 loss=4.077, nll_loss=2.45, ppl=5.46, wps=455457, ups=1.05, wpb=433300, bsz=16701.4, num_updates=50400, lr=0.000281718, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=49356 +epoch 030: 1497 / 1689 loss=4.077, nll_loss=2.45, ppl=5.46, wps=455457, ups=1.05, wpb=433300, bsz=16701.4, num_updates=50400, lr=0.000281718, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=49356 +epoch 030: 1497 / 1689 loss=4.077, nll_loss=2.45, ppl=5.46, wps=455457, ups=1.05, wpb=433300, bsz=16701.4, num_updates=50400, lr=0.000281718, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=49356 +epoch 030: 1497 / 1689 loss=4.077, nll_loss=2.45, ppl=5.46, wps=455457, ups=1.05, wpb=433300, bsz=16701.4, num_updates=50400, lr=0.000281718, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=49356 +epoch 030: 1497 / 1689 loss=4.077, nll_loss=2.45, ppl=5.46, wps=455457, ups=1.05, wpb=433300, bsz=16701.4, num_updates=50400, lr=0.000281718, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=49356 +epoch 030: 1497 / 1689 loss=4.077, nll_loss=2.45, ppl=5.46, wps=455457, ups=1.05, wpb=433300, bsz=16701.4, num_updates=50400, lr=0.000281718, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=49356 +epoch 030: 1497 / 1689 loss=4.077, nll_loss=2.45, ppl=5.46, wps=455457, ups=1.05, wpb=433300, bsz=16701.4, num_updates=50400, lr=0.000281718, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=49356 +epoch 030: 1497 / 1689 loss=4.077, nll_loss=2.45, ppl=5.46, wps=455457, ups=1.05, wpb=433300, bsz=16701.4, num_updates=50400, lr=0.000281718, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=49356 +epoch 030: 1497 / 1689 loss=4.077, nll_loss=2.45, ppl=5.46, wps=455457, ups=1.05, wpb=433300, bsz=16701.4, num_updates=50400, lr=0.000281718, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=49356 +epoch 030: 1497 / 1689 loss=4.077, nll_loss=2.45, ppl=5.46, wps=455457, ups=1.05, wpb=433300, bsz=16701.4, num_updates=50400, lr=0.000281718, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=49356 +epoch 030: 1497 / 1689 loss=4.077, nll_loss=2.45, ppl=5.46, wps=455457, ups=1.05, wpb=433300, bsz=16701.4, num_updates=50400, lr=0.000281718, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=49356 +epoch 030: 1497 / 1689 loss=4.077, nll_loss=2.45, ppl=5.46, wps=455457, ups=1.05, wpb=433300, bsz=16701.4, num_updates=50400, lr=0.000281718, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=49356 +epoch 030: 1497 / 1689 loss=4.077, nll_loss=2.45, ppl=5.46, wps=455457, ups=1.05, wpb=433300, bsz=16701.4, num_updates=50400, lr=0.000281718, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=49356 +epoch 030: 1497 / 1689 loss=4.077, nll_loss=2.45, ppl=5.46, wps=455457, ups=1.05, wpb=433300, bsz=16701.4, num_updates=50400, lr=0.000281718, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=49356 +epoch 030: 1497 / 1689 loss=4.077, nll_loss=2.45, ppl=5.46, wps=455457, ups=1.05, wpb=433300, bsz=16701.4, num_updates=50400, lr=0.000281718, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=49356 +epoch 030: 1497 / 1689 loss=4.077, nll_loss=2.45, ppl=5.46, wps=455457, ups=1.05, wpb=433300, bsz=16701.4, num_updates=50400, lr=0.000281718, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=49356 +epoch 030: 1497 / 1689 loss=4.077, nll_loss=2.45, ppl=5.46, wps=455457, ups=1.05, wpb=433300, bsz=16701.4, num_updates=50400, lr=0.000281718, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=49356 +epoch 030: 1497 / 1689 loss=4.077, nll_loss=2.45, ppl=5.46, wps=455457, ups=1.05, wpb=433300, bsz=16701.4, num_updates=50400, lr=0.000281718, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=49356 +epoch 030: 1497 / 1689 loss=4.077, nll_loss=2.45, ppl=5.46, wps=455457, ups=1.05, wpb=433300, bsz=16701.4, num_updates=50400, lr=0.000281718, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=49356 +epoch 030: 1497 / 1689 loss=4.077, nll_loss=2.45, ppl=5.46, wps=455457, ups=1.05, wpb=433300, bsz=16701.4, num_updates=50400, lr=0.000281718, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=49356 +epoch 030: 1497 / 1689 loss=4.077, nll_loss=2.45, ppl=5.46, wps=455457, ups=1.05, wpb=433300, bsz=16701.4, num_updates=50400, lr=0.000281718, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=49356 +epoch 030: 1497 / 1689 loss=4.077, nll_loss=2.45, ppl=5.46, wps=455457, ups=1.05, wpb=433300, bsz=16701.4, num_updates=50400, lr=0.000281718, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=49356 +epoch 030: 1497 / 1689 loss=4.077, nll_loss=2.45, ppl=5.46, wps=455457, ups=1.05, wpb=433300, bsz=16701.4, num_updates=50400, lr=0.000281718, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=49356 +epoch 030: 1497 / 1689 loss=4.077, nll_loss=2.45, ppl=5.46, wps=455457, ups=1.05, wpb=433300, bsz=16701.4, num_updates=50400, lr=0.000281718, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=49356 +epoch 030: 1497 / 1689 loss=4.077, nll_loss=2.45, ppl=5.46, wps=455457, ups=1.05, wpb=433300, bsz=16701.4, num_updates=50400, lr=0.000281718, gnorm=0.227, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=49356 +epoch 030: 1597 / 1689 loss=4.08, nll_loss=2.454, ppl=5.48, wps=464307, ups=1.06, wpb=437550, bsz=16510.7, num_updates=50500, lr=0.000281439, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=92, gb_free=21, wall=49451 +epoch 030: 1597 / 1689 loss=4.08, nll_loss=2.454, ppl=5.48, wps=464307, ups=1.06, wpb=437550, bsz=16510.7, num_updates=50500, lr=0.000281439, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=92, gb_free=21, wall=49451 +epoch 030: 1597 / 1689 loss=4.08, nll_loss=2.454, ppl=5.48, wps=464307, ups=1.06, wpb=437550, bsz=16510.7, num_updates=50500, lr=0.000281439, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=92, gb_free=21, wall=49451 +epoch 030: 1597 / 1689 loss=4.08, nll_loss=2.454, ppl=5.48, wps=464307, ups=1.06, wpb=437550, bsz=16510.7, num_updates=50500, lr=0.000281439, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=92, gb_free=21, wall=49451 +epoch 030: 1597 / 1689 loss=4.08, nll_loss=2.454, ppl=5.48, wps=464307, ups=1.06, wpb=437550, bsz=16510.7, num_updates=50500, lr=0.000281439, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=92, gb_free=21, wall=49451 +epoch 030: 1597 / 1689 loss=4.08, nll_loss=2.454, ppl=5.48, wps=464307, ups=1.06, wpb=437550, bsz=16510.7, num_updates=50500, lr=0.000281439, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=92, gb_free=21, wall=49451 +epoch 030: 1597 / 1689 loss=4.08, nll_loss=2.454, ppl=5.48, wps=464307, ups=1.06, wpb=437550, bsz=16510.7, num_updates=50500, lr=0.000281439, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=92, gb_free=21, wall=49451 +epoch 030: 1597 / 1689 loss=4.08, nll_loss=2.454, ppl=5.48, wps=464307, ups=1.06, wpb=437550, bsz=16510.7, num_updates=50500, lr=0.000281439, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=92, gb_free=21, wall=49451 +epoch 030: 1597 / 1689 loss=4.08, nll_loss=2.454, ppl=5.48, wps=464307, ups=1.06, wpb=437550, bsz=16510.7, num_updates=50500, lr=0.000281439, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=92, gb_free=21, wall=49451 +epoch 030: 1597 / 1689 loss=4.08, nll_loss=2.454, ppl=5.48, wps=464307, ups=1.06, wpb=437550, bsz=16510.7, num_updates=50500, lr=0.000281439, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=92, gb_free=21, wall=49451 +epoch 030: 1597 / 1689 loss=4.08, nll_loss=2.454, ppl=5.48, wps=464307, ups=1.06, wpb=437550, bsz=16510.7, num_updates=50500, lr=0.000281439, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=92, gb_free=21, wall=49451 +epoch 030: 1597 / 1689 loss=4.08, nll_loss=2.454, ppl=5.48, wps=464307, ups=1.06, wpb=437550, bsz=16510.7, num_updates=50500, lr=0.000281439, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=92, gb_free=21, wall=49451 +epoch 030: 1597 / 1689 loss=4.08, nll_loss=2.454, ppl=5.48, wps=464307, ups=1.06, wpb=437550, bsz=16510.7, num_updates=50500, lr=0.000281439, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=92, gb_free=21, wall=49451 +epoch 030: 1597 / 1689 loss=4.08, nll_loss=2.454, ppl=5.48, wps=464307, ups=1.06, wpb=437550, bsz=16510.7, num_updates=50500, lr=0.000281439, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=92, gb_free=21, wall=49451 +epoch 030: 1597 / 1689 loss=4.08, nll_loss=2.454, ppl=5.48, wps=464307, ups=1.06, wpb=437550, bsz=16510.7, num_updates=50500, lr=0.000281439, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=92, gb_free=21, wall=49451 +epoch 030: 1597 / 1689 loss=4.08, nll_loss=2.454, ppl=5.48, wps=464307, ups=1.06, wpb=437550, bsz=16510.7, num_updates=50500, lr=0.000281439, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=92, gb_free=21, wall=49451 +epoch 030: 1597 / 1689 loss=4.08, nll_loss=2.454, ppl=5.48, wps=464307, ups=1.06, wpb=437550, bsz=16510.7, num_updates=50500, lr=0.000281439, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=92, gb_free=21, wall=49451 +epoch 030: 1597 / 1689 loss=4.08, nll_loss=2.454, ppl=5.48, wps=464307, ups=1.06, wpb=437550, bsz=16510.7, num_updates=50500, lr=0.000281439, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=92, gb_free=21, wall=49451 +epoch 030: 1597 / 1689 loss=4.08, nll_loss=2.454, ppl=5.48, wps=464307, ups=1.06, wpb=437550, bsz=16510.7, num_updates=50500, lr=0.000281439, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=92, gb_free=21, wall=49451 +epoch 030: 1597 / 1689 loss=4.08, nll_loss=2.454, ppl=5.48, wps=464307, ups=1.06, wpb=437550, bsz=16510.7, num_updates=50500, lr=0.000281439, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=92, gb_free=21, wall=49451 +epoch 030: 1597 / 1689 loss=4.08, nll_loss=2.454, ppl=5.48, wps=464307, ups=1.06, wpb=437550, bsz=16510.7, num_updates=50500, lr=0.000281439, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=92, gb_free=21, wall=49451 +epoch 030: 1597 / 1689 loss=4.08, nll_loss=2.454, ppl=5.48, wps=464307, ups=1.06, wpb=437550, bsz=16510.7, num_updates=50500, lr=0.000281439, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=92, gb_free=21, wall=49451 +epoch 030: 1597 / 1689 loss=4.08, nll_loss=2.454, ppl=5.48, wps=464307, ups=1.06, wpb=437550, bsz=16510.7, num_updates=50500, lr=0.000281439, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=92, gb_free=21, wall=49451 +epoch 030: 1597 / 1689 loss=4.08, nll_loss=2.454, ppl=5.48, wps=464307, ups=1.06, wpb=437550, bsz=16510.7, num_updates=50500, lr=0.000281439, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=92, gb_free=21, wall=49451 +epoch 030: 1597 / 1689 loss=4.08, nll_loss=2.454, ppl=5.48, wps=464307, ups=1.06, wpb=437550, bsz=16510.7, num_updates=50500, lr=0.000281439, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=92, gb_free=21, wall=49451 +epoch 030: 1597 / 1689 loss=4.08, nll_loss=2.454, ppl=5.48, wps=464307, ups=1.06, wpb=437550, bsz=16510.7, num_updates=50500, lr=0.000281439, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=92, gb_free=21, wall=49451 +epoch 030: 1597 / 1689 loss=4.08, nll_loss=2.454, ppl=5.48, wps=464307, ups=1.06, wpb=437550, bsz=16510.7, num_updates=50500, lr=0.000281439, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=92, gb_free=21, wall=49451 +epoch 030: 1597 / 1689 loss=4.08, nll_loss=2.454, ppl=5.48, wps=464307, ups=1.06, wpb=437550, bsz=16510.7, num_updates=50500, lr=0.000281439, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=92, gb_free=21, wall=49451 +epoch 030: 1597 / 1689 loss=4.08, nll_loss=2.454, ppl=5.48, wps=464307, ups=1.06, wpb=437550, bsz=16510.7, num_updates=50500, lr=0.000281439, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=92, gb_free=21, wall=49451 +epoch 030: 1597 / 1689 loss=4.08, nll_loss=2.454, ppl=5.48, wps=464307, ups=1.06, wpb=437550, bsz=16510.7, num_updates=50500, lr=0.000281439, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=92, gb_free=21, wall=49451 +end of epoch 30 (average epoch stats below) +epoch 030 | loss 4.069 | nll_loss 2.44 | ppl 5.43 | wps 453247 | ups 1.05 | wpb 433542 | bsz 16505.5 | num_updates 50592 | lr 0.000281183 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1560 | gb_free 20.7 | wall 49536 +epoch 030 | loss 4.069 | nll_loss 2.44 | ppl 5.43 | wps 453247 | ups 1.05 | wpb 433542 | bsz 16505.5 | num_updates 50592 | lr 0.000281183 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1560 | gb_free 20.7 | wall 49536 +epoch 030 | loss 4.069 | nll_loss 2.44 | ppl 5.43 | wps 453247 | ups 1.05 | wpb 433542 | bsz 16505.5 | num_updates 50592 | lr 0.000281183 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1560 | gb_free 20.7 | wall 49536 +epoch 030 | loss 4.069 | nll_loss 2.44 | ppl 5.43 | wps 453247 | ups 1.05 | wpb 433542 | bsz 16505.5 | num_updates 50592 | lr 0.000281183 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1560 | gb_free 20.7 | wall 49536 +epoch 030 | loss 4.069 | nll_loss 2.44 | ppl 5.43 | wps 453247 | ups 1.05 | wpb 433542 | bsz 16505.5 | num_updates 50592 | lr 0.000281183 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1560 | gb_free 20.7 | wall 49536 +epoch 030 | loss 4.069 | nll_loss 2.44 | ppl 5.43 | wps 453247 | ups 1.05 | wpb 433542 | bsz 16505.5 | num_updates 50592 | lr 0.000281183 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1560 | gb_free 20.7 | wall 49536 +epoch 030 | loss 4.069 | nll_loss 2.44 | ppl 5.43 | wps 453247 | ups 1.05 | wpb 433542 | bsz 16505.5 | num_updates 50592 | lr 0.000281183 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1560 | gb_free 20.7 | wall 49536 +epoch 030 | loss 4.069 | nll_loss 2.44 | ppl 5.43 | wps 453247 | ups 1.05 | wpb 433542 | bsz 16505.5 | num_updates 50592 | lr 0.000281183 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1560 | gb_free 20.7 | wall 49536 +epoch 030 | loss 4.069 | nll_loss 2.44 | ppl 5.43 | wps 453247 | ups 1.05 | wpb 433542 | bsz 16505.5 | num_updates 50592 | lr 0.000281183 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1560 | gb_free 20.7 | wall 49536 +epoch 030 | loss 4.069 | nll_loss 2.44 | ppl 5.43 | wps 453247 | ups 1.05 | wpb 433542 | bsz 16505.5 | num_updates 50592 | lr 0.000281183 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1560 | gb_free 20.7 | wall 49536 +epoch 030 | loss 4.069 | nll_loss 2.44 | ppl 5.43 | wps 453247 | ups 1.05 | wpb 433542 | bsz 16505.5 | num_updates 50592 | lr 0.000281183 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1560 | gb_free 20.7 | wall 49536 +epoch 030 | loss 4.069 | nll_loss 2.44 | ppl 5.43 | wps 453247 | ups 1.05 | wpb 433542 | bsz 16505.5 | num_updates 50592 | lr 0.000281183 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1560 | gb_free 20.7 | wall 49536 +epoch 030 | loss 4.069 | nll_loss 2.44 | ppl 5.43 | wps 453247 | ups 1.05 | wpb 433542 | bsz 16505.5 | num_updates 50592 | lr 0.000281183 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1560 | gb_free 20.7 | wall 49536 +epoch 030 | loss 4.069 | nll_loss 2.44 | ppl 5.43 | wps 453247 | ups 1.05 | wpb 433542 | bsz 16505.5 | num_updates 50592 | lr 0.000281183 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1560 | gb_free 20.7 | wall 49536 +epoch 030 | loss 4.069 | nll_loss 2.44 | ppl 5.43 | wps 453247 | ups 1.05 | wpb 433542 | bsz 16505.5 | num_updates 50592 | lr 0.000281183 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1560 | gb_free 20.7 | wall 49536 +epoch 030 | loss 4.069 | nll_loss 2.44 | ppl 5.43 | wps 453247 | ups 1.05 | wpb 433542 | bsz 16505.5 | num_updates 50592 | lr 0.000281183 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1560 | gb_free 20.7 | wall 49536 +epoch 030 | loss 4.069 | nll_loss 2.44 | ppl 5.43 | wps 453247 | ups 1.05 | wpb 433542 | bsz 16505.5 | num_updates 50592 | lr 0.000281183 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1560 | gb_free 20.7 | wall 49536 +epoch 030 | loss 4.069 | nll_loss 2.44 | ppl 5.43 | wps 453247 | ups 1.05 | wpb 433542 | bsz 16505.5 | num_updates 50592 | lr 0.000281183 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1560 | gb_free 20.7 | wall 49536 +epoch 030 | loss 4.069 | nll_loss 2.44 | ppl 5.43 | wps 453247 | ups 1.05 | wpb 433542 | bsz 16505.5 | num_updates 50592 | lr 0.000281183 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1560 | gb_free 20.7 | wall 49536 +epoch 030 | loss 4.069 | nll_loss 2.44 | ppl 5.43 | wps 453247 | ups 1.05 | wpb 433542 | bsz 16505.5 | num_updates 50592 | lr 0.000281183 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1560 | gb_free 20.7 | wall 49536 +epoch 030 | loss 4.069 | nll_loss 2.44 | ppl 5.43 | wps 453247 | ups 1.05 | wpb 433542 | bsz 16505.5 | num_updates 50592 | lr 0.000281183 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1560 | gb_free 20.7 | wall 49536 +epoch 030 | loss 4.069 | nll_loss 2.44 | ppl 5.43 | wps 453247 | ups 1.05 | wpb 433542 | bsz 16505.5 | num_updates 50592 | lr 0.000281183 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1560 | gb_free 20.7 | wall 49536 +epoch 030 | loss 4.069 | nll_loss 2.44 | ppl 5.43 | wps 453247 | ups 1.05 | wpb 433542 | bsz 16505.5 | num_updates 50592 | lr 0.000281183 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1560 | gb_free 20.7 | wall 49536 +epoch 030 | loss 4.069 | nll_loss 2.44 | ppl 5.43 | wps 453247 | ups 1.05 | wpb 433542 | bsz 16505.5 | num_updates 50592 | lr 0.000281183 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1560 | gb_free 20.7 | wall 49536 +epoch 030 | loss 4.069 | nll_loss 2.44 | ppl 5.43 | wps 453247 | ups 1.05 | wpb 433542 | bsz 16505.5 | num_updates 50592 | lr 0.000281183 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1560 | gb_free 20.7 | wall 49536 +epoch 030 | loss 4.069 | nll_loss 2.44 | ppl 5.43 | wps 453247 | ups 1.05 | wpb 433542 | bsz 16505.5 | num_updates 50592 | lr 0.000281183 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1560 | gb_free 20.7 | wall 49536 +epoch 030 | loss 4.069 | nll_loss 2.44 | ppl 5.43 | wps 453247 | ups 1.05 | wpb 433542 | bsz 16505.5 | num_updates 50592 | lr 0.000281183 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1560 | gb_free 20.7 | wall 49536 +epoch 030 | loss 4.069 | nll_loss 2.44 | ppl 5.43 | wps 453247 | ups 1.05 | wpb 433542 | bsz 16505.5 | num_updates 50592 | lr 0.000281183 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1560 | gb_free 20.7 | wall 49536 +epoch 030 | loss 4.069 | nll_loss 2.44 | ppl 5.43 | wps 453247 | ups 1.05 | wpb 433542 | bsz 16505.5 | num_updates 50592 | lr 0.000281183 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1560 | gb_free 20.7 | wall 49536 +epoch 030 | loss 4.069 | nll_loss 2.44 | ppl 5.43 | wps 453247 | ups 1.05 | wpb 433542 | bsz 16505.5 | num_updates 50592 | lr 0.000281183 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1560 | gb_free 20.7 | wall 49536 +Start iterating over samples +epoch 031: 8 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=458599, ups=1.07, wpb=430327, bsz=16437.4, num_updates=50600, lr=0.000281161, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=49544 +epoch 031: 8 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=458599, ups=1.07, wpb=430327, bsz=16437.4, num_updates=50600, lr=0.000281161, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=49544 +epoch 031: 8 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=458599, ups=1.07, wpb=430327, bsz=16437.4, num_updates=50600, lr=0.000281161, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=49544 +epoch 031: 8 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=458599, ups=1.07, wpb=430327, bsz=16437.4, num_updates=50600, lr=0.000281161, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=49544 +epoch 031: 8 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=458599, ups=1.07, wpb=430327, bsz=16437.4, num_updates=50600, lr=0.000281161, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=49544 +epoch 031: 8 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=458599, ups=1.07, wpb=430327, bsz=16437.4, num_updates=50600, lr=0.000281161, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=49544 +epoch 031: 8 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=458599, ups=1.07, wpb=430327, bsz=16437.4, num_updates=50600, lr=0.000281161, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=49544 +epoch 031: 8 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=458599, ups=1.07, wpb=430327, bsz=16437.4, num_updates=50600, lr=0.000281161, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=49544 +epoch 031: 8 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=458599, ups=1.07, wpb=430327, bsz=16437.4, num_updates=50600, lr=0.000281161, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=49544 +epoch 031: 8 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=458599, ups=1.07, wpb=430327, bsz=16437.4, num_updates=50600, lr=0.000281161, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=49544 +epoch 031: 8 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=458599, ups=1.07, wpb=430327, bsz=16437.4, num_updates=50600, lr=0.000281161, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=49544 +epoch 031: 8 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=458599, ups=1.07, wpb=430327, bsz=16437.4, num_updates=50600, lr=0.000281161, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=49544 +epoch 031: 8 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=458599, ups=1.07, wpb=430327, bsz=16437.4, num_updates=50600, lr=0.000281161, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=49544 +epoch 031: 8 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=458599, ups=1.07, wpb=430327, bsz=16437.4, num_updates=50600, lr=0.000281161, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=49544 +epoch 031: 8 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=458599, ups=1.07, wpb=430327, bsz=16437.4, num_updates=50600, lr=0.000281161, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=49544 +epoch 031: 8 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=458599, ups=1.07, wpb=430327, bsz=16437.4, num_updates=50600, lr=0.000281161, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=49544 +epoch 031: 8 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=458599, ups=1.07, wpb=430327, bsz=16437.4, num_updates=50600, lr=0.000281161, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=49544 +epoch 031: 8 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=458599, ups=1.07, wpb=430327, bsz=16437.4, num_updates=50600, lr=0.000281161, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=49544 +epoch 031: 8 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=458599, ups=1.07, wpb=430327, bsz=16437.4, num_updates=50600, lr=0.000281161, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=49544 +epoch 031: 8 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=458599, ups=1.07, wpb=430327, bsz=16437.4, num_updates=50600, lr=0.000281161, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=49544 +epoch 031: 8 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=458599, ups=1.07, wpb=430327, bsz=16437.4, num_updates=50600, lr=0.000281161, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=49544 +epoch 031: 8 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=458599, ups=1.07, wpb=430327, bsz=16437.4, num_updates=50600, lr=0.000281161, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=49544 +epoch 031: 8 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=458599, ups=1.07, wpb=430327, bsz=16437.4, num_updates=50600, lr=0.000281161, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=49544 +epoch 031: 8 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=458599, ups=1.07, wpb=430327, bsz=16437.4, num_updates=50600, lr=0.000281161, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=49544 +epoch 031: 8 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=458599, ups=1.07, wpb=430327, bsz=16437.4, num_updates=50600, lr=0.000281161, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=49544 +epoch 031: 8 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=458599, ups=1.07, wpb=430327, bsz=16437.4, num_updates=50600, lr=0.000281161, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=49544 +epoch 031: 8 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=458599, ups=1.07, wpb=430327, bsz=16437.4, num_updates=50600, lr=0.000281161, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=49544 +epoch 031: 8 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=458599, ups=1.07, wpb=430327, bsz=16437.4, num_updates=50600, lr=0.000281161, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=49544 +epoch 031: 8 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=458599, ups=1.07, wpb=430327, bsz=16437.4, num_updates=50600, lr=0.000281161, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=49544 +epoch 031: 8 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=458599, ups=1.07, wpb=430327, bsz=16437.4, num_updates=50600, lr=0.000281161, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=49544 +epoch 031: 8 / 1689 loss=4.075, nll_loss=2.447, ppl=5.45, wps=458599, ups=1.07, wpb=430327, bsz=16437.4, num_updates=50600, lr=0.000281161, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.9, wall=49544 +epoch 031: 108 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=459694, ups=1.07, wpb=431555, bsz=16452.6, num_updates=50700, lr=0.000280883, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=49638 +epoch 031: 108 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=459694, ups=1.07, wpb=431555, bsz=16452.6, num_updates=50700, lr=0.000280883, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=49638 +epoch 031: 108 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=459694, ups=1.07, wpb=431555, bsz=16452.6, num_updates=50700, lr=0.000280883, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=49638 +epoch 031: 108 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=459694, ups=1.07, wpb=431555, bsz=16452.6, num_updates=50700, lr=0.000280883, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=49638 +epoch 031: 108 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=459694, ups=1.07, wpb=431555, bsz=16452.6, num_updates=50700, lr=0.000280883, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=49638 +epoch 031: 108 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=459694, ups=1.07, wpb=431555, bsz=16452.6, num_updates=50700, lr=0.000280883, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=49638 +epoch 031: 108 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=459694, ups=1.07, wpb=431555, bsz=16452.6, num_updates=50700, lr=0.000280883, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=49638 +epoch 031: 108 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=459694, ups=1.07, wpb=431555, bsz=16452.6, num_updates=50700, lr=0.000280883, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=49638 +epoch 031: 108 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=459694, ups=1.07, wpb=431555, bsz=16452.6, num_updates=50700, lr=0.000280883, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=49638 +epoch 031: 108 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=459694, ups=1.07, wpb=431555, bsz=16452.6, num_updates=50700, lr=0.000280883, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=49638 +epoch 031: 108 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=459694, ups=1.07, wpb=431555, bsz=16452.6, num_updates=50700, lr=0.000280883, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=49638 +epoch 031: 108 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=459694, ups=1.07, wpb=431555, bsz=16452.6, num_updates=50700, lr=0.000280883, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=49638 +epoch 031: 108 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=459694, ups=1.07, wpb=431555, bsz=16452.6, num_updates=50700, lr=0.000280883, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=49638 +epoch 031: 108 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=459694, ups=1.07, wpb=431555, bsz=16452.6, num_updates=50700, lr=0.000280883, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=49638 +epoch 031: 108 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=459694, ups=1.07, wpb=431555, bsz=16452.6, num_updates=50700, lr=0.000280883, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=49638 +epoch 031: 108 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=459694, ups=1.07, wpb=431555, bsz=16452.6, num_updates=50700, lr=0.000280883, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=49638 +epoch 031: 108 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=459694, ups=1.07, wpb=431555, bsz=16452.6, num_updates=50700, lr=0.000280883, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=49638 +epoch 031: 108 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=459694, ups=1.07, wpb=431555, bsz=16452.6, num_updates=50700, lr=0.000280883, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=49638 +epoch 031: 108 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=459694, ups=1.07, wpb=431555, bsz=16452.6, num_updates=50700, lr=0.000280883, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=49638 +epoch 031: 108 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=459694, ups=1.07, wpb=431555, bsz=16452.6, num_updates=50700, lr=0.000280883, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=49638 +epoch 031: 108 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=459694, ups=1.07, wpb=431555, bsz=16452.6, num_updates=50700, lr=0.000280883, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=49638 +epoch 031: 108 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=459694, ups=1.07, wpb=431555, bsz=16452.6, num_updates=50700, lr=0.000280883, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=49638 +epoch 031: 108 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=459694, ups=1.07, wpb=431555, bsz=16452.6, num_updates=50700, lr=0.000280883, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=49638 +epoch 031: 108 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=459694, ups=1.07, wpb=431555, bsz=16452.6, num_updates=50700, lr=0.000280883, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=49638 +epoch 031: 108 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=459694, ups=1.07, wpb=431555, bsz=16452.6, num_updates=50700, lr=0.000280883, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=49638 +epoch 031: 108 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=459694, ups=1.07, wpb=431555, bsz=16452.6, num_updates=50700, lr=0.000280883, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=49638 +epoch 031: 108 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=459694, ups=1.07, wpb=431555, bsz=16452.6, num_updates=50700, lr=0.000280883, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=49638 +epoch 031: 108 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=459694, ups=1.07, wpb=431555, bsz=16452.6, num_updates=50700, lr=0.000280883, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=49638 +epoch 031: 108 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=459694, ups=1.07, wpb=431555, bsz=16452.6, num_updates=50700, lr=0.000280883, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=49638 +epoch 031: 108 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=459694, ups=1.07, wpb=431555, bsz=16452.6, num_updates=50700, lr=0.000280883, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=49638 +epoch 031: 108 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=459694, ups=1.07, wpb=431555, bsz=16452.6, num_updates=50700, lr=0.000280883, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.1, wall=49638 +epoch 031: 208 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=464290, ups=1.07, wpb=434067, bsz=16653.1, num_updates=50800, lr=0.000280607, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=49732 +epoch 031: 208 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=464290, ups=1.07, wpb=434067, bsz=16653.1, num_updates=50800, lr=0.000280607, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=49732 +epoch 031: 208 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=464290, ups=1.07, wpb=434067, bsz=16653.1, num_updates=50800, lr=0.000280607, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=49732 +epoch 031: 208 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=464290, ups=1.07, wpb=434067, bsz=16653.1, num_updates=50800, lr=0.000280607, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=49732 +epoch 031: 208 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=464290, ups=1.07, wpb=434067, bsz=16653.1, num_updates=50800, lr=0.000280607, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=49732 +epoch 031: 208 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=464290, ups=1.07, wpb=434067, bsz=16653.1, num_updates=50800, lr=0.000280607, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=49732 +epoch 031: 208 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=464290, ups=1.07, wpb=434067, bsz=16653.1, num_updates=50800, lr=0.000280607, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=49732 +epoch 031: 208 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=464290, ups=1.07, wpb=434067, bsz=16653.1, num_updates=50800, lr=0.000280607, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=49732 +epoch 031: 208 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=464290, ups=1.07, wpb=434067, bsz=16653.1, num_updates=50800, lr=0.000280607, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=49732 +epoch 031: 208 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=464290, ups=1.07, wpb=434067, bsz=16653.1, num_updates=50800, lr=0.000280607, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=49732 +epoch 031: 208 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=464290, ups=1.07, wpb=434067, bsz=16653.1, num_updates=50800, lr=0.000280607, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=49732 +epoch 031: 208 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=464290, ups=1.07, wpb=434067, bsz=16653.1, num_updates=50800, lr=0.000280607, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=49732 +epoch 031: 208 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=464290, ups=1.07, wpb=434067, bsz=16653.1, num_updates=50800, lr=0.000280607, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=49732 +epoch 031: 208 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=464290, ups=1.07, wpb=434067, bsz=16653.1, num_updates=50800, lr=0.000280607, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=49732 +epoch 031: 208 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=464290, ups=1.07, wpb=434067, bsz=16653.1, num_updates=50800, lr=0.000280607, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=49732 +epoch 031: 208 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=464290, ups=1.07, wpb=434067, bsz=16653.1, num_updates=50800, lr=0.000280607, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=49732 +epoch 031: 208 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=464290, ups=1.07, wpb=434067, bsz=16653.1, num_updates=50800, lr=0.000280607, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=49732 +epoch 031: 208 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=464290, ups=1.07, wpb=434067, bsz=16653.1, num_updates=50800, lr=0.000280607, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=49732 +epoch 031: 208 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=464290, ups=1.07, wpb=434067, bsz=16653.1, num_updates=50800, lr=0.000280607, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=49732 +epoch 031: 208 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=464290, ups=1.07, wpb=434067, bsz=16653.1, num_updates=50800, lr=0.000280607, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=49732 +epoch 031: 208 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=464290, ups=1.07, wpb=434067, bsz=16653.1, num_updates=50800, lr=0.000280607, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=49732 +epoch 031: 208 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=464290, ups=1.07, wpb=434067, bsz=16653.1, num_updates=50800, lr=0.000280607, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=49732 +epoch 031: 208 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=464290, ups=1.07, wpb=434067, bsz=16653.1, num_updates=50800, lr=0.000280607, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=49732 +epoch 031: 208 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=464290, ups=1.07, wpb=434067, bsz=16653.1, num_updates=50800, lr=0.000280607, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=49732 +epoch 031: 208 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=464290, ups=1.07, wpb=434067, bsz=16653.1, num_updates=50800, lr=0.000280607, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=49732 +epoch 031: 208 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=464290, ups=1.07, wpb=434067, bsz=16653.1, num_updates=50800, lr=0.000280607, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=49732 +epoch 031: 208 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=464290, ups=1.07, wpb=434067, bsz=16653.1, num_updates=50800, lr=0.000280607, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=49732 +epoch 031: 208 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=464290, ups=1.07, wpb=434067, bsz=16653.1, num_updates=50800, lr=0.000280607, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=49732 +epoch 031: 208 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=464290, ups=1.07, wpb=434067, bsz=16653.1, num_updates=50800, lr=0.000280607, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=49732 +epoch 031: 208 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=464290, ups=1.07, wpb=434067, bsz=16653.1, num_updates=50800, lr=0.000280607, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=49732 +epoch 031: 208 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=464290, ups=1.07, wpb=434067, bsz=16653.1, num_updates=50800, lr=0.000280607, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=49732 +epoch 031: 308 / 1689 loss=4.055, nll_loss=2.424, ppl=5.37, wps=463706, ups=1.07, wpb=434638, bsz=16319.8, num_updates=50900, lr=0.000280331, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=49826 +epoch 031: 308 / 1689 loss=4.055, nll_loss=2.424, ppl=5.37, wps=463706, ups=1.07, wpb=434638, bsz=16319.8, num_updates=50900, lr=0.000280331, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=49826 +epoch 031: 308 / 1689 loss=4.055, nll_loss=2.424, ppl=5.37, wps=463706, ups=1.07, wpb=434638, bsz=16319.8, num_updates=50900, lr=0.000280331, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=49826 +epoch 031: 308 / 1689 loss=4.055, nll_loss=2.424, ppl=5.37, wps=463706, ups=1.07, wpb=434638, bsz=16319.8, num_updates=50900, lr=0.000280331, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=49826 +epoch 031: 308 / 1689 loss=4.055, nll_loss=2.424, ppl=5.37, wps=463706, ups=1.07, wpb=434638, bsz=16319.8, num_updates=50900, lr=0.000280331, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=49826 +epoch 031: 308 / 1689 loss=4.055, nll_loss=2.424, ppl=5.37, wps=463706, ups=1.07, wpb=434638, bsz=16319.8, num_updates=50900, lr=0.000280331, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=49826 +epoch 031: 308 / 1689 loss=4.055, nll_loss=2.424, ppl=5.37, wps=463706, ups=1.07, wpb=434638, bsz=16319.8, num_updates=50900, lr=0.000280331, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=49826 +epoch 031: 308 / 1689 loss=4.055, nll_loss=2.424, ppl=5.37, wps=463706, ups=1.07, wpb=434638, bsz=16319.8, num_updates=50900, lr=0.000280331, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=49826 +epoch 031: 308 / 1689 loss=4.055, nll_loss=2.424, ppl=5.37, wps=463706, ups=1.07, wpb=434638, bsz=16319.8, num_updates=50900, lr=0.000280331, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=49826 +epoch 031: 308 / 1689 loss=4.055, nll_loss=2.424, ppl=5.37, wps=463706, ups=1.07, wpb=434638, bsz=16319.8, num_updates=50900, lr=0.000280331, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=49826 +epoch 031: 308 / 1689 loss=4.055, nll_loss=2.424, ppl=5.37, wps=463706, ups=1.07, wpb=434638, bsz=16319.8, num_updates=50900, lr=0.000280331, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=49826 +epoch 031: 308 / 1689 loss=4.055, nll_loss=2.424, ppl=5.37, wps=463706, ups=1.07, wpb=434638, bsz=16319.8, num_updates=50900, lr=0.000280331, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=49826 +epoch 031: 308 / 1689 loss=4.055, nll_loss=2.424, ppl=5.37, wps=463706, ups=1.07, wpb=434638, bsz=16319.8, num_updates=50900, lr=0.000280331, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=49826 +epoch 031: 308 / 1689 loss=4.055, nll_loss=2.424, ppl=5.37, wps=463706, ups=1.07, wpb=434638, bsz=16319.8, num_updates=50900, lr=0.000280331, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=49826 +epoch 031: 308 / 1689 loss=4.055, nll_loss=2.424, ppl=5.37, wps=463706, ups=1.07, wpb=434638, bsz=16319.8, num_updates=50900, lr=0.000280331, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=49826 +epoch 031: 308 / 1689 loss=4.055, nll_loss=2.424, ppl=5.37, wps=463706, ups=1.07, wpb=434638, bsz=16319.8, num_updates=50900, lr=0.000280331, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=49826 +epoch 031: 308 / 1689 loss=4.055, nll_loss=2.424, ppl=5.37, wps=463706, ups=1.07, wpb=434638, bsz=16319.8, num_updates=50900, lr=0.000280331, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=49826 +epoch 031: 308 / 1689 loss=4.055, nll_loss=2.424, ppl=5.37, wps=463706, ups=1.07, wpb=434638, bsz=16319.8, num_updates=50900, lr=0.000280331, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=49826 +epoch 031: 308 / 1689 loss=4.055, nll_loss=2.424, ppl=5.37, wps=463706, ups=1.07, wpb=434638, bsz=16319.8, num_updates=50900, lr=0.000280331, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=49826 +epoch 031: 308 / 1689 loss=4.055, nll_loss=2.424, ppl=5.37, wps=463706, ups=1.07, wpb=434638, bsz=16319.8, num_updates=50900, lr=0.000280331, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=49826 +epoch 031: 308 / 1689 loss=4.055, nll_loss=2.424, ppl=5.37, wps=463706, ups=1.07, wpb=434638, bsz=16319.8, num_updates=50900, lr=0.000280331, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=49826 +epoch 031: 308 / 1689 loss=4.055, nll_loss=2.424, ppl=5.37, wps=463706, ups=1.07, wpb=434638, bsz=16319.8, num_updates=50900, lr=0.000280331, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=49826 +epoch 031: 308 / 1689 loss=4.055, nll_loss=2.424, ppl=5.37, wps=463706, ups=1.07, wpb=434638, bsz=16319.8, num_updates=50900, lr=0.000280331, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=49826 +epoch 031: 308 / 1689 loss=4.055, nll_loss=2.424, ppl=5.37, wps=463706, ups=1.07, wpb=434638, bsz=16319.8, num_updates=50900, lr=0.000280331, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=49826 +epoch 031: 308 / 1689 loss=4.055, nll_loss=2.424, ppl=5.37, wps=463706, ups=1.07, wpb=434638, bsz=16319.8, num_updates=50900, lr=0.000280331, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=49826 +epoch 031: 308 / 1689 loss=4.055, nll_loss=2.424, ppl=5.37, wps=463706, ups=1.07, wpb=434638, bsz=16319.8, num_updates=50900, lr=0.000280331, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=49826 +epoch 031: 308 / 1689 loss=4.055, nll_loss=2.424, ppl=5.37, wps=463706, ups=1.07, wpb=434638, bsz=16319.8, num_updates=50900, lr=0.000280331, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=49826 +epoch 031: 308 / 1689 loss=4.055, nll_loss=2.424, ppl=5.37, wps=463706, ups=1.07, wpb=434638, bsz=16319.8, num_updates=50900, lr=0.000280331, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=49826 +epoch 031: 308 / 1689 loss=4.055, nll_loss=2.424, ppl=5.37, wps=463706, ups=1.07, wpb=434638, bsz=16319.8, num_updates=50900, lr=0.000280331, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=49826 +epoch 031: 308 / 1689 loss=4.055, nll_loss=2.424, ppl=5.37, wps=463706, ups=1.07, wpb=434638, bsz=16319.8, num_updates=50900, lr=0.000280331, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=49826 +epoch 031: 308 / 1689 loss=4.055, nll_loss=2.424, ppl=5.37, wps=463706, ups=1.07, wpb=434638, bsz=16319.8, num_updates=50900, lr=0.000280331, gnorm=0.219, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=49826 +epoch 031: 408 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=460068, ups=1.06, wpb=432577, bsz=16378.5, num_updates=51000, lr=0.000280056, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=49920 +epoch 031: 408 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=460068, ups=1.06, wpb=432577, bsz=16378.5, num_updates=51000, lr=0.000280056, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=49920 +epoch 031: 408 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=460068, ups=1.06, wpb=432577, bsz=16378.5, num_updates=51000, lr=0.000280056, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=49920 +epoch 031: 408 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=460068, ups=1.06, wpb=432577, bsz=16378.5, num_updates=51000, lr=0.000280056, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=49920 +epoch 031: 408 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=460068, ups=1.06, wpb=432577, bsz=16378.5, num_updates=51000, lr=0.000280056, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=49920 +epoch 031: 408 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=460068, ups=1.06, wpb=432577, bsz=16378.5, num_updates=51000, lr=0.000280056, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=49920 +epoch 031: 408 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=460068, ups=1.06, wpb=432577, bsz=16378.5, num_updates=51000, lr=0.000280056, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=49920 +epoch 031: 408 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=460068, ups=1.06, wpb=432577, bsz=16378.5, num_updates=51000, lr=0.000280056, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=49920 +epoch 031: 408 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=460068, ups=1.06, wpb=432577, bsz=16378.5, num_updates=51000, lr=0.000280056, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=49920 +epoch 031: 408 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=460068, ups=1.06, wpb=432577, bsz=16378.5, num_updates=51000, lr=0.000280056, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=49920 +epoch 031: 408 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=460068, ups=1.06, wpb=432577, bsz=16378.5, num_updates=51000, lr=0.000280056, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=49920 +epoch 031: 408 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=460068, ups=1.06, wpb=432577, bsz=16378.5, num_updates=51000, lr=0.000280056, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=49920 +epoch 031: 408 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=460068, ups=1.06, wpb=432577, bsz=16378.5, num_updates=51000, lr=0.000280056, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=49920 +epoch 031: 408 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=460068, ups=1.06, wpb=432577, bsz=16378.5, num_updates=51000, lr=0.000280056, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=49920 +epoch 031: 408 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=460068, ups=1.06, wpb=432577, bsz=16378.5, num_updates=51000, lr=0.000280056, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=49920 +epoch 031: 408 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=460068, ups=1.06, wpb=432577, bsz=16378.5, num_updates=51000, lr=0.000280056, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=49920 +epoch 031: 408 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=460068, ups=1.06, wpb=432577, bsz=16378.5, num_updates=51000, lr=0.000280056, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=49920 +epoch 031: 408 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=460068, ups=1.06, wpb=432577, bsz=16378.5, num_updates=51000, lr=0.000280056, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=49920 +epoch 031: 408 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=460068, ups=1.06, wpb=432577, bsz=16378.5, num_updates=51000, lr=0.000280056, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=49920 +epoch 031: 408 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=460068, ups=1.06, wpb=432577, bsz=16378.5, num_updates=51000, lr=0.000280056, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=49920 +epoch 031: 408 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=460068, ups=1.06, wpb=432577, bsz=16378.5, num_updates=51000, lr=0.000280056, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=49920 +epoch 031: 408 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=460068, ups=1.06, wpb=432577, bsz=16378.5, num_updates=51000, lr=0.000280056, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=49920 +epoch 031: 408 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=460068, ups=1.06, wpb=432577, bsz=16378.5, num_updates=51000, lr=0.000280056, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=49920 +epoch 031: 408 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=460068, ups=1.06, wpb=432577, bsz=16378.5, num_updates=51000, lr=0.000280056, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=49920 +epoch 031: 408 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=460068, ups=1.06, wpb=432577, bsz=16378.5, num_updates=51000, lr=0.000280056, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=49920 +epoch 031: 408 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=460068, ups=1.06, wpb=432577, bsz=16378.5, num_updates=51000, lr=0.000280056, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=49920 +epoch 031: 408 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=460068, ups=1.06, wpb=432577, bsz=16378.5, num_updates=51000, lr=0.000280056, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=49920 +epoch 031: 408 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=460068, ups=1.06, wpb=432577, bsz=16378.5, num_updates=51000, lr=0.000280056, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=49920 +epoch 031: 408 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=460068, ups=1.06, wpb=432577, bsz=16378.5, num_updates=51000, lr=0.000280056, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=49920 +epoch 031: 408 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=460068, ups=1.06, wpb=432577, bsz=16378.5, num_updates=51000, lr=0.000280056, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=49920 +epoch 031: 408 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=460068, ups=1.06, wpb=432577, bsz=16378.5, num_updates=51000, lr=0.000280056, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.7, wall=49920 +begin validation on "valid" subset +epoch 031 | valid on 'valid' subset | loss 4.233 | nll_loss 2.596 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 51000 | best_loss 4.224 +epoch 031 | valid on 'valid' subset | loss 4.233 | nll_loss 2.596 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 51000 | best_loss 4.224 +epoch 031 | valid on 'valid' subset | loss 4.233 | nll_loss 2.596 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 51000 | best_loss 4.224 +epoch 031 | valid on 'valid' subset | loss 4.233 | nll_loss 2.596 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 51000 | best_loss 4.224 +epoch 031 | valid on 'valid' subset | loss 4.233 | nll_loss 2.596 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 51000 | best_loss 4.224 +epoch 031 | valid on 'valid' subset | loss 4.233 | nll_loss 2.596 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 51000 | best_loss 4.224 +epoch 031 | valid on 'valid' subset | loss 4.233 | nll_loss 2.596 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 51000 | best_loss 4.224 +epoch 031 | valid on 'valid' subset | loss 4.233 | nll_loss 2.596 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 51000 | best_loss 4.224 +epoch 031 | valid on 'valid' subset | loss 4.233 | nll_loss 2.596 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 51000 | best_loss 4.224 +epoch 031 | valid on 'valid' subset | loss 4.233 | nll_loss 2.596 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 51000 | best_loss 4.224 +epoch 031 | valid on 'valid' subset | loss 4.233 | nll_loss 2.596 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 51000 | best_loss 4.224 +epoch 031 | valid on 'valid' subset | loss 4.233 | nll_loss 2.596 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 51000 | best_loss 4.224 +epoch 031 | valid on 'valid' subset | loss 4.233 | nll_loss 2.596 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 51000 | best_loss 4.224 +epoch 031 | valid on 'valid' subset | loss 4.233 | nll_loss 2.596 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 51000 | best_loss 4.224 +epoch 031 | valid on 'valid' subset | loss 4.233 | nll_loss 2.596 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 51000 | best_loss 4.224 +epoch 031 | valid on 'valid' subset | loss 4.233 | nll_loss 2.596 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 51000 | best_loss 4.224 +epoch 031 | valid on 'valid' subset | loss 4.233 | nll_loss 2.596 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 51000 | best_loss 4.224 +epoch 031 | valid on 'valid' subset | loss 4.233 | nll_loss 2.596 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 51000 | best_loss 4.224 +epoch 031 | valid on 'valid' subset | loss 4.233 | nll_loss 2.596 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 51000 | best_loss 4.224 +epoch 031 | valid on 'valid' subset | loss 4.233 | nll_loss 2.596 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 51000 | best_loss 4.224 +epoch 031 | valid on 'valid' subset | loss 4.233 | nll_loss 2.596 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 51000 | best_loss 4.224 +epoch 031 | valid on 'valid' subset | loss 4.233 | nll_loss 2.596 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 51000 | best_loss 4.224 +epoch 031 | valid on 'valid' subset | loss 4.233 | nll_loss 2.596 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 51000 | best_loss 4.224 +epoch 031 | valid on 'valid' subset | loss 4.233 | nll_loss 2.596 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 51000 | best_loss 4.224 +epoch 031 | valid on 'valid' subset | loss 4.233 | nll_loss 2.596 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 51000 | best_loss 4.224 +epoch 031 | valid on 'valid' subset | loss 4.233 | nll_loss 2.596 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 51000 | best_loss 4.224 +epoch 031 | valid on 'valid' subset | loss 4.233 | nll_loss 2.596 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 51000 | best_loss 4.224 +epoch 031 | valid on 'valid' subset | loss 4.233 | nll_loss 2.596 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 51000 | best_loss 4.224 +epoch 031 | valid on 'valid' subset | loss 4.233 | nll_loss 2.596 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 51000 | best_loss 4.224 +epoch 031 | valid on 'valid' subset | loss 4.233 | nll_loss 2.596 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 51000 | best_loss 4.224 +epoch 031 | valid on 'valid' subset | loss 4.233 | nll_loss 2.596 | ppl 6.05 | wps 0 | wpb 42662 | bsz 2032 | num_updates 51000 | best_loss 4.224 +epoch 031: 508 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=409578, ups=0.94, wpb=435001, bsz=16788.8, num_updates=51100, lr=0.000279782, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=50026 +epoch 031: 508 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=409578, ups=0.94, wpb=435001, bsz=16788.8, num_updates=51100, lr=0.000279782, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=50026 +epoch 031: 508 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=409578, ups=0.94, wpb=435001, bsz=16788.8, num_updates=51100, lr=0.000279782, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=50026 +epoch 031: 508 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=409578, ups=0.94, wpb=435001, bsz=16788.8, num_updates=51100, lr=0.000279782, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=50026 +epoch 031: 508 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=409578, ups=0.94, wpb=435001, bsz=16788.8, num_updates=51100, lr=0.000279782, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=50026 +epoch 031: 508 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=409578, ups=0.94, wpb=435001, bsz=16788.8, num_updates=51100, lr=0.000279782, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=50026 +epoch 031: 508 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=409578, ups=0.94, wpb=435001, bsz=16788.8, num_updates=51100, lr=0.000279782, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=50026 +epoch 031: 508 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=409578, ups=0.94, wpb=435001, bsz=16788.8, num_updates=51100, lr=0.000279782, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=50026 +epoch 031: 508 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=409578, ups=0.94, wpb=435001, bsz=16788.8, num_updates=51100, lr=0.000279782, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=50026 +epoch 031: 508 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=409578, ups=0.94, wpb=435001, bsz=16788.8, num_updates=51100, lr=0.000279782, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=50026 +epoch 031: 508 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=409578, ups=0.94, wpb=435001, bsz=16788.8, num_updates=51100, lr=0.000279782, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=50026 +epoch 031: 508 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=409578, ups=0.94, wpb=435001, bsz=16788.8, num_updates=51100, lr=0.000279782, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=50026 +epoch 031: 508 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=409578, ups=0.94, wpb=435001, bsz=16788.8, num_updates=51100, lr=0.000279782, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=50026 +epoch 031: 508 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=409578, ups=0.94, wpb=435001, bsz=16788.8, num_updates=51100, lr=0.000279782, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=50026 +epoch 031: 508 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=409578, ups=0.94, wpb=435001, bsz=16788.8, num_updates=51100, lr=0.000279782, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=50026 +epoch 031: 508 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=409578, ups=0.94, wpb=435001, bsz=16788.8, num_updates=51100, lr=0.000279782, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=50026 +epoch 031: 508 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=409578, ups=0.94, wpb=435001, bsz=16788.8, num_updates=51100, lr=0.000279782, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=50026 +epoch 031: 508 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=409578, ups=0.94, wpb=435001, bsz=16788.8, num_updates=51100, lr=0.000279782, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=50026 +epoch 031: 508 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=409578, ups=0.94, wpb=435001, bsz=16788.8, num_updates=51100, lr=0.000279782, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=50026 +epoch 031: 508 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=409578, ups=0.94, wpb=435001, bsz=16788.8, num_updates=51100, lr=0.000279782, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=50026 +epoch 031: 508 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=409578, ups=0.94, wpb=435001, bsz=16788.8, num_updates=51100, lr=0.000279782, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=50026 +epoch 031: 508 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=409578, ups=0.94, wpb=435001, bsz=16788.8, num_updates=51100, lr=0.000279782, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=50026 +epoch 031: 508 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=409578, ups=0.94, wpb=435001, bsz=16788.8, num_updates=51100, lr=0.000279782, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=50026 +epoch 031: 508 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=409578, ups=0.94, wpb=435001, bsz=16788.8, num_updates=51100, lr=0.000279782, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=50026 +epoch 031: 508 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=409578, ups=0.94, wpb=435001, bsz=16788.8, num_updates=51100, lr=0.000279782, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=50026 +epoch 031: 508 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=409578, ups=0.94, wpb=435001, bsz=16788.8, num_updates=51100, lr=0.000279782, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=50026 +epoch 031: 508 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=409578, ups=0.94, wpb=435001, bsz=16788.8, num_updates=51100, lr=0.000279782, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=50026 +epoch 031: 508 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=409578, ups=0.94, wpb=435001, bsz=16788.8, num_updates=51100, lr=0.000279782, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=50026 +epoch 031: 508 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=409578, ups=0.94, wpb=435001, bsz=16788.8, num_updates=51100, lr=0.000279782, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=50026 +epoch 031: 508 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=409578, ups=0.94, wpb=435001, bsz=16788.8, num_updates=51100, lr=0.000279782, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=50026 +epoch 031: 508 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=409578, ups=0.94, wpb=435001, bsz=16788.8, num_updates=51100, lr=0.000279782, gnorm=0.223, clip=0, loss_scale=1, train_wall=93, gb_free=18.4, wall=50026 +epoch 031: 608 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461877, ups=1.06, wpb=433910, bsz=16527.1, num_updates=51200, lr=0.000279508, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=50120 +epoch 031: 608 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461877, ups=1.06, wpb=433910, bsz=16527.1, num_updates=51200, lr=0.000279508, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=50120 +epoch 031: 608 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461877, ups=1.06, wpb=433910, bsz=16527.1, num_updates=51200, lr=0.000279508, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=50120 +epoch 031: 608 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461877, ups=1.06, wpb=433910, bsz=16527.1, num_updates=51200, lr=0.000279508, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=50120 +epoch 031: 608 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461877, ups=1.06, wpb=433910, bsz=16527.1, num_updates=51200, lr=0.000279508, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=50120 +epoch 031: 608 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461877, ups=1.06, wpb=433910, bsz=16527.1, num_updates=51200, lr=0.000279508, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=50120 +epoch 031: 608 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461877, ups=1.06, wpb=433910, bsz=16527.1, num_updates=51200, lr=0.000279508, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=50120 +epoch 031: 608 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461877, ups=1.06, wpb=433910, bsz=16527.1, num_updates=51200, lr=0.000279508, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=50120 +epoch 031: 608 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461877, ups=1.06, wpb=433910, bsz=16527.1, num_updates=51200, lr=0.000279508, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=50120 +epoch 031: 608 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461877, ups=1.06, wpb=433910, bsz=16527.1, num_updates=51200, lr=0.000279508, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=50120 +epoch 031: 608 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461877, ups=1.06, wpb=433910, bsz=16527.1, num_updates=51200, lr=0.000279508, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=50120 +epoch 031: 608 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461877, ups=1.06, wpb=433910, bsz=16527.1, num_updates=51200, lr=0.000279508, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=50120 +epoch 031: 608 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461877, ups=1.06, wpb=433910, bsz=16527.1, num_updates=51200, lr=0.000279508, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=50120 +epoch 031: 608 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461877, ups=1.06, wpb=433910, bsz=16527.1, num_updates=51200, lr=0.000279508, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=50120 +epoch 031: 608 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461877, ups=1.06, wpb=433910, bsz=16527.1, num_updates=51200, lr=0.000279508, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=50120 +epoch 031: 608 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461877, ups=1.06, wpb=433910, bsz=16527.1, num_updates=51200, lr=0.000279508, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=50120 +epoch 031: 608 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461877, ups=1.06, wpb=433910, bsz=16527.1, num_updates=51200, lr=0.000279508, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=50120 +epoch 031: 608 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461877, ups=1.06, wpb=433910, bsz=16527.1, num_updates=51200, lr=0.000279508, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=50120 +epoch 031: 608 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461877, ups=1.06, wpb=433910, bsz=16527.1, num_updates=51200, lr=0.000279508, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=50120 +epoch 031: 608 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461877, ups=1.06, wpb=433910, bsz=16527.1, num_updates=51200, lr=0.000279508, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=50120 +epoch 031: 608 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461877, ups=1.06, wpb=433910, bsz=16527.1, num_updates=51200, lr=0.000279508, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=50120 +epoch 031: 608 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461877, ups=1.06, wpb=433910, bsz=16527.1, num_updates=51200, lr=0.000279508, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=50120 +epoch 031: 608 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461877, ups=1.06, wpb=433910, bsz=16527.1, num_updates=51200, lr=0.000279508, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=50120 +epoch 031: 608 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461877, ups=1.06, wpb=433910, bsz=16527.1, num_updates=51200, lr=0.000279508, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=50120 +epoch 031: 608 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461877, ups=1.06, wpb=433910, bsz=16527.1, num_updates=51200, lr=0.000279508, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=50120 +epoch 031: 608 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461877, ups=1.06, wpb=433910, bsz=16527.1, num_updates=51200, lr=0.000279508, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=50120 +epoch 031: 608 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461877, ups=1.06, wpb=433910, bsz=16527.1, num_updates=51200, lr=0.000279508, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=50120 +epoch 031: 608 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461877, ups=1.06, wpb=433910, bsz=16527.1, num_updates=51200, lr=0.000279508, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=50120 +epoch 031: 608 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461877, ups=1.06, wpb=433910, bsz=16527.1, num_updates=51200, lr=0.000279508, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=50120 +epoch 031: 608 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461877, ups=1.06, wpb=433910, bsz=16527.1, num_updates=51200, lr=0.000279508, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=50120 +epoch 031: 608 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461877, ups=1.06, wpb=433910, bsz=16527.1, num_updates=51200, lr=0.000279508, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=50120 +epoch 031: 708 / 1689 loss=4.071, nll_loss=2.442, ppl=5.43, wps=462319, ups=1.06, wpb=435202, bsz=16577.9, num_updates=51300, lr=0.000279236, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=50214 +epoch 031: 708 / 1689 loss=4.071, nll_loss=2.442, ppl=5.43, wps=462319, ups=1.06, wpb=435202, bsz=16577.9, num_updates=51300, lr=0.000279236, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=50214 +epoch 031: 708 / 1689 loss=4.071, nll_loss=2.442, ppl=5.43, wps=462319, ups=1.06, wpb=435202, bsz=16577.9, num_updates=51300, lr=0.000279236, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=50214 +epoch 031: 708 / 1689 loss=4.071, nll_loss=2.442, ppl=5.43, wps=462319, ups=1.06, wpb=435202, bsz=16577.9, num_updates=51300, lr=0.000279236, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=50214 +epoch 031: 708 / 1689 loss=4.071, nll_loss=2.442, ppl=5.43, wps=462319, ups=1.06, wpb=435202, bsz=16577.9, num_updates=51300, lr=0.000279236, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=50214 +epoch 031: 708 / 1689 loss=4.071, nll_loss=2.442, ppl=5.43, wps=462319, ups=1.06, wpb=435202, bsz=16577.9, num_updates=51300, lr=0.000279236, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=50214 +epoch 031: 708 / 1689 loss=4.071, nll_loss=2.442, ppl=5.43, wps=462319, ups=1.06, wpb=435202, bsz=16577.9, num_updates=51300, lr=0.000279236, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=50214 +epoch 031: 708 / 1689 loss=4.071, nll_loss=2.442, ppl=5.43, wps=462319, ups=1.06, wpb=435202, bsz=16577.9, num_updates=51300, lr=0.000279236, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=50214 +epoch 031: 708 / 1689 loss=4.071, nll_loss=2.442, ppl=5.43, wps=462319, ups=1.06, wpb=435202, bsz=16577.9, num_updates=51300, lr=0.000279236, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=50214 +epoch 031: 708 / 1689 loss=4.071, nll_loss=2.442, ppl=5.43, wps=462319, ups=1.06, wpb=435202, bsz=16577.9, num_updates=51300, lr=0.000279236, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=50214 +epoch 031: 708 / 1689 loss=4.071, nll_loss=2.442, ppl=5.43, wps=462319, ups=1.06, wpb=435202, bsz=16577.9, num_updates=51300, lr=0.000279236, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=50214 +epoch 031: 708 / 1689 loss=4.071, nll_loss=2.442, ppl=5.43, wps=462319, ups=1.06, wpb=435202, bsz=16577.9, num_updates=51300, lr=0.000279236, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=50214 +epoch 031: 708 / 1689 loss=4.071, nll_loss=2.442, ppl=5.43, wps=462319, ups=1.06, wpb=435202, bsz=16577.9, num_updates=51300, lr=0.000279236, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=50214 +epoch 031: 708 / 1689 loss=4.071, nll_loss=2.442, ppl=5.43, wps=462319, ups=1.06, wpb=435202, bsz=16577.9, num_updates=51300, lr=0.000279236, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=50214 +epoch 031: 708 / 1689 loss=4.071, nll_loss=2.442, ppl=5.43, wps=462319, ups=1.06, wpb=435202, bsz=16577.9, num_updates=51300, lr=0.000279236, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=50214 +epoch 031: 708 / 1689 loss=4.071, nll_loss=2.442, ppl=5.43, wps=462319, ups=1.06, wpb=435202, bsz=16577.9, num_updates=51300, lr=0.000279236, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=50214 +epoch 031: 708 / 1689 loss=4.071, nll_loss=2.442, ppl=5.43, wps=462319, ups=1.06, wpb=435202, bsz=16577.9, num_updates=51300, lr=0.000279236, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=50214 +epoch 031: 708 / 1689 loss=4.071, nll_loss=2.442, ppl=5.43, wps=462319, ups=1.06, wpb=435202, bsz=16577.9, num_updates=51300, lr=0.000279236, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=50214 +epoch 031: 708 / 1689 loss=4.071, nll_loss=2.442, ppl=5.43, wps=462319, ups=1.06, wpb=435202, bsz=16577.9, num_updates=51300, lr=0.000279236, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=50214 +epoch 031: 708 / 1689 loss=4.071, nll_loss=2.442, ppl=5.43, wps=462319, ups=1.06, wpb=435202, bsz=16577.9, num_updates=51300, lr=0.000279236, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=50214 +epoch 031: 708 / 1689 loss=4.071, nll_loss=2.442, ppl=5.43, wps=462319, ups=1.06, wpb=435202, bsz=16577.9, num_updates=51300, lr=0.000279236, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=50214 +epoch 031: 708 / 1689 loss=4.071, nll_loss=2.442, ppl=5.43, wps=462319, ups=1.06, wpb=435202, bsz=16577.9, num_updates=51300, lr=0.000279236, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=50214 +epoch 031: 708 / 1689 loss=4.071, nll_loss=2.442, ppl=5.43, wps=462319, ups=1.06, wpb=435202, bsz=16577.9, num_updates=51300, lr=0.000279236, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=50214 +epoch 031: 708 / 1689 loss=4.071, nll_loss=2.442, ppl=5.43, wps=462319, ups=1.06, wpb=435202, bsz=16577.9, num_updates=51300, lr=0.000279236, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=50214 +epoch 031: 708 / 1689 loss=4.071, nll_loss=2.442, ppl=5.43, wps=462319, ups=1.06, wpb=435202, bsz=16577.9, num_updates=51300, lr=0.000279236, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=50214 +epoch 031: 708 / 1689 loss=4.071, nll_loss=2.442, ppl=5.43, wps=462319, ups=1.06, wpb=435202, bsz=16577.9, num_updates=51300, lr=0.000279236, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=50214 +epoch 031: 708 / 1689 loss=4.071, nll_loss=2.442, ppl=5.43, wps=462319, ups=1.06, wpb=435202, bsz=16577.9, num_updates=51300, lr=0.000279236, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=50214 +epoch 031: 708 / 1689 loss=4.071, nll_loss=2.442, ppl=5.43, wps=462319, ups=1.06, wpb=435202, bsz=16577.9, num_updates=51300, lr=0.000279236, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=50214 +epoch 031: 708 / 1689 loss=4.071, nll_loss=2.442, ppl=5.43, wps=462319, ups=1.06, wpb=435202, bsz=16577.9, num_updates=51300, lr=0.000279236, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=50214 +epoch 031: 708 / 1689 loss=4.071, nll_loss=2.442, ppl=5.43, wps=462319, ups=1.06, wpb=435202, bsz=16577.9, num_updates=51300, lr=0.000279236, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=50214 +epoch 031: 708 / 1689 loss=4.071, nll_loss=2.442, ppl=5.43, wps=462319, ups=1.06, wpb=435202, bsz=16577.9, num_updates=51300, lr=0.000279236, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=50214 +epoch 031: 809 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=460205, ups=1.06, wpb=435830, bsz=16220, num_updates=51400, lr=0.000278964, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=50309 +epoch 031: 809 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=460205, ups=1.06, wpb=435830, bsz=16220, num_updates=51400, lr=0.000278964, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=50309 +epoch 031: 809 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=460205, ups=1.06, wpb=435830, bsz=16220, num_updates=51400, lr=0.000278964, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=50309 +epoch 031: 809 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=460205, ups=1.06, wpb=435830, bsz=16220, num_updates=51400, lr=0.000278964, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=50309 +epoch 031: 809 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=460205, ups=1.06, wpb=435830, bsz=16220, num_updates=51400, lr=0.000278964, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=50309 +epoch 031: 809 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=460205, ups=1.06, wpb=435830, bsz=16220, num_updates=51400, lr=0.000278964, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=50309 +epoch 031: 809 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=460205, ups=1.06, wpb=435830, bsz=16220, num_updates=51400, lr=0.000278964, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=50309 +epoch 031: 809 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=460205, ups=1.06, wpb=435830, bsz=16220, num_updates=51400, lr=0.000278964, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=50309 +epoch 031: 809 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=460205, ups=1.06, wpb=435830, bsz=16220, num_updates=51400, lr=0.000278964, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=50309 +epoch 031: 809 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=460205, ups=1.06, wpb=435830, bsz=16220, num_updates=51400, lr=0.000278964, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=50309 +epoch 031: 809 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=460205, ups=1.06, wpb=435830, bsz=16220, num_updates=51400, lr=0.000278964, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=50309 +epoch 031: 809 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=460205, ups=1.06, wpb=435830, bsz=16220, num_updates=51400, lr=0.000278964, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=50309 +epoch 031: 809 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=460205, ups=1.06, wpb=435830, bsz=16220, num_updates=51400, lr=0.000278964, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=50309 +epoch 031: 809 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=460205, ups=1.06, wpb=435830, bsz=16220, num_updates=51400, lr=0.000278964, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=50309 +epoch 031: 809 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=460205, ups=1.06, wpb=435830, bsz=16220, num_updates=51400, lr=0.000278964, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=50309 +epoch 031: 809 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=460205, ups=1.06, wpb=435830, bsz=16220, num_updates=51400, lr=0.000278964, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=50309 +epoch 031: 809 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=460205, ups=1.06, wpb=435830, bsz=16220, num_updates=51400, lr=0.000278964, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=50309 +epoch 031: 809 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=460205, ups=1.06, wpb=435830, bsz=16220, num_updates=51400, lr=0.000278964, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=50309 +epoch 031: 809 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=460205, ups=1.06, wpb=435830, bsz=16220, num_updates=51400, lr=0.000278964, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=50309 +epoch 031: 809 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=460205, ups=1.06, wpb=435830, bsz=16220, num_updates=51400, lr=0.000278964, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=50309 +epoch 031: 809 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=460205, ups=1.06, wpb=435830, bsz=16220, num_updates=51400, lr=0.000278964, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=50309 +epoch 031: 809 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=460205, ups=1.06, wpb=435830, bsz=16220, num_updates=51400, lr=0.000278964, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=50309 +epoch 031: 809 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=460205, ups=1.06, wpb=435830, bsz=16220, num_updates=51400, lr=0.000278964, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=50309 +epoch 031: 809 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=460205, ups=1.06, wpb=435830, bsz=16220, num_updates=51400, lr=0.000278964, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=50309 +epoch 031: 809 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=460205, ups=1.06, wpb=435830, bsz=16220, num_updates=51400, lr=0.000278964, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=50309 +epoch 031: 809 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=460205, ups=1.06, wpb=435830, bsz=16220, num_updates=51400, lr=0.000278964, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=50309 +epoch 031: 809 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=460205, ups=1.06, wpb=435830, bsz=16220, num_updates=51400, lr=0.000278964, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=50309 +epoch 031: 809 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=460205, ups=1.06, wpb=435830, bsz=16220, num_updates=51400, lr=0.000278964, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=50309 +epoch 031: 809 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=460205, ups=1.06, wpb=435830, bsz=16220, num_updates=51400, lr=0.000278964, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=50309 +epoch 031: 809 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=460205, ups=1.06, wpb=435830, bsz=16220, num_updates=51400, lr=0.000278964, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=50309 +epoch 031: 809 / 1689 loss=4.076, nll_loss=2.448, ppl=5.46, wps=460205, ups=1.06, wpb=435830, bsz=16220, num_updates=51400, lr=0.000278964, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=50309 +epoch 031: 909 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=461968, ups=1.06, wpb=435356, bsz=16359.5, num_updates=51500, lr=0.000278693, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.8, wall=50403 +epoch 031: 909 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=461968, ups=1.06, wpb=435356, bsz=16359.5, num_updates=51500, lr=0.000278693, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.8, wall=50403 +epoch 031: 909 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=461968, ups=1.06, wpb=435356, bsz=16359.5, num_updates=51500, lr=0.000278693, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.8, wall=50403 +epoch 031: 909 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=461968, ups=1.06, wpb=435356, bsz=16359.5, num_updates=51500, lr=0.000278693, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.8, wall=50403 +epoch 031: 909 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=461968, ups=1.06, wpb=435356, bsz=16359.5, num_updates=51500, lr=0.000278693, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.8, wall=50403 +epoch 031: 909 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=461968, ups=1.06, wpb=435356, bsz=16359.5, num_updates=51500, lr=0.000278693, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.8, wall=50403 +epoch 031: 909 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=461968, ups=1.06, wpb=435356, bsz=16359.5, num_updates=51500, lr=0.000278693, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.8, wall=50403 +epoch 031: 909 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=461968, ups=1.06, wpb=435356, bsz=16359.5, num_updates=51500, lr=0.000278693, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.8, wall=50403 +epoch 031: 909 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=461968, ups=1.06, wpb=435356, bsz=16359.5, num_updates=51500, lr=0.000278693, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.8, wall=50403 +epoch 031: 909 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=461968, ups=1.06, wpb=435356, bsz=16359.5, num_updates=51500, lr=0.000278693, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.8, wall=50403 +epoch 031: 909 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=461968, ups=1.06, wpb=435356, bsz=16359.5, num_updates=51500, lr=0.000278693, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.8, wall=50403 +epoch 031: 909 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=461968, ups=1.06, wpb=435356, bsz=16359.5, num_updates=51500, lr=0.000278693, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.8, wall=50403 +epoch 031: 909 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=461968, ups=1.06, wpb=435356, bsz=16359.5, num_updates=51500, lr=0.000278693, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.8, wall=50403 +epoch 031: 909 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=461968, ups=1.06, wpb=435356, bsz=16359.5, num_updates=51500, lr=0.000278693, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.8, wall=50403 +epoch 031: 909 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=461968, ups=1.06, wpb=435356, bsz=16359.5, num_updates=51500, lr=0.000278693, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.8, wall=50403 +epoch 031: 909 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=461968, ups=1.06, wpb=435356, bsz=16359.5, num_updates=51500, lr=0.000278693, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.8, wall=50403 +epoch 031: 909 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=461968, ups=1.06, wpb=435356, bsz=16359.5, num_updates=51500, lr=0.000278693, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.8, wall=50403 +epoch 031: 909 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=461968, ups=1.06, wpb=435356, bsz=16359.5, num_updates=51500, lr=0.000278693, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.8, wall=50403 +epoch 031: 909 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=461968, ups=1.06, wpb=435356, bsz=16359.5, num_updates=51500, lr=0.000278693, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.8, wall=50403 +epoch 031: 909 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=461968, ups=1.06, wpb=435356, bsz=16359.5, num_updates=51500, lr=0.000278693, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.8, wall=50403 +epoch 031: 909 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=461968, ups=1.06, wpb=435356, bsz=16359.5, num_updates=51500, lr=0.000278693, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.8, wall=50403 +epoch 031: 909 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=461968, ups=1.06, wpb=435356, bsz=16359.5, num_updates=51500, lr=0.000278693, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.8, wall=50403 +epoch 031: 909 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=461968, ups=1.06, wpb=435356, bsz=16359.5, num_updates=51500, lr=0.000278693, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.8, wall=50403 +epoch 031: 909 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=461968, ups=1.06, wpb=435356, bsz=16359.5, num_updates=51500, lr=0.000278693, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.8, wall=50403 +epoch 031: 909 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=461968, ups=1.06, wpb=435356, bsz=16359.5, num_updates=51500, lr=0.000278693, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.8, wall=50403 +epoch 031: 909 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=461968, ups=1.06, wpb=435356, bsz=16359.5, num_updates=51500, lr=0.000278693, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.8, wall=50403 +epoch 031: 909 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=461968, ups=1.06, wpb=435356, bsz=16359.5, num_updates=51500, lr=0.000278693, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.8, wall=50403 +epoch 031: 909 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=461968, ups=1.06, wpb=435356, bsz=16359.5, num_updates=51500, lr=0.000278693, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.8, wall=50403 +epoch 031: 909 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=461968, ups=1.06, wpb=435356, bsz=16359.5, num_updates=51500, lr=0.000278693, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.8, wall=50403 +epoch 031: 909 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=461968, ups=1.06, wpb=435356, bsz=16359.5, num_updates=51500, lr=0.000278693, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.8, wall=50403 +epoch 031: 909 / 1689 loss=4.073, nll_loss=2.444, ppl=5.44, wps=461968, ups=1.06, wpb=435356, bsz=16359.5, num_updates=51500, lr=0.000278693, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.8, wall=50403 +epoch 031: 1009 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=460412, ups=1.07, wpb=431060, bsz=16825.2, num_updates=51600, lr=0.000278423, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=50496 +epoch 031: 1009 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=460412, ups=1.07, wpb=431060, bsz=16825.2, num_updates=51600, lr=0.000278423, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=50496 +epoch 031: 1009 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=460412, ups=1.07, wpb=431060, bsz=16825.2, num_updates=51600, lr=0.000278423, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=50496 +epoch 031: 1009 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=460412, ups=1.07, wpb=431060, bsz=16825.2, num_updates=51600, lr=0.000278423, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=50496 +epoch 031: 1009 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=460412, ups=1.07, wpb=431060, bsz=16825.2, num_updates=51600, lr=0.000278423, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=50496 +epoch 031: 1009 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=460412, ups=1.07, wpb=431060, bsz=16825.2, num_updates=51600, lr=0.000278423, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=50496 +epoch 031: 1009 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=460412, ups=1.07, wpb=431060, bsz=16825.2, num_updates=51600, lr=0.000278423, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=50496 +epoch 031: 1009 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=460412, ups=1.07, wpb=431060, bsz=16825.2, num_updates=51600, lr=0.000278423, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=50496 +epoch 031: 1009 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=460412, ups=1.07, wpb=431060, bsz=16825.2, num_updates=51600, lr=0.000278423, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=50496 +epoch 031: 1009 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=460412, ups=1.07, wpb=431060, bsz=16825.2, num_updates=51600, lr=0.000278423, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=50496 +epoch 031: 1009 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=460412, ups=1.07, wpb=431060, bsz=16825.2, num_updates=51600, lr=0.000278423, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=50496 +epoch 031: 1009 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=460412, ups=1.07, wpb=431060, bsz=16825.2, num_updates=51600, lr=0.000278423, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=50496 +epoch 031: 1009 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=460412, ups=1.07, wpb=431060, bsz=16825.2, num_updates=51600, lr=0.000278423, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=50496 +epoch 031: 1009 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=460412, ups=1.07, wpb=431060, bsz=16825.2, num_updates=51600, lr=0.000278423, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=50496 +epoch 031: 1009 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=460412, ups=1.07, wpb=431060, bsz=16825.2, num_updates=51600, lr=0.000278423, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=50496 +epoch 031: 1009 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=460412, ups=1.07, wpb=431060, bsz=16825.2, num_updates=51600, lr=0.000278423, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=50496 +epoch 031: 1009 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=460412, ups=1.07, wpb=431060, bsz=16825.2, num_updates=51600, lr=0.000278423, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=50496 +epoch 031: 1009 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=460412, ups=1.07, wpb=431060, bsz=16825.2, num_updates=51600, lr=0.000278423, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=50496 +epoch 031: 1009 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=460412, ups=1.07, wpb=431060, bsz=16825.2, num_updates=51600, lr=0.000278423, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=50496 +epoch 031: 1009 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=460412, ups=1.07, wpb=431060, bsz=16825.2, num_updates=51600, lr=0.000278423, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=50496 +epoch 031: 1009 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=460412, ups=1.07, wpb=431060, bsz=16825.2, num_updates=51600, lr=0.000278423, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=50496 +epoch 031: 1009 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=460412, ups=1.07, wpb=431060, bsz=16825.2, num_updates=51600, lr=0.000278423, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=50496 +epoch 031: 1009 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=460412, ups=1.07, wpb=431060, bsz=16825.2, num_updates=51600, lr=0.000278423, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=50496 +epoch 031: 1009 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=460412, ups=1.07, wpb=431060, bsz=16825.2, num_updates=51600, lr=0.000278423, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=50496 +epoch 031: 1009 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=460412, ups=1.07, wpb=431060, bsz=16825.2, num_updates=51600, lr=0.000278423, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=50496 +epoch 031: 1009 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=460412, ups=1.07, wpb=431060, bsz=16825.2, num_updates=51600, lr=0.000278423, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=50496 +epoch 031: 1009 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=460412, ups=1.07, wpb=431060, bsz=16825.2, num_updates=51600, lr=0.000278423, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=50496 +epoch 031: 1009 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=460412, ups=1.07, wpb=431060, bsz=16825.2, num_updates=51600, lr=0.000278423, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=50496 +epoch 031: 1009 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=460412, ups=1.07, wpb=431060, bsz=16825.2, num_updates=51600, lr=0.000278423, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=50496 +epoch 031: 1009 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=460412, ups=1.07, wpb=431060, bsz=16825.2, num_updates=51600, lr=0.000278423, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=50496 +epoch 031: 1009 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=460412, ups=1.07, wpb=431060, bsz=16825.2, num_updates=51600, lr=0.000278423, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=50496 +epoch 031: 1109 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=464106, ups=1.07, wpb=432398, bsz=15988.1, num_updates=51700, lr=0.000278154, gnorm=0.23, clip=0, loss_scale=0.5, train_wall=91, gb_free=19.5, wall=50590 +epoch 031: 1109 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=464106, ups=1.07, wpb=432398, bsz=15988.1, num_updates=51700, lr=0.000278154, gnorm=0.23, clip=0, loss_scale=0.5, train_wall=91, gb_free=19.5, wall=50590 +epoch 031: 1109 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=464106, ups=1.07, wpb=432398, bsz=15988.1, num_updates=51700, lr=0.000278154, gnorm=0.23, clip=0, loss_scale=0.5, train_wall=91, gb_free=19.5, wall=50590 +epoch 031: 1109 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=464106, ups=1.07, wpb=432398, bsz=15988.1, num_updates=51700, lr=0.000278154, gnorm=0.23, clip=0, loss_scale=0.5, train_wall=91, gb_free=19.5, wall=50590 +epoch 031: 1109 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=464106, ups=1.07, wpb=432398, bsz=15988.1, num_updates=51700, lr=0.000278154, gnorm=0.23, clip=0, loss_scale=0.5, train_wall=91, gb_free=19.5, wall=50590 +epoch 031: 1109 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=464106, ups=1.07, wpb=432398, bsz=15988.1, num_updates=51700, lr=0.000278154, gnorm=0.23, clip=0, loss_scale=0.5, train_wall=91, gb_free=19.5, wall=50590 +epoch 031: 1109 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=464106, ups=1.07, wpb=432398, bsz=15988.1, num_updates=51700, lr=0.000278154, gnorm=0.23, clip=0, loss_scale=0.5, train_wall=91, gb_free=19.5, wall=50590 +epoch 031: 1109 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=464106, ups=1.07, wpb=432398, bsz=15988.1, num_updates=51700, lr=0.000278154, gnorm=0.23, clip=0, loss_scale=0.5, train_wall=91, gb_free=19.5, wall=50590 +epoch 031: 1109 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=464106, ups=1.07, wpb=432398, bsz=15988.1, num_updates=51700, lr=0.000278154, gnorm=0.23, clip=0, loss_scale=0.5, train_wall=91, gb_free=19.5, wall=50590 +epoch 031: 1109 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=464106, ups=1.07, wpb=432398, bsz=15988.1, num_updates=51700, lr=0.000278154, gnorm=0.23, clip=0, loss_scale=0.5, train_wall=91, gb_free=19.5, wall=50590 +epoch 031: 1109 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=464106, ups=1.07, wpb=432398, bsz=15988.1, num_updates=51700, lr=0.000278154, gnorm=0.23, clip=0, loss_scale=0.5, train_wall=91, gb_free=19.5, wall=50590 +epoch 031: 1109 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=464106, ups=1.07, wpb=432398, bsz=15988.1, num_updates=51700, lr=0.000278154, gnorm=0.23, clip=0, loss_scale=0.5, train_wall=91, gb_free=19.5, wall=50590 +epoch 031: 1109 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=464106, ups=1.07, wpb=432398, bsz=15988.1, num_updates=51700, lr=0.000278154, gnorm=0.23, clip=0, loss_scale=0.5, train_wall=91, gb_free=19.5, wall=50590 +epoch 031: 1109 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=464106, ups=1.07, wpb=432398, bsz=15988.1, num_updates=51700, lr=0.000278154, gnorm=0.23, clip=0, loss_scale=0.5, train_wall=91, gb_free=19.5, wall=50590 +epoch 031: 1109 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=464106, ups=1.07, wpb=432398, bsz=15988.1, num_updates=51700, lr=0.000278154, gnorm=0.23, clip=0, loss_scale=0.5, train_wall=91, gb_free=19.5, wall=50590 +epoch 031: 1109 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=464106, ups=1.07, wpb=432398, bsz=15988.1, num_updates=51700, lr=0.000278154, gnorm=0.23, clip=0, loss_scale=0.5, train_wall=91, gb_free=19.5, wall=50590 +epoch 031: 1109 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=464106, ups=1.07, wpb=432398, bsz=15988.1, num_updates=51700, lr=0.000278154, gnorm=0.23, clip=0, loss_scale=0.5, train_wall=91, gb_free=19.5, wall=50590 +epoch 031: 1109 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=464106, ups=1.07, wpb=432398, bsz=15988.1, num_updates=51700, lr=0.000278154, gnorm=0.23, clip=0, loss_scale=0.5, train_wall=91, gb_free=19.5, wall=50590 +epoch 031: 1109 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=464106, ups=1.07, wpb=432398, bsz=15988.1, num_updates=51700, lr=0.000278154, gnorm=0.23, clip=0, loss_scale=0.5, train_wall=91, gb_free=19.5, wall=50590 +epoch 031: 1109 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=464106, ups=1.07, wpb=432398, bsz=15988.1, num_updates=51700, lr=0.000278154, gnorm=0.23, clip=0, loss_scale=0.5, train_wall=91, gb_free=19.5, wall=50590 +epoch 031: 1109 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=464106, ups=1.07, wpb=432398, bsz=15988.1, num_updates=51700, lr=0.000278154, gnorm=0.23, clip=0, loss_scale=0.5, train_wall=91, gb_free=19.5, wall=50590 +epoch 031: 1109 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=464106, ups=1.07, wpb=432398, bsz=15988.1, num_updates=51700, lr=0.000278154, gnorm=0.23, clip=0, loss_scale=0.5, train_wall=91, gb_free=19.5, wall=50590 +epoch 031: 1109 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=464106, ups=1.07, wpb=432398, bsz=15988.1, num_updates=51700, lr=0.000278154, gnorm=0.23, clip=0, loss_scale=0.5, train_wall=91, gb_free=19.5, wall=50590 +epoch 031: 1109 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=464106, ups=1.07, wpb=432398, bsz=15988.1, num_updates=51700, lr=0.000278154, gnorm=0.23, clip=0, loss_scale=0.5, train_wall=91, gb_free=19.5, wall=50590 +epoch 031: 1109 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=464106, ups=1.07, wpb=432398, bsz=15988.1, num_updates=51700, lr=0.000278154, gnorm=0.23, clip=0, loss_scale=0.5, train_wall=91, gb_free=19.5, wall=50590 +epoch 031: 1109 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=464106, ups=1.07, wpb=432398, bsz=15988.1, num_updates=51700, lr=0.000278154, gnorm=0.23, clip=0, loss_scale=0.5, train_wall=91, gb_free=19.5, wall=50590 +epoch 031: 1109 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=464106, ups=1.07, wpb=432398, bsz=15988.1, num_updates=51700, lr=0.000278154, gnorm=0.23, clip=0, loss_scale=0.5, train_wall=91, gb_free=19.5, wall=50590 +epoch 031: 1109 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=464106, ups=1.07, wpb=432398, bsz=15988.1, num_updates=51700, lr=0.000278154, gnorm=0.23, clip=0, loss_scale=0.5, train_wall=91, gb_free=19.5, wall=50590 +epoch 031: 1109 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=464106, ups=1.07, wpb=432398, bsz=15988.1, num_updates=51700, lr=0.000278154, gnorm=0.23, clip=0, loss_scale=0.5, train_wall=91, gb_free=19.5, wall=50590 +epoch 031: 1109 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=464106, ups=1.07, wpb=432398, bsz=15988.1, num_updates=51700, lr=0.000278154, gnorm=0.23, clip=0, loss_scale=0.5, train_wall=91, gb_free=19.5, wall=50590 +epoch 031: 1109 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=464106, ups=1.07, wpb=432398, bsz=15988.1, num_updates=51700, lr=0.000278154, gnorm=0.23, clip=0, loss_scale=0.5, train_wall=91, gb_free=19.5, wall=50590 +epoch 031: 1209 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=458721, ups=1.06, wpb=433520, bsz=16700.9, num_updates=51800, lr=0.000277885, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=50684 +epoch 031: 1209 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=458721, ups=1.06, wpb=433520, bsz=16700.9, num_updates=51800, lr=0.000277885, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=50684 +epoch 031: 1209 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=458721, ups=1.06, wpb=433520, bsz=16700.9, num_updates=51800, lr=0.000277885, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=50684 +epoch 031: 1209 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=458721, ups=1.06, wpb=433520, bsz=16700.9, num_updates=51800, lr=0.000277885, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=50684 +epoch 031: 1209 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=458721, ups=1.06, wpb=433520, bsz=16700.9, num_updates=51800, lr=0.000277885, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=50684 +epoch 031: 1209 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=458721, ups=1.06, wpb=433520, bsz=16700.9, num_updates=51800, lr=0.000277885, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=50684 +epoch 031: 1209 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=458721, ups=1.06, wpb=433520, bsz=16700.9, num_updates=51800, lr=0.000277885, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=50684 +epoch 031: 1209 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=458721, ups=1.06, wpb=433520, bsz=16700.9, num_updates=51800, lr=0.000277885, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=50684 +epoch 031: 1209 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=458721, ups=1.06, wpb=433520, bsz=16700.9, num_updates=51800, lr=0.000277885, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=50684 +epoch 031: 1209 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=458721, ups=1.06, wpb=433520, bsz=16700.9, num_updates=51800, lr=0.000277885, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=50684 +epoch 031: 1209 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=458721, ups=1.06, wpb=433520, bsz=16700.9, num_updates=51800, lr=0.000277885, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=50684 +epoch 031: 1209 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=458721, ups=1.06, wpb=433520, bsz=16700.9, num_updates=51800, lr=0.000277885, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=50684 +epoch 031: 1209 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=458721, ups=1.06, wpb=433520, bsz=16700.9, num_updates=51800, lr=0.000277885, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=50684 +epoch 031: 1209 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=458721, ups=1.06, wpb=433520, bsz=16700.9, num_updates=51800, lr=0.000277885, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=50684 +epoch 031: 1209 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=458721, ups=1.06, wpb=433520, bsz=16700.9, num_updates=51800, lr=0.000277885, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=50684 +epoch 031: 1209 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=458721, ups=1.06, wpb=433520, bsz=16700.9, num_updates=51800, lr=0.000277885, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=50684 +epoch 031: 1209 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=458721, ups=1.06, wpb=433520, bsz=16700.9, num_updates=51800, lr=0.000277885, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=50684 +epoch 031: 1209 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=458721, ups=1.06, wpb=433520, bsz=16700.9, num_updates=51800, lr=0.000277885, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=50684 +epoch 031: 1209 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=458721, ups=1.06, wpb=433520, bsz=16700.9, num_updates=51800, lr=0.000277885, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=50684 +epoch 031: 1209 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=458721, ups=1.06, wpb=433520, bsz=16700.9, num_updates=51800, lr=0.000277885, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=50684 +epoch 031: 1209 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=458721, ups=1.06, wpb=433520, bsz=16700.9, num_updates=51800, lr=0.000277885, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=50684 +epoch 031: 1209 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=458721, ups=1.06, wpb=433520, bsz=16700.9, num_updates=51800, lr=0.000277885, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=50684 +epoch 031: 1209 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=458721, ups=1.06, wpb=433520, bsz=16700.9, num_updates=51800, lr=0.000277885, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=50684 +epoch 031: 1209 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=458721, ups=1.06, wpb=433520, bsz=16700.9, num_updates=51800, lr=0.000277885, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=50684 +epoch 031: 1209 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=458721, ups=1.06, wpb=433520, bsz=16700.9, num_updates=51800, lr=0.000277885, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=50684 +epoch 031: 1209 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=458721, ups=1.06, wpb=433520, bsz=16700.9, num_updates=51800, lr=0.000277885, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=50684 +epoch 031: 1209 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=458721, ups=1.06, wpb=433520, bsz=16700.9, num_updates=51800, lr=0.000277885, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=50684 +epoch 031: 1209 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=458721, ups=1.06, wpb=433520, bsz=16700.9, num_updates=51800, lr=0.000277885, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=50684 +epoch 031: 1209 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=458721, ups=1.06, wpb=433520, bsz=16700.9, num_updates=51800, lr=0.000277885, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=50684 +epoch 031: 1209 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=458721, ups=1.06, wpb=433520, bsz=16700.9, num_updates=51800, lr=0.000277885, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=50684 +epoch 031: 1209 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=458721, ups=1.06, wpb=433520, bsz=16700.9, num_updates=51800, lr=0.000277885, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.2, wall=50684 +epoch 031: 1309 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463760, ups=1.07, wpb=433012, bsz=16257.8, num_updates=51900, lr=0.000277617, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=50778 +epoch 031: 1309 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463760, ups=1.07, wpb=433012, bsz=16257.8, num_updates=51900, lr=0.000277617, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=50778 +epoch 031: 1309 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463760, ups=1.07, wpb=433012, bsz=16257.8, num_updates=51900, lr=0.000277617, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=50778 +epoch 031: 1309 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463760, ups=1.07, wpb=433012, bsz=16257.8, num_updates=51900, lr=0.000277617, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=50778 +epoch 031: 1309 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463760, ups=1.07, wpb=433012, bsz=16257.8, num_updates=51900, lr=0.000277617, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=50778 +epoch 031: 1309 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463760, ups=1.07, wpb=433012, bsz=16257.8, num_updates=51900, lr=0.000277617, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=50778 +epoch 031: 1309 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463760, ups=1.07, wpb=433012, bsz=16257.8, num_updates=51900, lr=0.000277617, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=50778 +epoch 031: 1309 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463760, ups=1.07, wpb=433012, bsz=16257.8, num_updates=51900, lr=0.000277617, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=50778 +epoch 031: 1309 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463760, ups=1.07, wpb=433012, bsz=16257.8, num_updates=51900, lr=0.000277617, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=50778 +epoch 031: 1309 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463760, ups=1.07, wpb=433012, bsz=16257.8, num_updates=51900, lr=0.000277617, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=50778 +epoch 031: 1309 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463760, ups=1.07, wpb=433012, bsz=16257.8, num_updates=51900, lr=0.000277617, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=50778 +epoch 031: 1309 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463760, ups=1.07, wpb=433012, bsz=16257.8, num_updates=51900, lr=0.000277617, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=50778 +epoch 031: 1309 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463760, ups=1.07, wpb=433012, bsz=16257.8, num_updates=51900, lr=0.000277617, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=50778 +epoch 031: 1309 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463760, ups=1.07, wpb=433012, bsz=16257.8, num_updates=51900, lr=0.000277617, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=50778 +epoch 031: 1309 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463760, ups=1.07, wpb=433012, bsz=16257.8, num_updates=51900, lr=0.000277617, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=50778 +epoch 031: 1309 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463760, ups=1.07, wpb=433012, bsz=16257.8, num_updates=51900, lr=0.000277617, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=50778 +epoch 031: 1309 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463760, ups=1.07, wpb=433012, bsz=16257.8, num_updates=51900, lr=0.000277617, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=50778 +epoch 031: 1309 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463760, ups=1.07, wpb=433012, bsz=16257.8, num_updates=51900, lr=0.000277617, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=50778 +epoch 031: 1309 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463760, ups=1.07, wpb=433012, bsz=16257.8, num_updates=51900, lr=0.000277617, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=50778 +epoch 031: 1309 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463760, ups=1.07, wpb=433012, bsz=16257.8, num_updates=51900, lr=0.000277617, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=50778 +epoch 031: 1309 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463760, ups=1.07, wpb=433012, bsz=16257.8, num_updates=51900, lr=0.000277617, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=50778 +epoch 031: 1309 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463760, ups=1.07, wpb=433012, bsz=16257.8, num_updates=51900, lr=0.000277617, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=50778 +epoch 031: 1309 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463760, ups=1.07, wpb=433012, bsz=16257.8, num_updates=51900, lr=0.000277617, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=50778 +epoch 031: 1309 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463760, ups=1.07, wpb=433012, bsz=16257.8, num_updates=51900, lr=0.000277617, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=50778 +epoch 031: 1309 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463760, ups=1.07, wpb=433012, bsz=16257.8, num_updates=51900, lr=0.000277617, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=50778 +epoch 031: 1309 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463760, ups=1.07, wpb=433012, bsz=16257.8, num_updates=51900, lr=0.000277617, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=50778 +epoch 031: 1309 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463760, ups=1.07, wpb=433012, bsz=16257.8, num_updates=51900, lr=0.000277617, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=50778 +epoch 031: 1309 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463760, ups=1.07, wpb=433012, bsz=16257.8, num_updates=51900, lr=0.000277617, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=50778 +epoch 031: 1309 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463760, ups=1.07, wpb=433012, bsz=16257.8, num_updates=51900, lr=0.000277617, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=50778 +epoch 031: 1309 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463760, ups=1.07, wpb=433012, bsz=16257.8, num_updates=51900, lr=0.000277617, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=50778 +epoch 031: 1309 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463760, ups=1.07, wpb=433012, bsz=16257.8, num_updates=51900, lr=0.000277617, gnorm=0.202, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=50778 +epoch 031: 1410 / 1689 loss=4.072, nll_loss=2.445, ppl=5.44, wps=453921, ups=1.05, wpb=434354, bsz=16865.7, num_updates=52000, lr=0.00027735, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.4, wall=50873 +epoch 031: 1410 / 1689 loss=4.072, nll_loss=2.445, ppl=5.44, wps=453921, ups=1.05, wpb=434354, bsz=16865.7, num_updates=52000, lr=0.00027735, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.4, wall=50873 +epoch 031: 1410 / 1689 loss=4.072, nll_loss=2.445, ppl=5.44, wps=453921, ups=1.05, wpb=434354, bsz=16865.7, num_updates=52000, lr=0.00027735, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.4, wall=50873 +epoch 031: 1410 / 1689 loss=4.072, nll_loss=2.445, ppl=5.44, wps=453921, ups=1.05, wpb=434354, bsz=16865.7, num_updates=52000, lr=0.00027735, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.4, wall=50873 +epoch 031: 1410 / 1689 loss=4.072, nll_loss=2.445, ppl=5.44, wps=453921, ups=1.05, wpb=434354, bsz=16865.7, num_updates=52000, lr=0.00027735, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.4, wall=50873 +epoch 031: 1410 / 1689 loss=4.072, nll_loss=2.445, ppl=5.44, wps=453921, ups=1.05, wpb=434354, bsz=16865.7, num_updates=52000, lr=0.00027735, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.4, wall=50873 +epoch 031: 1410 / 1689 loss=4.072, nll_loss=2.445, ppl=5.44, wps=453921, ups=1.05, wpb=434354, bsz=16865.7, num_updates=52000, lr=0.00027735, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.4, wall=50873 +epoch 031: 1410 / 1689 loss=4.072, nll_loss=2.445, ppl=5.44, wps=453921, ups=1.05, wpb=434354, bsz=16865.7, num_updates=52000, lr=0.00027735, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.4, wall=50873 +epoch 031: 1410 / 1689 loss=4.072, nll_loss=2.445, ppl=5.44, wps=453921, ups=1.05, wpb=434354, bsz=16865.7, num_updates=52000, lr=0.00027735, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.4, wall=50873 +epoch 031: 1410 / 1689 loss=4.072, nll_loss=2.445, ppl=5.44, wps=453921, ups=1.05, wpb=434354, bsz=16865.7, num_updates=52000, lr=0.00027735, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.4, wall=50873 +epoch 031: 1410 / 1689 loss=4.072, nll_loss=2.445, ppl=5.44, wps=453921, ups=1.05, wpb=434354, bsz=16865.7, num_updates=52000, lr=0.00027735, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.4, wall=50873 +epoch 031: 1410 / 1689 loss=4.072, nll_loss=2.445, ppl=5.44, wps=453921, ups=1.05, wpb=434354, bsz=16865.7, num_updates=52000, lr=0.00027735, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.4, wall=50873 +epoch 031: 1410 / 1689 loss=4.072, nll_loss=2.445, ppl=5.44, wps=453921, ups=1.05, wpb=434354, bsz=16865.7, num_updates=52000, lr=0.00027735, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.4, wall=50873 +epoch 031: 1410 / 1689 loss=4.072, nll_loss=2.445, ppl=5.44, wps=453921, ups=1.05, wpb=434354, bsz=16865.7, num_updates=52000, lr=0.00027735, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.4, wall=50873 +epoch 031: 1410 / 1689 loss=4.072, nll_loss=2.445, ppl=5.44, wps=453921, ups=1.05, wpb=434354, bsz=16865.7, num_updates=52000, lr=0.00027735, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.4, wall=50873 +epoch 031: 1410 / 1689 loss=4.072, nll_loss=2.445, ppl=5.44, wps=453921, ups=1.05, wpb=434354, bsz=16865.7, num_updates=52000, lr=0.00027735, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.4, wall=50873 +epoch 031: 1410 / 1689 loss=4.072, nll_loss=2.445, ppl=5.44, wps=453921, ups=1.05, wpb=434354, bsz=16865.7, num_updates=52000, lr=0.00027735, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.4, wall=50873 +epoch 031: 1410 / 1689 loss=4.072, nll_loss=2.445, ppl=5.44, wps=453921, ups=1.05, wpb=434354, bsz=16865.7, num_updates=52000, lr=0.00027735, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.4, wall=50873 +epoch 031: 1410 / 1689 loss=4.072, nll_loss=2.445, ppl=5.44, wps=453921, ups=1.05, wpb=434354, bsz=16865.7, num_updates=52000, lr=0.00027735, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.4, wall=50873 +epoch 031: 1410 / 1689 loss=4.072, nll_loss=2.445, ppl=5.44, wps=453921, ups=1.05, wpb=434354, bsz=16865.7, num_updates=52000, lr=0.00027735, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.4, wall=50873 +epoch 031: 1410 / 1689 loss=4.072, nll_loss=2.445, ppl=5.44, wps=453921, ups=1.05, wpb=434354, bsz=16865.7, num_updates=52000, lr=0.00027735, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.4, wall=50873 +epoch 031: 1410 / 1689 loss=4.072, nll_loss=2.445, ppl=5.44, wps=453921, ups=1.05, wpb=434354, bsz=16865.7, num_updates=52000, lr=0.00027735, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.4, wall=50873 +epoch 031: 1410 / 1689 loss=4.072, nll_loss=2.445, ppl=5.44, wps=453921, ups=1.05, wpb=434354, bsz=16865.7, num_updates=52000, lr=0.00027735, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.4, wall=50873 +epoch 031: 1410 / 1689 loss=4.072, nll_loss=2.445, ppl=5.44, wps=453921, ups=1.05, wpb=434354, bsz=16865.7, num_updates=52000, lr=0.00027735, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.4, wall=50873 +epoch 031: 1410 / 1689 loss=4.072, nll_loss=2.445, ppl=5.44, wps=453921, ups=1.05, wpb=434354, bsz=16865.7, num_updates=52000, lr=0.00027735, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.4, wall=50873 +epoch 031: 1410 / 1689 loss=4.072, nll_loss=2.445, ppl=5.44, wps=453921, ups=1.05, wpb=434354, bsz=16865.7, num_updates=52000, lr=0.00027735, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.4, wall=50873 +epoch 031: 1410 / 1689 loss=4.072, nll_loss=2.445, ppl=5.44, wps=453921, ups=1.05, wpb=434354, bsz=16865.7, num_updates=52000, lr=0.00027735, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.4, wall=50873 +epoch 031: 1410 / 1689 loss=4.072, nll_loss=2.445, ppl=5.44, wps=453921, ups=1.05, wpb=434354, bsz=16865.7, num_updates=52000, lr=0.00027735, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.4, wall=50873 +epoch 031: 1410 / 1689 loss=4.072, nll_loss=2.445, ppl=5.44, wps=453921, ups=1.05, wpb=434354, bsz=16865.7, num_updates=52000, lr=0.00027735, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.4, wall=50873 +epoch 031: 1410 / 1689 loss=4.072, nll_loss=2.445, ppl=5.44, wps=453921, ups=1.05, wpb=434354, bsz=16865.7, num_updates=52000, lr=0.00027735, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.4, wall=50873 +epoch 031: 1410 / 1689 loss=4.072, nll_loss=2.445, ppl=5.44, wps=453921, ups=1.05, wpb=434354, bsz=16865.7, num_updates=52000, lr=0.00027735, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=94, gb_free=20.4, wall=50873 +begin validation on "valid" subset +epoch 031 | valid on 'valid' subset | loss 4.22 | nll_loss 2.589 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 52000 | best_loss 4.22 +epoch 031 | valid on 'valid' subset | loss 4.22 | nll_loss 2.589 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 52000 | best_loss 4.22 +epoch 031 | valid on 'valid' subset | loss 4.22 | nll_loss 2.589 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 52000 | best_loss 4.22 +epoch 031 | valid on 'valid' subset | loss 4.22 | nll_loss 2.589 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 52000 | best_loss 4.22 +epoch 031 | valid on 'valid' subset | loss 4.22 | nll_loss 2.589 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 52000 | best_loss 4.22 +epoch 031 | valid on 'valid' subset | loss 4.22 | nll_loss 2.589 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 52000 | best_loss 4.22 +epoch 031 | valid on 'valid' subset | loss 4.22 | nll_loss 2.589 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 52000 | best_loss 4.22 +epoch 031 | valid on 'valid' subset | loss 4.22 | nll_loss 2.589 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 52000 | best_loss 4.22 +epoch 031 | valid on 'valid' subset | loss 4.22 | nll_loss 2.589 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 52000 | best_loss 4.22 +epoch 031 | valid on 'valid' subset | loss 4.22 | nll_loss 2.589 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 52000 | best_loss 4.22 +epoch 031 | valid on 'valid' subset | loss 4.22 | nll_loss 2.589 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 52000 | best_loss 4.22 +epoch 031 | valid on 'valid' subset | loss 4.22 | nll_loss 2.589 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 52000 | best_loss 4.22 +epoch 031 | valid on 'valid' subset | loss 4.22 | nll_loss 2.589 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 52000 | best_loss 4.22 +epoch 031 | valid on 'valid' subset | loss 4.22 | nll_loss 2.589 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 52000 | best_loss 4.22 +epoch 031 | valid on 'valid' subset | loss 4.22 | nll_loss 2.589 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 52000 | best_loss 4.22 +epoch 031 | valid on 'valid' subset | loss 4.22 | nll_loss 2.589 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 52000 | best_loss 4.22 +epoch 031 | valid on 'valid' subset | loss 4.22 | nll_loss 2.589 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 52000 | best_loss 4.22 +epoch 031 | valid on 'valid' subset | loss 4.22 | nll_loss 2.589 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 52000 | best_loss 4.22 +epoch 031 | valid on 'valid' subset | loss 4.22 | nll_loss 2.589 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 52000 | best_loss 4.22 +epoch 031 | valid on 'valid' subset | loss 4.22 | nll_loss 2.589 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 52000 | best_loss 4.22 +epoch 031 | valid on 'valid' subset | loss 4.22 | nll_loss 2.589 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 52000 | best_loss 4.22 +epoch 031 | valid on 'valid' subset | loss 4.22 | nll_loss 2.589 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 52000 | best_loss 4.22 +epoch 031 | valid on 'valid' subset | loss 4.22 | nll_loss 2.589 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 52000 | best_loss 4.22 +epoch 031 | valid on 'valid' subset | loss 4.22 | nll_loss 2.589 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 52000 | best_loss 4.22 +epoch 031 | valid on 'valid' subset | loss 4.22 | nll_loss 2.589 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 52000 | best_loss 4.22 +epoch 031 | valid on 'valid' subset | loss 4.22 | nll_loss 2.589 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 52000 | best_loss 4.22 +epoch 031 | valid on 'valid' subset | loss 4.22 | nll_loss 2.589 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 52000 | best_loss 4.22 +epoch 031 | valid on 'valid' subset | loss 4.22 | nll_loss 2.589 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 52000 | best_loss 4.22 +epoch 031 | valid on 'valid' subset | loss 4.22 | nll_loss 2.589 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 52000 | best_loss 4.22 +epoch 031 | valid on 'valid' subset | loss 4.22 | nll_loss 2.589 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 52000 | best_loss 4.22 +epoch 031 | valid on 'valid' subset | loss 4.22 | nll_loss 2.589 | ppl 6.02 | wps 0 | wpb 42662 | bsz 2032 | num_updates 52000 | best_loss 4.22 +epoch 031: 1510 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=379236, ups=0.87, wpb=433889, bsz=16920, num_updates=52100, lr=0.000277084, gnorm=0.206, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=50988 +epoch 031: 1510 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=379236, ups=0.87, wpb=433889, bsz=16920, num_updates=52100, lr=0.000277084, gnorm=0.206, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=50988 +epoch 031: 1510 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=379236, ups=0.87, wpb=433889, bsz=16920, num_updates=52100, lr=0.000277084, gnorm=0.206, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=50988 +epoch 031: 1510 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=379236, ups=0.87, wpb=433889, bsz=16920, num_updates=52100, lr=0.000277084, gnorm=0.206, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=50988 +epoch 031: 1510 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=379236, ups=0.87, wpb=433889, bsz=16920, num_updates=52100, lr=0.000277084, gnorm=0.206, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=50988 +epoch 031: 1510 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=379236, ups=0.87, wpb=433889, bsz=16920, num_updates=52100, lr=0.000277084, gnorm=0.206, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=50988 +epoch 031: 1510 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=379236, ups=0.87, wpb=433889, bsz=16920, num_updates=52100, lr=0.000277084, gnorm=0.206, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=50988 +epoch 031: 1510 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=379236, ups=0.87, wpb=433889, bsz=16920, num_updates=52100, lr=0.000277084, gnorm=0.206, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=50988 +epoch 031: 1510 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=379236, ups=0.87, wpb=433889, bsz=16920, num_updates=52100, lr=0.000277084, gnorm=0.206, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=50988 +epoch 031: 1510 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=379236, ups=0.87, wpb=433889, bsz=16920, num_updates=52100, lr=0.000277084, gnorm=0.206, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=50988 +epoch 031: 1510 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=379236, ups=0.87, wpb=433889, bsz=16920, num_updates=52100, lr=0.000277084, gnorm=0.206, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=50988 +epoch 031: 1510 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=379236, ups=0.87, wpb=433889, bsz=16920, num_updates=52100, lr=0.000277084, gnorm=0.206, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=50988 +epoch 031: 1510 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=379236, ups=0.87, wpb=433889, bsz=16920, num_updates=52100, lr=0.000277084, gnorm=0.206, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=50988 +epoch 031: 1510 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=379236, ups=0.87, wpb=433889, bsz=16920, num_updates=52100, lr=0.000277084, gnorm=0.206, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=50988 +epoch 031: 1510 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=379236, ups=0.87, wpb=433889, bsz=16920, num_updates=52100, lr=0.000277084, gnorm=0.206, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=50988 +epoch 031: 1510 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=379236, ups=0.87, wpb=433889, bsz=16920, num_updates=52100, lr=0.000277084, gnorm=0.206, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=50988 +epoch 031: 1510 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=379236, ups=0.87, wpb=433889, bsz=16920, num_updates=52100, lr=0.000277084, gnorm=0.206, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=50988 +epoch 031: 1510 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=379236, ups=0.87, wpb=433889, bsz=16920, num_updates=52100, lr=0.000277084, gnorm=0.206, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=50988 +epoch 031: 1510 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=379236, ups=0.87, wpb=433889, bsz=16920, num_updates=52100, lr=0.000277084, gnorm=0.206, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=50988 +epoch 031: 1510 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=379236, ups=0.87, wpb=433889, bsz=16920, num_updates=52100, lr=0.000277084, gnorm=0.206, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=50988 +epoch 031: 1510 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=379236, ups=0.87, wpb=433889, bsz=16920, num_updates=52100, lr=0.000277084, gnorm=0.206, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=50988 +epoch 031: 1510 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=379236, ups=0.87, wpb=433889, bsz=16920, num_updates=52100, lr=0.000277084, gnorm=0.206, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=50988 +epoch 031: 1510 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=379236, ups=0.87, wpb=433889, bsz=16920, num_updates=52100, lr=0.000277084, gnorm=0.206, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=50988 +epoch 031: 1510 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=379236, ups=0.87, wpb=433889, bsz=16920, num_updates=52100, lr=0.000277084, gnorm=0.206, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=50988 +epoch 031: 1510 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=379236, ups=0.87, wpb=433889, bsz=16920, num_updates=52100, lr=0.000277084, gnorm=0.206, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=50988 +epoch 031: 1510 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=379236, ups=0.87, wpb=433889, bsz=16920, num_updates=52100, lr=0.000277084, gnorm=0.206, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=50988 +epoch 031: 1510 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=379236, ups=0.87, wpb=433889, bsz=16920, num_updates=52100, lr=0.000277084, gnorm=0.206, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=50988 +epoch 031: 1510 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=379236, ups=0.87, wpb=433889, bsz=16920, num_updates=52100, lr=0.000277084, gnorm=0.206, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=50988 +epoch 031: 1510 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=379236, ups=0.87, wpb=433889, bsz=16920, num_updates=52100, lr=0.000277084, gnorm=0.206, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=50988 +epoch 031: 1510 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=379236, ups=0.87, wpb=433889, bsz=16920, num_updates=52100, lr=0.000277084, gnorm=0.206, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=50988 +epoch 031: 1510 / 1689 loss=4.072, nll_loss=2.444, ppl=5.44, wps=379236, ups=0.87, wpb=433889, bsz=16920, num_updates=52100, lr=0.000277084, gnorm=0.206, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=50988 +epoch 031: 1610 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=466547, ups=1.07, wpb=434258, bsz=16317.4, num_updates=52200, lr=0.000276818, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=51081 +epoch 031: 1610 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=466547, ups=1.07, wpb=434258, bsz=16317.4, num_updates=52200, lr=0.000276818, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=51081 +epoch 031: 1610 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=466547, ups=1.07, wpb=434258, bsz=16317.4, num_updates=52200, lr=0.000276818, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=51081 +epoch 031: 1610 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=466547, ups=1.07, wpb=434258, bsz=16317.4, num_updates=52200, lr=0.000276818, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=51081 +epoch 031: 1610 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=466547, ups=1.07, wpb=434258, bsz=16317.4, num_updates=52200, lr=0.000276818, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=51081 +epoch 031: 1610 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=466547, ups=1.07, wpb=434258, bsz=16317.4, num_updates=52200, lr=0.000276818, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=51081 +epoch 031: 1610 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=466547, ups=1.07, wpb=434258, bsz=16317.4, num_updates=52200, lr=0.000276818, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=51081 +epoch 031: 1610 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=466547, ups=1.07, wpb=434258, bsz=16317.4, num_updates=52200, lr=0.000276818, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=51081 +epoch 031: 1610 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=466547, ups=1.07, wpb=434258, bsz=16317.4, num_updates=52200, lr=0.000276818, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=51081 +epoch 031: 1610 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=466547, ups=1.07, wpb=434258, bsz=16317.4, num_updates=52200, lr=0.000276818, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=51081 +epoch 031: 1610 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=466547, ups=1.07, wpb=434258, bsz=16317.4, num_updates=52200, lr=0.000276818, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=51081 +epoch 031: 1610 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=466547, ups=1.07, wpb=434258, bsz=16317.4, num_updates=52200, lr=0.000276818, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=51081 +epoch 031: 1610 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=466547, ups=1.07, wpb=434258, bsz=16317.4, num_updates=52200, lr=0.000276818, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=51081 +epoch 031: 1610 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=466547, ups=1.07, wpb=434258, bsz=16317.4, num_updates=52200, lr=0.000276818, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=51081 +epoch 031: 1610 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=466547, ups=1.07, wpb=434258, bsz=16317.4, num_updates=52200, lr=0.000276818, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=51081 +epoch 031: 1610 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=466547, ups=1.07, wpb=434258, bsz=16317.4, num_updates=52200, lr=0.000276818, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=51081 +epoch 031: 1610 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=466547, ups=1.07, wpb=434258, bsz=16317.4, num_updates=52200, lr=0.000276818, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=51081 +epoch 031: 1610 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=466547, ups=1.07, wpb=434258, bsz=16317.4, num_updates=52200, lr=0.000276818, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=51081 +epoch 031: 1610 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=466547, ups=1.07, wpb=434258, bsz=16317.4, num_updates=52200, lr=0.000276818, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=51081 +epoch 031: 1610 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=466547, ups=1.07, wpb=434258, bsz=16317.4, num_updates=52200, lr=0.000276818, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=51081 +epoch 031: 1610 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=466547, ups=1.07, wpb=434258, bsz=16317.4, num_updates=52200, lr=0.000276818, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=51081 +epoch 031: 1610 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=466547, ups=1.07, wpb=434258, bsz=16317.4, num_updates=52200, lr=0.000276818, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=51081 +epoch 031: 1610 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=466547, ups=1.07, wpb=434258, bsz=16317.4, num_updates=52200, lr=0.000276818, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=51081 +epoch 031: 1610 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=466547, ups=1.07, wpb=434258, bsz=16317.4, num_updates=52200, lr=0.000276818, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=51081 +epoch 031: 1610 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=466547, ups=1.07, wpb=434258, bsz=16317.4, num_updates=52200, lr=0.000276818, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=51081 +epoch 031: 1610 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=466547, ups=1.07, wpb=434258, bsz=16317.4, num_updates=52200, lr=0.000276818, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=51081 +epoch 031: 1610 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=466547, ups=1.07, wpb=434258, bsz=16317.4, num_updates=52200, lr=0.000276818, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=51081 +epoch 031: 1610 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=466547, ups=1.07, wpb=434258, bsz=16317.4, num_updates=52200, lr=0.000276818, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=51081 +epoch 031: 1610 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=466547, ups=1.07, wpb=434258, bsz=16317.4, num_updates=52200, lr=0.000276818, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=51081 +epoch 031: 1610 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=466547, ups=1.07, wpb=434258, bsz=16317.4, num_updates=52200, lr=0.000276818, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=51081 +epoch 031: 1610 / 1689 loss=4.082, nll_loss=2.455, ppl=5.48, wps=466547, ups=1.07, wpb=434258, bsz=16317.4, num_updates=52200, lr=0.000276818, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.8, wall=51081 +end of epoch 31 (average epoch stats below) +epoch 031 | loss 4.065 | nll_loss 2.436 | ppl 5.41 | wps 452288 | ups 1.04 | wpb 433521 | bsz 16505.9 | num_updates 52279 | lr 0.000276609 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1556 | gb_free 20.3 | wall 51153 +epoch 031 | loss 4.065 | nll_loss 2.436 | ppl 5.41 | wps 452288 | ups 1.04 | wpb 433521 | bsz 16505.9 | num_updates 52279 | lr 0.000276609 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1556 | gb_free 20.3 | wall 51153 +epoch 031 | loss 4.065 | nll_loss 2.436 | ppl 5.41 | wps 452288 | ups 1.04 | wpb 433521 | bsz 16505.9 | num_updates 52279 | lr 0.000276609 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1556 | gb_free 20.3 | wall 51153 +epoch 031 | loss 4.065 | nll_loss 2.436 | ppl 5.41 | wps 452288 | ups 1.04 | wpb 433521 | bsz 16505.9 | num_updates 52279 | lr 0.000276609 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1556 | gb_free 20.3 | wall 51153 +epoch 031 | loss 4.065 | nll_loss 2.436 | ppl 5.41 | wps 452288 | ups 1.04 | wpb 433521 | bsz 16505.9 | num_updates 52279 | lr 0.000276609 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1556 | gb_free 20.3 | wall 51153 +epoch 031 | loss 4.065 | nll_loss 2.436 | ppl 5.41 | wps 452288 | ups 1.04 | wpb 433521 | bsz 16505.9 | num_updates 52279 | lr 0.000276609 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1556 | gb_free 20.3 | wall 51153 +epoch 031 | loss 4.065 | nll_loss 2.436 | ppl 5.41 | wps 452288 | ups 1.04 | wpb 433521 | bsz 16505.9 | num_updates 52279 | lr 0.000276609 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1556 | gb_free 20.3 | wall 51153 +epoch 031 | loss 4.065 | nll_loss 2.436 | ppl 5.41 | wps 452288 | ups 1.04 | wpb 433521 | bsz 16505.9 | num_updates 52279 | lr 0.000276609 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1556 | gb_free 20.3 | wall 51153 +epoch 031 | loss 4.065 | nll_loss 2.436 | ppl 5.41 | wps 452288 | ups 1.04 | wpb 433521 | bsz 16505.9 | num_updates 52279 | lr 0.000276609 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1556 | gb_free 20.3 | wall 51153 +epoch 031 | loss 4.065 | nll_loss 2.436 | ppl 5.41 | wps 452288 | ups 1.04 | wpb 433521 | bsz 16505.9 | num_updates 52279 | lr 0.000276609 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1556 | gb_free 20.3 | wall 51153 +epoch 031 | loss 4.065 | nll_loss 2.436 | ppl 5.41 | wps 452288 | ups 1.04 | wpb 433521 | bsz 16505.9 | num_updates 52279 | lr 0.000276609 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1556 | gb_free 20.3 | wall 51153 +epoch 031 | loss 4.065 | nll_loss 2.436 | ppl 5.41 | wps 452288 | ups 1.04 | wpb 433521 | bsz 16505.9 | num_updates 52279 | lr 0.000276609 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1556 | gb_free 20.3 | wall 51153 +epoch 031 | loss 4.065 | nll_loss 2.436 | ppl 5.41 | wps 452288 | ups 1.04 | wpb 433521 | bsz 16505.9 | num_updates 52279 | lr 0.000276609 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1556 | gb_free 20.3 | wall 51153 +epoch 031 | loss 4.065 | nll_loss 2.436 | ppl 5.41 | wps 452288 | ups 1.04 | wpb 433521 | bsz 16505.9 | num_updates 52279 | lr 0.000276609 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1556 | gb_free 20.3 | wall 51153 +epoch 031 | loss 4.065 | nll_loss 2.436 | ppl 5.41 | wps 452288 | ups 1.04 | wpb 433521 | bsz 16505.9 | num_updates 52279 | lr 0.000276609 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1556 | gb_free 20.3 | wall 51153 +epoch 031 | loss 4.065 | nll_loss 2.436 | ppl 5.41 | wps 452288 | ups 1.04 | wpb 433521 | bsz 16505.9 | num_updates 52279 | lr 0.000276609 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1556 | gb_free 20.3 | wall 51153 +epoch 031 | loss 4.065 | nll_loss 2.436 | ppl 5.41 | wps 452288 | ups 1.04 | wpb 433521 | bsz 16505.9 | num_updates 52279 | lr 0.000276609 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1556 | gb_free 20.3 | wall 51153 +epoch 031 | loss 4.065 | nll_loss 2.436 | ppl 5.41 | wps 452288 | ups 1.04 | wpb 433521 | bsz 16505.9 | num_updates 52279 | lr 0.000276609 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1556 | gb_free 20.3 | wall 51153 +epoch 031 | loss 4.065 | nll_loss 2.436 | ppl 5.41 | wps 452288 | ups 1.04 | wpb 433521 | bsz 16505.9 | num_updates 52279 | lr 0.000276609 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1556 | gb_free 20.3 | wall 51153 +epoch 031 | loss 4.065 | nll_loss 2.436 | ppl 5.41 | wps 452288 | ups 1.04 | wpb 433521 | bsz 16505.9 | num_updates 52279 | lr 0.000276609 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1556 | gb_free 20.3 | wall 51153 +epoch 031 | loss 4.065 | nll_loss 2.436 | ppl 5.41 | wps 452288 | ups 1.04 | wpb 433521 | bsz 16505.9 | num_updates 52279 | lr 0.000276609 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1556 | gb_free 20.3 | wall 51153 +epoch 031 | loss 4.065 | nll_loss 2.436 | ppl 5.41 | wps 452288 | ups 1.04 | wpb 433521 | bsz 16505.9 | num_updates 52279 | lr 0.000276609 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1556 | gb_free 20.3 | wall 51153 +epoch 031 | loss 4.065 | nll_loss 2.436 | ppl 5.41 | wps 452288 | ups 1.04 | wpb 433521 | bsz 16505.9 | num_updates 52279 | lr 0.000276609 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1556 | gb_free 20.3 | wall 51153 +epoch 031 | loss 4.065 | nll_loss 2.436 | ppl 5.41 | wps 452288 | ups 1.04 | wpb 433521 | bsz 16505.9 | num_updates 52279 | lr 0.000276609 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1556 | gb_free 20.3 | wall 51153 +epoch 031 | loss 4.065 | nll_loss 2.436 | ppl 5.41 | wps 452288 | ups 1.04 | wpb 433521 | bsz 16505.9 | num_updates 52279 | lr 0.000276609 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1556 | gb_free 20.3 | wall 51153 +epoch 031 | loss 4.065 | nll_loss 2.436 | ppl 5.41 | wps 452288 | ups 1.04 | wpb 433521 | bsz 16505.9 | num_updates 52279 | lr 0.000276609 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1556 | gb_free 20.3 | wall 51153 +epoch 031 | loss 4.065 | nll_loss 2.436 | ppl 5.41 | wps 452288 | ups 1.04 | wpb 433521 | bsz 16505.9 | num_updates 52279 | lr 0.000276609 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1556 | gb_free 20.3 | wall 51153 +epoch 031 | loss 4.065 | nll_loss 2.436 | ppl 5.41 | wps 452288 | ups 1.04 | wpb 433521 | bsz 16505.9 | num_updates 52279 | lr 0.000276609 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1556 | gb_free 20.3 | wall 51153 +epoch 031 | loss 4.065 | nll_loss 2.436 | ppl 5.41 | wps 452288 | ups 1.04 | wpb 433521 | bsz 16505.9 | num_updates 52279 | lr 0.000276609 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1556 | gb_free 20.3 | wall 51153 +epoch 031 | loss 4.065 | nll_loss 2.436 | ppl 5.41 | wps 452288 | ups 1.04 | wpb 433521 | bsz 16505.9 | num_updates 52279 | lr 0.000276609 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1556 | gb_free 20.3 | wall 51153 +epoch 031 | loss 4.065 | nll_loss 2.436 | ppl 5.41 | wps 452288 | ups 1.04 | wpb 433521 | bsz 16505.9 | num_updates 52279 | lr 0.000276609 | gnorm 0.214 | clip 0 | loss_scale 0.5 | train_wall 1556 | gb_free 20.3 | wall 51153 +Start iterating over samples +epoch 032: 21 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=454300, ups=1.06, wpb=428884, bsz=16530.5, num_updates=52300, lr=0.000276553, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=91, gb_free=18.6, wall=51175 +epoch 032: 21 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=454300, ups=1.06, wpb=428884, bsz=16530.5, num_updates=52300, lr=0.000276553, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=91, gb_free=18.6, wall=51175 +epoch 032: 21 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=454300, ups=1.06, wpb=428884, bsz=16530.5, num_updates=52300, lr=0.000276553, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=91, gb_free=18.6, wall=51175 +epoch 032: 21 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=454300, ups=1.06, wpb=428884, bsz=16530.5, num_updates=52300, lr=0.000276553, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=91, gb_free=18.6, wall=51175 +epoch 032: 21 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=454300, ups=1.06, wpb=428884, bsz=16530.5, num_updates=52300, lr=0.000276553, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=91, gb_free=18.6, wall=51175 +epoch 032: 21 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=454300, ups=1.06, wpb=428884, bsz=16530.5, num_updates=52300, lr=0.000276553, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=91, gb_free=18.6, wall=51175 +epoch 032: 21 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=454300, ups=1.06, wpb=428884, bsz=16530.5, num_updates=52300, lr=0.000276553, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=91, gb_free=18.6, wall=51175 +epoch 032: 21 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=454300, ups=1.06, wpb=428884, bsz=16530.5, num_updates=52300, lr=0.000276553, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=91, gb_free=18.6, wall=51175 +epoch 032: 21 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=454300, ups=1.06, wpb=428884, bsz=16530.5, num_updates=52300, lr=0.000276553, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=91, gb_free=18.6, wall=51175 +epoch 032: 21 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=454300, ups=1.06, wpb=428884, bsz=16530.5, num_updates=52300, lr=0.000276553, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=91, gb_free=18.6, wall=51175 +epoch 032: 21 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=454300, ups=1.06, wpb=428884, bsz=16530.5, num_updates=52300, lr=0.000276553, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=91, gb_free=18.6, wall=51175 +epoch 032: 21 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=454300, ups=1.06, wpb=428884, bsz=16530.5, num_updates=52300, lr=0.000276553, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=91, gb_free=18.6, wall=51175 +epoch 032: 21 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=454300, ups=1.06, wpb=428884, bsz=16530.5, num_updates=52300, lr=0.000276553, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=91, gb_free=18.6, wall=51175 +epoch 032: 21 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=454300, ups=1.06, wpb=428884, bsz=16530.5, num_updates=52300, lr=0.000276553, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=91, gb_free=18.6, wall=51175 +epoch 032: 21 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=454300, ups=1.06, wpb=428884, bsz=16530.5, num_updates=52300, lr=0.000276553, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=91, gb_free=18.6, wall=51175 +epoch 032: 21 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=454300, ups=1.06, wpb=428884, bsz=16530.5, num_updates=52300, lr=0.000276553, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=91, gb_free=18.6, wall=51175 +epoch 032: 21 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=454300, ups=1.06, wpb=428884, bsz=16530.5, num_updates=52300, lr=0.000276553, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=91, gb_free=18.6, wall=51175 +epoch 032: 21 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=454300, ups=1.06, wpb=428884, bsz=16530.5, num_updates=52300, lr=0.000276553, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=91, gb_free=18.6, wall=51175 +epoch 032: 21 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=454300, ups=1.06, wpb=428884, bsz=16530.5, num_updates=52300, lr=0.000276553, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=91, gb_free=18.6, wall=51175 +epoch 032: 21 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=454300, ups=1.06, wpb=428884, bsz=16530.5, num_updates=52300, lr=0.000276553, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=91, gb_free=18.6, wall=51175 +epoch 032: 21 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=454300, ups=1.06, wpb=428884, bsz=16530.5, num_updates=52300, lr=0.000276553, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=91, gb_free=18.6, wall=51175 +epoch 032: 21 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=454300, ups=1.06, wpb=428884, bsz=16530.5, num_updates=52300, lr=0.000276553, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=91, gb_free=18.6, wall=51175 +epoch 032: 21 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=454300, ups=1.06, wpb=428884, bsz=16530.5, num_updates=52300, lr=0.000276553, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=91, gb_free=18.6, wall=51175 +epoch 032: 21 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=454300, ups=1.06, wpb=428884, bsz=16530.5, num_updates=52300, lr=0.000276553, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=91, gb_free=18.6, wall=51175 +epoch 032: 21 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=454300, ups=1.06, wpb=428884, bsz=16530.5, num_updates=52300, lr=0.000276553, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=91, gb_free=18.6, wall=51175 +epoch 032: 21 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=454300, ups=1.06, wpb=428884, bsz=16530.5, num_updates=52300, lr=0.000276553, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=91, gb_free=18.6, wall=51175 +epoch 032: 21 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=454300, ups=1.06, wpb=428884, bsz=16530.5, num_updates=52300, lr=0.000276553, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=91, gb_free=18.6, wall=51175 +epoch 032: 21 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=454300, ups=1.06, wpb=428884, bsz=16530.5, num_updates=52300, lr=0.000276553, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=91, gb_free=18.6, wall=51175 +epoch 032: 21 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=454300, ups=1.06, wpb=428884, bsz=16530.5, num_updates=52300, lr=0.000276553, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=91, gb_free=18.6, wall=51175 +epoch 032: 21 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=454300, ups=1.06, wpb=428884, bsz=16530.5, num_updates=52300, lr=0.000276553, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=91, gb_free=18.6, wall=51175 +epoch 032: 21 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=454300, ups=1.06, wpb=428884, bsz=16530.5, num_updates=52300, lr=0.000276553, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=91, gb_free=18.6, wall=51175 +epoch 032: 21 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=454300, ups=1.06, wpb=428884, bsz=16530.5, num_updates=52300, lr=0.000276553, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=91, gb_free=18.6, wall=51175 +epoch 032: 121 / 1689 loss=4.051, nll_loss=2.419, ppl=5.35, wps=463121, ups=1.07, wpb=433056, bsz=16790.9, num_updates=52400, lr=0.000276289, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=51269 +epoch 032: 121 / 1689 loss=4.051, nll_loss=2.419, ppl=5.35, wps=463121, ups=1.07, wpb=433056, bsz=16790.9, num_updates=52400, lr=0.000276289, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=51269 +epoch 032: 121 / 1689 loss=4.051, nll_loss=2.419, ppl=5.35, wps=463121, ups=1.07, wpb=433056, bsz=16790.9, num_updates=52400, lr=0.000276289, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=51269 +epoch 032: 121 / 1689 loss=4.051, nll_loss=2.419, ppl=5.35, wps=463121, ups=1.07, wpb=433056, bsz=16790.9, num_updates=52400, lr=0.000276289, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=51269 +epoch 032: 121 / 1689 loss=4.051, nll_loss=2.419, ppl=5.35, wps=463121, ups=1.07, wpb=433056, bsz=16790.9, num_updates=52400, lr=0.000276289, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=51269 +epoch 032: 121 / 1689 loss=4.051, nll_loss=2.419, ppl=5.35, wps=463121, ups=1.07, wpb=433056, bsz=16790.9, num_updates=52400, lr=0.000276289, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=51269 +epoch 032: 121 / 1689 loss=4.051, nll_loss=2.419, ppl=5.35, wps=463121, ups=1.07, wpb=433056, bsz=16790.9, num_updates=52400, lr=0.000276289, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=51269 +epoch 032: 121 / 1689 loss=4.051, nll_loss=2.419, ppl=5.35, wps=463121, ups=1.07, wpb=433056, bsz=16790.9, num_updates=52400, lr=0.000276289, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=51269 +epoch 032: 121 / 1689 loss=4.051, nll_loss=2.419, ppl=5.35, wps=463121, ups=1.07, wpb=433056, bsz=16790.9, num_updates=52400, lr=0.000276289, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=51269 +epoch 032: 121 / 1689 loss=4.051, nll_loss=2.419, ppl=5.35, wps=463121, ups=1.07, wpb=433056, bsz=16790.9, num_updates=52400, lr=0.000276289, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=51269 +epoch 032: 121 / 1689 loss=4.051, nll_loss=2.419, ppl=5.35, wps=463121, ups=1.07, wpb=433056, bsz=16790.9, num_updates=52400, lr=0.000276289, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=51269 +epoch 032: 121 / 1689 loss=4.051, nll_loss=2.419, ppl=5.35, wps=463121, ups=1.07, wpb=433056, bsz=16790.9, num_updates=52400, lr=0.000276289, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=51269 +epoch 032: 121 / 1689 loss=4.051, nll_loss=2.419, ppl=5.35, wps=463121, ups=1.07, wpb=433056, bsz=16790.9, num_updates=52400, lr=0.000276289, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=51269 +epoch 032: 121 / 1689 loss=4.051, nll_loss=2.419, ppl=5.35, wps=463121, ups=1.07, wpb=433056, bsz=16790.9, num_updates=52400, lr=0.000276289, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=51269 +epoch 032: 121 / 1689 loss=4.051, nll_loss=2.419, ppl=5.35, wps=463121, ups=1.07, wpb=433056, bsz=16790.9, num_updates=52400, lr=0.000276289, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=51269 +epoch 032: 121 / 1689 loss=4.051, nll_loss=2.419, ppl=5.35, wps=463121, ups=1.07, wpb=433056, bsz=16790.9, num_updates=52400, lr=0.000276289, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=51269 +epoch 032: 121 / 1689 loss=4.051, nll_loss=2.419, ppl=5.35, wps=463121, ups=1.07, wpb=433056, bsz=16790.9, num_updates=52400, lr=0.000276289, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=51269 +epoch 032: 121 / 1689 loss=4.051, nll_loss=2.419, ppl=5.35, wps=463121, ups=1.07, wpb=433056, bsz=16790.9, num_updates=52400, lr=0.000276289, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=51269 +epoch 032: 121 / 1689 loss=4.051, nll_loss=2.419, ppl=5.35, wps=463121, ups=1.07, wpb=433056, bsz=16790.9, num_updates=52400, lr=0.000276289, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=51269 +epoch 032: 121 / 1689 loss=4.051, nll_loss=2.419, ppl=5.35, wps=463121, ups=1.07, wpb=433056, bsz=16790.9, num_updates=52400, lr=0.000276289, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=51269 +epoch 032: 121 / 1689 loss=4.051, nll_loss=2.419, ppl=5.35, wps=463121, ups=1.07, wpb=433056, bsz=16790.9, num_updates=52400, lr=0.000276289, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=51269 +epoch 032: 121 / 1689 loss=4.051, nll_loss=2.419, ppl=5.35, wps=463121, ups=1.07, wpb=433056, bsz=16790.9, num_updates=52400, lr=0.000276289, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=51269 +epoch 032: 121 / 1689 loss=4.051, nll_loss=2.419, ppl=5.35, wps=463121, ups=1.07, wpb=433056, bsz=16790.9, num_updates=52400, lr=0.000276289, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=51269 +epoch 032: 121 / 1689 loss=4.051, nll_loss=2.419, ppl=5.35, wps=463121, ups=1.07, wpb=433056, bsz=16790.9, num_updates=52400, lr=0.000276289, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=51269 +epoch 032: 121 / 1689 loss=4.051, nll_loss=2.419, ppl=5.35, wps=463121, ups=1.07, wpb=433056, bsz=16790.9, num_updates=52400, lr=0.000276289, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=51269 +epoch 032: 121 / 1689 loss=4.051, nll_loss=2.419, ppl=5.35, wps=463121, ups=1.07, wpb=433056, bsz=16790.9, num_updates=52400, lr=0.000276289, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=51269 +epoch 032: 121 / 1689 loss=4.051, nll_loss=2.419, ppl=5.35, wps=463121, ups=1.07, wpb=433056, bsz=16790.9, num_updates=52400, lr=0.000276289, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=51269 +epoch 032: 121 / 1689 loss=4.051, nll_loss=2.419, ppl=5.35, wps=463121, ups=1.07, wpb=433056, bsz=16790.9, num_updates=52400, lr=0.000276289, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=51269 +epoch 032: 121 / 1689 loss=4.051, nll_loss=2.419, ppl=5.35, wps=463121, ups=1.07, wpb=433056, bsz=16790.9, num_updates=52400, lr=0.000276289, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=51269 +epoch 032: 121 / 1689 loss=4.051, nll_loss=2.419, ppl=5.35, wps=463121, ups=1.07, wpb=433056, bsz=16790.9, num_updates=52400, lr=0.000276289, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=51269 +epoch 032: 121 / 1689 loss=4.051, nll_loss=2.419, ppl=5.35, wps=463121, ups=1.07, wpb=433056, bsz=16790.9, num_updates=52400, lr=0.000276289, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=51269 +epoch 032: 121 / 1689 loss=4.051, nll_loss=2.419, ppl=5.35, wps=463121, ups=1.07, wpb=433056, bsz=16790.9, num_updates=52400, lr=0.000276289, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=51269 +epoch 032: 221 / 1689 loss=4.043, nll_loss=2.41, ppl=5.32, wps=463794, ups=1.07, wpb=433147, bsz=16419.7, num_updates=52500, lr=0.000276026, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=51362 +epoch 032: 221 / 1689 loss=4.043, nll_loss=2.41, ppl=5.32, wps=463794, ups=1.07, wpb=433147, bsz=16419.7, num_updates=52500, lr=0.000276026, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=51362 +epoch 032: 221 / 1689 loss=4.043, nll_loss=2.41, ppl=5.32, wps=463794, ups=1.07, wpb=433147, bsz=16419.7, num_updates=52500, lr=0.000276026, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=51362 +epoch 032: 221 / 1689 loss=4.043, nll_loss=2.41, ppl=5.32, wps=463794, ups=1.07, wpb=433147, bsz=16419.7, num_updates=52500, lr=0.000276026, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=51362 +epoch 032: 221 / 1689 loss=4.043, nll_loss=2.41, ppl=5.32, wps=463794, ups=1.07, wpb=433147, bsz=16419.7, num_updates=52500, lr=0.000276026, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=51362 +epoch 032: 221 / 1689 loss=4.043, nll_loss=2.41, ppl=5.32, wps=463794, ups=1.07, wpb=433147, bsz=16419.7, num_updates=52500, lr=0.000276026, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=51362 +epoch 032: 221 / 1689 loss=4.043, nll_loss=2.41, ppl=5.32, wps=463794, ups=1.07, wpb=433147, bsz=16419.7, num_updates=52500, lr=0.000276026, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=51362 +epoch 032: 221 / 1689 loss=4.043, nll_loss=2.41, ppl=5.32, wps=463794, ups=1.07, wpb=433147, bsz=16419.7, num_updates=52500, lr=0.000276026, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=51362 +epoch 032: 221 / 1689 loss=4.043, nll_loss=2.41, ppl=5.32, wps=463794, ups=1.07, wpb=433147, bsz=16419.7, num_updates=52500, lr=0.000276026, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=51362 +epoch 032: 221 / 1689 loss=4.043, nll_loss=2.41, ppl=5.32, wps=463794, ups=1.07, wpb=433147, bsz=16419.7, num_updates=52500, lr=0.000276026, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=51362 +epoch 032: 221 / 1689 loss=4.043, nll_loss=2.41, ppl=5.32, wps=463794, ups=1.07, wpb=433147, bsz=16419.7, num_updates=52500, lr=0.000276026, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=51362 +epoch 032: 221 / 1689 loss=4.043, nll_loss=2.41, ppl=5.32, wps=463794, ups=1.07, wpb=433147, bsz=16419.7, num_updates=52500, lr=0.000276026, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=51362 +epoch 032: 221 / 1689 loss=4.043, nll_loss=2.41, ppl=5.32, wps=463794, ups=1.07, wpb=433147, bsz=16419.7, num_updates=52500, lr=0.000276026, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=51362 +epoch 032: 221 / 1689 loss=4.043, nll_loss=2.41, ppl=5.32, wps=463794, ups=1.07, wpb=433147, bsz=16419.7, num_updates=52500, lr=0.000276026, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=51362 +epoch 032: 221 / 1689 loss=4.043, nll_loss=2.41, ppl=5.32, wps=463794, ups=1.07, wpb=433147, bsz=16419.7, num_updates=52500, lr=0.000276026, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=51362 +epoch 032: 221 / 1689 loss=4.043, nll_loss=2.41, ppl=5.32, wps=463794, ups=1.07, wpb=433147, bsz=16419.7, num_updates=52500, lr=0.000276026, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=51362 +epoch 032: 221 / 1689 loss=4.043, nll_loss=2.41, ppl=5.32, wps=463794, ups=1.07, wpb=433147, bsz=16419.7, num_updates=52500, lr=0.000276026, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=51362 +epoch 032: 221 / 1689 loss=4.043, nll_loss=2.41, ppl=5.32, wps=463794, ups=1.07, wpb=433147, bsz=16419.7, num_updates=52500, lr=0.000276026, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=51362 +epoch 032: 221 / 1689 loss=4.043, nll_loss=2.41, ppl=5.32, wps=463794, ups=1.07, wpb=433147, bsz=16419.7, num_updates=52500, lr=0.000276026, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=51362 +epoch 032: 221 / 1689 loss=4.043, nll_loss=2.41, ppl=5.32, wps=463794, ups=1.07, wpb=433147, bsz=16419.7, num_updates=52500, lr=0.000276026, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=51362 +epoch 032: 221 / 1689 loss=4.043, nll_loss=2.41, ppl=5.32, wps=463794, ups=1.07, wpb=433147, bsz=16419.7, num_updates=52500, lr=0.000276026, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=51362 +epoch 032: 221 / 1689 loss=4.043, nll_loss=2.41, ppl=5.32, wps=463794, ups=1.07, wpb=433147, bsz=16419.7, num_updates=52500, lr=0.000276026, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=51362 +epoch 032: 221 / 1689 loss=4.043, nll_loss=2.41, ppl=5.32, wps=463794, ups=1.07, wpb=433147, bsz=16419.7, num_updates=52500, lr=0.000276026, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=51362 +epoch 032: 221 / 1689 loss=4.043, nll_loss=2.41, ppl=5.32, wps=463794, ups=1.07, wpb=433147, bsz=16419.7, num_updates=52500, lr=0.000276026, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=51362 +epoch 032: 221 / 1689 loss=4.043, nll_loss=2.41, ppl=5.32, wps=463794, ups=1.07, wpb=433147, bsz=16419.7, num_updates=52500, lr=0.000276026, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=51362 +epoch 032: 221 / 1689 loss=4.043, nll_loss=2.41, ppl=5.32, wps=463794, ups=1.07, wpb=433147, bsz=16419.7, num_updates=52500, lr=0.000276026, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=51362 +epoch 032: 221 / 1689 loss=4.043, nll_loss=2.41, ppl=5.32, wps=463794, ups=1.07, wpb=433147, bsz=16419.7, num_updates=52500, lr=0.000276026, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=51362 +epoch 032: 221 / 1689 loss=4.043, nll_loss=2.41, ppl=5.32, wps=463794, ups=1.07, wpb=433147, bsz=16419.7, num_updates=52500, lr=0.000276026, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=51362 +epoch 032: 221 / 1689 loss=4.043, nll_loss=2.41, ppl=5.32, wps=463794, ups=1.07, wpb=433147, bsz=16419.7, num_updates=52500, lr=0.000276026, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=51362 +epoch 032: 221 / 1689 loss=4.043, nll_loss=2.41, ppl=5.32, wps=463794, ups=1.07, wpb=433147, bsz=16419.7, num_updates=52500, lr=0.000276026, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=51362 +epoch 032: 221 / 1689 loss=4.043, nll_loss=2.41, ppl=5.32, wps=463794, ups=1.07, wpb=433147, bsz=16419.7, num_updates=52500, lr=0.000276026, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=51362 +epoch 032: 221 / 1689 loss=4.043, nll_loss=2.41, ppl=5.32, wps=463794, ups=1.07, wpb=433147, bsz=16419.7, num_updates=52500, lr=0.000276026, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=51362 +epoch 032: 321 / 1689 loss=4.059, nll_loss=2.429, ppl=5.38, wps=466130, ups=1.08, wpb=432538, bsz=16593.8, num_updates=52600, lr=0.000275764, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=51455 +epoch 032: 321 / 1689 loss=4.059, nll_loss=2.429, ppl=5.38, wps=466130, ups=1.08, wpb=432538, bsz=16593.8, num_updates=52600, lr=0.000275764, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=51455 +epoch 032: 321 / 1689 loss=4.059, nll_loss=2.429, ppl=5.38, wps=466130, ups=1.08, wpb=432538, bsz=16593.8, num_updates=52600, lr=0.000275764, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=51455 +epoch 032: 321 / 1689 loss=4.059, nll_loss=2.429, ppl=5.38, wps=466130, ups=1.08, wpb=432538, bsz=16593.8, num_updates=52600, lr=0.000275764, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=51455 +epoch 032: 321 / 1689 loss=4.059, nll_loss=2.429, ppl=5.38, wps=466130, ups=1.08, wpb=432538, bsz=16593.8, num_updates=52600, lr=0.000275764, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=51455 +epoch 032: 321 / 1689 loss=4.059, nll_loss=2.429, ppl=5.38, wps=466130, ups=1.08, wpb=432538, bsz=16593.8, num_updates=52600, lr=0.000275764, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=51455 +epoch 032: 321 / 1689 loss=4.059, nll_loss=2.429, ppl=5.38, wps=466130, ups=1.08, wpb=432538, bsz=16593.8, num_updates=52600, lr=0.000275764, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=51455 +epoch 032: 321 / 1689 loss=4.059, nll_loss=2.429, ppl=5.38, wps=466130, ups=1.08, wpb=432538, bsz=16593.8, num_updates=52600, lr=0.000275764, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=51455 +epoch 032: 321 / 1689 loss=4.059, nll_loss=2.429, ppl=5.38, wps=466130, ups=1.08, wpb=432538, bsz=16593.8, num_updates=52600, lr=0.000275764, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=51455 +epoch 032: 321 / 1689 loss=4.059, nll_loss=2.429, ppl=5.38, wps=466130, ups=1.08, wpb=432538, bsz=16593.8, num_updates=52600, lr=0.000275764, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=51455 +epoch 032: 321 / 1689 loss=4.059, nll_loss=2.429, ppl=5.38, wps=466130, ups=1.08, wpb=432538, bsz=16593.8, num_updates=52600, lr=0.000275764, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=51455 +epoch 032: 321 / 1689 loss=4.059, nll_loss=2.429, ppl=5.38, wps=466130, ups=1.08, wpb=432538, bsz=16593.8, num_updates=52600, lr=0.000275764, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=51455 +epoch 032: 321 / 1689 loss=4.059, nll_loss=2.429, ppl=5.38, wps=466130, ups=1.08, wpb=432538, bsz=16593.8, num_updates=52600, lr=0.000275764, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=51455 +epoch 032: 321 / 1689 loss=4.059, nll_loss=2.429, ppl=5.38, wps=466130, ups=1.08, wpb=432538, bsz=16593.8, num_updates=52600, lr=0.000275764, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=51455 +epoch 032: 321 / 1689 loss=4.059, nll_loss=2.429, ppl=5.38, wps=466130, ups=1.08, wpb=432538, bsz=16593.8, num_updates=52600, lr=0.000275764, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=51455 +epoch 032: 321 / 1689 loss=4.059, nll_loss=2.429, ppl=5.38, wps=466130, ups=1.08, wpb=432538, bsz=16593.8, num_updates=52600, lr=0.000275764, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=51455 +epoch 032: 321 / 1689 loss=4.059, nll_loss=2.429, ppl=5.38, wps=466130, ups=1.08, wpb=432538, bsz=16593.8, num_updates=52600, lr=0.000275764, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=51455 +epoch 032: 321 / 1689 loss=4.059, nll_loss=2.429, ppl=5.38, wps=466130, ups=1.08, wpb=432538, bsz=16593.8, num_updates=52600, lr=0.000275764, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=51455 +epoch 032: 321 / 1689 loss=4.059, nll_loss=2.429, ppl=5.38, wps=466130, ups=1.08, wpb=432538, bsz=16593.8, num_updates=52600, lr=0.000275764, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=51455 +epoch 032: 321 / 1689 loss=4.059, nll_loss=2.429, ppl=5.38, wps=466130, ups=1.08, wpb=432538, bsz=16593.8, num_updates=52600, lr=0.000275764, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=51455 +epoch 032: 321 / 1689 loss=4.059, nll_loss=2.429, ppl=5.38, wps=466130, ups=1.08, wpb=432538, bsz=16593.8, num_updates=52600, lr=0.000275764, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=51455 +epoch 032: 321 / 1689 loss=4.059, nll_loss=2.429, ppl=5.38, wps=466130, ups=1.08, wpb=432538, bsz=16593.8, num_updates=52600, lr=0.000275764, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=51455 +epoch 032: 321 / 1689 loss=4.059, nll_loss=2.429, ppl=5.38, wps=466130, ups=1.08, wpb=432538, bsz=16593.8, num_updates=52600, lr=0.000275764, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=51455 +epoch 032: 321 / 1689 loss=4.059, nll_loss=2.429, ppl=5.38, wps=466130, ups=1.08, wpb=432538, bsz=16593.8, num_updates=52600, lr=0.000275764, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=51455 +epoch 032: 321 / 1689 loss=4.059, nll_loss=2.429, ppl=5.38, wps=466130, ups=1.08, wpb=432538, bsz=16593.8, num_updates=52600, lr=0.000275764, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=51455 +epoch 032: 321 / 1689 loss=4.059, nll_loss=2.429, ppl=5.38, wps=466130, ups=1.08, wpb=432538, bsz=16593.8, num_updates=52600, lr=0.000275764, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=51455 +epoch 032: 321 / 1689 loss=4.059, nll_loss=2.429, ppl=5.38, wps=466130, ups=1.08, wpb=432538, bsz=16593.8, num_updates=52600, lr=0.000275764, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=51455 +epoch 032: 321 / 1689 loss=4.059, nll_loss=2.429, ppl=5.38, wps=466130, ups=1.08, wpb=432538, bsz=16593.8, num_updates=52600, lr=0.000275764, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=51455 +epoch 032: 321 / 1689 loss=4.059, nll_loss=2.429, ppl=5.38, wps=466130, ups=1.08, wpb=432538, bsz=16593.8, num_updates=52600, lr=0.000275764, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=51455 +epoch 032: 321 / 1689 loss=4.059, nll_loss=2.429, ppl=5.38, wps=466130, ups=1.08, wpb=432538, bsz=16593.8, num_updates=52600, lr=0.000275764, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=51455 +epoch 032: 321 / 1689 loss=4.059, nll_loss=2.429, ppl=5.38, wps=466130, ups=1.08, wpb=432538, bsz=16593.8, num_updates=52600, lr=0.000275764, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=51455 +epoch 032: 321 / 1689 loss=4.059, nll_loss=2.429, ppl=5.38, wps=466130, ups=1.08, wpb=432538, bsz=16593.8, num_updates=52600, lr=0.000275764, gnorm=0.209, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=51455 +epoch 032: 421 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=466327, ups=1.07, wpb=435136, bsz=16619.6, num_updates=52700, lr=0.000275502, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=51548 +epoch 032: 421 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=466327, ups=1.07, wpb=435136, bsz=16619.6, num_updates=52700, lr=0.000275502, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=51548 +epoch 032: 421 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=466327, ups=1.07, wpb=435136, bsz=16619.6, num_updates=52700, lr=0.000275502, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=51548 +epoch 032: 421 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=466327, ups=1.07, wpb=435136, bsz=16619.6, num_updates=52700, lr=0.000275502, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=51548 +epoch 032: 421 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=466327, ups=1.07, wpb=435136, bsz=16619.6, num_updates=52700, lr=0.000275502, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=51548 +epoch 032: 421 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=466327, ups=1.07, wpb=435136, bsz=16619.6, num_updates=52700, lr=0.000275502, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=51548 +epoch 032: 421 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=466327, ups=1.07, wpb=435136, bsz=16619.6, num_updates=52700, lr=0.000275502, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=51548 +epoch 032: 421 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=466327, ups=1.07, wpb=435136, bsz=16619.6, num_updates=52700, lr=0.000275502, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=51548 +epoch 032: 421 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=466327, ups=1.07, wpb=435136, bsz=16619.6, num_updates=52700, lr=0.000275502, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=51548 +epoch 032: 421 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=466327, ups=1.07, wpb=435136, bsz=16619.6, num_updates=52700, lr=0.000275502, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=51548 +epoch 032: 421 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=466327, ups=1.07, wpb=435136, bsz=16619.6, num_updates=52700, lr=0.000275502, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=51548 +epoch 032: 421 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=466327, ups=1.07, wpb=435136, bsz=16619.6, num_updates=52700, lr=0.000275502, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=51548 +epoch 032: 421 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=466327, ups=1.07, wpb=435136, bsz=16619.6, num_updates=52700, lr=0.000275502, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=51548 +epoch 032: 421 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=466327, ups=1.07, wpb=435136, bsz=16619.6, num_updates=52700, lr=0.000275502, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=51548 +epoch 032: 421 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=466327, ups=1.07, wpb=435136, bsz=16619.6, num_updates=52700, lr=0.000275502, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=51548 +epoch 032: 421 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=466327, ups=1.07, wpb=435136, bsz=16619.6, num_updates=52700, lr=0.000275502, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=51548 +epoch 032: 421 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=466327, ups=1.07, wpb=435136, bsz=16619.6, num_updates=52700, lr=0.000275502, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=51548 +epoch 032: 421 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=466327, ups=1.07, wpb=435136, bsz=16619.6, num_updates=52700, lr=0.000275502, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=51548 +epoch 032: 421 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=466327, ups=1.07, wpb=435136, bsz=16619.6, num_updates=52700, lr=0.000275502, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=51548 +epoch 032: 421 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=466327, ups=1.07, wpb=435136, bsz=16619.6, num_updates=52700, lr=0.000275502, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=51548 +epoch 032: 421 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=466327, ups=1.07, wpb=435136, bsz=16619.6, num_updates=52700, lr=0.000275502, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=51548 +epoch 032: 421 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=466327, ups=1.07, wpb=435136, bsz=16619.6, num_updates=52700, lr=0.000275502, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=51548 +epoch 032: 421 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=466327, ups=1.07, wpb=435136, bsz=16619.6, num_updates=52700, lr=0.000275502, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=51548 +epoch 032: 421 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=466327, ups=1.07, wpb=435136, bsz=16619.6, num_updates=52700, lr=0.000275502, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=51548 +epoch 032: 421 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=466327, ups=1.07, wpb=435136, bsz=16619.6, num_updates=52700, lr=0.000275502, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=51548 +epoch 032: 421 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=466327, ups=1.07, wpb=435136, bsz=16619.6, num_updates=52700, lr=0.000275502, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=51548 +epoch 032: 421 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=466327, ups=1.07, wpb=435136, bsz=16619.6, num_updates=52700, lr=0.000275502, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=51548 +epoch 032: 421 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=466327, ups=1.07, wpb=435136, bsz=16619.6, num_updates=52700, lr=0.000275502, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=51548 +epoch 032: 421 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=466327, ups=1.07, wpb=435136, bsz=16619.6, num_updates=52700, lr=0.000275502, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=51548 +epoch 032: 421 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=466327, ups=1.07, wpb=435136, bsz=16619.6, num_updates=52700, lr=0.000275502, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=51548 +epoch 032: 421 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=466327, ups=1.07, wpb=435136, bsz=16619.6, num_updates=52700, lr=0.000275502, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=51548 +epoch 032: 421 / 1689 loss=4.067, nll_loss=2.437, ppl=5.42, wps=466327, ups=1.07, wpb=435136, bsz=16619.6, num_updates=52700, lr=0.000275502, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=51548 +epoch 032: 522 / 1689 loss=4.054, nll_loss=2.423, ppl=5.36, wps=455919, ups=1.05, wpb=433925, bsz=16704.2, num_updates=52800, lr=0.000275241, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=51643 +epoch 032: 522 / 1689 loss=4.054, nll_loss=2.423, ppl=5.36, wps=455919, ups=1.05, wpb=433925, bsz=16704.2, num_updates=52800, lr=0.000275241, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=51643 +epoch 032: 522 / 1689 loss=4.054, nll_loss=2.423, ppl=5.36, wps=455919, ups=1.05, wpb=433925, bsz=16704.2, num_updates=52800, lr=0.000275241, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=51643 +epoch 032: 522 / 1689 loss=4.054, nll_loss=2.423, ppl=5.36, wps=455919, ups=1.05, wpb=433925, bsz=16704.2, num_updates=52800, lr=0.000275241, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=51643 +epoch 032: 522 / 1689 loss=4.054, nll_loss=2.423, ppl=5.36, wps=455919, ups=1.05, wpb=433925, bsz=16704.2, num_updates=52800, lr=0.000275241, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=51643 +epoch 032: 522 / 1689 loss=4.054, nll_loss=2.423, ppl=5.36, wps=455919, ups=1.05, wpb=433925, bsz=16704.2, num_updates=52800, lr=0.000275241, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=51643 +epoch 032: 522 / 1689 loss=4.054, nll_loss=2.423, ppl=5.36, wps=455919, ups=1.05, wpb=433925, bsz=16704.2, num_updates=52800, lr=0.000275241, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=51643 +epoch 032: 522 / 1689 loss=4.054, nll_loss=2.423, ppl=5.36, wps=455919, ups=1.05, wpb=433925, bsz=16704.2, num_updates=52800, lr=0.000275241, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=51643 +epoch 032: 522 / 1689 loss=4.054, nll_loss=2.423, ppl=5.36, wps=455919, ups=1.05, wpb=433925, bsz=16704.2, num_updates=52800, lr=0.000275241, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=51643 +epoch 032: 522 / 1689 loss=4.054, nll_loss=2.423, ppl=5.36, wps=455919, ups=1.05, wpb=433925, bsz=16704.2, num_updates=52800, lr=0.000275241, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=51643 +epoch 032: 522 / 1689 loss=4.054, nll_loss=2.423, ppl=5.36, wps=455919, ups=1.05, wpb=433925, bsz=16704.2, num_updates=52800, lr=0.000275241, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=51643 +epoch 032: 522 / 1689 loss=4.054, nll_loss=2.423, ppl=5.36, wps=455919, ups=1.05, wpb=433925, bsz=16704.2, num_updates=52800, lr=0.000275241, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=51643 +epoch 032: 522 / 1689 loss=4.054, nll_loss=2.423, ppl=5.36, wps=455919, ups=1.05, wpb=433925, bsz=16704.2, num_updates=52800, lr=0.000275241, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=51643 +epoch 032: 522 / 1689 loss=4.054, nll_loss=2.423, ppl=5.36, wps=455919, ups=1.05, wpb=433925, bsz=16704.2, num_updates=52800, lr=0.000275241, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=51643 +epoch 032: 522 / 1689 loss=4.054, nll_loss=2.423, ppl=5.36, wps=455919, ups=1.05, wpb=433925, bsz=16704.2, num_updates=52800, lr=0.000275241, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=51643 +epoch 032: 522 / 1689 loss=4.054, nll_loss=2.423, ppl=5.36, wps=455919, ups=1.05, wpb=433925, bsz=16704.2, num_updates=52800, lr=0.000275241, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=51643 +epoch 032: 522 / 1689 loss=4.054, nll_loss=2.423, ppl=5.36, wps=455919, ups=1.05, wpb=433925, bsz=16704.2, num_updates=52800, lr=0.000275241, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=51643 +epoch 032: 522 / 1689 loss=4.054, nll_loss=2.423, ppl=5.36, wps=455919, ups=1.05, wpb=433925, bsz=16704.2, num_updates=52800, lr=0.000275241, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=51643 +epoch 032: 522 / 1689 loss=4.054, nll_loss=2.423, ppl=5.36, wps=455919, ups=1.05, wpb=433925, bsz=16704.2, num_updates=52800, lr=0.000275241, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=51643 +epoch 032: 522 / 1689 loss=4.054, nll_loss=2.423, ppl=5.36, wps=455919, ups=1.05, wpb=433925, bsz=16704.2, num_updates=52800, lr=0.000275241, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=51643 +epoch 032: 522 / 1689 loss=4.054, nll_loss=2.423, ppl=5.36, wps=455919, ups=1.05, wpb=433925, bsz=16704.2, num_updates=52800, lr=0.000275241, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=51643 +epoch 032: 522 / 1689 loss=4.054, nll_loss=2.423, ppl=5.36, wps=455919, ups=1.05, wpb=433925, bsz=16704.2, num_updates=52800, lr=0.000275241, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=51643 +epoch 032: 522 / 1689 loss=4.054, nll_loss=2.423, ppl=5.36, wps=455919, ups=1.05, wpb=433925, bsz=16704.2, num_updates=52800, lr=0.000275241, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=51643 +epoch 032: 522 / 1689 loss=4.054, nll_loss=2.423, ppl=5.36, wps=455919, ups=1.05, wpb=433925, bsz=16704.2, num_updates=52800, lr=0.000275241, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=51643 +epoch 032: 522 / 1689 loss=4.054, nll_loss=2.423, ppl=5.36, wps=455919, ups=1.05, wpb=433925, bsz=16704.2, num_updates=52800, lr=0.000275241, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=51643 +epoch 032: 522 / 1689 loss=4.054, nll_loss=2.423, ppl=5.36, wps=455919, ups=1.05, wpb=433925, bsz=16704.2, num_updates=52800, lr=0.000275241, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=51643 +epoch 032: 522 / 1689 loss=4.054, nll_loss=2.423, ppl=5.36, wps=455919, ups=1.05, wpb=433925, bsz=16704.2, num_updates=52800, lr=0.000275241, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=51643 +epoch 032: 522 / 1689 loss=4.054, nll_loss=2.423, ppl=5.36, wps=455919, ups=1.05, wpb=433925, bsz=16704.2, num_updates=52800, lr=0.000275241, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=51643 +epoch 032: 522 / 1689 loss=4.054, nll_loss=2.423, ppl=5.36, wps=455919, ups=1.05, wpb=433925, bsz=16704.2, num_updates=52800, lr=0.000275241, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=51643 +epoch 032: 522 / 1689 loss=4.054, nll_loss=2.423, ppl=5.36, wps=455919, ups=1.05, wpb=433925, bsz=16704.2, num_updates=52800, lr=0.000275241, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=51643 +epoch 032: 522 / 1689 loss=4.054, nll_loss=2.423, ppl=5.36, wps=455919, ups=1.05, wpb=433925, bsz=16704.2, num_updates=52800, lr=0.000275241, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=51643 +epoch 032: 522 / 1689 loss=4.054, nll_loss=2.423, ppl=5.36, wps=455919, ups=1.05, wpb=433925, bsz=16704.2, num_updates=52800, lr=0.000275241, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=51643 +epoch 032: 622 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=460499, ups=1.07, wpb=432190, bsz=16438.8, num_updates=52900, lr=0.000274981, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=51737 +epoch 032: 622 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=460499, ups=1.07, wpb=432190, bsz=16438.8, num_updates=52900, lr=0.000274981, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=51737 +epoch 032: 622 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=460499, ups=1.07, wpb=432190, bsz=16438.8, num_updates=52900, lr=0.000274981, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=51737 +epoch 032: 622 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=460499, ups=1.07, wpb=432190, bsz=16438.8, num_updates=52900, lr=0.000274981, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=51737 +epoch 032: 622 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=460499, ups=1.07, wpb=432190, bsz=16438.8, num_updates=52900, lr=0.000274981, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=51737 +epoch 032: 622 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=460499, ups=1.07, wpb=432190, bsz=16438.8, num_updates=52900, lr=0.000274981, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=51737 +epoch 032: 622 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=460499, ups=1.07, wpb=432190, bsz=16438.8, num_updates=52900, lr=0.000274981, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=51737 +epoch 032: 622 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=460499, ups=1.07, wpb=432190, bsz=16438.8, num_updates=52900, lr=0.000274981, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=51737 +epoch 032: 622 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=460499, ups=1.07, wpb=432190, bsz=16438.8, num_updates=52900, lr=0.000274981, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=51737 +epoch 032: 622 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=460499, ups=1.07, wpb=432190, bsz=16438.8, num_updates=52900, lr=0.000274981, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=51737 +epoch 032: 622 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=460499, ups=1.07, wpb=432190, bsz=16438.8, num_updates=52900, lr=0.000274981, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=51737 +epoch 032: 622 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=460499, ups=1.07, wpb=432190, bsz=16438.8, num_updates=52900, lr=0.000274981, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=51737 +epoch 032: 622 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=460499, ups=1.07, wpb=432190, bsz=16438.8, num_updates=52900, lr=0.000274981, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=51737 +epoch 032: 622 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=460499, ups=1.07, wpb=432190, bsz=16438.8, num_updates=52900, lr=0.000274981, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=51737 +epoch 032: 622 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=460499, ups=1.07, wpb=432190, bsz=16438.8, num_updates=52900, lr=0.000274981, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=51737 +epoch 032: 622 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=460499, ups=1.07, wpb=432190, bsz=16438.8, num_updates=52900, lr=0.000274981, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=51737 +epoch 032: 622 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=460499, ups=1.07, wpb=432190, bsz=16438.8, num_updates=52900, lr=0.000274981, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=51737 +epoch 032: 622 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=460499, ups=1.07, wpb=432190, bsz=16438.8, num_updates=52900, lr=0.000274981, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=51737 +epoch 032: 622 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=460499, ups=1.07, wpb=432190, bsz=16438.8, num_updates=52900, lr=0.000274981, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=51737 +epoch 032: 622 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=460499, ups=1.07, wpb=432190, bsz=16438.8, num_updates=52900, lr=0.000274981, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=51737 +epoch 032: 622 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=460499, ups=1.07, wpb=432190, bsz=16438.8, num_updates=52900, lr=0.000274981, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=51737 +epoch 032: 622 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=460499, ups=1.07, wpb=432190, bsz=16438.8, num_updates=52900, lr=0.000274981, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=51737 +epoch 032: 622 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=460499, ups=1.07, wpb=432190, bsz=16438.8, num_updates=52900, lr=0.000274981, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=51737 +epoch 032: 622 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=460499, ups=1.07, wpb=432190, bsz=16438.8, num_updates=52900, lr=0.000274981, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=51737 +epoch 032: 622 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=460499, ups=1.07, wpb=432190, bsz=16438.8, num_updates=52900, lr=0.000274981, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=51737 +epoch 032: 622 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=460499, ups=1.07, wpb=432190, bsz=16438.8, num_updates=52900, lr=0.000274981, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=51737 +epoch 032: 622 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=460499, ups=1.07, wpb=432190, bsz=16438.8, num_updates=52900, lr=0.000274981, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=51737 +epoch 032: 622 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=460499, ups=1.07, wpb=432190, bsz=16438.8, num_updates=52900, lr=0.000274981, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=51737 +epoch 032: 622 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=460499, ups=1.07, wpb=432190, bsz=16438.8, num_updates=52900, lr=0.000274981, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=51737 +epoch 032: 622 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=460499, ups=1.07, wpb=432190, bsz=16438.8, num_updates=52900, lr=0.000274981, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=51737 +epoch 032: 622 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=460499, ups=1.07, wpb=432190, bsz=16438.8, num_updates=52900, lr=0.000274981, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=51737 +epoch 032: 622 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=460499, ups=1.07, wpb=432190, bsz=16438.8, num_updates=52900, lr=0.000274981, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.4, wall=51737 +epoch 032: 722 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=459933, ups=1.06, wpb=433590, bsz=16613.8, num_updates=53000, lr=0.000274721, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51831 +epoch 032: 722 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=459933, ups=1.06, wpb=433590, bsz=16613.8, num_updates=53000, lr=0.000274721, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51831 +epoch 032: 722 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=459933, ups=1.06, wpb=433590, bsz=16613.8, num_updates=53000, lr=0.000274721, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51831 +epoch 032: 722 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=459933, ups=1.06, wpb=433590, bsz=16613.8, num_updates=53000, lr=0.000274721, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51831 +epoch 032: 722 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=459933, ups=1.06, wpb=433590, bsz=16613.8, num_updates=53000, lr=0.000274721, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51831 +epoch 032: 722 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=459933, ups=1.06, wpb=433590, bsz=16613.8, num_updates=53000, lr=0.000274721, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51831 +epoch 032: 722 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=459933, ups=1.06, wpb=433590, bsz=16613.8, num_updates=53000, lr=0.000274721, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51831 +epoch 032: 722 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=459933, ups=1.06, wpb=433590, bsz=16613.8, num_updates=53000, lr=0.000274721, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51831 +epoch 032: 722 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=459933, ups=1.06, wpb=433590, bsz=16613.8, num_updates=53000, lr=0.000274721, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51831 +epoch 032: 722 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=459933, ups=1.06, wpb=433590, bsz=16613.8, num_updates=53000, lr=0.000274721, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51831 +epoch 032: 722 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=459933, ups=1.06, wpb=433590, bsz=16613.8, num_updates=53000, lr=0.000274721, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51831 +epoch 032: 722 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=459933, ups=1.06, wpb=433590, bsz=16613.8, num_updates=53000, lr=0.000274721, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51831 +epoch 032: 722 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=459933, ups=1.06, wpb=433590, bsz=16613.8, num_updates=53000, lr=0.000274721, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51831 +epoch 032: 722 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=459933, ups=1.06, wpb=433590, bsz=16613.8, num_updates=53000, lr=0.000274721, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51831 +epoch 032: 722 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=459933, ups=1.06, wpb=433590, bsz=16613.8, num_updates=53000, lr=0.000274721, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51831 +epoch 032: 722 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=459933, ups=1.06, wpb=433590, bsz=16613.8, num_updates=53000, lr=0.000274721, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51831 +epoch 032: 722 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=459933, ups=1.06, wpb=433590, bsz=16613.8, num_updates=53000, lr=0.000274721, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51831 +epoch 032: 722 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=459933, ups=1.06, wpb=433590, bsz=16613.8, num_updates=53000, lr=0.000274721, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51831 +epoch 032: 722 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=459933, ups=1.06, wpb=433590, bsz=16613.8, num_updates=53000, lr=0.000274721, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51831 +epoch 032: 722 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=459933, ups=1.06, wpb=433590, bsz=16613.8, num_updates=53000, lr=0.000274721, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51831 +epoch 032: 722 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=459933, ups=1.06, wpb=433590, bsz=16613.8, num_updates=53000, lr=0.000274721, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51831 +epoch 032: 722 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=459933, ups=1.06, wpb=433590, bsz=16613.8, num_updates=53000, lr=0.000274721, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51831 +epoch 032: 722 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=459933, ups=1.06, wpb=433590, bsz=16613.8, num_updates=53000, lr=0.000274721, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51831 +epoch 032: 722 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=459933, ups=1.06, wpb=433590, bsz=16613.8, num_updates=53000, lr=0.000274721, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51831 +epoch 032: 722 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=459933, ups=1.06, wpb=433590, bsz=16613.8, num_updates=53000, lr=0.000274721, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51831 +epoch 032: 722 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=459933, ups=1.06, wpb=433590, bsz=16613.8, num_updates=53000, lr=0.000274721, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51831 +epoch 032: 722 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=459933, ups=1.06, wpb=433590, bsz=16613.8, num_updates=53000, lr=0.000274721, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51831 +epoch 032: 722 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=459933, ups=1.06, wpb=433590, bsz=16613.8, num_updates=53000, lr=0.000274721, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51831 +epoch 032: 722 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=459933, ups=1.06, wpb=433590, bsz=16613.8, num_updates=53000, lr=0.000274721, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51831 +epoch 032: 722 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=459933, ups=1.06, wpb=433590, bsz=16613.8, num_updates=53000, lr=0.000274721, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51831 +epoch 032: 722 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=459933, ups=1.06, wpb=433590, bsz=16613.8, num_updates=53000, lr=0.000274721, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51831 +epoch 032: 722 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=459933, ups=1.06, wpb=433590, bsz=16613.8, num_updates=53000, lr=0.000274721, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51831 +begin validation on "valid" subset +epoch 032 | valid on 'valid' subset | loss 4.213 | nll_loss 2.579 | ppl 5.98 | wps 0 | wpb 42662 | bsz 2032 | num_updates 53000 | best_loss 4.213 +epoch 032 | valid on 'valid' subset | loss 4.213 | nll_loss 2.579 | ppl 5.98 | wps 0 | wpb 42662 | bsz 2032 | num_updates 53000 | best_loss 4.213 +epoch 032 | valid on 'valid' subset | loss 4.213 | nll_loss 2.579 | ppl 5.98 | wps 0 | wpb 42662 | bsz 2032 | num_updates 53000 | best_loss 4.213 +epoch 032 | valid on 'valid' subset | loss 4.213 | nll_loss 2.579 | ppl 5.98 | wps 0 | wpb 42662 | bsz 2032 | num_updates 53000 | best_loss 4.213 +epoch 032 | valid on 'valid' subset | loss 4.213 | nll_loss 2.579 | ppl 5.98 | wps 0 | wpb 42662 | bsz 2032 | num_updates 53000 | best_loss 4.213 +epoch 032 | valid on 'valid' subset | loss 4.213 | nll_loss 2.579 | ppl 5.98 | wps 0 | wpb 42662 | bsz 2032 | num_updates 53000 | best_loss 4.213 +epoch 032 | valid on 'valid' subset | loss 4.213 | nll_loss 2.579 | ppl 5.98 | wps 0 | wpb 42662 | bsz 2032 | num_updates 53000 | best_loss 4.213 +epoch 032 | valid on 'valid' subset | loss 4.213 | nll_loss 2.579 | ppl 5.98 | wps 0 | wpb 42662 | bsz 2032 | num_updates 53000 | best_loss 4.213 +epoch 032 | valid on 'valid' subset | loss 4.213 | nll_loss 2.579 | ppl 5.98 | wps 0 | wpb 42662 | bsz 2032 | num_updates 53000 | best_loss 4.213 +epoch 032 | valid on 'valid' subset | loss 4.213 | nll_loss 2.579 | ppl 5.98 | wps 0 | wpb 42662 | bsz 2032 | num_updates 53000 | best_loss 4.213 +epoch 032 | valid on 'valid' subset | loss 4.213 | nll_loss 2.579 | ppl 5.98 | wps 0 | wpb 42662 | bsz 2032 | num_updates 53000 | best_loss 4.213 +epoch 032 | valid on 'valid' subset | loss 4.213 | nll_loss 2.579 | ppl 5.98 | wps 0 | wpb 42662 | bsz 2032 | num_updates 53000 | best_loss 4.213 +epoch 032 | valid on 'valid' subset | loss 4.213 | nll_loss 2.579 | ppl 5.98 | wps 0 | wpb 42662 | bsz 2032 | num_updates 53000 | best_loss 4.213 +epoch 032 | valid on 'valid' subset | loss 4.213 | nll_loss 2.579 | ppl 5.98 | wps 0 | wpb 42662 | bsz 2032 | num_updates 53000 | best_loss 4.213 +epoch 032 | valid on 'valid' subset | loss 4.213 | nll_loss 2.579 | ppl 5.98 | wps 0 | wpb 42662 | bsz 2032 | num_updates 53000 | best_loss 4.213 +epoch 032 | valid on 'valid' subset | loss 4.213 | nll_loss 2.579 | ppl 5.98 | wps 0 | wpb 42662 | bsz 2032 | num_updates 53000 | best_loss 4.213 +epoch 032 | valid on 'valid' subset | loss 4.213 | nll_loss 2.579 | ppl 5.98 | wps 0 | wpb 42662 | bsz 2032 | num_updates 53000 | best_loss 4.213 +epoch 032 | valid on 'valid' subset | loss 4.213 | nll_loss 2.579 | ppl 5.98 | wps 0 | wpb 42662 | bsz 2032 | num_updates 53000 | best_loss 4.213 +epoch 032 | valid on 'valid' subset | loss 4.213 | nll_loss 2.579 | ppl 5.98 | wps 0 | wpb 42662 | bsz 2032 | num_updates 53000 | best_loss 4.213 +epoch 032 | valid on 'valid' subset | loss 4.213 | nll_loss 2.579 | ppl 5.98 | wps 0 | wpb 42662 | bsz 2032 | num_updates 53000 | best_loss 4.213 +epoch 032 | valid on 'valid' subset | loss 4.213 | nll_loss 2.579 | ppl 5.98 | wps 0 | wpb 42662 | bsz 2032 | num_updates 53000 | best_loss 4.213 +epoch 032 | valid on 'valid' subset | loss 4.213 | nll_loss 2.579 | ppl 5.98 | wps 0 | wpb 42662 | bsz 2032 | num_updates 53000 | best_loss 4.213 +epoch 032 | valid on 'valid' subset | loss 4.213 | nll_loss 2.579 | ppl 5.98 | wps 0 | wpb 42662 | bsz 2032 | num_updates 53000 | best_loss 4.213 +epoch 032 | valid on 'valid' subset | loss 4.213 | nll_loss 2.579 | ppl 5.98 | wps 0 | wpb 42662 | bsz 2032 | num_updates 53000 | best_loss 4.213 +epoch 032 | valid on 'valid' subset | loss 4.213 | nll_loss 2.579 | ppl 5.98 | wps 0 | wpb 42662 | bsz 2032 | num_updates 53000 | best_loss 4.213 +epoch 032 | valid on 'valid' subset | loss 4.213 | nll_loss 2.579 | ppl 5.98 | wps 0 | wpb 42662 | bsz 2032 | num_updates 53000 | best_loss 4.213 +epoch 032 | valid on 'valid' subset | loss 4.213 | nll_loss 2.579 | ppl 5.98 | wps 0 | wpb 42662 | bsz 2032 | num_updates 53000 | best_loss 4.213 +epoch 032 | valid on 'valid' subset | loss 4.213 | nll_loss 2.579 | ppl 5.98 | wps 0 | wpb 42662 | bsz 2032 | num_updates 53000 | best_loss 4.213 +epoch 032 | valid on 'valid' subset | loss 4.213 | nll_loss 2.579 | ppl 5.98 | wps 0 | wpb 42662 | bsz 2032 | num_updates 53000 | best_loss 4.213 +epoch 032 | valid on 'valid' subset | loss 4.213 | nll_loss 2.579 | ppl 5.98 | wps 0 | wpb 42662 | bsz 2032 | num_updates 53000 | best_loss 4.213 +epoch 032 | valid on 'valid' subset | loss 4.213 | nll_loss 2.579 | ppl 5.98 | wps 0 | wpb 42662 | bsz 2032 | num_updates 53000 | best_loss 4.213 +epoch 032 | valid on 'valid' subset | loss 4.213 | nll_loss 2.579 | ppl 5.98 | wps 0 | wpb 42662 | bsz 2032 | num_updates 53000 | best_loss 4.213 +epoch 032: 822 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=387119, ups=0.89, wpb=435847, bsz=16008.7, num_updates=53100, lr=0.000274462, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51944 +epoch 032: 822 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=387119, ups=0.89, wpb=435847, bsz=16008.7, num_updates=53100, lr=0.000274462, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51944 +epoch 032: 822 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=387119, ups=0.89, wpb=435847, bsz=16008.7, num_updates=53100, lr=0.000274462, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51944 +epoch 032: 822 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=387119, ups=0.89, wpb=435847, bsz=16008.7, num_updates=53100, lr=0.000274462, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51944 +epoch 032: 822 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=387119, ups=0.89, wpb=435847, bsz=16008.7, num_updates=53100, lr=0.000274462, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51944 +epoch 032: 822 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=387119, ups=0.89, wpb=435847, bsz=16008.7, num_updates=53100, lr=0.000274462, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51944 +epoch 032: 822 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=387119, ups=0.89, wpb=435847, bsz=16008.7, num_updates=53100, lr=0.000274462, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51944 +epoch 032: 822 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=387119, ups=0.89, wpb=435847, bsz=16008.7, num_updates=53100, lr=0.000274462, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51944 +epoch 032: 822 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=387119, ups=0.89, wpb=435847, bsz=16008.7, num_updates=53100, lr=0.000274462, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51944 +epoch 032: 822 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=387119, ups=0.89, wpb=435847, bsz=16008.7, num_updates=53100, lr=0.000274462, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51944 +epoch 032: 822 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=387119, ups=0.89, wpb=435847, bsz=16008.7, num_updates=53100, lr=0.000274462, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51944 +epoch 032: 822 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=387119, ups=0.89, wpb=435847, bsz=16008.7, num_updates=53100, lr=0.000274462, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51944 +epoch 032: 822 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=387119, ups=0.89, wpb=435847, bsz=16008.7, num_updates=53100, lr=0.000274462, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51944 +epoch 032: 822 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=387119, ups=0.89, wpb=435847, bsz=16008.7, num_updates=53100, lr=0.000274462, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51944 +epoch 032: 822 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=387119, ups=0.89, wpb=435847, bsz=16008.7, num_updates=53100, lr=0.000274462, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51944 +epoch 032: 822 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=387119, ups=0.89, wpb=435847, bsz=16008.7, num_updates=53100, lr=0.000274462, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51944 +epoch 032: 822 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=387119, ups=0.89, wpb=435847, bsz=16008.7, num_updates=53100, lr=0.000274462, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51944 +epoch 032: 822 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=387119, ups=0.89, wpb=435847, bsz=16008.7, num_updates=53100, lr=0.000274462, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51944 +epoch 032: 822 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=387119, ups=0.89, wpb=435847, bsz=16008.7, num_updates=53100, lr=0.000274462, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51944 +epoch 032: 822 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=387119, ups=0.89, wpb=435847, bsz=16008.7, num_updates=53100, lr=0.000274462, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51944 +epoch 032: 822 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=387119, ups=0.89, wpb=435847, bsz=16008.7, num_updates=53100, lr=0.000274462, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51944 +epoch 032: 822 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=387119, ups=0.89, wpb=435847, bsz=16008.7, num_updates=53100, lr=0.000274462, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51944 +epoch 032: 822 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=387119, ups=0.89, wpb=435847, bsz=16008.7, num_updates=53100, lr=0.000274462, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51944 +epoch 032: 822 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=387119, ups=0.89, wpb=435847, bsz=16008.7, num_updates=53100, lr=0.000274462, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51944 +epoch 032: 822 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=387119, ups=0.89, wpb=435847, bsz=16008.7, num_updates=53100, lr=0.000274462, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51944 +epoch 032: 822 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=387119, ups=0.89, wpb=435847, bsz=16008.7, num_updates=53100, lr=0.000274462, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51944 +epoch 032: 822 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=387119, ups=0.89, wpb=435847, bsz=16008.7, num_updates=53100, lr=0.000274462, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51944 +epoch 032: 822 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=387119, ups=0.89, wpb=435847, bsz=16008.7, num_updates=53100, lr=0.000274462, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51944 +epoch 032: 822 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=387119, ups=0.89, wpb=435847, bsz=16008.7, num_updates=53100, lr=0.000274462, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51944 +epoch 032: 822 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=387119, ups=0.89, wpb=435847, bsz=16008.7, num_updates=53100, lr=0.000274462, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51944 +epoch 032: 822 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=387119, ups=0.89, wpb=435847, bsz=16008.7, num_updates=53100, lr=0.000274462, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51944 +epoch 032: 822 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=387119, ups=0.89, wpb=435847, bsz=16008.7, num_updates=53100, lr=0.000274462, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.9, wall=51944 +epoch 032: 922 / 1689 loss=4.067, nll_loss=2.439, ppl=5.42, wps=465162, ups=1.07, wpb=433653, bsz=16535.8, num_updates=53200, lr=0.000274204, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.6, wall=52037 +epoch 032: 922 / 1689 loss=4.067, nll_loss=2.439, ppl=5.42, wps=465162, ups=1.07, wpb=433653, bsz=16535.8, num_updates=53200, lr=0.000274204, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.6, wall=52037 +epoch 032: 922 / 1689 loss=4.067, nll_loss=2.439, ppl=5.42, wps=465162, ups=1.07, wpb=433653, bsz=16535.8, num_updates=53200, lr=0.000274204, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.6, wall=52037 +epoch 032: 922 / 1689 loss=4.067, nll_loss=2.439, ppl=5.42, wps=465162, ups=1.07, wpb=433653, bsz=16535.8, num_updates=53200, lr=0.000274204, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.6, wall=52037 +epoch 032: 922 / 1689 loss=4.067, nll_loss=2.439, ppl=5.42, wps=465162, ups=1.07, wpb=433653, bsz=16535.8, num_updates=53200, lr=0.000274204, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.6, wall=52037 +epoch 032: 922 / 1689 loss=4.067, nll_loss=2.439, ppl=5.42, wps=465162, ups=1.07, wpb=433653, bsz=16535.8, num_updates=53200, lr=0.000274204, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.6, wall=52037 +epoch 032: 922 / 1689 loss=4.067, nll_loss=2.439, ppl=5.42, wps=465162, ups=1.07, wpb=433653, bsz=16535.8, num_updates=53200, lr=0.000274204, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.6, wall=52037 +epoch 032: 922 / 1689 loss=4.067, nll_loss=2.439, ppl=5.42, wps=465162, ups=1.07, wpb=433653, bsz=16535.8, num_updates=53200, lr=0.000274204, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.6, wall=52037 +epoch 032: 922 / 1689 loss=4.067, nll_loss=2.439, ppl=5.42, wps=465162, ups=1.07, wpb=433653, bsz=16535.8, num_updates=53200, lr=0.000274204, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.6, wall=52037 +epoch 032: 922 / 1689 loss=4.067, nll_loss=2.439, ppl=5.42, wps=465162, ups=1.07, wpb=433653, bsz=16535.8, num_updates=53200, lr=0.000274204, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.6, wall=52037 +epoch 032: 922 / 1689 loss=4.067, nll_loss=2.439, ppl=5.42, wps=465162, ups=1.07, wpb=433653, bsz=16535.8, num_updates=53200, lr=0.000274204, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.6, wall=52037 +epoch 032: 922 / 1689 loss=4.067, nll_loss=2.439, ppl=5.42, wps=465162, ups=1.07, wpb=433653, bsz=16535.8, num_updates=53200, lr=0.000274204, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.6, wall=52037 +epoch 032: 922 / 1689 loss=4.067, nll_loss=2.439, ppl=5.42, wps=465162, ups=1.07, wpb=433653, bsz=16535.8, num_updates=53200, lr=0.000274204, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.6, wall=52037 +epoch 032: 922 / 1689 loss=4.067, nll_loss=2.439, ppl=5.42, wps=465162, ups=1.07, wpb=433653, bsz=16535.8, num_updates=53200, lr=0.000274204, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.6, wall=52037 +epoch 032: 922 / 1689 loss=4.067, nll_loss=2.439, ppl=5.42, wps=465162, ups=1.07, wpb=433653, bsz=16535.8, num_updates=53200, lr=0.000274204, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.6, wall=52037 +epoch 032: 922 / 1689 loss=4.067, nll_loss=2.439, ppl=5.42, wps=465162, ups=1.07, wpb=433653, bsz=16535.8, num_updates=53200, lr=0.000274204, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.6, wall=52037 +epoch 032: 922 / 1689 loss=4.067, nll_loss=2.439, ppl=5.42, wps=465162, ups=1.07, wpb=433653, bsz=16535.8, num_updates=53200, lr=0.000274204, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.6, wall=52037 +epoch 032: 922 / 1689 loss=4.067, nll_loss=2.439, ppl=5.42, wps=465162, ups=1.07, wpb=433653, bsz=16535.8, num_updates=53200, lr=0.000274204, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.6, wall=52037 +epoch 032: 922 / 1689 loss=4.067, nll_loss=2.439, ppl=5.42, wps=465162, ups=1.07, wpb=433653, bsz=16535.8, num_updates=53200, lr=0.000274204, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.6, wall=52037 +epoch 032: 922 / 1689 loss=4.067, nll_loss=2.439, ppl=5.42, wps=465162, ups=1.07, wpb=433653, bsz=16535.8, num_updates=53200, lr=0.000274204, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.6, wall=52037 +epoch 032: 922 / 1689 loss=4.067, nll_loss=2.439, ppl=5.42, wps=465162, ups=1.07, wpb=433653, bsz=16535.8, num_updates=53200, lr=0.000274204, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.6, wall=52037 +epoch 032: 922 / 1689 loss=4.067, nll_loss=2.439, ppl=5.42, wps=465162, ups=1.07, wpb=433653, bsz=16535.8, num_updates=53200, lr=0.000274204, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.6, wall=52037 +epoch 032: 922 / 1689 loss=4.067, nll_loss=2.439, ppl=5.42, wps=465162, ups=1.07, wpb=433653, bsz=16535.8, num_updates=53200, lr=0.000274204, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.6, wall=52037 +epoch 032: 922 / 1689 loss=4.067, nll_loss=2.439, ppl=5.42, wps=465162, ups=1.07, wpb=433653, bsz=16535.8, num_updates=53200, lr=0.000274204, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.6, wall=52037 +epoch 032: 922 / 1689 loss=4.067, nll_loss=2.439, ppl=5.42, wps=465162, ups=1.07, wpb=433653, bsz=16535.8, num_updates=53200, lr=0.000274204, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.6, wall=52037 +epoch 032: 922 / 1689 loss=4.067, nll_loss=2.439, ppl=5.42, wps=465162, ups=1.07, wpb=433653, bsz=16535.8, num_updates=53200, lr=0.000274204, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.6, wall=52037 +epoch 032: 922 / 1689 loss=4.067, nll_loss=2.439, ppl=5.42, wps=465162, ups=1.07, wpb=433653, bsz=16535.8, num_updates=53200, lr=0.000274204, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.6, wall=52037 +epoch 032: 922 / 1689 loss=4.067, nll_loss=2.439, ppl=5.42, wps=465162, ups=1.07, wpb=433653, bsz=16535.8, num_updates=53200, lr=0.000274204, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.6, wall=52037 +epoch 032: 922 / 1689 loss=4.067, nll_loss=2.439, ppl=5.42, wps=465162, ups=1.07, wpb=433653, bsz=16535.8, num_updates=53200, lr=0.000274204, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.6, wall=52037 +epoch 032: 922 / 1689 loss=4.067, nll_loss=2.439, ppl=5.42, wps=465162, ups=1.07, wpb=433653, bsz=16535.8, num_updates=53200, lr=0.000274204, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.6, wall=52037 +epoch 032: 922 / 1689 loss=4.067, nll_loss=2.439, ppl=5.42, wps=465162, ups=1.07, wpb=433653, bsz=16535.8, num_updates=53200, lr=0.000274204, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.6, wall=52037 +epoch 032: 922 / 1689 loss=4.067, nll_loss=2.439, ppl=5.42, wps=465162, ups=1.07, wpb=433653, bsz=16535.8, num_updates=53200, lr=0.000274204, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.6, wall=52037 +epoch 032: 1022 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=464006, ups=1.07, wpb=432714, bsz=16053.7, num_updates=53300, lr=0.000273947, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52131 +epoch 032: 1022 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=464006, ups=1.07, wpb=432714, bsz=16053.7, num_updates=53300, lr=0.000273947, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52131 +epoch 032: 1022 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=464006, ups=1.07, wpb=432714, bsz=16053.7, num_updates=53300, lr=0.000273947, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52131 +epoch 032: 1022 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=464006, ups=1.07, wpb=432714, bsz=16053.7, num_updates=53300, lr=0.000273947, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52131 +epoch 032: 1022 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=464006, ups=1.07, wpb=432714, bsz=16053.7, num_updates=53300, lr=0.000273947, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52131 +epoch 032: 1022 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=464006, ups=1.07, wpb=432714, bsz=16053.7, num_updates=53300, lr=0.000273947, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52131 +epoch 032: 1022 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=464006, ups=1.07, wpb=432714, bsz=16053.7, num_updates=53300, lr=0.000273947, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52131 +epoch 032: 1022 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=464006, ups=1.07, wpb=432714, bsz=16053.7, num_updates=53300, lr=0.000273947, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52131 +epoch 032: 1022 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=464006, ups=1.07, wpb=432714, bsz=16053.7, num_updates=53300, lr=0.000273947, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52131 +epoch 032: 1022 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=464006, ups=1.07, wpb=432714, bsz=16053.7, num_updates=53300, lr=0.000273947, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52131 +epoch 032: 1022 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=464006, ups=1.07, wpb=432714, bsz=16053.7, num_updates=53300, lr=0.000273947, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52131 +epoch 032: 1022 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=464006, ups=1.07, wpb=432714, bsz=16053.7, num_updates=53300, lr=0.000273947, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52131 +epoch 032: 1022 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=464006, ups=1.07, wpb=432714, bsz=16053.7, num_updates=53300, lr=0.000273947, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52131 +epoch 032: 1022 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=464006, ups=1.07, wpb=432714, bsz=16053.7, num_updates=53300, lr=0.000273947, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52131 +epoch 032: 1022 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=464006, ups=1.07, wpb=432714, bsz=16053.7, num_updates=53300, lr=0.000273947, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52131 +epoch 032: 1022 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=464006, ups=1.07, wpb=432714, bsz=16053.7, num_updates=53300, lr=0.000273947, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52131 +epoch 032: 1022 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=464006, ups=1.07, wpb=432714, bsz=16053.7, num_updates=53300, lr=0.000273947, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52131 +epoch 032: 1022 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=464006, ups=1.07, wpb=432714, bsz=16053.7, num_updates=53300, lr=0.000273947, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52131 +epoch 032: 1022 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=464006, ups=1.07, wpb=432714, bsz=16053.7, num_updates=53300, lr=0.000273947, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52131 +epoch 032: 1022 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=464006, ups=1.07, wpb=432714, bsz=16053.7, num_updates=53300, lr=0.000273947, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52131 +epoch 032: 1022 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=464006, ups=1.07, wpb=432714, bsz=16053.7, num_updates=53300, lr=0.000273947, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52131 +epoch 032: 1022 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=464006, ups=1.07, wpb=432714, bsz=16053.7, num_updates=53300, lr=0.000273947, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52131 +epoch 032: 1022 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=464006, ups=1.07, wpb=432714, bsz=16053.7, num_updates=53300, lr=0.000273947, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52131 +epoch 032: 1022 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=464006, ups=1.07, wpb=432714, bsz=16053.7, num_updates=53300, lr=0.000273947, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52131 +epoch 032: 1022 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=464006, ups=1.07, wpb=432714, bsz=16053.7, num_updates=53300, lr=0.000273947, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52131 +epoch 032: 1022 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=464006, ups=1.07, wpb=432714, bsz=16053.7, num_updates=53300, lr=0.000273947, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52131 +epoch 032: 1022 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=464006, ups=1.07, wpb=432714, bsz=16053.7, num_updates=53300, lr=0.000273947, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52131 +epoch 032: 1022 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=464006, ups=1.07, wpb=432714, bsz=16053.7, num_updates=53300, lr=0.000273947, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52131 +epoch 032: 1022 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=464006, ups=1.07, wpb=432714, bsz=16053.7, num_updates=53300, lr=0.000273947, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52131 +epoch 032: 1022 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=464006, ups=1.07, wpb=432714, bsz=16053.7, num_updates=53300, lr=0.000273947, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52131 +epoch 032: 1022 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=464006, ups=1.07, wpb=432714, bsz=16053.7, num_updates=53300, lr=0.000273947, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52131 +epoch 032: 1022 / 1689 loss=4.059, nll_loss=2.428, ppl=5.38, wps=464006, ups=1.07, wpb=432714, bsz=16053.7, num_updates=53300, lr=0.000273947, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52131 +epoch 032: 1122 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459532, ups=1.06, wpb=435179, bsz=16525.2, num_updates=53400, lr=0.00027369, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=52225 +epoch 032: 1122 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459532, ups=1.06, wpb=435179, bsz=16525.2, num_updates=53400, lr=0.00027369, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=52225 +epoch 032: 1122 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459532, ups=1.06, wpb=435179, bsz=16525.2, num_updates=53400, lr=0.00027369, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=52225 +epoch 032: 1122 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459532, ups=1.06, wpb=435179, bsz=16525.2, num_updates=53400, lr=0.00027369, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=52225 +epoch 032: 1122 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459532, ups=1.06, wpb=435179, bsz=16525.2, num_updates=53400, lr=0.00027369, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=52225 +epoch 032: 1122 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459532, ups=1.06, wpb=435179, bsz=16525.2, num_updates=53400, lr=0.00027369, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=52225 +epoch 032: 1122 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459532, ups=1.06, wpb=435179, bsz=16525.2, num_updates=53400, lr=0.00027369, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=52225 +epoch 032: 1122 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459532, ups=1.06, wpb=435179, bsz=16525.2, num_updates=53400, lr=0.00027369, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=52225 +epoch 032: 1122 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459532, ups=1.06, wpb=435179, bsz=16525.2, num_updates=53400, lr=0.00027369, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=52225 +epoch 032: 1122 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459532, ups=1.06, wpb=435179, bsz=16525.2, num_updates=53400, lr=0.00027369, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=52225 +epoch 032: 1122 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459532, ups=1.06, wpb=435179, bsz=16525.2, num_updates=53400, lr=0.00027369, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=52225 +epoch 032: 1122 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459532, ups=1.06, wpb=435179, bsz=16525.2, num_updates=53400, lr=0.00027369, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=52225 +epoch 032: 1122 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459532, ups=1.06, wpb=435179, bsz=16525.2, num_updates=53400, lr=0.00027369, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=52225 +epoch 032: 1122 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459532, ups=1.06, wpb=435179, bsz=16525.2, num_updates=53400, lr=0.00027369, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=52225 +epoch 032: 1122 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459532, ups=1.06, wpb=435179, bsz=16525.2, num_updates=53400, lr=0.00027369, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=52225 +epoch 032: 1122 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459532, ups=1.06, wpb=435179, bsz=16525.2, num_updates=53400, lr=0.00027369, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=52225 +epoch 032: 1122 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459532, ups=1.06, wpb=435179, bsz=16525.2, num_updates=53400, lr=0.00027369, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=52225 +epoch 032: 1122 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459532, ups=1.06, wpb=435179, bsz=16525.2, num_updates=53400, lr=0.00027369, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=52225 +epoch 032: 1122 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459532, ups=1.06, wpb=435179, bsz=16525.2, num_updates=53400, lr=0.00027369, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=52225 +epoch 032: 1122 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459532, ups=1.06, wpb=435179, bsz=16525.2, num_updates=53400, lr=0.00027369, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=52225 +epoch 032: 1122 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459532, ups=1.06, wpb=435179, bsz=16525.2, num_updates=53400, lr=0.00027369, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=52225 +epoch 032: 1122 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459532, ups=1.06, wpb=435179, bsz=16525.2, num_updates=53400, lr=0.00027369, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=52225 +epoch 032: 1122 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459532, ups=1.06, wpb=435179, bsz=16525.2, num_updates=53400, lr=0.00027369, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=52225 +epoch 032: 1122 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459532, ups=1.06, wpb=435179, bsz=16525.2, num_updates=53400, lr=0.00027369, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=52225 +epoch 032: 1122 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459532, ups=1.06, wpb=435179, bsz=16525.2, num_updates=53400, lr=0.00027369, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=52225 +epoch 032: 1122 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459532, ups=1.06, wpb=435179, bsz=16525.2, num_updates=53400, lr=0.00027369, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=52225 +epoch 032: 1122 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459532, ups=1.06, wpb=435179, bsz=16525.2, num_updates=53400, lr=0.00027369, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=52225 +epoch 032: 1122 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459532, ups=1.06, wpb=435179, bsz=16525.2, num_updates=53400, lr=0.00027369, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=52225 +epoch 032: 1122 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459532, ups=1.06, wpb=435179, bsz=16525.2, num_updates=53400, lr=0.00027369, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=52225 +epoch 032: 1122 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459532, ups=1.06, wpb=435179, bsz=16525.2, num_updates=53400, lr=0.00027369, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=52225 +epoch 032: 1122 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459532, ups=1.06, wpb=435179, bsz=16525.2, num_updates=53400, lr=0.00027369, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=52225 +epoch 032: 1122 / 1689 loss=4.071, nll_loss=2.443, ppl=5.44, wps=459532, ups=1.06, wpb=435179, bsz=16525.2, num_updates=53400, lr=0.00027369, gnorm=0.218, clip=0, loss_scale=1, train_wall=93, gb_free=19.2, wall=52225 +epoch 032: 1222 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459292, ups=1.06, wpb=433732, bsz=16268.4, num_updates=53500, lr=0.000273434, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=52320 +epoch 032: 1222 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459292, ups=1.06, wpb=433732, bsz=16268.4, num_updates=53500, lr=0.000273434, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=52320 +epoch 032: 1222 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459292, ups=1.06, wpb=433732, bsz=16268.4, num_updates=53500, lr=0.000273434, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=52320 +epoch 032: 1222 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459292, ups=1.06, wpb=433732, bsz=16268.4, num_updates=53500, lr=0.000273434, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=52320 +epoch 032: 1222 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459292, ups=1.06, wpb=433732, bsz=16268.4, num_updates=53500, lr=0.000273434, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=52320 +epoch 032: 1222 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459292, ups=1.06, wpb=433732, bsz=16268.4, num_updates=53500, lr=0.000273434, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=52320 +epoch 032: 1222 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459292, ups=1.06, wpb=433732, bsz=16268.4, num_updates=53500, lr=0.000273434, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=52320 +epoch 032: 1222 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459292, ups=1.06, wpb=433732, bsz=16268.4, num_updates=53500, lr=0.000273434, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=52320 +epoch 032: 1222 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459292, ups=1.06, wpb=433732, bsz=16268.4, num_updates=53500, lr=0.000273434, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=52320 +epoch 032: 1222 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459292, ups=1.06, wpb=433732, bsz=16268.4, num_updates=53500, lr=0.000273434, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=52320 +epoch 032: 1222 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459292, ups=1.06, wpb=433732, bsz=16268.4, num_updates=53500, lr=0.000273434, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=52320 +epoch 032: 1222 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459292, ups=1.06, wpb=433732, bsz=16268.4, num_updates=53500, lr=0.000273434, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=52320 +epoch 032: 1222 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459292, ups=1.06, wpb=433732, bsz=16268.4, num_updates=53500, lr=0.000273434, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=52320 +epoch 032: 1222 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459292, ups=1.06, wpb=433732, bsz=16268.4, num_updates=53500, lr=0.000273434, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=52320 +epoch 032: 1222 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459292, ups=1.06, wpb=433732, bsz=16268.4, num_updates=53500, lr=0.000273434, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=52320 +epoch 032: 1222 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459292, ups=1.06, wpb=433732, bsz=16268.4, num_updates=53500, lr=0.000273434, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=52320 +epoch 032: 1222 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459292, ups=1.06, wpb=433732, bsz=16268.4, num_updates=53500, lr=0.000273434, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=52320 +epoch 032: 1222 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459292, ups=1.06, wpb=433732, bsz=16268.4, num_updates=53500, lr=0.000273434, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=52320 +epoch 032: 1222 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459292, ups=1.06, wpb=433732, bsz=16268.4, num_updates=53500, lr=0.000273434, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=52320 +epoch 032: 1222 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459292, ups=1.06, wpb=433732, bsz=16268.4, num_updates=53500, lr=0.000273434, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=52320 +epoch 032: 1222 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459292, ups=1.06, wpb=433732, bsz=16268.4, num_updates=53500, lr=0.000273434, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=52320 +epoch 032: 1222 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459292, ups=1.06, wpb=433732, bsz=16268.4, num_updates=53500, lr=0.000273434, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=52320 +epoch 032: 1222 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459292, ups=1.06, wpb=433732, bsz=16268.4, num_updates=53500, lr=0.000273434, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=52320 +epoch 032: 1222 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459292, ups=1.06, wpb=433732, bsz=16268.4, num_updates=53500, lr=0.000273434, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=52320 +epoch 032: 1222 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459292, ups=1.06, wpb=433732, bsz=16268.4, num_updates=53500, lr=0.000273434, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=52320 +epoch 032: 1222 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459292, ups=1.06, wpb=433732, bsz=16268.4, num_updates=53500, lr=0.000273434, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=52320 +epoch 032: 1222 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459292, ups=1.06, wpb=433732, bsz=16268.4, num_updates=53500, lr=0.000273434, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=52320 +epoch 032: 1222 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459292, ups=1.06, wpb=433732, bsz=16268.4, num_updates=53500, lr=0.000273434, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=52320 +epoch 032: 1222 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459292, ups=1.06, wpb=433732, bsz=16268.4, num_updates=53500, lr=0.000273434, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=52320 +epoch 032: 1222 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459292, ups=1.06, wpb=433732, bsz=16268.4, num_updates=53500, lr=0.000273434, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=52320 +epoch 032: 1222 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459292, ups=1.06, wpb=433732, bsz=16268.4, num_updates=53500, lr=0.000273434, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=52320 +epoch 032: 1222 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459292, ups=1.06, wpb=433732, bsz=16268.4, num_updates=53500, lr=0.000273434, gnorm=0.214, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=52320 +epoch 032: 1322 / 1689 loss=4.059, nll_loss=2.429, ppl=5.39, wps=461660, ups=1.07, wpb=432888, bsz=16172.6, num_updates=53600, lr=0.000273179, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=20.6, wall=52413 +epoch 032: 1322 / 1689 loss=4.059, nll_loss=2.429, ppl=5.39, wps=461660, ups=1.07, wpb=432888, bsz=16172.6, num_updates=53600, lr=0.000273179, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=20.6, wall=52413 +epoch 032: 1322 / 1689 loss=4.059, nll_loss=2.429, ppl=5.39, wps=461660, ups=1.07, wpb=432888, bsz=16172.6, num_updates=53600, lr=0.000273179, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=20.6, wall=52413 +epoch 032: 1322 / 1689 loss=4.059, nll_loss=2.429, ppl=5.39, wps=461660, ups=1.07, wpb=432888, bsz=16172.6, num_updates=53600, lr=0.000273179, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=20.6, wall=52413 +epoch 032: 1322 / 1689 loss=4.059, nll_loss=2.429, ppl=5.39, wps=461660, ups=1.07, wpb=432888, bsz=16172.6, num_updates=53600, lr=0.000273179, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=20.6, wall=52413 +epoch 032: 1322 / 1689 loss=4.059, nll_loss=2.429, ppl=5.39, wps=461660, ups=1.07, wpb=432888, bsz=16172.6, num_updates=53600, lr=0.000273179, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=20.6, wall=52413 +epoch 032: 1322 / 1689 loss=4.059, nll_loss=2.429, ppl=5.39, wps=461660, ups=1.07, wpb=432888, bsz=16172.6, num_updates=53600, lr=0.000273179, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=20.6, wall=52413 +epoch 032: 1322 / 1689 loss=4.059, nll_loss=2.429, ppl=5.39, wps=461660, ups=1.07, wpb=432888, bsz=16172.6, num_updates=53600, lr=0.000273179, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=20.6, wall=52413 +epoch 032: 1322 / 1689 loss=4.059, nll_loss=2.429, ppl=5.39, wps=461660, ups=1.07, wpb=432888, bsz=16172.6, num_updates=53600, lr=0.000273179, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=20.6, wall=52413 +epoch 032: 1322 / 1689 loss=4.059, nll_loss=2.429, ppl=5.39, wps=461660, ups=1.07, wpb=432888, bsz=16172.6, num_updates=53600, lr=0.000273179, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=20.6, wall=52413 +epoch 032: 1322 / 1689 loss=4.059, nll_loss=2.429, ppl=5.39, wps=461660, ups=1.07, wpb=432888, bsz=16172.6, num_updates=53600, lr=0.000273179, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=20.6, wall=52413 +epoch 032: 1322 / 1689 loss=4.059, nll_loss=2.429, ppl=5.39, wps=461660, ups=1.07, wpb=432888, bsz=16172.6, num_updates=53600, lr=0.000273179, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=20.6, wall=52413 +epoch 032: 1322 / 1689 loss=4.059, nll_loss=2.429, ppl=5.39, wps=461660, ups=1.07, wpb=432888, bsz=16172.6, num_updates=53600, lr=0.000273179, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=20.6, wall=52413 +epoch 032: 1322 / 1689 loss=4.059, nll_loss=2.429, ppl=5.39, wps=461660, ups=1.07, wpb=432888, bsz=16172.6, num_updates=53600, lr=0.000273179, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=20.6, wall=52413 +epoch 032: 1322 / 1689 loss=4.059, nll_loss=2.429, ppl=5.39, wps=461660, ups=1.07, wpb=432888, bsz=16172.6, num_updates=53600, lr=0.000273179, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=20.6, wall=52413 +epoch 032: 1322 / 1689 loss=4.059, nll_loss=2.429, ppl=5.39, wps=461660, ups=1.07, wpb=432888, bsz=16172.6, num_updates=53600, lr=0.000273179, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=20.6, wall=52413 +epoch 032: 1322 / 1689 loss=4.059, nll_loss=2.429, ppl=5.39, wps=461660, ups=1.07, wpb=432888, bsz=16172.6, num_updates=53600, lr=0.000273179, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=20.6, wall=52413 +epoch 032: 1322 / 1689 loss=4.059, nll_loss=2.429, ppl=5.39, wps=461660, ups=1.07, wpb=432888, bsz=16172.6, num_updates=53600, lr=0.000273179, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=20.6, wall=52413 +epoch 032: 1322 / 1689 loss=4.059, nll_loss=2.429, ppl=5.39, wps=461660, ups=1.07, wpb=432888, bsz=16172.6, num_updates=53600, lr=0.000273179, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=20.6, wall=52413 +epoch 032: 1322 / 1689 loss=4.059, nll_loss=2.429, ppl=5.39, wps=461660, ups=1.07, wpb=432888, bsz=16172.6, num_updates=53600, lr=0.000273179, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=20.6, wall=52413 +epoch 032: 1322 / 1689 loss=4.059, nll_loss=2.429, ppl=5.39, wps=461660, ups=1.07, wpb=432888, bsz=16172.6, num_updates=53600, lr=0.000273179, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=20.6, wall=52413 +epoch 032: 1322 / 1689 loss=4.059, nll_loss=2.429, ppl=5.39, wps=461660, ups=1.07, wpb=432888, bsz=16172.6, num_updates=53600, lr=0.000273179, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=20.6, wall=52413 +epoch 032: 1322 / 1689 loss=4.059, nll_loss=2.429, ppl=5.39, wps=461660, ups=1.07, wpb=432888, bsz=16172.6, num_updates=53600, lr=0.000273179, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=20.6, wall=52413 +epoch 032: 1322 / 1689 loss=4.059, nll_loss=2.429, ppl=5.39, wps=461660, ups=1.07, wpb=432888, bsz=16172.6, num_updates=53600, lr=0.000273179, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=20.6, wall=52413 +epoch 032: 1322 / 1689 loss=4.059, nll_loss=2.429, ppl=5.39, wps=461660, ups=1.07, wpb=432888, bsz=16172.6, num_updates=53600, lr=0.000273179, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=20.6, wall=52413 +epoch 032: 1322 / 1689 loss=4.059, nll_loss=2.429, ppl=5.39, wps=461660, ups=1.07, wpb=432888, bsz=16172.6, num_updates=53600, lr=0.000273179, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=20.6, wall=52413 +epoch 032: 1322 / 1689 loss=4.059, nll_loss=2.429, ppl=5.39, wps=461660, ups=1.07, wpb=432888, bsz=16172.6, num_updates=53600, lr=0.000273179, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=20.6, wall=52413 +epoch 032: 1322 / 1689 loss=4.059, nll_loss=2.429, ppl=5.39, wps=461660, ups=1.07, wpb=432888, bsz=16172.6, num_updates=53600, lr=0.000273179, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=20.6, wall=52413 +epoch 032: 1322 / 1689 loss=4.059, nll_loss=2.429, ppl=5.39, wps=461660, ups=1.07, wpb=432888, bsz=16172.6, num_updates=53600, lr=0.000273179, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=20.6, wall=52413 +epoch 032: 1322 / 1689 loss=4.059, nll_loss=2.429, ppl=5.39, wps=461660, ups=1.07, wpb=432888, bsz=16172.6, num_updates=53600, lr=0.000273179, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=20.6, wall=52413 +epoch 032: 1322 / 1689 loss=4.059, nll_loss=2.429, ppl=5.39, wps=461660, ups=1.07, wpb=432888, bsz=16172.6, num_updates=53600, lr=0.000273179, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=20.6, wall=52413 +epoch 032: 1322 / 1689 loss=4.059, nll_loss=2.429, ppl=5.39, wps=461660, ups=1.07, wpb=432888, bsz=16172.6, num_updates=53600, lr=0.000273179, gnorm=0.213, clip=0, loss_scale=1, train_wall=92, gb_free=20.6, wall=52413 +epoch 032: 1422 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=460480, ups=1.06, wpb=433376, bsz=16836.9, num_updates=53700, lr=0.000272925, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=52508 +epoch 032: 1422 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=460480, ups=1.06, wpb=433376, bsz=16836.9, num_updates=53700, lr=0.000272925, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=52508 +epoch 032: 1422 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=460480, ups=1.06, wpb=433376, bsz=16836.9, num_updates=53700, lr=0.000272925, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=52508 +epoch 032: 1422 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=460480, ups=1.06, wpb=433376, bsz=16836.9, num_updates=53700, lr=0.000272925, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=52508 +epoch 032: 1422 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=460480, ups=1.06, wpb=433376, bsz=16836.9, num_updates=53700, lr=0.000272925, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=52508 +epoch 032: 1422 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=460480, ups=1.06, wpb=433376, bsz=16836.9, num_updates=53700, lr=0.000272925, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=52508 +epoch 032: 1422 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=460480, ups=1.06, wpb=433376, bsz=16836.9, num_updates=53700, lr=0.000272925, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=52508 +epoch 032: 1422 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=460480, ups=1.06, wpb=433376, bsz=16836.9, num_updates=53700, lr=0.000272925, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=52508 +epoch 032: 1422 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=460480, ups=1.06, wpb=433376, bsz=16836.9, num_updates=53700, lr=0.000272925, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=52508 +epoch 032: 1422 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=460480, ups=1.06, wpb=433376, bsz=16836.9, num_updates=53700, lr=0.000272925, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=52508 +epoch 032: 1422 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=460480, ups=1.06, wpb=433376, bsz=16836.9, num_updates=53700, lr=0.000272925, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=52508 +epoch 032: 1422 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=460480, ups=1.06, wpb=433376, bsz=16836.9, num_updates=53700, lr=0.000272925, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=52508 +epoch 032: 1422 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=460480, ups=1.06, wpb=433376, bsz=16836.9, num_updates=53700, lr=0.000272925, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=52508 +epoch 032: 1422 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=460480, ups=1.06, wpb=433376, bsz=16836.9, num_updates=53700, lr=0.000272925, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=52508 +epoch 032: 1422 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=460480, ups=1.06, wpb=433376, bsz=16836.9, num_updates=53700, lr=0.000272925, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=52508 +epoch 032: 1422 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=460480, ups=1.06, wpb=433376, bsz=16836.9, num_updates=53700, lr=0.000272925, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=52508 +epoch 032: 1422 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=460480, ups=1.06, wpb=433376, bsz=16836.9, num_updates=53700, lr=0.000272925, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=52508 +epoch 032: 1422 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=460480, ups=1.06, wpb=433376, bsz=16836.9, num_updates=53700, lr=0.000272925, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=52508 +epoch 032: 1422 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=460480, ups=1.06, wpb=433376, bsz=16836.9, num_updates=53700, lr=0.000272925, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=52508 +epoch 032: 1422 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=460480, ups=1.06, wpb=433376, bsz=16836.9, num_updates=53700, lr=0.000272925, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=52508 +epoch 032: 1422 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=460480, ups=1.06, wpb=433376, bsz=16836.9, num_updates=53700, lr=0.000272925, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=52508 +epoch 032: 1422 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=460480, ups=1.06, wpb=433376, bsz=16836.9, num_updates=53700, lr=0.000272925, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=52508 +epoch 032: 1422 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=460480, ups=1.06, wpb=433376, bsz=16836.9, num_updates=53700, lr=0.000272925, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=52508 +epoch 032: 1422 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=460480, ups=1.06, wpb=433376, bsz=16836.9, num_updates=53700, lr=0.000272925, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=52508 +epoch 032: 1422 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=460480, ups=1.06, wpb=433376, bsz=16836.9, num_updates=53700, lr=0.000272925, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=52508 +epoch 032: 1422 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=460480, ups=1.06, wpb=433376, bsz=16836.9, num_updates=53700, lr=0.000272925, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=52508 +epoch 032: 1422 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=460480, ups=1.06, wpb=433376, bsz=16836.9, num_updates=53700, lr=0.000272925, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=52508 +epoch 032: 1422 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=460480, ups=1.06, wpb=433376, bsz=16836.9, num_updates=53700, lr=0.000272925, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=52508 +epoch 032: 1422 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=460480, ups=1.06, wpb=433376, bsz=16836.9, num_updates=53700, lr=0.000272925, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=52508 +epoch 032: 1422 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=460480, ups=1.06, wpb=433376, bsz=16836.9, num_updates=53700, lr=0.000272925, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=52508 +epoch 032: 1422 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=460480, ups=1.06, wpb=433376, bsz=16836.9, num_updates=53700, lr=0.000272925, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=52508 +epoch 032: 1422 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=460480, ups=1.06, wpb=433376, bsz=16836.9, num_updates=53700, lr=0.000272925, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=52508 +epoch 032: 1522 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=460326, ups=1.06, wpb=435315, bsz=16626.3, num_updates=53800, lr=0.000272671, gnorm=0.209, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=52602 +epoch 032: 1522 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=460326, ups=1.06, wpb=435315, bsz=16626.3, num_updates=53800, lr=0.000272671, gnorm=0.209, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=52602 +epoch 032: 1522 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=460326, ups=1.06, wpb=435315, bsz=16626.3, num_updates=53800, lr=0.000272671, gnorm=0.209, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=52602 +epoch 032: 1522 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=460326, ups=1.06, wpb=435315, bsz=16626.3, num_updates=53800, lr=0.000272671, gnorm=0.209, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=52602 +epoch 032: 1522 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=460326, ups=1.06, wpb=435315, bsz=16626.3, num_updates=53800, lr=0.000272671, gnorm=0.209, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=52602 +epoch 032: 1522 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=460326, ups=1.06, wpb=435315, bsz=16626.3, num_updates=53800, lr=0.000272671, gnorm=0.209, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=52602 +epoch 032: 1522 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=460326, ups=1.06, wpb=435315, bsz=16626.3, num_updates=53800, lr=0.000272671, gnorm=0.209, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=52602 +epoch 032: 1522 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=460326, ups=1.06, wpb=435315, bsz=16626.3, num_updates=53800, lr=0.000272671, gnorm=0.209, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=52602 +epoch 032: 1522 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=460326, ups=1.06, wpb=435315, bsz=16626.3, num_updates=53800, lr=0.000272671, gnorm=0.209, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=52602 +epoch 032: 1522 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=460326, ups=1.06, wpb=435315, bsz=16626.3, num_updates=53800, lr=0.000272671, gnorm=0.209, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=52602 +epoch 032: 1522 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=460326, ups=1.06, wpb=435315, bsz=16626.3, num_updates=53800, lr=0.000272671, gnorm=0.209, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=52602 +epoch 032: 1522 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=460326, ups=1.06, wpb=435315, bsz=16626.3, num_updates=53800, lr=0.000272671, gnorm=0.209, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=52602 +epoch 032: 1522 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=460326, ups=1.06, wpb=435315, bsz=16626.3, num_updates=53800, lr=0.000272671, gnorm=0.209, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=52602 +epoch 032: 1522 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=460326, ups=1.06, wpb=435315, bsz=16626.3, num_updates=53800, lr=0.000272671, gnorm=0.209, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=52602 +epoch 032: 1522 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=460326, ups=1.06, wpb=435315, bsz=16626.3, num_updates=53800, lr=0.000272671, gnorm=0.209, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=52602 +epoch 032: 1522 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=460326, ups=1.06, wpb=435315, bsz=16626.3, num_updates=53800, lr=0.000272671, gnorm=0.209, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=52602 +epoch 032: 1522 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=460326, ups=1.06, wpb=435315, bsz=16626.3, num_updates=53800, lr=0.000272671, gnorm=0.209, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=52602 +epoch 032: 1522 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=460326, ups=1.06, wpb=435315, bsz=16626.3, num_updates=53800, lr=0.000272671, gnorm=0.209, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=52602 +epoch 032: 1522 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=460326, ups=1.06, wpb=435315, bsz=16626.3, num_updates=53800, lr=0.000272671, gnorm=0.209, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=52602 +epoch 032: 1522 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=460326, ups=1.06, wpb=435315, bsz=16626.3, num_updates=53800, lr=0.000272671, gnorm=0.209, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=52602 +epoch 032: 1522 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=460326, ups=1.06, wpb=435315, bsz=16626.3, num_updates=53800, lr=0.000272671, gnorm=0.209, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=52602 +epoch 032: 1522 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=460326, ups=1.06, wpb=435315, bsz=16626.3, num_updates=53800, lr=0.000272671, gnorm=0.209, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=52602 +epoch 032: 1522 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=460326, ups=1.06, wpb=435315, bsz=16626.3, num_updates=53800, lr=0.000272671, gnorm=0.209, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=52602 +epoch 032: 1522 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=460326, ups=1.06, wpb=435315, bsz=16626.3, num_updates=53800, lr=0.000272671, gnorm=0.209, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=52602 +epoch 032: 1522 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=460326, ups=1.06, wpb=435315, bsz=16626.3, num_updates=53800, lr=0.000272671, gnorm=0.209, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=52602 +epoch 032: 1522 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=460326, ups=1.06, wpb=435315, bsz=16626.3, num_updates=53800, lr=0.000272671, gnorm=0.209, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=52602 +epoch 032: 1522 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=460326, ups=1.06, wpb=435315, bsz=16626.3, num_updates=53800, lr=0.000272671, gnorm=0.209, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=52602 +epoch 032: 1522 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=460326, ups=1.06, wpb=435315, bsz=16626.3, num_updates=53800, lr=0.000272671, gnorm=0.209, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=52602 +epoch 032: 1522 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=460326, ups=1.06, wpb=435315, bsz=16626.3, num_updates=53800, lr=0.000272671, gnorm=0.209, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=52602 +epoch 032: 1522 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=460326, ups=1.06, wpb=435315, bsz=16626.3, num_updates=53800, lr=0.000272671, gnorm=0.209, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=52602 +epoch 032: 1522 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=460326, ups=1.06, wpb=435315, bsz=16626.3, num_updates=53800, lr=0.000272671, gnorm=0.209, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=52602 +epoch 032: 1522 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=460326, ups=1.06, wpb=435315, bsz=16626.3, num_updates=53800, lr=0.000272671, gnorm=0.209, clip=0, loss_scale=2, train_wall=93, gb_free=19.1, wall=52602 +epoch 032: 1623 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=453123, ups=1.05, wpb=432607, bsz=16539.8, num_updates=53900, lr=0.000272418, gnorm=0.202, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=52698 +epoch 032: 1623 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=453123, ups=1.05, wpb=432607, bsz=16539.8, num_updates=53900, lr=0.000272418, gnorm=0.202, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=52698 +epoch 032: 1623 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=453123, ups=1.05, wpb=432607, bsz=16539.8, num_updates=53900, lr=0.000272418, gnorm=0.202, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=52698 +epoch 032: 1623 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=453123, ups=1.05, wpb=432607, bsz=16539.8, num_updates=53900, lr=0.000272418, gnorm=0.202, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=52698 +epoch 032: 1623 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=453123, ups=1.05, wpb=432607, bsz=16539.8, num_updates=53900, lr=0.000272418, gnorm=0.202, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=52698 +epoch 032: 1623 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=453123, ups=1.05, wpb=432607, bsz=16539.8, num_updates=53900, lr=0.000272418, gnorm=0.202, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=52698 +epoch 032: 1623 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=453123, ups=1.05, wpb=432607, bsz=16539.8, num_updates=53900, lr=0.000272418, gnorm=0.202, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=52698 +epoch 032: 1623 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=453123, ups=1.05, wpb=432607, bsz=16539.8, num_updates=53900, lr=0.000272418, gnorm=0.202, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=52698 +epoch 032: 1623 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=453123, ups=1.05, wpb=432607, bsz=16539.8, num_updates=53900, lr=0.000272418, gnorm=0.202, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=52698 +epoch 032: 1623 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=453123, ups=1.05, wpb=432607, bsz=16539.8, num_updates=53900, lr=0.000272418, gnorm=0.202, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=52698 +epoch 032: 1623 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=453123, ups=1.05, wpb=432607, bsz=16539.8, num_updates=53900, lr=0.000272418, gnorm=0.202, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=52698 +epoch 032: 1623 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=453123, ups=1.05, wpb=432607, bsz=16539.8, num_updates=53900, lr=0.000272418, gnorm=0.202, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=52698 +epoch 032: 1623 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=453123, ups=1.05, wpb=432607, bsz=16539.8, num_updates=53900, lr=0.000272418, gnorm=0.202, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=52698 +epoch 032: 1623 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=453123, ups=1.05, wpb=432607, bsz=16539.8, num_updates=53900, lr=0.000272418, gnorm=0.202, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=52698 +epoch 032: 1623 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=453123, ups=1.05, wpb=432607, bsz=16539.8, num_updates=53900, lr=0.000272418, gnorm=0.202, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=52698 +epoch 032: 1623 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=453123, ups=1.05, wpb=432607, bsz=16539.8, num_updates=53900, lr=0.000272418, gnorm=0.202, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=52698 +epoch 032: 1623 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=453123, ups=1.05, wpb=432607, bsz=16539.8, num_updates=53900, lr=0.000272418, gnorm=0.202, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=52698 +epoch 032: 1623 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=453123, ups=1.05, wpb=432607, bsz=16539.8, num_updates=53900, lr=0.000272418, gnorm=0.202, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=52698 +epoch 032: 1623 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=453123, ups=1.05, wpb=432607, bsz=16539.8, num_updates=53900, lr=0.000272418, gnorm=0.202, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=52698 +epoch 032: 1623 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=453123, ups=1.05, wpb=432607, bsz=16539.8, num_updates=53900, lr=0.000272418, gnorm=0.202, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=52698 +epoch 032: 1623 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=453123, ups=1.05, wpb=432607, bsz=16539.8, num_updates=53900, lr=0.000272418, gnorm=0.202, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=52698 +epoch 032: 1623 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=453123, ups=1.05, wpb=432607, bsz=16539.8, num_updates=53900, lr=0.000272418, gnorm=0.202, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=52698 +epoch 032: 1623 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=453123, ups=1.05, wpb=432607, bsz=16539.8, num_updates=53900, lr=0.000272418, gnorm=0.202, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=52698 +epoch 032: 1623 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=453123, ups=1.05, wpb=432607, bsz=16539.8, num_updates=53900, lr=0.000272418, gnorm=0.202, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=52698 +epoch 032: 1623 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=453123, ups=1.05, wpb=432607, bsz=16539.8, num_updates=53900, lr=0.000272418, gnorm=0.202, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=52698 +epoch 032: 1623 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=453123, ups=1.05, wpb=432607, bsz=16539.8, num_updates=53900, lr=0.000272418, gnorm=0.202, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=52698 +epoch 032: 1623 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=453123, ups=1.05, wpb=432607, bsz=16539.8, num_updates=53900, lr=0.000272418, gnorm=0.202, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=52698 +epoch 032: 1623 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=453123, ups=1.05, wpb=432607, bsz=16539.8, num_updates=53900, lr=0.000272418, gnorm=0.202, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=52698 +epoch 032: 1623 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=453123, ups=1.05, wpb=432607, bsz=16539.8, num_updates=53900, lr=0.000272418, gnorm=0.202, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=52698 +epoch 032: 1623 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=453123, ups=1.05, wpb=432607, bsz=16539.8, num_updates=53900, lr=0.000272418, gnorm=0.202, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=52698 +epoch 032: 1623 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=453123, ups=1.05, wpb=432607, bsz=16539.8, num_updates=53900, lr=0.000272418, gnorm=0.202, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=52698 +epoch 032: 1623 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=453123, ups=1.05, wpb=432607, bsz=16539.8, num_updates=53900, lr=0.000272418, gnorm=0.202, clip=0, loss_scale=1, train_wall=94, gb_free=19.1, wall=52698 +end of epoch 32 (average epoch stats below) +epoch 032 | loss 4.061 | nll_loss 2.431 | ppl 5.39 | wps 455471 | ups 1.05 | wpb 433520 | bsz 16504 | num_updates 53966 | lr 0.000272251 | gnorm 0.211 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 22.2 | wall 52759 +epoch 032 | loss 4.061 | nll_loss 2.431 | ppl 5.39 | wps 455471 | ups 1.05 | wpb 433520 | bsz 16504 | num_updates 53966 | lr 0.000272251 | gnorm 0.211 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 22.2 | wall 52759 +epoch 032 | loss 4.061 | nll_loss 2.431 | ppl 5.39 | wps 455471 | ups 1.05 | wpb 433520 | bsz 16504 | num_updates 53966 | lr 0.000272251 | gnorm 0.211 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 22.2 | wall 52759 +epoch 032 | loss 4.061 | nll_loss 2.431 | ppl 5.39 | wps 455471 | ups 1.05 | wpb 433520 | bsz 16504 | num_updates 53966 | lr 0.000272251 | gnorm 0.211 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 22.2 | wall 52759 +epoch 032 | loss 4.061 | nll_loss 2.431 | ppl 5.39 | wps 455471 | ups 1.05 | wpb 433520 | bsz 16504 | num_updates 53966 | lr 0.000272251 | gnorm 0.211 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 22.2 | wall 52759 +epoch 032 | loss 4.061 | nll_loss 2.431 | ppl 5.39 | wps 455471 | ups 1.05 | wpb 433520 | bsz 16504 | num_updates 53966 | lr 0.000272251 | gnorm 0.211 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 22.2 | wall 52759 +epoch 032 | loss 4.061 | nll_loss 2.431 | ppl 5.39 | wps 455471 | ups 1.05 | wpb 433520 | bsz 16504 | num_updates 53966 | lr 0.000272251 | gnorm 0.211 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 22.2 | wall 52759 +epoch 032 | loss 4.061 | nll_loss 2.431 | ppl 5.39 | wps 455471 | ups 1.05 | wpb 433520 | bsz 16504 | num_updates 53966 | lr 0.000272251 | gnorm 0.211 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 22.2 | wall 52759 +epoch 032 | loss 4.061 | nll_loss 2.431 | ppl 5.39 | wps 455471 | ups 1.05 | wpb 433520 | bsz 16504 | num_updates 53966 | lr 0.000272251 | gnorm 0.211 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 22.2 | wall 52759 +epoch 032 | loss 4.061 | nll_loss 2.431 | ppl 5.39 | wps 455471 | ups 1.05 | wpb 433520 | bsz 16504 | num_updates 53966 | lr 0.000272251 | gnorm 0.211 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 22.2 | wall 52759 +epoch 032 | loss 4.061 | nll_loss 2.431 | ppl 5.39 | wps 455471 | ups 1.05 | wpb 433520 | bsz 16504 | num_updates 53966 | lr 0.000272251 | gnorm 0.211 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 22.2 | wall 52759 +epoch 032 | loss 4.061 | nll_loss 2.431 | ppl 5.39 | wps 455471 | ups 1.05 | wpb 433520 | bsz 16504 | num_updates 53966 | lr 0.000272251 | gnorm 0.211 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 22.2 | wall 52759 +epoch 032 | loss 4.061 | nll_loss 2.431 | ppl 5.39 | wps 455471 | ups 1.05 | wpb 433520 | bsz 16504 | num_updates 53966 | lr 0.000272251 | gnorm 0.211 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 22.2 | wall 52759 +epoch 032 | loss 4.061 | nll_loss 2.431 | ppl 5.39 | wps 455471 | ups 1.05 | wpb 433520 | bsz 16504 | num_updates 53966 | lr 0.000272251 | gnorm 0.211 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 22.2 | wall 52759 +epoch 032 | loss 4.061 | nll_loss 2.431 | ppl 5.39 | wps 455471 | ups 1.05 | wpb 433520 | bsz 16504 | num_updates 53966 | lr 0.000272251 | gnorm 0.211 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 22.2 | wall 52759 +epoch 032 | loss 4.061 | nll_loss 2.431 | ppl 5.39 | wps 455471 | ups 1.05 | wpb 433520 | bsz 16504 | num_updates 53966 | lr 0.000272251 | gnorm 0.211 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 22.2 | wall 52759 +epoch 032 | loss 4.061 | nll_loss 2.431 | ppl 5.39 | wps 455471 | ups 1.05 | wpb 433520 | bsz 16504 | num_updates 53966 | lr 0.000272251 | gnorm 0.211 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 22.2 | wall 52759 +epoch 032 | loss 4.061 | nll_loss 2.431 | ppl 5.39 | wps 455471 | ups 1.05 | wpb 433520 | bsz 16504 | num_updates 53966 | lr 0.000272251 | gnorm 0.211 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 22.2 | wall 52759 +epoch 032 | loss 4.061 | nll_loss 2.431 | ppl 5.39 | wps 455471 | ups 1.05 | wpb 433520 | bsz 16504 | num_updates 53966 | lr 0.000272251 | gnorm 0.211 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 22.2 | wall 52759 +epoch 032 | loss 4.061 | nll_loss 2.431 | ppl 5.39 | wps 455471 | ups 1.05 | wpb 433520 | bsz 16504 | num_updates 53966 | lr 0.000272251 | gnorm 0.211 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 22.2 | wall 52759 +epoch 032 | loss 4.061 | nll_loss 2.431 | ppl 5.39 | wps 455471 | ups 1.05 | wpb 433520 | bsz 16504 | num_updates 53966 | lr 0.000272251 | gnorm 0.211 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 22.2 | wall 52759 +epoch 032 | loss 4.061 | nll_loss 2.431 | ppl 5.39 | wps 455471 | ups 1.05 | wpb 433520 | bsz 16504 | num_updates 53966 | lr 0.000272251 | gnorm 0.211 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 22.2 | wall 52759 +epoch 032 | loss 4.061 | nll_loss 2.431 | ppl 5.39 | wps 455471 | ups 1.05 | wpb 433520 | bsz 16504 | num_updates 53966 | lr 0.000272251 | gnorm 0.211 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 22.2 | wall 52759 +epoch 032 | loss 4.061 | nll_loss 2.431 | ppl 5.39 | wps 455471 | ups 1.05 | wpb 433520 | bsz 16504 | num_updates 53966 | lr 0.000272251 | gnorm 0.211 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 22.2 | wall 52759 +epoch 032 | loss 4.061 | nll_loss 2.431 | ppl 5.39 | wps 455471 | ups 1.05 | wpb 433520 | bsz 16504 | num_updates 53966 | lr 0.000272251 | gnorm 0.211 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 22.2 | wall 52759 +epoch 032 | loss 4.061 | nll_loss 2.431 | ppl 5.39 | wps 455471 | ups 1.05 | wpb 433520 | bsz 16504 | num_updates 53966 | lr 0.000272251 | gnorm 0.211 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 22.2 | wall 52759 +epoch 032 | loss 4.061 | nll_loss 2.431 | ppl 5.39 | wps 455471 | ups 1.05 | wpb 433520 | bsz 16504 | num_updates 53966 | lr 0.000272251 | gnorm 0.211 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 22.2 | wall 52759 +epoch 032 | loss 4.061 | nll_loss 2.431 | ppl 5.39 | wps 455471 | ups 1.05 | wpb 433520 | bsz 16504 | num_updates 53966 | lr 0.000272251 | gnorm 0.211 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 22.2 | wall 52759 +epoch 032 | loss 4.061 | nll_loss 2.431 | ppl 5.39 | wps 455471 | ups 1.05 | wpb 433520 | bsz 16504 | num_updates 53966 | lr 0.000272251 | gnorm 0.211 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 22.2 | wall 52759 +epoch 032 | loss 4.061 | nll_loss 2.431 | ppl 5.39 | wps 455471 | ups 1.05 | wpb 433520 | bsz 16504 | num_updates 53966 | lr 0.000272251 | gnorm 0.211 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 22.2 | wall 52759 +epoch 032 | loss 4.061 | nll_loss 2.431 | ppl 5.39 | wps 455471 | ups 1.05 | wpb 433520 | bsz 16504 | num_updates 53966 | lr 0.000272251 | gnorm 0.211 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 22.2 | wall 52759 +epoch 032 | loss 4.061 | nll_loss 2.431 | ppl 5.39 | wps 455471 | ups 1.05 | wpb 433520 | bsz 16504 | num_updates 53966 | lr 0.000272251 | gnorm 0.211 | clip 0 | loss_scale 1 | train_wall 1563 | gb_free 22.2 | wall 52759 +Start iterating over samples +epoch 033: 34 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=457539, ups=1.06, wpb=431479, bsz=16452.4, num_updates=54000, lr=0.000272166, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=52792 +epoch 033: 34 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=457539, ups=1.06, wpb=431479, bsz=16452.4, num_updates=54000, lr=0.000272166, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=52792 +epoch 033: 34 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=457539, ups=1.06, wpb=431479, bsz=16452.4, num_updates=54000, lr=0.000272166, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=52792 +epoch 033: 34 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=457539, ups=1.06, wpb=431479, bsz=16452.4, num_updates=54000, lr=0.000272166, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=52792 +epoch 033: 34 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=457539, ups=1.06, wpb=431479, bsz=16452.4, num_updates=54000, lr=0.000272166, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=52792 +epoch 033: 34 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=457539, ups=1.06, wpb=431479, bsz=16452.4, num_updates=54000, lr=0.000272166, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=52792 +epoch 033: 34 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=457539, ups=1.06, wpb=431479, bsz=16452.4, num_updates=54000, lr=0.000272166, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=52792 +epoch 033: 34 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=457539, ups=1.06, wpb=431479, bsz=16452.4, num_updates=54000, lr=0.000272166, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=52792 +epoch 033: 34 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=457539, ups=1.06, wpb=431479, bsz=16452.4, num_updates=54000, lr=0.000272166, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=52792 +epoch 033: 34 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=457539, ups=1.06, wpb=431479, bsz=16452.4, num_updates=54000, lr=0.000272166, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=52792 +epoch 033: 34 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=457539, ups=1.06, wpb=431479, bsz=16452.4, num_updates=54000, lr=0.000272166, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=52792 +epoch 033: 34 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=457539, ups=1.06, wpb=431479, bsz=16452.4, num_updates=54000, lr=0.000272166, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=52792 +epoch 033: 34 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=457539, ups=1.06, wpb=431479, bsz=16452.4, num_updates=54000, lr=0.000272166, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=52792 +epoch 033: 34 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=457539, ups=1.06, wpb=431479, bsz=16452.4, num_updates=54000, lr=0.000272166, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=52792 +epoch 033: 34 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=457539, ups=1.06, wpb=431479, bsz=16452.4, num_updates=54000, lr=0.000272166, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=52792 +epoch 033: 34 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=457539, ups=1.06, wpb=431479, bsz=16452.4, num_updates=54000, lr=0.000272166, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=52792 +epoch 033: 34 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=457539, ups=1.06, wpb=431479, bsz=16452.4, num_updates=54000, lr=0.000272166, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=52792 +epoch 033: 34 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=457539, ups=1.06, wpb=431479, bsz=16452.4, num_updates=54000, lr=0.000272166, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=52792 +epoch 033: 34 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=457539, ups=1.06, wpb=431479, bsz=16452.4, num_updates=54000, lr=0.000272166, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=52792 +epoch 033: 34 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=457539, ups=1.06, wpb=431479, bsz=16452.4, num_updates=54000, lr=0.000272166, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=52792 +epoch 033: 34 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=457539, ups=1.06, wpb=431479, bsz=16452.4, num_updates=54000, lr=0.000272166, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=52792 +epoch 033: 34 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=457539, ups=1.06, wpb=431479, bsz=16452.4, num_updates=54000, lr=0.000272166, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=52792 +epoch 033: 34 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=457539, ups=1.06, wpb=431479, bsz=16452.4, num_updates=54000, lr=0.000272166, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=52792 +epoch 033: 34 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=457539, ups=1.06, wpb=431479, bsz=16452.4, num_updates=54000, lr=0.000272166, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=52792 +epoch 033: 34 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=457539, ups=1.06, wpb=431479, bsz=16452.4, num_updates=54000, lr=0.000272166, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=52792 +epoch 033: 34 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=457539, ups=1.06, wpb=431479, bsz=16452.4, num_updates=54000, lr=0.000272166, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=52792 +epoch 033: 34 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=457539, ups=1.06, wpb=431479, bsz=16452.4, num_updates=54000, lr=0.000272166, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=52792 +epoch 033: 34 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=457539, ups=1.06, wpb=431479, bsz=16452.4, num_updates=54000, lr=0.000272166, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=52792 +epoch 033: 34 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=457539, ups=1.06, wpb=431479, bsz=16452.4, num_updates=54000, lr=0.000272166, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=52792 +epoch 033: 34 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=457539, ups=1.06, wpb=431479, bsz=16452.4, num_updates=54000, lr=0.000272166, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=52792 +epoch 033: 34 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=457539, ups=1.06, wpb=431479, bsz=16452.4, num_updates=54000, lr=0.000272166, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=52792 +epoch 033: 34 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=457539, ups=1.06, wpb=431479, bsz=16452.4, num_updates=54000, lr=0.000272166, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=52792 +epoch 033: 34 / 1689 loss=4.074, nll_loss=2.446, ppl=5.45, wps=457539, ups=1.06, wpb=431479, bsz=16452.4, num_updates=54000, lr=0.000272166, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=52792 +begin validation on "valid" subset +epoch 033 | valid on 'valid' subset | loss 4.223 | nll_loss 2.585 | ppl 6 | wps 0 | wpb 42662 | bsz 2032 | num_updates 54000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.223 | nll_loss 2.585 | ppl 6 | wps 0 | wpb 42662 | bsz 2032 | num_updates 54000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.223 | nll_loss 2.585 | ppl 6 | wps 0 | wpb 42662 | bsz 2032 | num_updates 54000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.223 | nll_loss 2.585 | ppl 6 | wps 0 | wpb 42662 | bsz 2032 | num_updates 54000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.223 | nll_loss 2.585 | ppl 6 | wps 0 | wpb 42662 | bsz 2032 | num_updates 54000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.223 | nll_loss 2.585 | ppl 6 | wps 0 | wpb 42662 | bsz 2032 | num_updates 54000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.223 | nll_loss 2.585 | ppl 6 | wps 0 | wpb 42662 | bsz 2032 | num_updates 54000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.223 | nll_loss 2.585 | ppl 6 | wps 0 | wpb 42662 | bsz 2032 | num_updates 54000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.223 | nll_loss 2.585 | ppl 6 | wps 0 | wpb 42662 | bsz 2032 | num_updates 54000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.223 | nll_loss 2.585 | ppl 6 | wps 0 | wpb 42662 | bsz 2032 | num_updates 54000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.223 | nll_loss 2.585 | ppl 6 | wps 0 | wpb 42662 | bsz 2032 | num_updates 54000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.223 | nll_loss 2.585 | ppl 6 | wps 0 | wpb 42662 | bsz 2032 | num_updates 54000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.223 | nll_loss 2.585 | ppl 6 | wps 0 | wpb 42662 | bsz 2032 | num_updates 54000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.223 | nll_loss 2.585 | ppl 6 | wps 0 | wpb 42662 | bsz 2032 | num_updates 54000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.223 | nll_loss 2.585 | ppl 6 | wps 0 | wpb 42662 | bsz 2032 | num_updates 54000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.223 | nll_loss 2.585 | ppl 6 | wps 0 | wpb 42662 | bsz 2032 | num_updates 54000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.223 | nll_loss 2.585 | ppl 6 | wps 0 | wpb 42662 | bsz 2032 | num_updates 54000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.223 | nll_loss 2.585 | ppl 6 | wps 0 | wpb 42662 | bsz 2032 | num_updates 54000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.223 | nll_loss 2.585 | ppl 6 | wps 0 | wpb 42662 | bsz 2032 | num_updates 54000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.223 | nll_loss 2.585 | ppl 6 | wps 0 | wpb 42662 | bsz 2032 | num_updates 54000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.223 | nll_loss 2.585 | ppl 6 | wps 0 | wpb 42662 | bsz 2032 | num_updates 54000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.223 | nll_loss 2.585 | ppl 6 | wps 0 | wpb 42662 | bsz 2032 | num_updates 54000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.223 | nll_loss 2.585 | ppl 6 | wps 0 | wpb 42662 | bsz 2032 | num_updates 54000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.223 | nll_loss 2.585 | ppl 6 | wps 0 | wpb 42662 | bsz 2032 | num_updates 54000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.223 | nll_loss 2.585 | ppl 6 | wps 0 | wpb 42662 | bsz 2032 | num_updates 54000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.223 | nll_loss 2.585 | ppl 6 | wps 0 | wpb 42662 | bsz 2032 | num_updates 54000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.223 | nll_loss 2.585 | ppl 6 | wps 0 | wpb 42662 | bsz 2032 | num_updates 54000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.223 | nll_loss 2.585 | ppl 6 | wps 0 | wpb 42662 | bsz 2032 | num_updates 54000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.223 | nll_loss 2.585 | ppl 6 | wps 0 | wpb 42662 | bsz 2032 | num_updates 54000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.223 | nll_loss 2.585 | ppl 6 | wps 0 | wpb 42662 | bsz 2032 | num_updates 54000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.223 | nll_loss 2.585 | ppl 6 | wps 0 | wpb 42662 | bsz 2032 | num_updates 54000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.223 | nll_loss 2.585 | ppl 6 | wps 0 | wpb 42662 | bsz 2032 | num_updates 54000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.223 | nll_loss 2.585 | ppl 6 | wps 0 | wpb 42662 | bsz 2032 | num_updates 54000 | best_loss 4.213 +epoch 033: 134 / 1689 loss=4.037, nll_loss=2.403, ppl=5.29, wps=406976, ups=0.94, wpb=432683, bsz=16514.5, num_updates=54100, lr=0.000271914, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52898 +epoch 033: 134 / 1689 loss=4.037, nll_loss=2.403, ppl=5.29, wps=406976, ups=0.94, wpb=432683, bsz=16514.5, num_updates=54100, lr=0.000271914, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52898 +epoch 033: 134 / 1689 loss=4.037, nll_loss=2.403, ppl=5.29, wps=406976, ups=0.94, wpb=432683, bsz=16514.5, num_updates=54100, lr=0.000271914, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52898 +epoch 033: 134 / 1689 loss=4.037, nll_loss=2.403, ppl=5.29, wps=406976, ups=0.94, wpb=432683, bsz=16514.5, num_updates=54100, lr=0.000271914, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52898 +epoch 033: 134 / 1689 loss=4.037, nll_loss=2.403, ppl=5.29, wps=406976, ups=0.94, wpb=432683, bsz=16514.5, num_updates=54100, lr=0.000271914, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52898 +epoch 033: 134 / 1689 loss=4.037, nll_loss=2.403, ppl=5.29, wps=406976, ups=0.94, wpb=432683, bsz=16514.5, num_updates=54100, lr=0.000271914, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52898 +epoch 033: 134 / 1689 loss=4.037, nll_loss=2.403, ppl=5.29, wps=406976, ups=0.94, wpb=432683, bsz=16514.5, num_updates=54100, lr=0.000271914, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52898 +epoch 033: 134 / 1689 loss=4.037, nll_loss=2.403, ppl=5.29, wps=406976, ups=0.94, wpb=432683, bsz=16514.5, num_updates=54100, lr=0.000271914, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52898 +epoch 033: 134 / 1689 loss=4.037, nll_loss=2.403, ppl=5.29, wps=406976, ups=0.94, wpb=432683, bsz=16514.5, num_updates=54100, lr=0.000271914, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52898 +epoch 033: 134 / 1689 loss=4.037, nll_loss=2.403, ppl=5.29, wps=406976, ups=0.94, wpb=432683, bsz=16514.5, num_updates=54100, lr=0.000271914, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52898 +epoch 033: 134 / 1689 loss=4.037, nll_loss=2.403, ppl=5.29, wps=406976, ups=0.94, wpb=432683, bsz=16514.5, num_updates=54100, lr=0.000271914, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52898 +epoch 033: 134 / 1689 loss=4.037, nll_loss=2.403, ppl=5.29, wps=406976, ups=0.94, wpb=432683, bsz=16514.5, num_updates=54100, lr=0.000271914, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52898 +epoch 033: 134 / 1689 loss=4.037, nll_loss=2.403, ppl=5.29, wps=406976, ups=0.94, wpb=432683, bsz=16514.5, num_updates=54100, lr=0.000271914, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52898 +epoch 033: 134 / 1689 loss=4.037, nll_loss=2.403, ppl=5.29, wps=406976, ups=0.94, wpb=432683, bsz=16514.5, num_updates=54100, lr=0.000271914, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52898 +epoch 033: 134 / 1689 loss=4.037, nll_loss=2.403, ppl=5.29, wps=406976, ups=0.94, wpb=432683, bsz=16514.5, num_updates=54100, lr=0.000271914, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52898 +epoch 033: 134 / 1689 loss=4.037, nll_loss=2.403, ppl=5.29, wps=406976, ups=0.94, wpb=432683, bsz=16514.5, num_updates=54100, lr=0.000271914, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52898 +epoch 033: 134 / 1689 loss=4.037, nll_loss=2.403, ppl=5.29, wps=406976, ups=0.94, wpb=432683, bsz=16514.5, num_updates=54100, lr=0.000271914, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52898 +epoch 033: 134 / 1689 loss=4.037, nll_loss=2.403, ppl=5.29, wps=406976, ups=0.94, wpb=432683, bsz=16514.5, num_updates=54100, lr=0.000271914, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52898 +epoch 033: 134 / 1689 loss=4.037, nll_loss=2.403, ppl=5.29, wps=406976, ups=0.94, wpb=432683, bsz=16514.5, num_updates=54100, lr=0.000271914, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52898 +epoch 033: 134 / 1689 loss=4.037, nll_loss=2.403, ppl=5.29, wps=406976, ups=0.94, wpb=432683, bsz=16514.5, num_updates=54100, lr=0.000271914, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52898 +epoch 033: 134 / 1689 loss=4.037, nll_loss=2.403, ppl=5.29, wps=406976, ups=0.94, wpb=432683, bsz=16514.5, num_updates=54100, lr=0.000271914, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52898 +epoch 033: 134 / 1689 loss=4.037, nll_loss=2.403, ppl=5.29, wps=406976, ups=0.94, wpb=432683, bsz=16514.5, num_updates=54100, lr=0.000271914, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52898 +epoch 033: 134 / 1689 loss=4.037, nll_loss=2.403, ppl=5.29, wps=406976, ups=0.94, wpb=432683, bsz=16514.5, num_updates=54100, lr=0.000271914, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52898 +epoch 033: 134 / 1689 loss=4.037, nll_loss=2.403, ppl=5.29, wps=406976, ups=0.94, wpb=432683, bsz=16514.5, num_updates=54100, lr=0.000271914, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52898 +epoch 033: 134 / 1689 loss=4.037, nll_loss=2.403, ppl=5.29, wps=406976, ups=0.94, wpb=432683, bsz=16514.5, num_updates=54100, lr=0.000271914, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52898 +epoch 033: 134 / 1689 loss=4.037, nll_loss=2.403, ppl=5.29, wps=406976, ups=0.94, wpb=432683, bsz=16514.5, num_updates=54100, lr=0.000271914, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52898 +epoch 033: 134 / 1689 loss=4.037, nll_loss=2.403, ppl=5.29, wps=406976, ups=0.94, wpb=432683, bsz=16514.5, num_updates=54100, lr=0.000271914, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52898 +epoch 033: 134 / 1689 loss=4.037, nll_loss=2.403, ppl=5.29, wps=406976, ups=0.94, wpb=432683, bsz=16514.5, num_updates=54100, lr=0.000271914, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52898 +epoch 033: 134 / 1689 loss=4.037, nll_loss=2.403, ppl=5.29, wps=406976, ups=0.94, wpb=432683, bsz=16514.5, num_updates=54100, lr=0.000271914, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52898 +epoch 033: 134 / 1689 loss=4.037, nll_loss=2.403, ppl=5.29, wps=406976, ups=0.94, wpb=432683, bsz=16514.5, num_updates=54100, lr=0.000271914, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52898 +epoch 033: 134 / 1689 loss=4.037, nll_loss=2.403, ppl=5.29, wps=406976, ups=0.94, wpb=432683, bsz=16514.5, num_updates=54100, lr=0.000271914, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52898 +epoch 033: 134 / 1689 loss=4.037, nll_loss=2.403, ppl=5.29, wps=406976, ups=0.94, wpb=432683, bsz=16514.5, num_updates=54100, lr=0.000271914, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52898 +epoch 033: 134 / 1689 loss=4.037, nll_loss=2.403, ppl=5.29, wps=406976, ups=0.94, wpb=432683, bsz=16514.5, num_updates=54100, lr=0.000271914, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.3, wall=52898 +epoch 033: 235 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=454832, ups=1.05, wpb=432951, bsz=16840.8, num_updates=54200, lr=0.000271663, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=52993 +epoch 033: 235 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=454832, ups=1.05, wpb=432951, bsz=16840.8, num_updates=54200, lr=0.000271663, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=52993 +epoch 033: 235 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=454832, ups=1.05, wpb=432951, bsz=16840.8, num_updates=54200, lr=0.000271663, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=52993 +epoch 033: 235 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=454832, ups=1.05, wpb=432951, bsz=16840.8, num_updates=54200, lr=0.000271663, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=52993 +epoch 033: 235 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=454832, ups=1.05, wpb=432951, bsz=16840.8, num_updates=54200, lr=0.000271663, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=52993 +epoch 033: 235 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=454832, ups=1.05, wpb=432951, bsz=16840.8, num_updates=54200, lr=0.000271663, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=52993 +epoch 033: 235 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=454832, ups=1.05, wpb=432951, bsz=16840.8, num_updates=54200, lr=0.000271663, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=52993 +epoch 033: 235 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=454832, ups=1.05, wpb=432951, bsz=16840.8, num_updates=54200, lr=0.000271663, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=52993 +epoch 033: 235 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=454832, ups=1.05, wpb=432951, bsz=16840.8, num_updates=54200, lr=0.000271663, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=52993 +epoch 033: 235 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=454832, ups=1.05, wpb=432951, bsz=16840.8, num_updates=54200, lr=0.000271663, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=52993 +epoch 033: 235 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=454832, ups=1.05, wpb=432951, bsz=16840.8, num_updates=54200, lr=0.000271663, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=52993 +epoch 033: 235 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=454832, ups=1.05, wpb=432951, bsz=16840.8, num_updates=54200, lr=0.000271663, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=52993 +epoch 033: 235 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=454832, ups=1.05, wpb=432951, bsz=16840.8, num_updates=54200, lr=0.000271663, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=52993 +epoch 033: 235 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=454832, ups=1.05, wpb=432951, bsz=16840.8, num_updates=54200, lr=0.000271663, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=52993 +epoch 033: 235 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=454832, ups=1.05, wpb=432951, bsz=16840.8, num_updates=54200, lr=0.000271663, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=52993 +epoch 033: 235 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=454832, ups=1.05, wpb=432951, bsz=16840.8, num_updates=54200, lr=0.000271663, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=52993 +epoch 033: 235 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=454832, ups=1.05, wpb=432951, bsz=16840.8, num_updates=54200, lr=0.000271663, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=52993 +epoch 033: 235 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=454832, ups=1.05, wpb=432951, bsz=16840.8, num_updates=54200, lr=0.000271663, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=52993 +epoch 033: 235 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=454832, ups=1.05, wpb=432951, bsz=16840.8, num_updates=54200, lr=0.000271663, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=52993 +epoch 033: 235 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=454832, ups=1.05, wpb=432951, bsz=16840.8, num_updates=54200, lr=0.000271663, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=52993 +epoch 033: 235 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=454832, ups=1.05, wpb=432951, bsz=16840.8, num_updates=54200, lr=0.000271663, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=52993 +epoch 033: 235 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=454832, ups=1.05, wpb=432951, bsz=16840.8, num_updates=54200, lr=0.000271663, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=52993 +epoch 033: 235 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=454832, ups=1.05, wpb=432951, bsz=16840.8, num_updates=54200, lr=0.000271663, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=52993 +epoch 033: 235 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=454832, ups=1.05, wpb=432951, bsz=16840.8, num_updates=54200, lr=0.000271663, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=52993 +epoch 033: 235 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=454832, ups=1.05, wpb=432951, bsz=16840.8, num_updates=54200, lr=0.000271663, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=52993 +epoch 033: 235 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=454832, ups=1.05, wpb=432951, bsz=16840.8, num_updates=54200, lr=0.000271663, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=52993 +epoch 033: 235 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=454832, ups=1.05, wpb=432951, bsz=16840.8, num_updates=54200, lr=0.000271663, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=52993 +epoch 033: 235 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=454832, ups=1.05, wpb=432951, bsz=16840.8, num_updates=54200, lr=0.000271663, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=52993 +epoch 033: 235 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=454832, ups=1.05, wpb=432951, bsz=16840.8, num_updates=54200, lr=0.000271663, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=52993 +epoch 033: 235 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=454832, ups=1.05, wpb=432951, bsz=16840.8, num_updates=54200, lr=0.000271663, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=52993 +epoch 033: 235 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=454832, ups=1.05, wpb=432951, bsz=16840.8, num_updates=54200, lr=0.000271663, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=52993 +epoch 033: 235 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=454832, ups=1.05, wpb=432951, bsz=16840.8, num_updates=54200, lr=0.000271663, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=52993 +epoch 033: 235 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=454832, ups=1.05, wpb=432951, bsz=16840.8, num_updates=54200, lr=0.000271663, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.1, wall=52993 +epoch 033: 335 / 1689 loss=4.036, nll_loss=2.402, ppl=5.29, wps=458161, ups=1.06, wpb=434118, bsz=16078.7, num_updates=54300, lr=0.000271413, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=53088 +epoch 033: 335 / 1689 loss=4.036, nll_loss=2.402, ppl=5.29, wps=458161, ups=1.06, wpb=434118, bsz=16078.7, num_updates=54300, lr=0.000271413, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=53088 +epoch 033: 335 / 1689 loss=4.036, nll_loss=2.402, ppl=5.29, wps=458161, ups=1.06, wpb=434118, bsz=16078.7, num_updates=54300, lr=0.000271413, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=53088 +epoch 033: 335 / 1689 loss=4.036, nll_loss=2.402, ppl=5.29, wps=458161, ups=1.06, wpb=434118, bsz=16078.7, num_updates=54300, lr=0.000271413, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=53088 +epoch 033: 335 / 1689 loss=4.036, nll_loss=2.402, ppl=5.29, wps=458161, ups=1.06, wpb=434118, bsz=16078.7, num_updates=54300, lr=0.000271413, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=53088 +epoch 033: 335 / 1689 loss=4.036, nll_loss=2.402, ppl=5.29, wps=458161, ups=1.06, wpb=434118, bsz=16078.7, num_updates=54300, lr=0.000271413, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=53088 +epoch 033: 335 / 1689 loss=4.036, nll_loss=2.402, ppl=5.29, wps=458161, ups=1.06, wpb=434118, bsz=16078.7, num_updates=54300, lr=0.000271413, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=53088 +epoch 033: 335 / 1689 loss=4.036, nll_loss=2.402, ppl=5.29, wps=458161, ups=1.06, wpb=434118, bsz=16078.7, num_updates=54300, lr=0.000271413, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=53088 +epoch 033: 335 / 1689 loss=4.036, nll_loss=2.402, ppl=5.29, wps=458161, ups=1.06, wpb=434118, bsz=16078.7, num_updates=54300, lr=0.000271413, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=53088 +epoch 033: 335 / 1689 loss=4.036, nll_loss=2.402, ppl=5.29, wps=458161, ups=1.06, wpb=434118, bsz=16078.7, num_updates=54300, lr=0.000271413, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=53088 +epoch 033: 335 / 1689 loss=4.036, nll_loss=2.402, ppl=5.29, wps=458161, ups=1.06, wpb=434118, bsz=16078.7, num_updates=54300, lr=0.000271413, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=53088 +epoch 033: 335 / 1689 loss=4.036, nll_loss=2.402, ppl=5.29, wps=458161, ups=1.06, wpb=434118, bsz=16078.7, num_updates=54300, lr=0.000271413, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=53088 +epoch 033: 335 / 1689 loss=4.036, nll_loss=2.402, ppl=5.29, wps=458161, ups=1.06, wpb=434118, bsz=16078.7, num_updates=54300, lr=0.000271413, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=53088 +epoch 033: 335 / 1689 loss=4.036, nll_loss=2.402, ppl=5.29, wps=458161, ups=1.06, wpb=434118, bsz=16078.7, num_updates=54300, lr=0.000271413, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=53088 +epoch 033: 335 / 1689 loss=4.036, nll_loss=2.402, ppl=5.29, wps=458161, ups=1.06, wpb=434118, bsz=16078.7, num_updates=54300, lr=0.000271413, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=53088 +epoch 033: 335 / 1689 loss=4.036, nll_loss=2.402, ppl=5.29, wps=458161, ups=1.06, wpb=434118, bsz=16078.7, num_updates=54300, lr=0.000271413, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=53088 +epoch 033: 335 / 1689 loss=4.036, nll_loss=2.402, ppl=5.29, wps=458161, ups=1.06, wpb=434118, bsz=16078.7, num_updates=54300, lr=0.000271413, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=53088 +epoch 033: 335 / 1689 loss=4.036, nll_loss=2.402, ppl=5.29, wps=458161, ups=1.06, wpb=434118, bsz=16078.7, num_updates=54300, lr=0.000271413, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=53088 +epoch 033: 335 / 1689 loss=4.036, nll_loss=2.402, ppl=5.29, wps=458161, ups=1.06, wpb=434118, bsz=16078.7, num_updates=54300, lr=0.000271413, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=53088 +epoch 033: 335 / 1689 loss=4.036, nll_loss=2.402, ppl=5.29, wps=458161, ups=1.06, wpb=434118, bsz=16078.7, num_updates=54300, lr=0.000271413, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=53088 +epoch 033: 335 / 1689 loss=4.036, nll_loss=2.402, ppl=5.29, wps=458161, ups=1.06, wpb=434118, bsz=16078.7, num_updates=54300, lr=0.000271413, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=53088 +epoch 033: 335 / 1689 loss=4.036, nll_loss=2.402, ppl=5.29, wps=458161, ups=1.06, wpb=434118, bsz=16078.7, num_updates=54300, lr=0.000271413, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=53088 +epoch 033: 335 / 1689 loss=4.036, nll_loss=2.402, ppl=5.29, wps=458161, ups=1.06, wpb=434118, bsz=16078.7, num_updates=54300, lr=0.000271413, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=53088 +epoch 033: 335 / 1689 loss=4.036, nll_loss=2.402, ppl=5.29, wps=458161, ups=1.06, wpb=434118, bsz=16078.7, num_updates=54300, lr=0.000271413, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=53088 +epoch 033: 335 / 1689 loss=4.036, nll_loss=2.402, ppl=5.29, wps=458161, ups=1.06, wpb=434118, bsz=16078.7, num_updates=54300, lr=0.000271413, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=53088 +epoch 033: 335 / 1689 loss=4.036, nll_loss=2.402, ppl=5.29, wps=458161, ups=1.06, wpb=434118, bsz=16078.7, num_updates=54300, lr=0.000271413, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=53088 +epoch 033: 335 / 1689 loss=4.036, nll_loss=2.402, ppl=5.29, wps=458161, ups=1.06, wpb=434118, bsz=16078.7, num_updates=54300, lr=0.000271413, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=53088 +epoch 033: 335 / 1689 loss=4.036, nll_loss=2.402, ppl=5.29, wps=458161, ups=1.06, wpb=434118, bsz=16078.7, num_updates=54300, lr=0.000271413, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=53088 +epoch 033: 335 / 1689 loss=4.036, nll_loss=2.402, ppl=5.29, wps=458161, ups=1.06, wpb=434118, bsz=16078.7, num_updates=54300, lr=0.000271413, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=53088 +epoch 033: 335 / 1689 loss=4.036, nll_loss=2.402, ppl=5.29, wps=458161, ups=1.06, wpb=434118, bsz=16078.7, num_updates=54300, lr=0.000271413, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=53088 +epoch 033: 335 / 1689 loss=4.036, nll_loss=2.402, ppl=5.29, wps=458161, ups=1.06, wpb=434118, bsz=16078.7, num_updates=54300, lr=0.000271413, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=53088 +epoch 033: 335 / 1689 loss=4.036, nll_loss=2.402, ppl=5.29, wps=458161, ups=1.06, wpb=434118, bsz=16078.7, num_updates=54300, lr=0.000271413, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=53088 +epoch 033: 335 / 1689 loss=4.036, nll_loss=2.402, ppl=5.29, wps=458161, ups=1.06, wpb=434118, bsz=16078.7, num_updates=54300, lr=0.000271413, gnorm=0.221, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=53088 +epoch 033: 435 / 1689 loss=4.048, nll_loss=2.416, ppl=5.34, wps=458494, ups=1.06, wpb=432593, bsz=16471.4, num_updates=54400, lr=0.000271163, gnorm=0.194, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.5, wall=53183 +epoch 033: 435 / 1689 loss=4.048, nll_loss=2.416, ppl=5.34, wps=458494, ups=1.06, wpb=432593, bsz=16471.4, num_updates=54400, lr=0.000271163, gnorm=0.194, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.5, wall=53183 +epoch 033: 435 / 1689 loss=4.048, nll_loss=2.416, ppl=5.34, wps=458494, ups=1.06, wpb=432593, bsz=16471.4, num_updates=54400, lr=0.000271163, gnorm=0.194, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.5, wall=53183 +epoch 033: 435 / 1689 loss=4.048, nll_loss=2.416, ppl=5.34, wps=458494, ups=1.06, wpb=432593, bsz=16471.4, num_updates=54400, lr=0.000271163, gnorm=0.194, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.5, wall=53183 +epoch 033: 435 / 1689 loss=4.048, nll_loss=2.416, ppl=5.34, wps=458494, ups=1.06, wpb=432593, bsz=16471.4, num_updates=54400, lr=0.000271163, gnorm=0.194, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.5, wall=53183 +epoch 033: 435 / 1689 loss=4.048, nll_loss=2.416, ppl=5.34, wps=458494, ups=1.06, wpb=432593, bsz=16471.4, num_updates=54400, lr=0.000271163, gnorm=0.194, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.5, wall=53183 +epoch 033: 435 / 1689 loss=4.048, nll_loss=2.416, ppl=5.34, wps=458494, ups=1.06, wpb=432593, bsz=16471.4, num_updates=54400, lr=0.000271163, gnorm=0.194, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.5, wall=53183 +epoch 033: 435 / 1689 loss=4.048, nll_loss=2.416, ppl=5.34, wps=458494, ups=1.06, wpb=432593, bsz=16471.4, num_updates=54400, lr=0.000271163, gnorm=0.194, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.5, wall=53183 +epoch 033: 435 / 1689 loss=4.048, nll_loss=2.416, ppl=5.34, wps=458494, ups=1.06, wpb=432593, bsz=16471.4, num_updates=54400, lr=0.000271163, gnorm=0.194, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.5, wall=53183 +epoch 033: 435 / 1689 loss=4.048, nll_loss=2.416, ppl=5.34, wps=458494, ups=1.06, wpb=432593, bsz=16471.4, num_updates=54400, lr=0.000271163, gnorm=0.194, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.5, wall=53183 +epoch 033: 435 / 1689 loss=4.048, nll_loss=2.416, ppl=5.34, wps=458494, ups=1.06, wpb=432593, bsz=16471.4, num_updates=54400, lr=0.000271163, gnorm=0.194, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.5, wall=53183 +epoch 033: 435 / 1689 loss=4.048, nll_loss=2.416, ppl=5.34, wps=458494, ups=1.06, wpb=432593, bsz=16471.4, num_updates=54400, lr=0.000271163, gnorm=0.194, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.5, wall=53183 +epoch 033: 435 / 1689 loss=4.048, nll_loss=2.416, ppl=5.34, wps=458494, ups=1.06, wpb=432593, bsz=16471.4, num_updates=54400, lr=0.000271163, gnorm=0.194, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.5, wall=53183 +epoch 033: 435 / 1689 loss=4.048, nll_loss=2.416, ppl=5.34, wps=458494, ups=1.06, wpb=432593, bsz=16471.4, num_updates=54400, lr=0.000271163, gnorm=0.194, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.5, wall=53183 +epoch 033: 435 / 1689 loss=4.048, nll_loss=2.416, ppl=5.34, wps=458494, ups=1.06, wpb=432593, bsz=16471.4, num_updates=54400, lr=0.000271163, gnorm=0.194, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.5, wall=53183 +epoch 033: 435 / 1689 loss=4.048, nll_loss=2.416, ppl=5.34, wps=458494, ups=1.06, wpb=432593, bsz=16471.4, num_updates=54400, lr=0.000271163, gnorm=0.194, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.5, wall=53183 +epoch 033: 435 / 1689 loss=4.048, nll_loss=2.416, ppl=5.34, wps=458494, ups=1.06, wpb=432593, bsz=16471.4, num_updates=54400, lr=0.000271163, gnorm=0.194, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.5, wall=53183 +epoch 033: 435 / 1689 loss=4.048, nll_loss=2.416, ppl=5.34, wps=458494, ups=1.06, wpb=432593, bsz=16471.4, num_updates=54400, lr=0.000271163, gnorm=0.194, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.5, wall=53183 +epoch 033: 435 / 1689 loss=4.048, nll_loss=2.416, ppl=5.34, wps=458494, ups=1.06, wpb=432593, bsz=16471.4, num_updates=54400, lr=0.000271163, gnorm=0.194, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.5, wall=53183 +epoch 033: 435 / 1689 loss=4.048, nll_loss=2.416, ppl=5.34, wps=458494, ups=1.06, wpb=432593, bsz=16471.4, num_updates=54400, lr=0.000271163, gnorm=0.194, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.5, wall=53183 +epoch 033: 435 / 1689 loss=4.048, nll_loss=2.416, ppl=5.34, wps=458494, ups=1.06, wpb=432593, bsz=16471.4, num_updates=54400, lr=0.000271163, gnorm=0.194, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.5, wall=53183 +epoch 033: 435 / 1689 loss=4.048, nll_loss=2.416, ppl=5.34, wps=458494, ups=1.06, wpb=432593, bsz=16471.4, num_updates=54400, lr=0.000271163, gnorm=0.194, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.5, wall=53183 +epoch 033: 435 / 1689 loss=4.048, nll_loss=2.416, ppl=5.34, wps=458494, ups=1.06, wpb=432593, bsz=16471.4, num_updates=54400, lr=0.000271163, gnorm=0.194, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.5, wall=53183 +epoch 033: 435 / 1689 loss=4.048, nll_loss=2.416, ppl=5.34, wps=458494, ups=1.06, wpb=432593, bsz=16471.4, num_updates=54400, lr=0.000271163, gnorm=0.194, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.5, wall=53183 +epoch 033: 435 / 1689 loss=4.048, nll_loss=2.416, ppl=5.34, wps=458494, ups=1.06, wpb=432593, bsz=16471.4, num_updates=54400, lr=0.000271163, gnorm=0.194, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.5, wall=53183 +epoch 033: 435 / 1689 loss=4.048, nll_loss=2.416, ppl=5.34, wps=458494, ups=1.06, wpb=432593, bsz=16471.4, num_updates=54400, lr=0.000271163, gnorm=0.194, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.5, wall=53183 +epoch 033: 435 / 1689 loss=4.048, nll_loss=2.416, ppl=5.34, wps=458494, ups=1.06, wpb=432593, bsz=16471.4, num_updates=54400, lr=0.000271163, gnorm=0.194, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.5, wall=53183 +epoch 033: 435 / 1689 loss=4.048, nll_loss=2.416, ppl=5.34, wps=458494, ups=1.06, wpb=432593, bsz=16471.4, num_updates=54400, lr=0.000271163, gnorm=0.194, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.5, wall=53183 +epoch 033: 435 / 1689 loss=4.048, nll_loss=2.416, ppl=5.34, wps=458494, ups=1.06, wpb=432593, bsz=16471.4, num_updates=54400, lr=0.000271163, gnorm=0.194, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.5, wall=53183 +epoch 033: 435 / 1689 loss=4.048, nll_loss=2.416, ppl=5.34, wps=458494, ups=1.06, wpb=432593, bsz=16471.4, num_updates=54400, lr=0.000271163, gnorm=0.194, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.5, wall=53183 +epoch 033: 435 / 1689 loss=4.048, nll_loss=2.416, ppl=5.34, wps=458494, ups=1.06, wpb=432593, bsz=16471.4, num_updates=54400, lr=0.000271163, gnorm=0.194, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.5, wall=53183 +epoch 033: 435 / 1689 loss=4.048, nll_loss=2.416, ppl=5.34, wps=458494, ups=1.06, wpb=432593, bsz=16471.4, num_updates=54400, lr=0.000271163, gnorm=0.194, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.5, wall=53183 +epoch 033: 435 / 1689 loss=4.048, nll_loss=2.416, ppl=5.34, wps=458494, ups=1.06, wpb=432593, bsz=16471.4, num_updates=54400, lr=0.000271163, gnorm=0.194, clip=0, loss_scale=0.5, train_wall=93, gb_free=20.5, wall=53183 +epoch 033: 535 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463029, ups=1.06, wpb=434854, bsz=16533.1, num_updates=54500, lr=0.000270914, gnorm=0.199, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=53276 +epoch 033: 535 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463029, ups=1.06, wpb=434854, bsz=16533.1, num_updates=54500, lr=0.000270914, gnorm=0.199, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=53276 +epoch 033: 535 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463029, ups=1.06, wpb=434854, bsz=16533.1, num_updates=54500, lr=0.000270914, gnorm=0.199, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=53276 +epoch 033: 535 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463029, ups=1.06, wpb=434854, bsz=16533.1, num_updates=54500, lr=0.000270914, gnorm=0.199, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=53276 +epoch 033: 535 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463029, ups=1.06, wpb=434854, bsz=16533.1, num_updates=54500, lr=0.000270914, gnorm=0.199, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=53276 +epoch 033: 535 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463029, ups=1.06, wpb=434854, bsz=16533.1, num_updates=54500, lr=0.000270914, gnorm=0.199, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=53276 +epoch 033: 535 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463029, ups=1.06, wpb=434854, bsz=16533.1, num_updates=54500, lr=0.000270914, gnorm=0.199, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=53276 +epoch 033: 535 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463029, ups=1.06, wpb=434854, bsz=16533.1, num_updates=54500, lr=0.000270914, gnorm=0.199, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=53276 +epoch 033: 535 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463029, ups=1.06, wpb=434854, bsz=16533.1, num_updates=54500, lr=0.000270914, gnorm=0.199, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=53276 +epoch 033: 535 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463029, ups=1.06, wpb=434854, bsz=16533.1, num_updates=54500, lr=0.000270914, gnorm=0.199, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=53276 +epoch 033: 535 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463029, ups=1.06, wpb=434854, bsz=16533.1, num_updates=54500, lr=0.000270914, gnorm=0.199, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=53276 +epoch 033: 535 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463029, ups=1.06, wpb=434854, bsz=16533.1, num_updates=54500, lr=0.000270914, gnorm=0.199, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=53276 +epoch 033: 535 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463029, ups=1.06, wpb=434854, bsz=16533.1, num_updates=54500, lr=0.000270914, gnorm=0.199, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=53276 +epoch 033: 535 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463029, ups=1.06, wpb=434854, bsz=16533.1, num_updates=54500, lr=0.000270914, gnorm=0.199, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=53276 +epoch 033: 535 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463029, ups=1.06, wpb=434854, bsz=16533.1, num_updates=54500, lr=0.000270914, gnorm=0.199, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=53276 +epoch 033: 535 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463029, ups=1.06, wpb=434854, bsz=16533.1, num_updates=54500, lr=0.000270914, gnorm=0.199, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=53276 +epoch 033: 535 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463029, ups=1.06, wpb=434854, bsz=16533.1, num_updates=54500, lr=0.000270914, gnorm=0.199, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=53276 +epoch 033: 535 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463029, ups=1.06, wpb=434854, bsz=16533.1, num_updates=54500, lr=0.000270914, gnorm=0.199, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=53276 +epoch 033: 535 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463029, ups=1.06, wpb=434854, bsz=16533.1, num_updates=54500, lr=0.000270914, gnorm=0.199, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=53276 +epoch 033: 535 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463029, ups=1.06, wpb=434854, bsz=16533.1, num_updates=54500, lr=0.000270914, gnorm=0.199, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=53276 +epoch 033: 535 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463029, ups=1.06, wpb=434854, bsz=16533.1, num_updates=54500, lr=0.000270914, gnorm=0.199, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=53276 +epoch 033: 535 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463029, ups=1.06, wpb=434854, bsz=16533.1, num_updates=54500, lr=0.000270914, gnorm=0.199, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=53276 +epoch 033: 535 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463029, ups=1.06, wpb=434854, bsz=16533.1, num_updates=54500, lr=0.000270914, gnorm=0.199, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=53276 +epoch 033: 535 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463029, ups=1.06, wpb=434854, bsz=16533.1, num_updates=54500, lr=0.000270914, gnorm=0.199, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=53276 +epoch 033: 535 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463029, ups=1.06, wpb=434854, bsz=16533.1, num_updates=54500, lr=0.000270914, gnorm=0.199, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=53276 +epoch 033: 535 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463029, ups=1.06, wpb=434854, bsz=16533.1, num_updates=54500, lr=0.000270914, gnorm=0.199, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=53276 +epoch 033: 535 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463029, ups=1.06, wpb=434854, bsz=16533.1, num_updates=54500, lr=0.000270914, gnorm=0.199, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=53276 +epoch 033: 535 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463029, ups=1.06, wpb=434854, bsz=16533.1, num_updates=54500, lr=0.000270914, gnorm=0.199, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=53276 +epoch 033: 535 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463029, ups=1.06, wpb=434854, bsz=16533.1, num_updates=54500, lr=0.000270914, gnorm=0.199, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=53276 +epoch 033: 535 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463029, ups=1.06, wpb=434854, bsz=16533.1, num_updates=54500, lr=0.000270914, gnorm=0.199, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=53276 +epoch 033: 535 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463029, ups=1.06, wpb=434854, bsz=16533.1, num_updates=54500, lr=0.000270914, gnorm=0.199, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=53276 +epoch 033: 535 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463029, ups=1.06, wpb=434854, bsz=16533.1, num_updates=54500, lr=0.000270914, gnorm=0.199, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=53276 +epoch 033: 535 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463029, ups=1.06, wpb=434854, bsz=16533.1, num_updates=54500, lr=0.000270914, gnorm=0.199, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.3, wall=53276 +epoch 033: 635 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=459274, ups=1.06, wpb=433496, bsz=16819.6, num_updates=54600, lr=0.000270666, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=53371 +epoch 033: 635 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=459274, ups=1.06, wpb=433496, bsz=16819.6, num_updates=54600, lr=0.000270666, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=53371 +epoch 033: 635 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=459274, ups=1.06, wpb=433496, bsz=16819.6, num_updates=54600, lr=0.000270666, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=53371 +epoch 033: 635 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=459274, ups=1.06, wpb=433496, bsz=16819.6, num_updates=54600, lr=0.000270666, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=53371 +epoch 033: 635 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=459274, ups=1.06, wpb=433496, bsz=16819.6, num_updates=54600, lr=0.000270666, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=53371 +epoch 033: 635 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=459274, ups=1.06, wpb=433496, bsz=16819.6, num_updates=54600, lr=0.000270666, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=53371 +epoch 033: 635 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=459274, ups=1.06, wpb=433496, bsz=16819.6, num_updates=54600, lr=0.000270666, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=53371 +epoch 033: 635 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=459274, ups=1.06, wpb=433496, bsz=16819.6, num_updates=54600, lr=0.000270666, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=53371 +epoch 033: 635 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=459274, ups=1.06, wpb=433496, bsz=16819.6, num_updates=54600, lr=0.000270666, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=53371 +epoch 033: 635 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=459274, ups=1.06, wpb=433496, bsz=16819.6, num_updates=54600, lr=0.000270666, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=53371 +epoch 033: 635 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=459274, ups=1.06, wpb=433496, bsz=16819.6, num_updates=54600, lr=0.000270666, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=53371 +epoch 033: 635 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=459274, ups=1.06, wpb=433496, bsz=16819.6, num_updates=54600, lr=0.000270666, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=53371 +epoch 033: 635 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=459274, ups=1.06, wpb=433496, bsz=16819.6, num_updates=54600, lr=0.000270666, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=53371 +epoch 033: 635 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=459274, ups=1.06, wpb=433496, bsz=16819.6, num_updates=54600, lr=0.000270666, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=53371 +epoch 033: 635 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=459274, ups=1.06, wpb=433496, bsz=16819.6, num_updates=54600, lr=0.000270666, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=53371 +epoch 033: 635 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=459274, ups=1.06, wpb=433496, bsz=16819.6, num_updates=54600, lr=0.000270666, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=53371 +epoch 033: 635 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=459274, ups=1.06, wpb=433496, bsz=16819.6, num_updates=54600, lr=0.000270666, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=53371 +epoch 033: 635 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=459274, ups=1.06, wpb=433496, bsz=16819.6, num_updates=54600, lr=0.000270666, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=53371 +epoch 033: 635 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=459274, ups=1.06, wpb=433496, bsz=16819.6, num_updates=54600, lr=0.000270666, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=53371 +epoch 033: 635 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=459274, ups=1.06, wpb=433496, bsz=16819.6, num_updates=54600, lr=0.000270666, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=53371 +epoch 033: 635 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=459274, ups=1.06, wpb=433496, bsz=16819.6, num_updates=54600, lr=0.000270666, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=53371 +epoch 033: 635 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=459274, ups=1.06, wpb=433496, bsz=16819.6, num_updates=54600, lr=0.000270666, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=53371 +epoch 033: 635 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=459274, ups=1.06, wpb=433496, bsz=16819.6, num_updates=54600, lr=0.000270666, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=53371 +epoch 033: 635 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=459274, ups=1.06, wpb=433496, bsz=16819.6, num_updates=54600, lr=0.000270666, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=53371 +epoch 033: 635 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=459274, ups=1.06, wpb=433496, bsz=16819.6, num_updates=54600, lr=0.000270666, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=53371 +epoch 033: 635 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=459274, ups=1.06, wpb=433496, bsz=16819.6, num_updates=54600, lr=0.000270666, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=53371 +epoch 033: 635 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=459274, ups=1.06, wpb=433496, bsz=16819.6, num_updates=54600, lr=0.000270666, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=53371 +epoch 033: 635 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=459274, ups=1.06, wpb=433496, bsz=16819.6, num_updates=54600, lr=0.000270666, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=53371 +epoch 033: 635 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=459274, ups=1.06, wpb=433496, bsz=16819.6, num_updates=54600, lr=0.000270666, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=53371 +epoch 033: 635 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=459274, ups=1.06, wpb=433496, bsz=16819.6, num_updates=54600, lr=0.000270666, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=53371 +epoch 033: 635 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=459274, ups=1.06, wpb=433496, bsz=16819.6, num_updates=54600, lr=0.000270666, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=53371 +epoch 033: 635 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=459274, ups=1.06, wpb=433496, bsz=16819.6, num_updates=54600, lr=0.000270666, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=53371 +epoch 033: 635 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=459274, ups=1.06, wpb=433496, bsz=16819.6, num_updates=54600, lr=0.000270666, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=53371 +epoch 033: 735 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=458134, ups=1.06, wpb=433005, bsz=16415.8, num_updates=54700, lr=0.000270418, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=18.5, wall=53465 +epoch 033: 735 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=458134, ups=1.06, wpb=433005, bsz=16415.8, num_updates=54700, lr=0.000270418, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=18.5, wall=53465 +epoch 033: 735 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=458134, ups=1.06, wpb=433005, bsz=16415.8, num_updates=54700, lr=0.000270418, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=18.5, wall=53465 +epoch 033: 735 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=458134, ups=1.06, wpb=433005, bsz=16415.8, num_updates=54700, lr=0.000270418, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=18.5, wall=53465 +epoch 033: 735 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=458134, ups=1.06, wpb=433005, bsz=16415.8, num_updates=54700, lr=0.000270418, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=18.5, wall=53465 +epoch 033: 735 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=458134, ups=1.06, wpb=433005, bsz=16415.8, num_updates=54700, lr=0.000270418, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=18.5, wall=53465 +epoch 033: 735 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=458134, ups=1.06, wpb=433005, bsz=16415.8, num_updates=54700, lr=0.000270418, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=18.5, wall=53465 +epoch 033: 735 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=458134, ups=1.06, wpb=433005, bsz=16415.8, num_updates=54700, lr=0.000270418, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=18.5, wall=53465 +epoch 033: 735 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=458134, ups=1.06, wpb=433005, bsz=16415.8, num_updates=54700, lr=0.000270418, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=18.5, wall=53465 +epoch 033: 735 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=458134, ups=1.06, wpb=433005, bsz=16415.8, num_updates=54700, lr=0.000270418, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=18.5, wall=53465 +epoch 033: 735 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=458134, ups=1.06, wpb=433005, bsz=16415.8, num_updates=54700, lr=0.000270418, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=18.5, wall=53465 +epoch 033: 735 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=458134, ups=1.06, wpb=433005, bsz=16415.8, num_updates=54700, lr=0.000270418, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=18.5, wall=53465 +epoch 033: 735 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=458134, ups=1.06, wpb=433005, bsz=16415.8, num_updates=54700, lr=0.000270418, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=18.5, wall=53465 +epoch 033: 735 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=458134, ups=1.06, wpb=433005, bsz=16415.8, num_updates=54700, lr=0.000270418, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=18.5, wall=53465 +epoch 033: 735 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=458134, ups=1.06, wpb=433005, bsz=16415.8, num_updates=54700, lr=0.000270418, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=18.5, wall=53465 +epoch 033: 735 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=458134, ups=1.06, wpb=433005, bsz=16415.8, num_updates=54700, lr=0.000270418, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=18.5, wall=53465 +epoch 033: 735 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=458134, ups=1.06, wpb=433005, bsz=16415.8, num_updates=54700, lr=0.000270418, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=18.5, wall=53465 +epoch 033: 735 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=458134, ups=1.06, wpb=433005, bsz=16415.8, num_updates=54700, lr=0.000270418, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=18.5, wall=53465 +epoch 033: 735 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=458134, ups=1.06, wpb=433005, bsz=16415.8, num_updates=54700, lr=0.000270418, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=18.5, wall=53465 +epoch 033: 735 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=458134, ups=1.06, wpb=433005, bsz=16415.8, num_updates=54700, lr=0.000270418, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=18.5, wall=53465 +epoch 033: 735 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=458134, ups=1.06, wpb=433005, bsz=16415.8, num_updates=54700, lr=0.000270418, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=18.5, wall=53465 +epoch 033: 735 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=458134, ups=1.06, wpb=433005, bsz=16415.8, num_updates=54700, lr=0.000270418, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=18.5, wall=53465 +epoch 033: 735 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=458134, ups=1.06, wpb=433005, bsz=16415.8, num_updates=54700, lr=0.000270418, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=18.5, wall=53465 +epoch 033: 735 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=458134, ups=1.06, wpb=433005, bsz=16415.8, num_updates=54700, lr=0.000270418, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=18.5, wall=53465 +epoch 033: 735 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=458134, ups=1.06, wpb=433005, bsz=16415.8, num_updates=54700, lr=0.000270418, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=18.5, wall=53465 +epoch 033: 735 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=458134, ups=1.06, wpb=433005, bsz=16415.8, num_updates=54700, lr=0.000270418, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=18.5, wall=53465 +epoch 033: 735 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=458134, ups=1.06, wpb=433005, bsz=16415.8, num_updates=54700, lr=0.000270418, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=18.5, wall=53465 +epoch 033: 735 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=458134, ups=1.06, wpb=433005, bsz=16415.8, num_updates=54700, lr=0.000270418, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=18.5, wall=53465 +epoch 033: 735 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=458134, ups=1.06, wpb=433005, bsz=16415.8, num_updates=54700, lr=0.000270418, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=18.5, wall=53465 +epoch 033: 735 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=458134, ups=1.06, wpb=433005, bsz=16415.8, num_updates=54700, lr=0.000270418, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=18.5, wall=53465 +epoch 033: 735 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=458134, ups=1.06, wpb=433005, bsz=16415.8, num_updates=54700, lr=0.000270418, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=18.5, wall=53465 +epoch 033: 735 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=458134, ups=1.06, wpb=433005, bsz=16415.8, num_updates=54700, lr=0.000270418, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=18.5, wall=53465 +epoch 033: 735 / 1689 loss=4.043, nll_loss=2.411, ppl=5.32, wps=458134, ups=1.06, wpb=433005, bsz=16415.8, num_updates=54700, lr=0.000270418, gnorm=0.217, clip=0, loss_scale=1, train_wall=93, gb_free=18.5, wall=53465 +epoch 033: 835 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=462291, ups=1.06, wpb=434246, bsz=16773, num_updates=54800, lr=0.000270172, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=53559 +epoch 033: 835 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=462291, ups=1.06, wpb=434246, bsz=16773, num_updates=54800, lr=0.000270172, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=53559 +epoch 033: 835 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=462291, ups=1.06, wpb=434246, bsz=16773, num_updates=54800, lr=0.000270172, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=53559 +epoch 033: 835 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=462291, ups=1.06, wpb=434246, bsz=16773, num_updates=54800, lr=0.000270172, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=53559 +epoch 033: 835 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=462291, ups=1.06, wpb=434246, bsz=16773, num_updates=54800, lr=0.000270172, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=53559 +epoch 033: 835 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=462291, ups=1.06, wpb=434246, bsz=16773, num_updates=54800, lr=0.000270172, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=53559 +epoch 033: 835 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=462291, ups=1.06, wpb=434246, bsz=16773, num_updates=54800, lr=0.000270172, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=53559 +epoch 033: 835 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=462291, ups=1.06, wpb=434246, bsz=16773, num_updates=54800, lr=0.000270172, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=53559 +epoch 033: 835 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=462291, ups=1.06, wpb=434246, bsz=16773, num_updates=54800, lr=0.000270172, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=53559 +epoch 033: 835 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=462291, ups=1.06, wpb=434246, bsz=16773, num_updates=54800, lr=0.000270172, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=53559 +epoch 033: 835 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=462291, ups=1.06, wpb=434246, bsz=16773, num_updates=54800, lr=0.000270172, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=53559 +epoch 033: 835 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=462291, ups=1.06, wpb=434246, bsz=16773, num_updates=54800, lr=0.000270172, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=53559 +epoch 033: 835 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=462291, ups=1.06, wpb=434246, bsz=16773, num_updates=54800, lr=0.000270172, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=53559 +epoch 033: 835 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=462291, ups=1.06, wpb=434246, bsz=16773, num_updates=54800, lr=0.000270172, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=53559 +epoch 033: 835 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=462291, ups=1.06, wpb=434246, bsz=16773, num_updates=54800, lr=0.000270172, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=53559 +epoch 033: 835 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=462291, ups=1.06, wpb=434246, bsz=16773, num_updates=54800, lr=0.000270172, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=53559 +epoch 033: 835 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=462291, ups=1.06, wpb=434246, bsz=16773, num_updates=54800, lr=0.000270172, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=53559 +epoch 033: 835 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=462291, ups=1.06, wpb=434246, bsz=16773, num_updates=54800, lr=0.000270172, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=53559 +epoch 033: 835 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=462291, ups=1.06, wpb=434246, bsz=16773, num_updates=54800, lr=0.000270172, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=53559 +epoch 033: 835 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=462291, ups=1.06, wpb=434246, bsz=16773, num_updates=54800, lr=0.000270172, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=53559 +epoch 033: 835 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=462291, ups=1.06, wpb=434246, bsz=16773, num_updates=54800, lr=0.000270172, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=53559 +epoch 033: 835 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=462291, ups=1.06, wpb=434246, bsz=16773, num_updates=54800, lr=0.000270172, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=53559 +epoch 033: 835 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=462291, ups=1.06, wpb=434246, bsz=16773, num_updates=54800, lr=0.000270172, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=53559 +epoch 033: 835 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=462291, ups=1.06, wpb=434246, bsz=16773, num_updates=54800, lr=0.000270172, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=53559 +epoch 033: 835 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=462291, ups=1.06, wpb=434246, bsz=16773, num_updates=54800, lr=0.000270172, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=53559 +epoch 033: 835 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=462291, ups=1.06, wpb=434246, bsz=16773, num_updates=54800, lr=0.000270172, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=53559 +epoch 033: 835 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=462291, ups=1.06, wpb=434246, bsz=16773, num_updates=54800, lr=0.000270172, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=53559 +epoch 033: 835 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=462291, ups=1.06, wpb=434246, bsz=16773, num_updates=54800, lr=0.000270172, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=53559 +epoch 033: 835 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=462291, ups=1.06, wpb=434246, bsz=16773, num_updates=54800, lr=0.000270172, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=53559 +epoch 033: 835 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=462291, ups=1.06, wpb=434246, bsz=16773, num_updates=54800, lr=0.000270172, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=53559 +epoch 033: 835 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=462291, ups=1.06, wpb=434246, bsz=16773, num_updates=54800, lr=0.000270172, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=53559 +epoch 033: 835 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=462291, ups=1.06, wpb=434246, bsz=16773, num_updates=54800, lr=0.000270172, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=53559 +epoch 033: 835 / 1689 loss=4.061, nll_loss=2.431, ppl=5.39, wps=462291, ups=1.06, wpb=434246, bsz=16773, num_updates=54800, lr=0.000270172, gnorm=0.216, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=53559 +epoch 033: 935 / 1689 loss=4.05, nll_loss=2.419, ppl=5.35, wps=465263, ups=1.07, wpb=433296, bsz=15991.7, num_updates=54900, lr=0.000269925, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=53652 +epoch 033: 935 / 1689 loss=4.05, nll_loss=2.419, ppl=5.35, wps=465263, ups=1.07, wpb=433296, bsz=15991.7, num_updates=54900, lr=0.000269925, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=53652 +epoch 033: 935 / 1689 loss=4.05, nll_loss=2.419, ppl=5.35, wps=465263, ups=1.07, wpb=433296, bsz=15991.7, num_updates=54900, lr=0.000269925, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=53652 +epoch 033: 935 / 1689 loss=4.05, nll_loss=2.419, ppl=5.35, wps=465263, ups=1.07, wpb=433296, bsz=15991.7, num_updates=54900, lr=0.000269925, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=53652 +epoch 033: 935 / 1689 loss=4.05, nll_loss=2.419, ppl=5.35, wps=465263, ups=1.07, wpb=433296, bsz=15991.7, num_updates=54900, lr=0.000269925, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=53652 +epoch 033: 935 / 1689 loss=4.05, nll_loss=2.419, ppl=5.35, wps=465263, ups=1.07, wpb=433296, bsz=15991.7, num_updates=54900, lr=0.000269925, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=53652 +epoch 033: 935 / 1689 loss=4.05, nll_loss=2.419, ppl=5.35, wps=465263, ups=1.07, wpb=433296, bsz=15991.7, num_updates=54900, lr=0.000269925, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=53652 +epoch 033: 935 / 1689 loss=4.05, nll_loss=2.419, ppl=5.35, wps=465263, ups=1.07, wpb=433296, bsz=15991.7, num_updates=54900, lr=0.000269925, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=53652 +epoch 033: 935 / 1689 loss=4.05, nll_loss=2.419, ppl=5.35, wps=465263, ups=1.07, wpb=433296, bsz=15991.7, num_updates=54900, lr=0.000269925, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=53652 +epoch 033: 935 / 1689 loss=4.05, nll_loss=2.419, ppl=5.35, wps=465263, ups=1.07, wpb=433296, bsz=15991.7, num_updates=54900, lr=0.000269925, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=53652 +epoch 033: 935 / 1689 loss=4.05, nll_loss=2.419, ppl=5.35, wps=465263, ups=1.07, wpb=433296, bsz=15991.7, num_updates=54900, lr=0.000269925, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=53652 +epoch 033: 935 / 1689 loss=4.05, nll_loss=2.419, ppl=5.35, wps=465263, ups=1.07, wpb=433296, bsz=15991.7, num_updates=54900, lr=0.000269925, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=53652 +epoch 033: 935 / 1689 loss=4.05, nll_loss=2.419, ppl=5.35, wps=465263, ups=1.07, wpb=433296, bsz=15991.7, num_updates=54900, lr=0.000269925, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=53652 +epoch 033: 935 / 1689 loss=4.05, nll_loss=2.419, ppl=5.35, wps=465263, ups=1.07, wpb=433296, bsz=15991.7, num_updates=54900, lr=0.000269925, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=53652 +epoch 033: 935 / 1689 loss=4.05, nll_loss=2.419, ppl=5.35, wps=465263, ups=1.07, wpb=433296, bsz=15991.7, num_updates=54900, lr=0.000269925, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=53652 +epoch 033: 935 / 1689 loss=4.05, nll_loss=2.419, ppl=5.35, wps=465263, ups=1.07, wpb=433296, bsz=15991.7, num_updates=54900, lr=0.000269925, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=53652 +epoch 033: 935 / 1689 loss=4.05, nll_loss=2.419, ppl=5.35, wps=465263, ups=1.07, wpb=433296, bsz=15991.7, num_updates=54900, lr=0.000269925, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=53652 +epoch 033: 935 / 1689 loss=4.05, nll_loss=2.419, ppl=5.35, wps=465263, ups=1.07, wpb=433296, bsz=15991.7, num_updates=54900, lr=0.000269925, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=53652 +epoch 033: 935 / 1689 loss=4.05, nll_loss=2.419, ppl=5.35, wps=465263, ups=1.07, wpb=433296, bsz=15991.7, num_updates=54900, lr=0.000269925, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=53652 +epoch 033: 935 / 1689 loss=4.05, nll_loss=2.419, ppl=5.35, wps=465263, ups=1.07, wpb=433296, bsz=15991.7, num_updates=54900, lr=0.000269925, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=53652 +epoch 033: 935 / 1689 loss=4.05, nll_loss=2.419, ppl=5.35, wps=465263, ups=1.07, wpb=433296, bsz=15991.7, num_updates=54900, lr=0.000269925, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=53652 +epoch 033: 935 / 1689 loss=4.05, nll_loss=2.419, ppl=5.35, wps=465263, ups=1.07, wpb=433296, bsz=15991.7, num_updates=54900, lr=0.000269925, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=53652 +epoch 033: 935 / 1689 loss=4.05, nll_loss=2.419, ppl=5.35, wps=465263, ups=1.07, wpb=433296, bsz=15991.7, num_updates=54900, lr=0.000269925, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=53652 +epoch 033: 935 / 1689 loss=4.05, nll_loss=2.419, ppl=5.35, wps=465263, ups=1.07, wpb=433296, bsz=15991.7, num_updates=54900, lr=0.000269925, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=53652 +epoch 033: 935 / 1689 loss=4.05, nll_loss=2.419, ppl=5.35, wps=465263, ups=1.07, wpb=433296, bsz=15991.7, num_updates=54900, lr=0.000269925, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=53652 +epoch 033: 935 / 1689 loss=4.05, nll_loss=2.419, ppl=5.35, wps=465263, ups=1.07, wpb=433296, bsz=15991.7, num_updates=54900, lr=0.000269925, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=53652 +epoch 033: 935 / 1689 loss=4.05, nll_loss=2.419, ppl=5.35, wps=465263, ups=1.07, wpb=433296, bsz=15991.7, num_updates=54900, lr=0.000269925, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=53652 +epoch 033: 935 / 1689 loss=4.05, nll_loss=2.419, ppl=5.35, wps=465263, ups=1.07, wpb=433296, bsz=15991.7, num_updates=54900, lr=0.000269925, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=53652 +epoch 033: 935 / 1689 loss=4.05, nll_loss=2.419, ppl=5.35, wps=465263, ups=1.07, wpb=433296, bsz=15991.7, num_updates=54900, lr=0.000269925, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=53652 +epoch 033: 935 / 1689 loss=4.05, nll_loss=2.419, ppl=5.35, wps=465263, ups=1.07, wpb=433296, bsz=15991.7, num_updates=54900, lr=0.000269925, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=53652 +epoch 033: 935 / 1689 loss=4.05, nll_loss=2.419, ppl=5.35, wps=465263, ups=1.07, wpb=433296, bsz=15991.7, num_updates=54900, lr=0.000269925, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=53652 +epoch 033: 935 / 1689 loss=4.05, nll_loss=2.419, ppl=5.35, wps=465263, ups=1.07, wpb=433296, bsz=15991.7, num_updates=54900, lr=0.000269925, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=53652 +epoch 033: 935 / 1689 loss=4.05, nll_loss=2.419, ppl=5.35, wps=465263, ups=1.07, wpb=433296, bsz=15991.7, num_updates=54900, lr=0.000269925, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=19.6, wall=53652 +epoch 033: 1036 / 1689 loss=4.062, nll_loss=2.432, ppl=5.4, wps=461030, ups=1.06, wpb=433091, bsz=16480.4, num_updates=55000, lr=0.00026968, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.5, wall=53746 +epoch 033: 1036 / 1689 loss=4.062, nll_loss=2.432, ppl=5.4, wps=461030, ups=1.06, wpb=433091, bsz=16480.4, num_updates=55000, lr=0.00026968, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.5, wall=53746 +epoch 033: 1036 / 1689 loss=4.062, nll_loss=2.432, ppl=5.4, wps=461030, ups=1.06, wpb=433091, bsz=16480.4, num_updates=55000, lr=0.00026968, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.5, wall=53746 +epoch 033: 1036 / 1689 loss=4.062, nll_loss=2.432, ppl=5.4, wps=461030, ups=1.06, wpb=433091, bsz=16480.4, num_updates=55000, lr=0.00026968, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.5, wall=53746 +epoch 033: 1036 / 1689 loss=4.062, nll_loss=2.432, ppl=5.4, wps=461030, ups=1.06, wpb=433091, bsz=16480.4, num_updates=55000, lr=0.00026968, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.5, wall=53746 +epoch 033: 1036 / 1689 loss=4.062, nll_loss=2.432, ppl=5.4, wps=461030, ups=1.06, wpb=433091, bsz=16480.4, num_updates=55000, lr=0.00026968, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.5, wall=53746 +epoch 033: 1036 / 1689 loss=4.062, nll_loss=2.432, ppl=5.4, wps=461030, ups=1.06, wpb=433091, bsz=16480.4, num_updates=55000, lr=0.00026968, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.5, wall=53746 +epoch 033: 1036 / 1689 loss=4.062, nll_loss=2.432, ppl=5.4, wps=461030, ups=1.06, wpb=433091, bsz=16480.4, num_updates=55000, lr=0.00026968, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.5, wall=53746 +epoch 033: 1036 / 1689 loss=4.062, nll_loss=2.432, ppl=5.4, wps=461030, ups=1.06, wpb=433091, bsz=16480.4, num_updates=55000, lr=0.00026968, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.5, wall=53746 +epoch 033: 1036 / 1689 loss=4.062, nll_loss=2.432, ppl=5.4, wps=461030, ups=1.06, wpb=433091, bsz=16480.4, num_updates=55000, lr=0.00026968, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.5, wall=53746 +epoch 033: 1036 / 1689 loss=4.062, nll_loss=2.432, ppl=5.4, wps=461030, ups=1.06, wpb=433091, bsz=16480.4, num_updates=55000, lr=0.00026968, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.5, wall=53746 +epoch 033: 1036 / 1689 loss=4.062, nll_loss=2.432, ppl=5.4, wps=461030, ups=1.06, wpb=433091, bsz=16480.4, num_updates=55000, lr=0.00026968, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.5, wall=53746 +epoch 033: 1036 / 1689 loss=4.062, nll_loss=2.432, ppl=5.4, wps=461030, ups=1.06, wpb=433091, bsz=16480.4, num_updates=55000, lr=0.00026968, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.5, wall=53746 +epoch 033: 1036 / 1689 loss=4.062, nll_loss=2.432, ppl=5.4, wps=461030, ups=1.06, wpb=433091, bsz=16480.4, num_updates=55000, lr=0.00026968, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.5, wall=53746 +epoch 033: 1036 / 1689 loss=4.062, nll_loss=2.432, ppl=5.4, wps=461030, ups=1.06, wpb=433091, bsz=16480.4, num_updates=55000, lr=0.00026968, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.5, wall=53746 +epoch 033: 1036 / 1689 loss=4.062, nll_loss=2.432, ppl=5.4, wps=461030, ups=1.06, wpb=433091, bsz=16480.4, num_updates=55000, lr=0.00026968, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.5, wall=53746 +epoch 033: 1036 / 1689 loss=4.062, nll_loss=2.432, ppl=5.4, wps=461030, ups=1.06, wpb=433091, bsz=16480.4, num_updates=55000, lr=0.00026968, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.5, wall=53746 +epoch 033: 1036 / 1689 loss=4.062, nll_loss=2.432, ppl=5.4, wps=461030, ups=1.06, wpb=433091, bsz=16480.4, num_updates=55000, lr=0.00026968, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.5, wall=53746 +epoch 033: 1036 / 1689 loss=4.062, nll_loss=2.432, ppl=5.4, wps=461030, ups=1.06, wpb=433091, bsz=16480.4, num_updates=55000, lr=0.00026968, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.5, wall=53746 +epoch 033: 1036 / 1689 loss=4.062, nll_loss=2.432, ppl=5.4, wps=461030, ups=1.06, wpb=433091, bsz=16480.4, num_updates=55000, lr=0.00026968, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.5, wall=53746 +epoch 033: 1036 / 1689 loss=4.062, nll_loss=2.432, ppl=5.4, wps=461030, ups=1.06, wpb=433091, bsz=16480.4, num_updates=55000, lr=0.00026968, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.5, wall=53746 +epoch 033: 1036 / 1689 loss=4.062, nll_loss=2.432, ppl=5.4, wps=461030, ups=1.06, wpb=433091, bsz=16480.4, num_updates=55000, lr=0.00026968, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.5, wall=53746 +epoch 033: 1036 / 1689 loss=4.062, nll_loss=2.432, ppl=5.4, wps=461030, ups=1.06, wpb=433091, bsz=16480.4, num_updates=55000, lr=0.00026968, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.5, wall=53746 +epoch 033: 1036 / 1689 loss=4.062, nll_loss=2.432, ppl=5.4, wps=461030, ups=1.06, wpb=433091, bsz=16480.4, num_updates=55000, lr=0.00026968, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.5, wall=53746 +epoch 033: 1036 / 1689 loss=4.062, nll_loss=2.432, ppl=5.4, wps=461030, ups=1.06, wpb=433091, bsz=16480.4, num_updates=55000, lr=0.00026968, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.5, wall=53746 +epoch 033: 1036 / 1689 loss=4.062, nll_loss=2.432, ppl=5.4, wps=461030, ups=1.06, wpb=433091, bsz=16480.4, num_updates=55000, lr=0.00026968, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.5, wall=53746 +epoch 033: 1036 / 1689 loss=4.062, nll_loss=2.432, ppl=5.4, wps=461030, ups=1.06, wpb=433091, bsz=16480.4, num_updates=55000, lr=0.00026968, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.5, wall=53746 +epoch 033: 1036 / 1689 loss=4.062, nll_loss=2.432, ppl=5.4, wps=461030, ups=1.06, wpb=433091, bsz=16480.4, num_updates=55000, lr=0.00026968, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.5, wall=53746 +epoch 033: 1036 / 1689 loss=4.062, nll_loss=2.432, ppl=5.4, wps=461030, ups=1.06, wpb=433091, bsz=16480.4, num_updates=55000, lr=0.00026968, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.5, wall=53746 +epoch 033: 1036 / 1689 loss=4.062, nll_loss=2.432, ppl=5.4, wps=461030, ups=1.06, wpb=433091, bsz=16480.4, num_updates=55000, lr=0.00026968, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.5, wall=53746 +epoch 033: 1036 / 1689 loss=4.062, nll_loss=2.432, ppl=5.4, wps=461030, ups=1.06, wpb=433091, bsz=16480.4, num_updates=55000, lr=0.00026968, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.5, wall=53746 +epoch 033: 1036 / 1689 loss=4.062, nll_loss=2.432, ppl=5.4, wps=461030, ups=1.06, wpb=433091, bsz=16480.4, num_updates=55000, lr=0.00026968, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.5, wall=53746 +epoch 033: 1036 / 1689 loss=4.062, nll_loss=2.432, ppl=5.4, wps=461030, ups=1.06, wpb=433091, bsz=16480.4, num_updates=55000, lr=0.00026968, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.5, wall=53746 +begin validation on "valid" subset +epoch 033 | valid on 'valid' subset | loss 4.229 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 55000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.229 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 55000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.229 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 55000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.229 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 55000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.229 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 55000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.229 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 55000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.229 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 55000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.229 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 55000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.229 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 55000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.229 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 55000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.229 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 55000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.229 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 55000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.229 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 55000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.229 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 55000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.229 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 55000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.229 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 55000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.229 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 55000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.229 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 55000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.229 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 55000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.229 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 55000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.229 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 55000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.229 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 55000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.229 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 55000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.229 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 55000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.229 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 55000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.229 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 55000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.229 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 55000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.229 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 55000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.229 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 55000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.229 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 55000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.229 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 55000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.229 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 55000 | best_loss 4.213 +epoch 033 | valid on 'valid' subset | loss 4.229 | nll_loss 2.599 | ppl 6.06 | wps 0 | wpb 42662 | bsz 2032 | num_updates 55000 | best_loss 4.213 +epoch 033: 1136 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=342628, ups=0.79, wpb=434198, bsz=16522.6, num_updates=55100, lr=0.000269435, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=53873 +epoch 033: 1136 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=342628, ups=0.79, wpb=434198, bsz=16522.6, num_updates=55100, lr=0.000269435, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=53873 +epoch 033: 1136 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=342628, ups=0.79, wpb=434198, bsz=16522.6, num_updates=55100, lr=0.000269435, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=53873 +epoch 033: 1136 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=342628, ups=0.79, wpb=434198, bsz=16522.6, num_updates=55100, lr=0.000269435, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=53873 +epoch 033: 1136 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=342628, ups=0.79, wpb=434198, bsz=16522.6, num_updates=55100, lr=0.000269435, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=53873 +epoch 033: 1136 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=342628, ups=0.79, wpb=434198, bsz=16522.6, num_updates=55100, lr=0.000269435, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=53873 +epoch 033: 1136 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=342628, ups=0.79, wpb=434198, bsz=16522.6, num_updates=55100, lr=0.000269435, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=53873 +epoch 033: 1136 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=342628, ups=0.79, wpb=434198, bsz=16522.6, num_updates=55100, lr=0.000269435, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=53873 +epoch 033: 1136 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=342628, ups=0.79, wpb=434198, bsz=16522.6, num_updates=55100, lr=0.000269435, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=53873 +epoch 033: 1136 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=342628, ups=0.79, wpb=434198, bsz=16522.6, num_updates=55100, lr=0.000269435, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=53873 +epoch 033: 1136 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=342628, ups=0.79, wpb=434198, bsz=16522.6, num_updates=55100, lr=0.000269435, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=53873 +epoch 033: 1136 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=342628, ups=0.79, wpb=434198, bsz=16522.6, num_updates=55100, lr=0.000269435, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=53873 +epoch 033: 1136 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=342628, ups=0.79, wpb=434198, bsz=16522.6, num_updates=55100, lr=0.000269435, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=53873 +epoch 033: 1136 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=342628, ups=0.79, wpb=434198, bsz=16522.6, num_updates=55100, lr=0.000269435, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=53873 +epoch 033: 1136 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=342628, ups=0.79, wpb=434198, bsz=16522.6, num_updates=55100, lr=0.000269435, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=53873 +epoch 033: 1136 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=342628, ups=0.79, wpb=434198, bsz=16522.6, num_updates=55100, lr=0.000269435, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=53873 +epoch 033: 1136 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=342628, ups=0.79, wpb=434198, bsz=16522.6, num_updates=55100, lr=0.000269435, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=53873 +epoch 033: 1136 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=342628, ups=0.79, wpb=434198, bsz=16522.6, num_updates=55100, lr=0.000269435, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=53873 +epoch 033: 1136 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=342628, ups=0.79, wpb=434198, bsz=16522.6, num_updates=55100, lr=0.000269435, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=53873 +epoch 033: 1136 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=342628, ups=0.79, wpb=434198, bsz=16522.6, num_updates=55100, lr=0.000269435, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=53873 +epoch 033: 1136 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=342628, ups=0.79, wpb=434198, bsz=16522.6, num_updates=55100, lr=0.000269435, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=53873 +epoch 033: 1136 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=342628, ups=0.79, wpb=434198, bsz=16522.6, num_updates=55100, lr=0.000269435, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=53873 +epoch 033: 1136 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=342628, ups=0.79, wpb=434198, bsz=16522.6, num_updates=55100, lr=0.000269435, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=53873 +epoch 033: 1136 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=342628, ups=0.79, wpb=434198, bsz=16522.6, num_updates=55100, lr=0.000269435, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=53873 +epoch 033: 1136 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=342628, ups=0.79, wpb=434198, bsz=16522.6, num_updates=55100, lr=0.000269435, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=53873 +epoch 033: 1136 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=342628, ups=0.79, wpb=434198, bsz=16522.6, num_updates=55100, lr=0.000269435, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=53873 +epoch 033: 1136 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=342628, ups=0.79, wpb=434198, bsz=16522.6, num_updates=55100, lr=0.000269435, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=53873 +epoch 033: 1136 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=342628, ups=0.79, wpb=434198, bsz=16522.6, num_updates=55100, lr=0.000269435, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=53873 +epoch 033: 1136 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=342628, ups=0.79, wpb=434198, bsz=16522.6, num_updates=55100, lr=0.000269435, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=53873 +epoch 033: 1136 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=342628, ups=0.79, wpb=434198, bsz=16522.6, num_updates=55100, lr=0.000269435, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=53873 +epoch 033: 1136 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=342628, ups=0.79, wpb=434198, bsz=16522.6, num_updates=55100, lr=0.000269435, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=53873 +epoch 033: 1136 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=342628, ups=0.79, wpb=434198, bsz=16522.6, num_updates=55100, lr=0.000269435, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=53873 +epoch 033: 1136 / 1689 loss=4.065, nll_loss=2.436, ppl=5.41, wps=342628, ups=0.79, wpb=434198, bsz=16522.6, num_updates=55100, lr=0.000269435, gnorm=0.212, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.1, wall=53873 +epoch 033: 1236 / 1689 loss=4.063, nll_loss=2.435, ppl=5.41, wps=463167, ups=1.07, wpb=433642, bsz=17214.2, num_updates=55200, lr=0.000269191, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.5, wall=53967 +epoch 033: 1236 / 1689 loss=4.063, nll_loss=2.435, ppl=5.41, wps=463167, ups=1.07, wpb=433642, bsz=17214.2, num_updates=55200, lr=0.000269191, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.5, wall=53967 +epoch 033: 1236 / 1689 loss=4.063, nll_loss=2.435, ppl=5.41, wps=463167, ups=1.07, wpb=433642, bsz=17214.2, num_updates=55200, lr=0.000269191, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.5, wall=53967 +epoch 033: 1236 / 1689 loss=4.063, nll_loss=2.435, ppl=5.41, wps=463167, ups=1.07, wpb=433642, bsz=17214.2, num_updates=55200, lr=0.000269191, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.5, wall=53967 +epoch 033: 1236 / 1689 loss=4.063, nll_loss=2.435, ppl=5.41, wps=463167, ups=1.07, wpb=433642, bsz=17214.2, num_updates=55200, lr=0.000269191, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.5, wall=53967 +epoch 033: 1236 / 1689 loss=4.063, nll_loss=2.435, ppl=5.41, wps=463167, ups=1.07, wpb=433642, bsz=17214.2, num_updates=55200, lr=0.000269191, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.5, wall=53967 +epoch 033: 1236 / 1689 loss=4.063, nll_loss=2.435, ppl=5.41, wps=463167, ups=1.07, wpb=433642, bsz=17214.2, num_updates=55200, lr=0.000269191, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.5, wall=53967 +epoch 033: 1236 / 1689 loss=4.063, nll_loss=2.435, ppl=5.41, wps=463167, ups=1.07, wpb=433642, bsz=17214.2, num_updates=55200, lr=0.000269191, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.5, wall=53967 +epoch 033: 1236 / 1689 loss=4.063, nll_loss=2.435, ppl=5.41, wps=463167, ups=1.07, wpb=433642, bsz=17214.2, num_updates=55200, lr=0.000269191, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.5, wall=53967 +epoch 033: 1236 / 1689 loss=4.063, nll_loss=2.435, ppl=5.41, wps=463167, ups=1.07, wpb=433642, bsz=17214.2, num_updates=55200, lr=0.000269191, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.5, wall=53967 +epoch 033: 1236 / 1689 loss=4.063, nll_loss=2.435, ppl=5.41, wps=463167, ups=1.07, wpb=433642, bsz=17214.2, num_updates=55200, lr=0.000269191, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.5, wall=53967 +epoch 033: 1236 / 1689 loss=4.063, nll_loss=2.435, ppl=5.41, wps=463167, ups=1.07, wpb=433642, bsz=17214.2, num_updates=55200, lr=0.000269191, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.5, wall=53967 +epoch 033: 1236 / 1689 loss=4.063, nll_loss=2.435, ppl=5.41, wps=463167, ups=1.07, wpb=433642, bsz=17214.2, num_updates=55200, lr=0.000269191, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.5, wall=53967 +epoch 033: 1236 / 1689 loss=4.063, nll_loss=2.435, ppl=5.41, wps=463167, ups=1.07, wpb=433642, bsz=17214.2, num_updates=55200, lr=0.000269191, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.5, wall=53967 +epoch 033: 1236 / 1689 loss=4.063, nll_loss=2.435, ppl=5.41, wps=463167, ups=1.07, wpb=433642, bsz=17214.2, num_updates=55200, lr=0.000269191, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.5, wall=53967 +epoch 033: 1236 / 1689 loss=4.063, nll_loss=2.435, ppl=5.41, wps=463167, ups=1.07, wpb=433642, bsz=17214.2, num_updates=55200, lr=0.000269191, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.5, wall=53967 +epoch 033: 1236 / 1689 loss=4.063, nll_loss=2.435, ppl=5.41, wps=463167, ups=1.07, wpb=433642, bsz=17214.2, num_updates=55200, lr=0.000269191, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.5, wall=53967 +epoch 033: 1236 / 1689 loss=4.063, nll_loss=2.435, ppl=5.41, wps=463167, ups=1.07, wpb=433642, bsz=17214.2, num_updates=55200, lr=0.000269191, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.5, wall=53967 +epoch 033: 1236 / 1689 loss=4.063, nll_loss=2.435, ppl=5.41, wps=463167, ups=1.07, wpb=433642, bsz=17214.2, num_updates=55200, lr=0.000269191, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.5, wall=53967 +epoch 033: 1236 / 1689 loss=4.063, nll_loss=2.435, ppl=5.41, wps=463167, ups=1.07, wpb=433642, bsz=17214.2, num_updates=55200, lr=0.000269191, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.5, wall=53967 +epoch 033: 1236 / 1689 loss=4.063, nll_loss=2.435, ppl=5.41, wps=463167, ups=1.07, wpb=433642, bsz=17214.2, num_updates=55200, lr=0.000269191, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.5, wall=53967 +epoch 033: 1236 / 1689 loss=4.063, nll_loss=2.435, ppl=5.41, wps=463167, ups=1.07, wpb=433642, bsz=17214.2, num_updates=55200, lr=0.000269191, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.5, wall=53967 +epoch 033: 1236 / 1689 loss=4.063, nll_loss=2.435, ppl=5.41, wps=463167, ups=1.07, wpb=433642, bsz=17214.2, num_updates=55200, lr=0.000269191, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.5, wall=53967 +epoch 033: 1236 / 1689 loss=4.063, nll_loss=2.435, ppl=5.41, wps=463167, ups=1.07, wpb=433642, bsz=17214.2, num_updates=55200, lr=0.000269191, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.5, wall=53967 +epoch 033: 1236 / 1689 loss=4.063, nll_loss=2.435, ppl=5.41, wps=463167, ups=1.07, wpb=433642, bsz=17214.2, num_updates=55200, lr=0.000269191, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.5, wall=53967 +epoch 033: 1236 / 1689 loss=4.063, nll_loss=2.435, ppl=5.41, wps=463167, ups=1.07, wpb=433642, bsz=17214.2, num_updates=55200, lr=0.000269191, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.5, wall=53967 +epoch 033: 1236 / 1689 loss=4.063, nll_loss=2.435, ppl=5.41, wps=463167, ups=1.07, wpb=433642, bsz=17214.2, num_updates=55200, lr=0.000269191, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.5, wall=53967 +epoch 033: 1236 / 1689 loss=4.063, nll_loss=2.435, ppl=5.41, wps=463167, ups=1.07, wpb=433642, bsz=17214.2, num_updates=55200, lr=0.000269191, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.5, wall=53967 +epoch 033: 1236 / 1689 loss=4.063, nll_loss=2.435, ppl=5.41, wps=463167, ups=1.07, wpb=433642, bsz=17214.2, num_updates=55200, lr=0.000269191, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.5, wall=53967 +epoch 033: 1236 / 1689 loss=4.063, nll_loss=2.435, ppl=5.41, wps=463167, ups=1.07, wpb=433642, bsz=17214.2, num_updates=55200, lr=0.000269191, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.5, wall=53967 +epoch 033: 1236 / 1689 loss=4.063, nll_loss=2.435, ppl=5.41, wps=463167, ups=1.07, wpb=433642, bsz=17214.2, num_updates=55200, lr=0.000269191, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.5, wall=53967 +epoch 033: 1236 / 1689 loss=4.063, nll_loss=2.435, ppl=5.41, wps=463167, ups=1.07, wpb=433642, bsz=17214.2, num_updates=55200, lr=0.000269191, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.5, wall=53967 +epoch 033: 1236 / 1689 loss=4.063, nll_loss=2.435, ppl=5.41, wps=463167, ups=1.07, wpb=433642, bsz=17214.2, num_updates=55200, lr=0.000269191, gnorm=0.231, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.5, wall=53967 +epoch 033: 1336 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463836, ups=1.07, wpb=432888, bsz=16417.4, num_updates=55300, lr=0.000268947, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=54060 +epoch 033: 1336 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463836, ups=1.07, wpb=432888, bsz=16417.4, num_updates=55300, lr=0.000268947, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=54060 +epoch 033: 1336 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463836, ups=1.07, wpb=432888, bsz=16417.4, num_updates=55300, lr=0.000268947, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=54060 +epoch 033: 1336 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463836, ups=1.07, wpb=432888, bsz=16417.4, num_updates=55300, lr=0.000268947, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=54060 +epoch 033: 1336 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463836, ups=1.07, wpb=432888, bsz=16417.4, num_updates=55300, lr=0.000268947, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=54060 +epoch 033: 1336 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463836, ups=1.07, wpb=432888, bsz=16417.4, num_updates=55300, lr=0.000268947, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=54060 +epoch 033: 1336 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463836, ups=1.07, wpb=432888, bsz=16417.4, num_updates=55300, lr=0.000268947, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=54060 +epoch 033: 1336 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463836, ups=1.07, wpb=432888, bsz=16417.4, num_updates=55300, lr=0.000268947, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=54060 +epoch 033: 1336 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463836, ups=1.07, wpb=432888, bsz=16417.4, num_updates=55300, lr=0.000268947, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=54060 +epoch 033: 1336 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463836, ups=1.07, wpb=432888, bsz=16417.4, num_updates=55300, lr=0.000268947, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=54060 +epoch 033: 1336 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463836, ups=1.07, wpb=432888, bsz=16417.4, num_updates=55300, lr=0.000268947, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=54060 +epoch 033: 1336 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463836, ups=1.07, wpb=432888, bsz=16417.4, num_updates=55300, lr=0.000268947, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=54060 +epoch 033: 1336 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463836, ups=1.07, wpb=432888, bsz=16417.4, num_updates=55300, lr=0.000268947, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=54060 +epoch 033: 1336 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463836, ups=1.07, wpb=432888, bsz=16417.4, num_updates=55300, lr=0.000268947, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=54060 +epoch 033: 1336 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463836, ups=1.07, wpb=432888, bsz=16417.4, num_updates=55300, lr=0.000268947, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=54060 +epoch 033: 1336 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463836, ups=1.07, wpb=432888, bsz=16417.4, num_updates=55300, lr=0.000268947, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=54060 +epoch 033: 1336 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463836, ups=1.07, wpb=432888, bsz=16417.4, num_updates=55300, lr=0.000268947, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=54060 +epoch 033: 1336 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463836, ups=1.07, wpb=432888, bsz=16417.4, num_updates=55300, lr=0.000268947, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=54060 +epoch 033: 1336 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463836, ups=1.07, wpb=432888, bsz=16417.4, num_updates=55300, lr=0.000268947, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=54060 +epoch 033: 1336 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463836, ups=1.07, wpb=432888, bsz=16417.4, num_updates=55300, lr=0.000268947, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=54060 +epoch 033: 1336 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463836, ups=1.07, wpb=432888, bsz=16417.4, num_updates=55300, lr=0.000268947, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=54060 +epoch 033: 1336 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463836, ups=1.07, wpb=432888, bsz=16417.4, num_updates=55300, lr=0.000268947, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=54060 +epoch 033: 1336 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463836, ups=1.07, wpb=432888, bsz=16417.4, num_updates=55300, lr=0.000268947, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=54060 +epoch 033: 1336 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463836, ups=1.07, wpb=432888, bsz=16417.4, num_updates=55300, lr=0.000268947, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=54060 +epoch 033: 1336 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463836, ups=1.07, wpb=432888, bsz=16417.4, num_updates=55300, lr=0.000268947, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=54060 +epoch 033: 1336 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463836, ups=1.07, wpb=432888, bsz=16417.4, num_updates=55300, lr=0.000268947, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=54060 +epoch 033: 1336 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463836, ups=1.07, wpb=432888, bsz=16417.4, num_updates=55300, lr=0.000268947, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=54060 +epoch 033: 1336 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463836, ups=1.07, wpb=432888, bsz=16417.4, num_updates=55300, lr=0.000268947, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=54060 +epoch 033: 1336 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463836, ups=1.07, wpb=432888, bsz=16417.4, num_updates=55300, lr=0.000268947, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=54060 +epoch 033: 1336 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463836, ups=1.07, wpb=432888, bsz=16417.4, num_updates=55300, lr=0.000268947, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=54060 +epoch 033: 1336 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463836, ups=1.07, wpb=432888, bsz=16417.4, num_updates=55300, lr=0.000268947, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=54060 +epoch 033: 1336 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463836, ups=1.07, wpb=432888, bsz=16417.4, num_updates=55300, lr=0.000268947, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=54060 +epoch 033: 1336 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=463836, ups=1.07, wpb=432888, bsz=16417.4, num_updates=55300, lr=0.000268947, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.7, wall=54060 +epoch 033: 1436 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459934, ups=1.06, wpb=433723, bsz=16798.7, num_updates=55400, lr=0.000268705, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.2, wall=54154 +epoch 033: 1436 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459934, ups=1.06, wpb=433723, bsz=16798.7, num_updates=55400, lr=0.000268705, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.2, wall=54154 +epoch 033: 1436 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459934, ups=1.06, wpb=433723, bsz=16798.7, num_updates=55400, lr=0.000268705, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.2, wall=54154 +epoch 033: 1436 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459934, ups=1.06, wpb=433723, bsz=16798.7, num_updates=55400, lr=0.000268705, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.2, wall=54154 +epoch 033: 1436 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459934, ups=1.06, wpb=433723, bsz=16798.7, num_updates=55400, lr=0.000268705, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.2, wall=54154 +epoch 033: 1436 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459934, ups=1.06, wpb=433723, bsz=16798.7, num_updates=55400, lr=0.000268705, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.2, wall=54154 +epoch 033: 1436 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459934, ups=1.06, wpb=433723, bsz=16798.7, num_updates=55400, lr=0.000268705, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.2, wall=54154 +epoch 033: 1436 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459934, ups=1.06, wpb=433723, bsz=16798.7, num_updates=55400, lr=0.000268705, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.2, wall=54154 +epoch 033: 1436 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459934, ups=1.06, wpb=433723, bsz=16798.7, num_updates=55400, lr=0.000268705, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.2, wall=54154 +epoch 033: 1436 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459934, ups=1.06, wpb=433723, bsz=16798.7, num_updates=55400, lr=0.000268705, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.2, wall=54154 +epoch 033: 1436 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459934, ups=1.06, wpb=433723, bsz=16798.7, num_updates=55400, lr=0.000268705, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.2, wall=54154 +epoch 033: 1436 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459934, ups=1.06, wpb=433723, bsz=16798.7, num_updates=55400, lr=0.000268705, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.2, wall=54154 +epoch 033: 1436 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459934, ups=1.06, wpb=433723, bsz=16798.7, num_updates=55400, lr=0.000268705, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.2, wall=54154 +epoch 033: 1436 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459934, ups=1.06, wpb=433723, bsz=16798.7, num_updates=55400, lr=0.000268705, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.2, wall=54154 +epoch 033: 1436 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459934, ups=1.06, wpb=433723, bsz=16798.7, num_updates=55400, lr=0.000268705, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.2, wall=54154 +epoch 033: 1436 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459934, ups=1.06, wpb=433723, bsz=16798.7, num_updates=55400, lr=0.000268705, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.2, wall=54154 +epoch 033: 1436 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459934, ups=1.06, wpb=433723, bsz=16798.7, num_updates=55400, lr=0.000268705, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.2, wall=54154 +epoch 033: 1436 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459934, ups=1.06, wpb=433723, bsz=16798.7, num_updates=55400, lr=0.000268705, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.2, wall=54154 +epoch 033: 1436 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459934, ups=1.06, wpb=433723, bsz=16798.7, num_updates=55400, lr=0.000268705, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.2, wall=54154 +epoch 033: 1436 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459934, ups=1.06, wpb=433723, bsz=16798.7, num_updates=55400, lr=0.000268705, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.2, wall=54154 +epoch 033: 1436 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459934, ups=1.06, wpb=433723, bsz=16798.7, num_updates=55400, lr=0.000268705, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.2, wall=54154 +epoch 033: 1436 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459934, ups=1.06, wpb=433723, bsz=16798.7, num_updates=55400, lr=0.000268705, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.2, wall=54154 +epoch 033: 1436 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459934, ups=1.06, wpb=433723, bsz=16798.7, num_updates=55400, lr=0.000268705, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.2, wall=54154 +epoch 033: 1436 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459934, ups=1.06, wpb=433723, bsz=16798.7, num_updates=55400, lr=0.000268705, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.2, wall=54154 +epoch 033: 1436 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459934, ups=1.06, wpb=433723, bsz=16798.7, num_updates=55400, lr=0.000268705, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.2, wall=54154 +epoch 033: 1436 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459934, ups=1.06, wpb=433723, bsz=16798.7, num_updates=55400, lr=0.000268705, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.2, wall=54154 +epoch 033: 1436 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459934, ups=1.06, wpb=433723, bsz=16798.7, num_updates=55400, lr=0.000268705, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.2, wall=54154 +epoch 033: 1436 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459934, ups=1.06, wpb=433723, bsz=16798.7, num_updates=55400, lr=0.000268705, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.2, wall=54154 +epoch 033: 1436 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459934, ups=1.06, wpb=433723, bsz=16798.7, num_updates=55400, lr=0.000268705, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.2, wall=54154 +epoch 033: 1436 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459934, ups=1.06, wpb=433723, bsz=16798.7, num_updates=55400, lr=0.000268705, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.2, wall=54154 +epoch 033: 1436 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459934, ups=1.06, wpb=433723, bsz=16798.7, num_updates=55400, lr=0.000268705, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.2, wall=54154 +epoch 033: 1436 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459934, ups=1.06, wpb=433723, bsz=16798.7, num_updates=55400, lr=0.000268705, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.2, wall=54154 +epoch 033: 1436 / 1689 loss=4.069, nll_loss=2.441, ppl=5.43, wps=459934, ups=1.06, wpb=433723, bsz=16798.7, num_updates=55400, lr=0.000268705, gnorm=0.208, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.2, wall=54154 +epoch 033: 1536 / 1689 loss=4.061, nll_loss=2.432, ppl=5.4, wps=462576, ups=1.07, wpb=434035, bsz=16562.9, num_updates=55500, lr=0.000268462, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=54248 +epoch 033: 1536 / 1689 loss=4.061, nll_loss=2.432, ppl=5.4, wps=462576, ups=1.07, wpb=434035, bsz=16562.9, num_updates=55500, lr=0.000268462, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=54248 +epoch 033: 1536 / 1689 loss=4.061, nll_loss=2.432, ppl=5.4, wps=462576, ups=1.07, wpb=434035, bsz=16562.9, num_updates=55500, lr=0.000268462, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=54248 +epoch 033: 1536 / 1689 loss=4.061, nll_loss=2.432, ppl=5.4, wps=462576, ups=1.07, wpb=434035, bsz=16562.9, num_updates=55500, lr=0.000268462, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=54248 +epoch 033: 1536 / 1689 loss=4.061, nll_loss=2.432, ppl=5.4, wps=462576, ups=1.07, wpb=434035, bsz=16562.9, num_updates=55500, lr=0.000268462, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=54248 +epoch 033: 1536 / 1689 loss=4.061, nll_loss=2.432, ppl=5.4, wps=462576, ups=1.07, wpb=434035, bsz=16562.9, num_updates=55500, lr=0.000268462, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=54248 +epoch 033: 1536 / 1689 loss=4.061, nll_loss=2.432, ppl=5.4, wps=462576, ups=1.07, wpb=434035, bsz=16562.9, num_updates=55500, lr=0.000268462, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=54248 +epoch 033: 1536 / 1689 loss=4.061, nll_loss=2.432, ppl=5.4, wps=462576, ups=1.07, wpb=434035, bsz=16562.9, num_updates=55500, lr=0.000268462, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=54248 +epoch 033: 1536 / 1689 loss=4.061, nll_loss=2.432, ppl=5.4, wps=462576, ups=1.07, wpb=434035, bsz=16562.9, num_updates=55500, lr=0.000268462, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=54248 +epoch 033: 1536 / 1689 loss=4.061, nll_loss=2.432, ppl=5.4, wps=462576, ups=1.07, wpb=434035, bsz=16562.9, num_updates=55500, lr=0.000268462, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=54248 +epoch 033: 1536 / 1689 loss=4.061, nll_loss=2.432, ppl=5.4, wps=462576, ups=1.07, wpb=434035, bsz=16562.9, num_updates=55500, lr=0.000268462, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=54248 +epoch 033: 1536 / 1689 loss=4.061, nll_loss=2.432, ppl=5.4, wps=462576, ups=1.07, wpb=434035, bsz=16562.9, num_updates=55500, lr=0.000268462, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=54248 +epoch 033: 1536 / 1689 loss=4.061, nll_loss=2.432, ppl=5.4, wps=462576, ups=1.07, wpb=434035, bsz=16562.9, num_updates=55500, lr=0.000268462, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=54248 +epoch 033: 1536 / 1689 loss=4.061, nll_loss=2.432, ppl=5.4, wps=462576, ups=1.07, wpb=434035, bsz=16562.9, num_updates=55500, lr=0.000268462, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=54248 +epoch 033: 1536 / 1689 loss=4.061, nll_loss=2.432, ppl=5.4, wps=462576, ups=1.07, wpb=434035, bsz=16562.9, num_updates=55500, lr=0.000268462, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=54248 +epoch 033: 1536 / 1689 loss=4.061, nll_loss=2.432, ppl=5.4, wps=462576, ups=1.07, wpb=434035, bsz=16562.9, num_updates=55500, lr=0.000268462, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=54248 +epoch 033: 1536 / 1689 loss=4.061, nll_loss=2.432, ppl=5.4, wps=462576, ups=1.07, wpb=434035, bsz=16562.9, num_updates=55500, lr=0.000268462, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=54248 +epoch 033: 1536 / 1689 loss=4.061, nll_loss=2.432, ppl=5.4, wps=462576, ups=1.07, wpb=434035, bsz=16562.9, num_updates=55500, lr=0.000268462, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=54248 +epoch 033: 1536 / 1689 loss=4.061, nll_loss=2.432, ppl=5.4, wps=462576, ups=1.07, wpb=434035, bsz=16562.9, num_updates=55500, lr=0.000268462, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=54248 +epoch 033: 1536 / 1689 loss=4.061, nll_loss=2.432, ppl=5.4, wps=462576, ups=1.07, wpb=434035, bsz=16562.9, num_updates=55500, lr=0.000268462, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=54248 +epoch 033: 1536 / 1689 loss=4.061, nll_loss=2.432, ppl=5.4, wps=462576, ups=1.07, wpb=434035, bsz=16562.9, num_updates=55500, lr=0.000268462, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=54248 +epoch 033: 1536 / 1689 loss=4.061, nll_loss=2.432, ppl=5.4, wps=462576, ups=1.07, wpb=434035, bsz=16562.9, num_updates=55500, lr=0.000268462, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=54248 +epoch 033: 1536 / 1689 loss=4.061, nll_loss=2.432, ppl=5.4, wps=462576, ups=1.07, wpb=434035, bsz=16562.9, num_updates=55500, lr=0.000268462, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=54248 +epoch 033: 1536 / 1689 loss=4.061, nll_loss=2.432, ppl=5.4, wps=462576, ups=1.07, wpb=434035, bsz=16562.9, num_updates=55500, lr=0.000268462, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=54248 +epoch 033: 1536 / 1689 loss=4.061, nll_loss=2.432, ppl=5.4, wps=462576, ups=1.07, wpb=434035, bsz=16562.9, num_updates=55500, lr=0.000268462, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=54248 +epoch 033: 1536 / 1689 loss=4.061, nll_loss=2.432, ppl=5.4, wps=462576, ups=1.07, wpb=434035, bsz=16562.9, num_updates=55500, lr=0.000268462, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=54248 +epoch 033: 1536 / 1689 loss=4.061, nll_loss=2.432, ppl=5.4, wps=462576, ups=1.07, wpb=434035, bsz=16562.9, num_updates=55500, lr=0.000268462, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=54248 +epoch 033: 1536 / 1689 loss=4.061, nll_loss=2.432, ppl=5.4, wps=462576, ups=1.07, wpb=434035, bsz=16562.9, num_updates=55500, lr=0.000268462, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=54248 +epoch 033: 1536 / 1689 loss=4.061, nll_loss=2.432, ppl=5.4, wps=462576, ups=1.07, wpb=434035, bsz=16562.9, num_updates=55500, lr=0.000268462, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=54248 +epoch 033: 1536 / 1689 loss=4.061, nll_loss=2.432, ppl=5.4, wps=462576, ups=1.07, wpb=434035, bsz=16562.9, num_updates=55500, lr=0.000268462, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=54248 +epoch 033: 1536 / 1689 loss=4.061, nll_loss=2.432, ppl=5.4, wps=462576, ups=1.07, wpb=434035, bsz=16562.9, num_updates=55500, lr=0.000268462, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=54248 +epoch 033: 1536 / 1689 loss=4.061, nll_loss=2.432, ppl=5.4, wps=462576, ups=1.07, wpb=434035, bsz=16562.9, num_updates=55500, lr=0.000268462, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=54248 +epoch 033: 1536 / 1689 loss=4.061, nll_loss=2.432, ppl=5.4, wps=462576, ups=1.07, wpb=434035, bsz=16562.9, num_updates=55500, lr=0.000268462, gnorm=0.211, clip=0, loss_scale=1, train_wall=93, gb_free=20, wall=54248 +epoch 033: 1637 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=461204, ups=1.06, wpb=435876, bsz=16138.6, num_updates=55600, lr=0.000268221, gnorm=0.197, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=54343 +epoch 033: 1637 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=461204, ups=1.06, wpb=435876, bsz=16138.6, num_updates=55600, lr=0.000268221, gnorm=0.197, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=54343 +epoch 033: 1637 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=461204, ups=1.06, wpb=435876, bsz=16138.6, num_updates=55600, lr=0.000268221, gnorm=0.197, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=54343 +epoch 033: 1637 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=461204, ups=1.06, wpb=435876, bsz=16138.6, num_updates=55600, lr=0.000268221, gnorm=0.197, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=54343 +epoch 033: 1637 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=461204, ups=1.06, wpb=435876, bsz=16138.6, num_updates=55600, lr=0.000268221, gnorm=0.197, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=54343 +epoch 033: 1637 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=461204, ups=1.06, wpb=435876, bsz=16138.6, num_updates=55600, lr=0.000268221, gnorm=0.197, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=54343 +epoch 033: 1637 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=461204, ups=1.06, wpb=435876, bsz=16138.6, num_updates=55600, lr=0.000268221, gnorm=0.197, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=54343 +epoch 033: 1637 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=461204, ups=1.06, wpb=435876, bsz=16138.6, num_updates=55600, lr=0.000268221, gnorm=0.197, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=54343 +epoch 033: 1637 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=461204, ups=1.06, wpb=435876, bsz=16138.6, num_updates=55600, lr=0.000268221, gnorm=0.197, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=54343 +epoch 033: 1637 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=461204, ups=1.06, wpb=435876, bsz=16138.6, num_updates=55600, lr=0.000268221, gnorm=0.197, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=54343 +epoch 033: 1637 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=461204, ups=1.06, wpb=435876, bsz=16138.6, num_updates=55600, lr=0.000268221, gnorm=0.197, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=54343 +epoch 033: 1637 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=461204, ups=1.06, wpb=435876, bsz=16138.6, num_updates=55600, lr=0.000268221, gnorm=0.197, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=54343 +epoch 033: 1637 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=461204, ups=1.06, wpb=435876, bsz=16138.6, num_updates=55600, lr=0.000268221, gnorm=0.197, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=54343 +epoch 033: 1637 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=461204, ups=1.06, wpb=435876, bsz=16138.6, num_updates=55600, lr=0.000268221, gnorm=0.197, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=54343 +epoch 033: 1637 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=461204, ups=1.06, wpb=435876, bsz=16138.6, num_updates=55600, lr=0.000268221, gnorm=0.197, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=54343 +epoch 033: 1637 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=461204, ups=1.06, wpb=435876, bsz=16138.6, num_updates=55600, lr=0.000268221, gnorm=0.197, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=54343 +epoch 033: 1637 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=461204, ups=1.06, wpb=435876, bsz=16138.6, num_updates=55600, lr=0.000268221, gnorm=0.197, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=54343 +epoch 033: 1637 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=461204, ups=1.06, wpb=435876, bsz=16138.6, num_updates=55600, lr=0.000268221, gnorm=0.197, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=54343 +epoch 033: 1637 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=461204, ups=1.06, wpb=435876, bsz=16138.6, num_updates=55600, lr=0.000268221, gnorm=0.197, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=54343 +epoch 033: 1637 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=461204, ups=1.06, wpb=435876, bsz=16138.6, num_updates=55600, lr=0.000268221, gnorm=0.197, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=54343 +epoch 033: 1637 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=461204, ups=1.06, wpb=435876, bsz=16138.6, num_updates=55600, lr=0.000268221, gnorm=0.197, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=54343 +epoch 033: 1637 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=461204, ups=1.06, wpb=435876, bsz=16138.6, num_updates=55600, lr=0.000268221, gnorm=0.197, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=54343 +epoch 033: 1637 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=461204, ups=1.06, wpb=435876, bsz=16138.6, num_updates=55600, lr=0.000268221, gnorm=0.197, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=54343 +epoch 033: 1637 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=461204, ups=1.06, wpb=435876, bsz=16138.6, num_updates=55600, lr=0.000268221, gnorm=0.197, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=54343 +epoch 033: 1637 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=461204, ups=1.06, wpb=435876, bsz=16138.6, num_updates=55600, lr=0.000268221, gnorm=0.197, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=54343 +epoch 033: 1637 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=461204, ups=1.06, wpb=435876, bsz=16138.6, num_updates=55600, lr=0.000268221, gnorm=0.197, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=54343 +epoch 033: 1637 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=461204, ups=1.06, wpb=435876, bsz=16138.6, num_updates=55600, lr=0.000268221, gnorm=0.197, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=54343 +epoch 033: 1637 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=461204, ups=1.06, wpb=435876, bsz=16138.6, num_updates=55600, lr=0.000268221, gnorm=0.197, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=54343 +epoch 033: 1637 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=461204, ups=1.06, wpb=435876, bsz=16138.6, num_updates=55600, lr=0.000268221, gnorm=0.197, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=54343 +epoch 033: 1637 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=461204, ups=1.06, wpb=435876, bsz=16138.6, num_updates=55600, lr=0.000268221, gnorm=0.197, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=54343 +epoch 033: 1637 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=461204, ups=1.06, wpb=435876, bsz=16138.6, num_updates=55600, lr=0.000268221, gnorm=0.197, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=54343 +epoch 033: 1637 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=461204, ups=1.06, wpb=435876, bsz=16138.6, num_updates=55600, lr=0.000268221, gnorm=0.197, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=54343 +epoch 033: 1637 / 1689 loss=4.07, nll_loss=2.442, ppl=5.43, wps=461204, ups=1.06, wpb=435876, bsz=16138.6, num_updates=55600, lr=0.000268221, gnorm=0.197, clip=0, loss_scale=0.5, train_wall=94, gb_free=18.8, wall=54343 +end of epoch 33 (average epoch stats below) +epoch 033 | loss 4.057 | nll_loss 2.427 | ppl 5.38 | wps 447861 | ups 1.03 | wpb 433533 | bsz 16503 | num_updates 55652 | lr 0.000268096 | gnorm 0.21 | clip 0 | loss_scale 0.5 | train_wall 1567 | gb_free 21.6 | wall 54391 +epoch 033 | loss 4.057 | nll_loss 2.427 | ppl 5.38 | wps 447861 | ups 1.03 | wpb 433533 | bsz 16503 | num_updates 55652 | lr 0.000268096 | gnorm 0.21 | clip 0 | loss_scale 0.5 | train_wall 1567 | gb_free 21.6 | wall 54391 +epoch 033 | loss 4.057 | nll_loss 2.427 | ppl 5.38 | wps 447861 | ups 1.03 | wpb 433533 | bsz 16503 | num_updates 55652 | lr 0.000268096 | gnorm 0.21 | clip 0 | loss_scale 0.5 | train_wall 1567 | gb_free 21.6 | wall 54391 +epoch 033 | loss 4.057 | nll_loss 2.427 | ppl 5.38 | wps 447861 | ups 1.03 | wpb 433533 | bsz 16503 | num_updates 55652 | lr 0.000268096 | gnorm 0.21 | clip 0 | loss_scale 0.5 | train_wall 1567 | gb_free 21.6 | wall 54391 +epoch 033 | loss 4.057 | nll_loss 2.427 | ppl 5.38 | wps 447861 | ups 1.03 | wpb 433533 | bsz 16503 | num_updates 55652 | lr 0.000268096 | gnorm 0.21 | clip 0 | loss_scale 0.5 | train_wall 1567 | gb_free 21.6 | wall 54391 +epoch 033 | loss 4.057 | nll_loss 2.427 | ppl 5.38 | wps 447861 | ups 1.03 | wpb 433533 | bsz 16503 | num_updates 55652 | lr 0.000268096 | gnorm 0.21 | clip 0 | loss_scale 0.5 | train_wall 1567 | gb_free 21.6 | wall 54391 +epoch 033 | loss 4.057 | nll_loss 2.427 | ppl 5.38 | wps 447861 | ups 1.03 | wpb 433533 | bsz 16503 | num_updates 55652 | lr 0.000268096 | gnorm 0.21 | clip 0 | loss_scale 0.5 | train_wall 1567 | gb_free 21.6 | wall 54391 +epoch 033 | loss 4.057 | nll_loss 2.427 | ppl 5.38 | wps 447861 | ups 1.03 | wpb 433533 | bsz 16503 | num_updates 55652 | lr 0.000268096 | gnorm 0.21 | clip 0 | loss_scale 0.5 | train_wall 1567 | gb_free 21.6 | wall 54391 +epoch 033 | loss 4.057 | nll_loss 2.427 | ppl 5.38 | wps 447861 | ups 1.03 | wpb 433533 | bsz 16503 | num_updates 55652 | lr 0.000268096 | gnorm 0.21 | clip 0 | loss_scale 0.5 | train_wall 1567 | gb_free 21.6 | wall 54391 +epoch 033 | loss 4.057 | nll_loss 2.427 | ppl 5.38 | wps 447861 | ups 1.03 | wpb 433533 | bsz 16503 | num_updates 55652 | lr 0.000268096 | gnorm 0.21 | clip 0 | loss_scale 0.5 | train_wall 1567 | gb_free 21.6 | wall 54391 +epoch 033 | loss 4.057 | nll_loss 2.427 | ppl 5.38 | wps 447861 | ups 1.03 | wpb 433533 | bsz 16503 | num_updates 55652 | lr 0.000268096 | gnorm 0.21 | clip 0 | loss_scale 0.5 | train_wall 1567 | gb_free 21.6 | wall 54391 +epoch 033 | loss 4.057 | nll_loss 2.427 | ppl 5.38 | wps 447861 | ups 1.03 | wpb 433533 | bsz 16503 | num_updates 55652 | lr 0.000268096 | gnorm 0.21 | clip 0 | loss_scale 0.5 | train_wall 1567 | gb_free 21.6 | wall 54391 +epoch 033 | loss 4.057 | nll_loss 2.427 | ppl 5.38 | wps 447861 | ups 1.03 | wpb 433533 | bsz 16503 | num_updates 55652 | lr 0.000268096 | gnorm 0.21 | clip 0 | loss_scale 0.5 | train_wall 1567 | gb_free 21.6 | wall 54391 +epoch 033 | loss 4.057 | nll_loss 2.427 | ppl 5.38 | wps 447861 | ups 1.03 | wpb 433533 | bsz 16503 | num_updates 55652 | lr 0.000268096 | gnorm 0.21 | clip 0 | loss_scale 0.5 | train_wall 1567 | gb_free 21.6 | wall 54391 +epoch 033 | loss 4.057 | nll_loss 2.427 | ppl 5.38 | wps 447861 | ups 1.03 | wpb 433533 | bsz 16503 | num_updates 55652 | lr 0.000268096 | gnorm 0.21 | clip 0 | loss_scale 0.5 | train_wall 1567 | gb_free 21.6 | wall 54391 +epoch 033 | loss 4.057 | nll_loss 2.427 | ppl 5.38 | wps 447861 | ups 1.03 | wpb 433533 | bsz 16503 | num_updates 55652 | lr 0.000268096 | gnorm 0.21 | clip 0 | loss_scale 0.5 | train_wall 1567 | gb_free 21.6 | wall 54391 +epoch 033 | loss 4.057 | nll_loss 2.427 | ppl 5.38 | wps 447861 | ups 1.03 | wpb 433533 | bsz 16503 | num_updates 55652 | lr 0.000268096 | gnorm 0.21 | clip 0 | loss_scale 0.5 | train_wall 1567 | gb_free 21.6 | wall 54391 +epoch 033 | loss 4.057 | nll_loss 2.427 | ppl 5.38 | wps 447861 | ups 1.03 | wpb 433533 | bsz 16503 | num_updates 55652 | lr 0.000268096 | gnorm 0.21 | clip 0 | loss_scale 0.5 | train_wall 1567 | gb_free 21.6 | wall 54391 +epoch 033 | loss 4.057 | nll_loss 2.427 | ppl 5.38 | wps 447861 | ups 1.03 | wpb 433533 | bsz 16503 | num_updates 55652 | lr 0.000268096 | gnorm 0.21 | clip 0 | loss_scale 0.5 | train_wall 1567 | gb_free 21.6 | wall 54391 +epoch 033 | loss 4.057 | nll_loss 2.427 | ppl 5.38 | wps 447861 | ups 1.03 | wpb 433533 | bsz 16503 | num_updates 55652 | lr 0.000268096 | gnorm 0.21 | clip 0 | loss_scale 0.5 | train_wall 1567 | gb_free 21.6 | wall 54391 +epoch 033 | loss 4.057 | nll_loss 2.427 | ppl 5.38 | wps 447861 | ups 1.03 | wpb 433533 | bsz 16503 | num_updates 55652 | lr 0.000268096 | gnorm 0.21 | clip 0 | loss_scale 0.5 | train_wall 1567 | gb_free 21.6 | wall 54391 +epoch 033 | loss 4.057 | nll_loss 2.427 | ppl 5.38 | wps 447861 | ups 1.03 | wpb 433533 | bsz 16503 | num_updates 55652 | lr 0.000268096 | gnorm 0.21 | clip 0 | loss_scale 0.5 | train_wall 1567 | gb_free 21.6 | wall 54391 +epoch 033 | loss 4.057 | nll_loss 2.427 | ppl 5.38 | wps 447861 | ups 1.03 | wpb 433533 | bsz 16503 | num_updates 55652 | lr 0.000268096 | gnorm 0.21 | clip 0 | loss_scale 0.5 | train_wall 1567 | gb_free 21.6 | wall 54391 +epoch 033 | loss 4.057 | nll_loss 2.427 | ppl 5.38 | wps 447861 | ups 1.03 | wpb 433533 | bsz 16503 | num_updates 55652 | lr 0.000268096 | gnorm 0.21 | clip 0 | loss_scale 0.5 | train_wall 1567 | gb_free 21.6 | wall 54391 +epoch 033 | loss 4.057 | nll_loss 2.427 | ppl 5.38 | wps 447861 | ups 1.03 | wpb 433533 | bsz 16503 | num_updates 55652 | lr 0.000268096 | gnorm 0.21 | clip 0 | loss_scale 0.5 | train_wall 1567 | gb_free 21.6 | wall 54391 +epoch 033 | loss 4.057 | nll_loss 2.427 | ppl 5.38 | wps 447861 | ups 1.03 | wpb 433533 | bsz 16503 | num_updates 55652 | lr 0.000268096 | gnorm 0.21 | clip 0 | loss_scale 0.5 | train_wall 1567 | gb_free 21.6 | wall 54391 +epoch 033 | loss 4.057 | nll_loss 2.427 | ppl 5.38 | wps 447861 | ups 1.03 | wpb 433533 | bsz 16503 | num_updates 55652 | lr 0.000268096 | gnorm 0.21 | clip 0 | loss_scale 0.5 | train_wall 1567 | gb_free 21.6 | wall 54391 +epoch 033 | loss 4.057 | nll_loss 2.427 | ppl 5.38 | wps 447861 | ups 1.03 | wpb 433533 | bsz 16503 | num_updates 55652 | lr 0.000268096 | gnorm 0.21 | clip 0 | loss_scale 0.5 | train_wall 1567 | gb_free 21.6 | wall 54391 +epoch 033 | loss 4.057 | nll_loss 2.427 | ppl 5.38 | wps 447861 | ups 1.03 | wpb 433533 | bsz 16503 | num_updates 55652 | lr 0.000268096 | gnorm 0.21 | clip 0 | loss_scale 0.5 | train_wall 1567 | gb_free 21.6 | wall 54391 +epoch 033 | loss 4.057 | nll_loss 2.427 | ppl 5.38 | wps 447861 | ups 1.03 | wpb 433533 | bsz 16503 | num_updates 55652 | lr 0.000268096 | gnorm 0.21 | clip 0 | loss_scale 0.5 | train_wall 1567 | gb_free 21.6 | wall 54391 +epoch 033 | loss 4.057 | nll_loss 2.427 | ppl 5.38 | wps 447861 | ups 1.03 | wpb 433533 | bsz 16503 | num_updates 55652 | lr 0.000268096 | gnorm 0.21 | clip 0 | loss_scale 0.5 | train_wall 1567 | gb_free 21.6 | wall 54391 +epoch 033 | loss 4.057 | nll_loss 2.427 | ppl 5.38 | wps 447861 | ups 1.03 | wpb 433533 | bsz 16503 | num_updates 55652 | lr 0.000268096 | gnorm 0.21 | clip 0 | loss_scale 0.5 | train_wall 1567 | gb_free 21.6 | wall 54391 +epoch 033 | loss 4.057 | nll_loss 2.427 | ppl 5.38 | wps 447861 | ups 1.03 | wpb 433533 | bsz 16503 | num_updates 55652 | lr 0.000268096 | gnorm 0.21 | clip 0 | loss_scale 0.5 | train_wall 1567 | gb_free 21.6 | wall 54391 +Start iterating over samples +epoch 034: 48 / 1689 loss=4.051, nll_loss=2.42, ppl=5.35, wps=458495, ups=1.06, wpb=431704, bsz=16171.7, num_updates=55700, lr=0.00026798, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.2, wall=54437 +epoch 034: 48 / 1689 loss=4.051, nll_loss=2.42, ppl=5.35, wps=458495, ups=1.06, wpb=431704, bsz=16171.7, num_updates=55700, lr=0.00026798, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.2, wall=54437 +epoch 034: 48 / 1689 loss=4.051, nll_loss=2.42, ppl=5.35, wps=458495, ups=1.06, wpb=431704, bsz=16171.7, num_updates=55700, lr=0.00026798, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.2, wall=54437 +epoch 034: 48 / 1689 loss=4.051, nll_loss=2.42, ppl=5.35, wps=458495, ups=1.06, wpb=431704, bsz=16171.7, num_updates=55700, lr=0.00026798, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.2, wall=54437 +epoch 034: 48 / 1689 loss=4.051, nll_loss=2.42, ppl=5.35, wps=458495, ups=1.06, wpb=431704, bsz=16171.7, num_updates=55700, lr=0.00026798, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.2, wall=54437 +epoch 034: 48 / 1689 loss=4.051, nll_loss=2.42, ppl=5.35, wps=458495, ups=1.06, wpb=431704, bsz=16171.7, num_updates=55700, lr=0.00026798, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.2, wall=54437 +epoch 034: 48 / 1689 loss=4.051, nll_loss=2.42, ppl=5.35, wps=458495, ups=1.06, wpb=431704, bsz=16171.7, num_updates=55700, lr=0.00026798, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.2, wall=54437 +epoch 034: 48 / 1689 loss=4.051, nll_loss=2.42, ppl=5.35, wps=458495, ups=1.06, wpb=431704, bsz=16171.7, num_updates=55700, lr=0.00026798, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.2, wall=54437 +epoch 034: 48 / 1689 loss=4.051, nll_loss=2.42, ppl=5.35, wps=458495, ups=1.06, wpb=431704, bsz=16171.7, num_updates=55700, lr=0.00026798, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.2, wall=54437 +epoch 034: 48 / 1689 loss=4.051, nll_loss=2.42, ppl=5.35, wps=458495, ups=1.06, wpb=431704, bsz=16171.7, num_updates=55700, lr=0.00026798, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.2, wall=54437 +epoch 034: 48 / 1689 loss=4.051, nll_loss=2.42, ppl=5.35, wps=458495, ups=1.06, wpb=431704, bsz=16171.7, num_updates=55700, lr=0.00026798, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.2, wall=54437 +epoch 034: 48 / 1689 loss=4.051, nll_loss=2.42, ppl=5.35, wps=458495, ups=1.06, wpb=431704, bsz=16171.7, num_updates=55700, lr=0.00026798, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.2, wall=54437 +epoch 034: 48 / 1689 loss=4.051, nll_loss=2.42, ppl=5.35, wps=458495, ups=1.06, wpb=431704, bsz=16171.7, num_updates=55700, lr=0.00026798, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.2, wall=54437 +epoch 034: 48 / 1689 loss=4.051, nll_loss=2.42, ppl=5.35, wps=458495, ups=1.06, wpb=431704, bsz=16171.7, num_updates=55700, lr=0.00026798, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.2, wall=54437 +epoch 034: 48 / 1689 loss=4.051, nll_loss=2.42, ppl=5.35, wps=458495, ups=1.06, wpb=431704, bsz=16171.7, num_updates=55700, lr=0.00026798, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.2, wall=54437 +epoch 034: 48 / 1689 loss=4.051, nll_loss=2.42, ppl=5.35, wps=458495, ups=1.06, wpb=431704, bsz=16171.7, num_updates=55700, lr=0.00026798, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.2, wall=54437 +epoch 034: 48 / 1689 loss=4.051, nll_loss=2.42, ppl=5.35, wps=458495, ups=1.06, wpb=431704, bsz=16171.7, num_updates=55700, lr=0.00026798, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.2, wall=54437 +epoch 034: 48 / 1689 loss=4.051, nll_loss=2.42, ppl=5.35, wps=458495, ups=1.06, wpb=431704, bsz=16171.7, num_updates=55700, lr=0.00026798, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.2, wall=54437 +epoch 034: 48 / 1689 loss=4.051, nll_loss=2.42, ppl=5.35, wps=458495, ups=1.06, wpb=431704, bsz=16171.7, num_updates=55700, lr=0.00026798, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.2, wall=54437 +epoch 034: 48 / 1689 loss=4.051, nll_loss=2.42, ppl=5.35, wps=458495, ups=1.06, wpb=431704, bsz=16171.7, num_updates=55700, lr=0.00026798, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.2, wall=54437 +epoch 034: 48 / 1689 loss=4.051, nll_loss=2.42, ppl=5.35, wps=458495, ups=1.06, wpb=431704, bsz=16171.7, num_updates=55700, lr=0.00026798, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.2, wall=54437 +epoch 034: 48 / 1689 loss=4.051, nll_loss=2.42, ppl=5.35, wps=458495, ups=1.06, wpb=431704, bsz=16171.7, num_updates=55700, lr=0.00026798, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.2, wall=54437 +epoch 034: 48 / 1689 loss=4.051, nll_loss=2.42, ppl=5.35, wps=458495, ups=1.06, wpb=431704, bsz=16171.7, num_updates=55700, lr=0.00026798, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.2, wall=54437 +epoch 034: 48 / 1689 loss=4.051, nll_loss=2.42, ppl=5.35, wps=458495, ups=1.06, wpb=431704, bsz=16171.7, num_updates=55700, lr=0.00026798, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.2, wall=54437 +epoch 034: 48 / 1689 loss=4.051, nll_loss=2.42, ppl=5.35, wps=458495, ups=1.06, wpb=431704, bsz=16171.7, num_updates=55700, lr=0.00026798, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.2, wall=54437 +epoch 034: 48 / 1689 loss=4.051, nll_loss=2.42, ppl=5.35, wps=458495, ups=1.06, wpb=431704, bsz=16171.7, num_updates=55700, lr=0.00026798, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.2, wall=54437 +epoch 034: 48 / 1689 loss=4.051, nll_loss=2.42, ppl=5.35, wps=458495, ups=1.06, wpb=431704, bsz=16171.7, num_updates=55700, lr=0.00026798, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.2, wall=54437 +epoch 034: 48 / 1689 loss=4.051, nll_loss=2.42, ppl=5.35, wps=458495, ups=1.06, wpb=431704, bsz=16171.7, num_updates=55700, lr=0.00026798, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.2, wall=54437 +epoch 034: 48 / 1689 loss=4.051, nll_loss=2.42, ppl=5.35, wps=458495, ups=1.06, wpb=431704, bsz=16171.7, num_updates=55700, lr=0.00026798, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.2, wall=54437 +epoch 034: 48 / 1689 loss=4.051, nll_loss=2.42, ppl=5.35, wps=458495, ups=1.06, wpb=431704, bsz=16171.7, num_updates=55700, lr=0.00026798, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.2, wall=54437 +epoch 034: 48 / 1689 loss=4.051, nll_loss=2.42, ppl=5.35, wps=458495, ups=1.06, wpb=431704, bsz=16171.7, num_updates=55700, lr=0.00026798, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.2, wall=54437 +epoch 034: 48 / 1689 loss=4.051, nll_loss=2.42, ppl=5.35, wps=458495, ups=1.06, wpb=431704, bsz=16171.7, num_updates=55700, lr=0.00026798, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.2, wall=54437 +epoch 034: 48 / 1689 loss=4.051, nll_loss=2.42, ppl=5.35, wps=458495, ups=1.06, wpb=431704, bsz=16171.7, num_updates=55700, lr=0.00026798, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.2, wall=54437 +epoch 034: 48 / 1689 loss=4.051, nll_loss=2.42, ppl=5.35, wps=458495, ups=1.06, wpb=431704, bsz=16171.7, num_updates=55700, lr=0.00026798, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.2, wall=54437 +epoch 034: 148 / 1689 loss=4.044, nll_loss=2.411, ppl=5.32, wps=460000, ups=1.06, wpb=434055, bsz=16965.8, num_updates=55800, lr=0.00026774, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=54531 +epoch 034: 148 / 1689 loss=4.044, nll_loss=2.411, ppl=5.32, wps=460000, ups=1.06, wpb=434055, bsz=16965.8, num_updates=55800, lr=0.00026774, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=54531 +epoch 034: 148 / 1689 loss=4.044, nll_loss=2.411, ppl=5.32, wps=460000, ups=1.06, wpb=434055, bsz=16965.8, num_updates=55800, lr=0.00026774, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=54531 +epoch 034: 148 / 1689 loss=4.044, nll_loss=2.411, ppl=5.32, wps=460000, ups=1.06, wpb=434055, bsz=16965.8, num_updates=55800, lr=0.00026774, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=54531 +epoch 034: 148 / 1689 loss=4.044, nll_loss=2.411, ppl=5.32, wps=460000, ups=1.06, wpb=434055, bsz=16965.8, num_updates=55800, lr=0.00026774, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=54531 +epoch 034: 148 / 1689 loss=4.044, nll_loss=2.411, ppl=5.32, wps=460000, ups=1.06, wpb=434055, bsz=16965.8, num_updates=55800, lr=0.00026774, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=54531 +epoch 034: 148 / 1689 loss=4.044, nll_loss=2.411, ppl=5.32, wps=460000, ups=1.06, wpb=434055, bsz=16965.8, num_updates=55800, lr=0.00026774, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=54531 +epoch 034: 148 / 1689 loss=4.044, nll_loss=2.411, ppl=5.32, wps=460000, ups=1.06, wpb=434055, bsz=16965.8, num_updates=55800, lr=0.00026774, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=54531 +epoch 034: 148 / 1689 loss=4.044, nll_loss=2.411, ppl=5.32, wps=460000, ups=1.06, wpb=434055, bsz=16965.8, num_updates=55800, lr=0.00026774, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=54531 +epoch 034: 148 / 1689 loss=4.044, nll_loss=2.411, ppl=5.32, wps=460000, ups=1.06, wpb=434055, bsz=16965.8, num_updates=55800, lr=0.00026774, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=54531 +epoch 034: 148 / 1689 loss=4.044, nll_loss=2.411, ppl=5.32, wps=460000, ups=1.06, wpb=434055, bsz=16965.8, num_updates=55800, lr=0.00026774, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=54531 +epoch 034: 148 / 1689 loss=4.044, nll_loss=2.411, ppl=5.32, wps=460000, ups=1.06, wpb=434055, bsz=16965.8, num_updates=55800, lr=0.00026774, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=54531 +epoch 034: 148 / 1689 loss=4.044, nll_loss=2.411, ppl=5.32, wps=460000, ups=1.06, wpb=434055, bsz=16965.8, num_updates=55800, lr=0.00026774, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=54531 +epoch 034: 148 / 1689 loss=4.044, nll_loss=2.411, ppl=5.32, wps=460000, ups=1.06, wpb=434055, bsz=16965.8, num_updates=55800, lr=0.00026774, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=54531 +epoch 034: 148 / 1689 loss=4.044, nll_loss=2.411, ppl=5.32, wps=460000, ups=1.06, wpb=434055, bsz=16965.8, num_updates=55800, lr=0.00026774, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=54531 +epoch 034: 148 / 1689 loss=4.044, nll_loss=2.411, ppl=5.32, wps=460000, ups=1.06, wpb=434055, bsz=16965.8, num_updates=55800, lr=0.00026774, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=54531 +epoch 034: 148 / 1689 loss=4.044, nll_loss=2.411, ppl=5.32, wps=460000, ups=1.06, wpb=434055, bsz=16965.8, num_updates=55800, lr=0.00026774, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=54531 +epoch 034: 148 / 1689 loss=4.044, nll_loss=2.411, ppl=5.32, wps=460000, ups=1.06, wpb=434055, bsz=16965.8, num_updates=55800, lr=0.00026774, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=54531 +epoch 034: 148 / 1689 loss=4.044, nll_loss=2.411, ppl=5.32, wps=460000, ups=1.06, wpb=434055, bsz=16965.8, num_updates=55800, lr=0.00026774, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=54531 +epoch 034: 148 / 1689 loss=4.044, nll_loss=2.411, ppl=5.32, wps=460000, ups=1.06, wpb=434055, bsz=16965.8, num_updates=55800, lr=0.00026774, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=54531 +epoch 034: 148 / 1689 loss=4.044, nll_loss=2.411, ppl=5.32, wps=460000, ups=1.06, wpb=434055, bsz=16965.8, num_updates=55800, lr=0.00026774, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=54531 +epoch 034: 148 / 1689 loss=4.044, nll_loss=2.411, ppl=5.32, wps=460000, ups=1.06, wpb=434055, bsz=16965.8, num_updates=55800, lr=0.00026774, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=54531 +epoch 034: 148 / 1689 loss=4.044, nll_loss=2.411, ppl=5.32, wps=460000, ups=1.06, wpb=434055, bsz=16965.8, num_updates=55800, lr=0.00026774, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=54531 +epoch 034: 148 / 1689 loss=4.044, nll_loss=2.411, ppl=5.32, wps=460000, ups=1.06, wpb=434055, bsz=16965.8, num_updates=55800, lr=0.00026774, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=54531 +epoch 034: 148 / 1689 loss=4.044, nll_loss=2.411, ppl=5.32, wps=460000, ups=1.06, wpb=434055, bsz=16965.8, num_updates=55800, lr=0.00026774, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=54531 +epoch 034: 148 / 1689 loss=4.044, nll_loss=2.411, ppl=5.32, wps=460000, ups=1.06, wpb=434055, bsz=16965.8, num_updates=55800, lr=0.00026774, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=54531 +epoch 034: 148 / 1689 loss=4.044, nll_loss=2.411, ppl=5.32, wps=460000, ups=1.06, wpb=434055, bsz=16965.8, num_updates=55800, lr=0.00026774, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=54531 +epoch 034: 148 / 1689 loss=4.044, nll_loss=2.411, ppl=5.32, wps=460000, ups=1.06, wpb=434055, bsz=16965.8, num_updates=55800, lr=0.00026774, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=54531 +epoch 034: 148 / 1689 loss=4.044, nll_loss=2.411, ppl=5.32, wps=460000, ups=1.06, wpb=434055, bsz=16965.8, num_updates=55800, lr=0.00026774, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=54531 +epoch 034: 148 / 1689 loss=4.044, nll_loss=2.411, ppl=5.32, wps=460000, ups=1.06, wpb=434055, bsz=16965.8, num_updates=55800, lr=0.00026774, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=54531 +epoch 034: 148 / 1689 loss=4.044, nll_loss=2.411, ppl=5.32, wps=460000, ups=1.06, wpb=434055, bsz=16965.8, num_updates=55800, lr=0.00026774, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=54531 +epoch 034: 148 / 1689 loss=4.044, nll_loss=2.411, ppl=5.32, wps=460000, ups=1.06, wpb=434055, bsz=16965.8, num_updates=55800, lr=0.00026774, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=54531 +epoch 034: 148 / 1689 loss=4.044, nll_loss=2.411, ppl=5.32, wps=460000, ups=1.06, wpb=434055, bsz=16965.8, num_updates=55800, lr=0.00026774, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=54531 +epoch 034: 148 / 1689 loss=4.044, nll_loss=2.411, ppl=5.32, wps=460000, ups=1.06, wpb=434055, bsz=16965.8, num_updates=55800, lr=0.00026774, gnorm=0.215, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.8, wall=54531 +epoch 034: 248 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=463720, ups=1.07, wpb=432602, bsz=16144.7, num_updates=55900, lr=0.0002675, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=54625 +epoch 034: 248 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=463720, ups=1.07, wpb=432602, bsz=16144.7, num_updates=55900, lr=0.0002675, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=54625 +epoch 034: 248 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=463720, ups=1.07, wpb=432602, bsz=16144.7, num_updates=55900, lr=0.0002675, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=54625 +epoch 034: 248 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=463720, ups=1.07, wpb=432602, bsz=16144.7, num_updates=55900, lr=0.0002675, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=54625 +epoch 034: 248 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=463720, ups=1.07, wpb=432602, bsz=16144.7, num_updates=55900, lr=0.0002675, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=54625 +epoch 034: 248 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=463720, ups=1.07, wpb=432602, bsz=16144.7, num_updates=55900, lr=0.0002675, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=54625 +epoch 034: 248 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=463720, ups=1.07, wpb=432602, bsz=16144.7, num_updates=55900, lr=0.0002675, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=54625 +epoch 034: 248 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=463720, ups=1.07, wpb=432602, bsz=16144.7, num_updates=55900, lr=0.0002675, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=54625 +epoch 034: 248 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=463720, ups=1.07, wpb=432602, bsz=16144.7, num_updates=55900, lr=0.0002675, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=54625 +epoch 034: 248 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=463720, ups=1.07, wpb=432602, bsz=16144.7, num_updates=55900, lr=0.0002675, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=54625 +epoch 034: 248 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=463720, ups=1.07, wpb=432602, bsz=16144.7, num_updates=55900, lr=0.0002675, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=54625 +epoch 034: 248 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=463720, ups=1.07, wpb=432602, bsz=16144.7, num_updates=55900, lr=0.0002675, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=54625 +epoch 034: 248 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=463720, ups=1.07, wpb=432602, bsz=16144.7, num_updates=55900, lr=0.0002675, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=54625 +epoch 034: 248 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=463720, ups=1.07, wpb=432602, bsz=16144.7, num_updates=55900, lr=0.0002675, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=54625 +epoch 034: 248 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=463720, ups=1.07, wpb=432602, bsz=16144.7, num_updates=55900, lr=0.0002675, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=54625 +epoch 034: 248 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=463720, ups=1.07, wpb=432602, bsz=16144.7, num_updates=55900, lr=0.0002675, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=54625 +epoch 034: 248 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=463720, ups=1.07, wpb=432602, bsz=16144.7, num_updates=55900, lr=0.0002675, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=54625 +epoch 034: 248 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=463720, ups=1.07, wpb=432602, bsz=16144.7, num_updates=55900, lr=0.0002675, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=54625 +epoch 034: 248 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=463720, ups=1.07, wpb=432602, bsz=16144.7, num_updates=55900, lr=0.0002675, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=54625 +epoch 034: 248 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=463720, ups=1.07, wpb=432602, bsz=16144.7, num_updates=55900, lr=0.0002675, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=54625 +epoch 034: 248 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=463720, ups=1.07, wpb=432602, bsz=16144.7, num_updates=55900, lr=0.0002675, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=54625 +epoch 034: 248 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=463720, ups=1.07, wpb=432602, bsz=16144.7, num_updates=55900, lr=0.0002675, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=54625 +epoch 034: 248 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=463720, ups=1.07, wpb=432602, bsz=16144.7, num_updates=55900, lr=0.0002675, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=54625 +epoch 034: 248 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=463720, ups=1.07, wpb=432602, bsz=16144.7, num_updates=55900, lr=0.0002675, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=54625 +epoch 034: 248 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=463720, ups=1.07, wpb=432602, bsz=16144.7, num_updates=55900, lr=0.0002675, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=54625 +epoch 034: 248 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=463720, ups=1.07, wpb=432602, bsz=16144.7, num_updates=55900, lr=0.0002675, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=54625 +epoch 034: 248 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=463720, ups=1.07, wpb=432602, bsz=16144.7, num_updates=55900, lr=0.0002675, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=54625 +epoch 034: 248 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=463720, ups=1.07, wpb=432602, bsz=16144.7, num_updates=55900, lr=0.0002675, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=54625 +epoch 034: 248 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=463720, ups=1.07, wpb=432602, bsz=16144.7, num_updates=55900, lr=0.0002675, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=54625 +epoch 034: 248 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=463720, ups=1.07, wpb=432602, bsz=16144.7, num_updates=55900, lr=0.0002675, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=54625 +epoch 034: 248 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=463720, ups=1.07, wpb=432602, bsz=16144.7, num_updates=55900, lr=0.0002675, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=54625 +epoch 034: 248 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=463720, ups=1.07, wpb=432602, bsz=16144.7, num_updates=55900, lr=0.0002675, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=54625 +epoch 034: 248 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=463720, ups=1.07, wpb=432602, bsz=16144.7, num_updates=55900, lr=0.0002675, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=54625 +epoch 034: 248 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=463720, ups=1.07, wpb=432602, bsz=16144.7, num_updates=55900, lr=0.0002675, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.1, wall=54625 +epoch 034: 348 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=459116, ups=1.06, wpb=432785, bsz=16274.8, num_updates=56000, lr=0.000267261, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=54719 +epoch 034: 348 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=459116, ups=1.06, wpb=432785, bsz=16274.8, num_updates=56000, lr=0.000267261, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=54719 +epoch 034: 348 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=459116, ups=1.06, wpb=432785, bsz=16274.8, num_updates=56000, lr=0.000267261, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=54719 +epoch 034: 348 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=459116, ups=1.06, wpb=432785, bsz=16274.8, num_updates=56000, lr=0.000267261, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=54719 +epoch 034: 348 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=459116, ups=1.06, wpb=432785, bsz=16274.8, num_updates=56000, lr=0.000267261, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=54719 +epoch 034: 348 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=459116, ups=1.06, wpb=432785, bsz=16274.8, num_updates=56000, lr=0.000267261, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=54719 +epoch 034: 348 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=459116, ups=1.06, wpb=432785, bsz=16274.8, num_updates=56000, lr=0.000267261, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=54719 +epoch 034: 348 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=459116, ups=1.06, wpb=432785, bsz=16274.8, num_updates=56000, lr=0.000267261, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=54719 +epoch 034: 348 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=459116, ups=1.06, wpb=432785, bsz=16274.8, num_updates=56000, lr=0.000267261, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=54719 +epoch 034: 348 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=459116, ups=1.06, wpb=432785, bsz=16274.8, num_updates=56000, lr=0.000267261, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=54719 +epoch 034: 348 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=459116, ups=1.06, wpb=432785, bsz=16274.8, num_updates=56000, lr=0.000267261, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=54719 +epoch 034: 348 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=459116, ups=1.06, wpb=432785, bsz=16274.8, num_updates=56000, lr=0.000267261, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=54719 +epoch 034: 348 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=459116, ups=1.06, wpb=432785, bsz=16274.8, num_updates=56000, lr=0.000267261, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=54719 +epoch 034: 348 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=459116, ups=1.06, wpb=432785, bsz=16274.8, num_updates=56000, lr=0.000267261, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=54719 +epoch 034: 348 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=459116, ups=1.06, wpb=432785, bsz=16274.8, num_updates=56000, lr=0.000267261, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=54719 +epoch 034: 348 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=459116, ups=1.06, wpb=432785, bsz=16274.8, num_updates=56000, lr=0.000267261, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=54719 +epoch 034: 348 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=459116, ups=1.06, wpb=432785, bsz=16274.8, num_updates=56000, lr=0.000267261, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=54719 +epoch 034: 348 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=459116, ups=1.06, wpb=432785, bsz=16274.8, num_updates=56000, lr=0.000267261, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=54719 +epoch 034: 348 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=459116, ups=1.06, wpb=432785, bsz=16274.8, num_updates=56000, lr=0.000267261, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=54719 +epoch 034: 348 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=459116, ups=1.06, wpb=432785, bsz=16274.8, num_updates=56000, lr=0.000267261, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=54719 +epoch 034: 348 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=459116, ups=1.06, wpb=432785, bsz=16274.8, num_updates=56000, lr=0.000267261, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=54719 +epoch 034: 348 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=459116, ups=1.06, wpb=432785, bsz=16274.8, num_updates=56000, lr=0.000267261, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=54719 +epoch 034: 348 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=459116, ups=1.06, wpb=432785, bsz=16274.8, num_updates=56000, lr=0.000267261, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=54719 +epoch 034: 348 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=459116, ups=1.06, wpb=432785, bsz=16274.8, num_updates=56000, lr=0.000267261, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=54719 +epoch 034: 348 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=459116, ups=1.06, wpb=432785, bsz=16274.8, num_updates=56000, lr=0.000267261, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=54719 +epoch 034: 348 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=459116, ups=1.06, wpb=432785, bsz=16274.8, num_updates=56000, lr=0.000267261, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=54719 +epoch 034: 348 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=459116, ups=1.06, wpb=432785, bsz=16274.8, num_updates=56000, lr=0.000267261, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=54719 +epoch 034: 348 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=459116, ups=1.06, wpb=432785, bsz=16274.8, num_updates=56000, lr=0.000267261, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=54719 +epoch 034: 348 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=459116, ups=1.06, wpb=432785, bsz=16274.8, num_updates=56000, lr=0.000267261, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=54719 +epoch 034: 348 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=459116, ups=1.06, wpb=432785, bsz=16274.8, num_updates=56000, lr=0.000267261, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=54719 +epoch 034: 348 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=459116, ups=1.06, wpb=432785, bsz=16274.8, num_updates=56000, lr=0.000267261, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=54719 +epoch 034: 348 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=459116, ups=1.06, wpb=432785, bsz=16274.8, num_updates=56000, lr=0.000267261, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=54719 +epoch 034: 348 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=459116, ups=1.06, wpb=432785, bsz=16274.8, num_updates=56000, lr=0.000267261, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=54719 +epoch 034: 348 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=459116, ups=1.06, wpb=432785, bsz=16274.8, num_updates=56000, lr=0.000267261, gnorm=0.203, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.5, wall=54719 +begin validation on "valid" subset +epoch 034 | valid on 'valid' subset | loss 4.227 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 56000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.227 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 56000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.227 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 56000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.227 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 56000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.227 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 56000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.227 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 56000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.227 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 56000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.227 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 56000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.227 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 56000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.227 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 56000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.227 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 56000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.227 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 56000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.227 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 56000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.227 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 56000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.227 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 56000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.227 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 56000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.227 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 56000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.227 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 56000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.227 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 56000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.227 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 56000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.227 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 56000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.227 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 56000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.227 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 56000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.227 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 56000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.227 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 56000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.227 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 56000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.227 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 56000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.227 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 56000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.227 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 56000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.227 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 56000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.227 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 56000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.227 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 56000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.227 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 56000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.227 | nll_loss 2.594 | ppl 6.04 | wps 0 | wpb 42662 | bsz 2032 | num_updates 56000 | best_loss 4.213 +epoch 034: 448 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=398818, ups=0.92, wpb=433564, bsz=16571.4, num_updates=56100, lr=0.000267023, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=54828 +epoch 034: 448 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=398818, ups=0.92, wpb=433564, bsz=16571.4, num_updates=56100, lr=0.000267023, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=54828 +epoch 034: 448 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=398818, ups=0.92, wpb=433564, bsz=16571.4, num_updates=56100, lr=0.000267023, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=54828 +epoch 034: 448 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=398818, ups=0.92, wpb=433564, bsz=16571.4, num_updates=56100, lr=0.000267023, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=54828 +epoch 034: 448 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=398818, ups=0.92, wpb=433564, bsz=16571.4, num_updates=56100, lr=0.000267023, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=54828 +epoch 034: 448 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=398818, ups=0.92, wpb=433564, bsz=16571.4, num_updates=56100, lr=0.000267023, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=54828 +epoch 034: 448 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=398818, ups=0.92, wpb=433564, bsz=16571.4, num_updates=56100, lr=0.000267023, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=54828 +epoch 034: 448 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=398818, ups=0.92, wpb=433564, bsz=16571.4, num_updates=56100, lr=0.000267023, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=54828 +epoch 034: 448 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=398818, ups=0.92, wpb=433564, bsz=16571.4, num_updates=56100, lr=0.000267023, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=54828 +epoch 034: 448 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=398818, ups=0.92, wpb=433564, bsz=16571.4, num_updates=56100, lr=0.000267023, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=54828 +epoch 034: 448 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=398818, ups=0.92, wpb=433564, bsz=16571.4, num_updates=56100, lr=0.000267023, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=54828 +epoch 034: 448 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=398818, ups=0.92, wpb=433564, bsz=16571.4, num_updates=56100, lr=0.000267023, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=54828 +epoch 034: 448 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=398818, ups=0.92, wpb=433564, bsz=16571.4, num_updates=56100, lr=0.000267023, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=54828 +epoch 034: 448 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=398818, ups=0.92, wpb=433564, bsz=16571.4, num_updates=56100, lr=0.000267023, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=54828 +epoch 034: 448 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=398818, ups=0.92, wpb=433564, bsz=16571.4, num_updates=56100, lr=0.000267023, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=54828 +epoch 034: 448 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=398818, ups=0.92, wpb=433564, bsz=16571.4, num_updates=56100, lr=0.000267023, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=54828 +epoch 034: 448 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=398818, ups=0.92, wpb=433564, bsz=16571.4, num_updates=56100, lr=0.000267023, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=54828 +epoch 034: 448 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=398818, ups=0.92, wpb=433564, bsz=16571.4, num_updates=56100, lr=0.000267023, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=54828 +epoch 034: 448 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=398818, ups=0.92, wpb=433564, bsz=16571.4, num_updates=56100, lr=0.000267023, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=54828 +epoch 034: 448 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=398818, ups=0.92, wpb=433564, bsz=16571.4, num_updates=56100, lr=0.000267023, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=54828 +epoch 034: 448 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=398818, ups=0.92, wpb=433564, bsz=16571.4, num_updates=56100, lr=0.000267023, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=54828 +epoch 034: 448 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=398818, ups=0.92, wpb=433564, bsz=16571.4, num_updates=56100, lr=0.000267023, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=54828 +epoch 034: 448 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=398818, ups=0.92, wpb=433564, bsz=16571.4, num_updates=56100, lr=0.000267023, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=54828 +epoch 034: 448 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=398818, ups=0.92, wpb=433564, bsz=16571.4, num_updates=56100, lr=0.000267023, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=54828 +epoch 034: 448 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=398818, ups=0.92, wpb=433564, bsz=16571.4, num_updates=56100, lr=0.000267023, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=54828 +epoch 034: 448 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=398818, ups=0.92, wpb=433564, bsz=16571.4, num_updates=56100, lr=0.000267023, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=54828 +epoch 034: 448 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=398818, ups=0.92, wpb=433564, bsz=16571.4, num_updates=56100, lr=0.000267023, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=54828 +epoch 034: 448 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=398818, ups=0.92, wpb=433564, bsz=16571.4, num_updates=56100, lr=0.000267023, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=54828 +epoch 034: 448 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=398818, ups=0.92, wpb=433564, bsz=16571.4, num_updates=56100, lr=0.000267023, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=54828 +epoch 034: 448 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=398818, ups=0.92, wpb=433564, bsz=16571.4, num_updates=56100, lr=0.000267023, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=54828 +epoch 034: 448 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=398818, ups=0.92, wpb=433564, bsz=16571.4, num_updates=56100, lr=0.000267023, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=54828 +epoch 034: 448 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=398818, ups=0.92, wpb=433564, bsz=16571.4, num_updates=56100, lr=0.000267023, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=54828 +epoch 034: 448 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=398818, ups=0.92, wpb=433564, bsz=16571.4, num_updates=56100, lr=0.000267023, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=54828 +epoch 034: 448 / 1689 loss=4.052, nll_loss=2.42, ppl=5.35, wps=398818, ups=0.92, wpb=433564, bsz=16571.4, num_updates=56100, lr=0.000267023, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=19.8, wall=54828 +epoch 034: 549 / 1689 loss=4.045, nll_loss=2.413, ppl=5.33, wps=455540, ups=1.05, wpb=433581, bsz=16550.4, num_updates=56200, lr=0.000266785, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=54923 +epoch 034: 549 / 1689 loss=4.045, nll_loss=2.413, ppl=5.33, wps=455540, ups=1.05, wpb=433581, bsz=16550.4, num_updates=56200, lr=0.000266785, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=54923 +epoch 034: 549 / 1689 loss=4.045, nll_loss=2.413, ppl=5.33, wps=455540, ups=1.05, wpb=433581, bsz=16550.4, num_updates=56200, lr=0.000266785, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=54923 +epoch 034: 549 / 1689 loss=4.045, nll_loss=2.413, ppl=5.33, wps=455540, ups=1.05, wpb=433581, bsz=16550.4, num_updates=56200, lr=0.000266785, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=54923 +epoch 034: 549 / 1689 loss=4.045, nll_loss=2.413, ppl=5.33, wps=455540, ups=1.05, wpb=433581, bsz=16550.4, num_updates=56200, lr=0.000266785, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=54923 +epoch 034: 549 / 1689 loss=4.045, nll_loss=2.413, ppl=5.33, wps=455540, ups=1.05, wpb=433581, bsz=16550.4, num_updates=56200, lr=0.000266785, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=54923 +epoch 034: 549 / 1689 loss=4.045, nll_loss=2.413, ppl=5.33, wps=455540, ups=1.05, wpb=433581, bsz=16550.4, num_updates=56200, lr=0.000266785, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=54923 +epoch 034: 549 / 1689 loss=4.045, nll_loss=2.413, ppl=5.33, wps=455540, ups=1.05, wpb=433581, bsz=16550.4, num_updates=56200, lr=0.000266785, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=54923 +epoch 034: 549 / 1689 loss=4.045, nll_loss=2.413, ppl=5.33, wps=455540, ups=1.05, wpb=433581, bsz=16550.4, num_updates=56200, lr=0.000266785, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=54923 +epoch 034: 549 / 1689 loss=4.045, nll_loss=2.413, ppl=5.33, wps=455540, ups=1.05, wpb=433581, bsz=16550.4, num_updates=56200, lr=0.000266785, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=54923 +epoch 034: 549 / 1689 loss=4.045, nll_loss=2.413, ppl=5.33, wps=455540, ups=1.05, wpb=433581, bsz=16550.4, num_updates=56200, lr=0.000266785, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=54923 +epoch 034: 549 / 1689 loss=4.045, nll_loss=2.413, ppl=5.33, wps=455540, ups=1.05, wpb=433581, bsz=16550.4, num_updates=56200, lr=0.000266785, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=54923 +epoch 034: 549 / 1689 loss=4.045, nll_loss=2.413, ppl=5.33, wps=455540, ups=1.05, wpb=433581, bsz=16550.4, num_updates=56200, lr=0.000266785, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=54923 +epoch 034: 549 / 1689 loss=4.045, nll_loss=2.413, ppl=5.33, wps=455540, ups=1.05, wpb=433581, bsz=16550.4, num_updates=56200, lr=0.000266785, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=54923 +epoch 034: 549 / 1689 loss=4.045, nll_loss=2.413, ppl=5.33, wps=455540, ups=1.05, wpb=433581, bsz=16550.4, num_updates=56200, lr=0.000266785, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=54923 +epoch 034: 549 / 1689 loss=4.045, nll_loss=2.413, ppl=5.33, wps=455540, ups=1.05, wpb=433581, bsz=16550.4, num_updates=56200, lr=0.000266785, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=54923 +epoch 034: 549 / 1689 loss=4.045, nll_loss=2.413, ppl=5.33, wps=455540, ups=1.05, wpb=433581, bsz=16550.4, num_updates=56200, lr=0.000266785, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=54923 +epoch 034: 549 / 1689 loss=4.045, nll_loss=2.413, ppl=5.33, wps=455540, ups=1.05, wpb=433581, bsz=16550.4, num_updates=56200, lr=0.000266785, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=54923 +epoch 034: 549 / 1689 loss=4.045, nll_loss=2.413, ppl=5.33, wps=455540, ups=1.05, wpb=433581, bsz=16550.4, num_updates=56200, lr=0.000266785, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=54923 +epoch 034: 549 / 1689 loss=4.045, nll_loss=2.413, ppl=5.33, wps=455540, ups=1.05, wpb=433581, bsz=16550.4, num_updates=56200, lr=0.000266785, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=54923 +epoch 034: 549 / 1689 loss=4.045, nll_loss=2.413, ppl=5.33, wps=455540, ups=1.05, wpb=433581, bsz=16550.4, num_updates=56200, lr=0.000266785, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=54923 +epoch 034: 549 / 1689 loss=4.045, nll_loss=2.413, ppl=5.33, wps=455540, ups=1.05, wpb=433581, bsz=16550.4, num_updates=56200, lr=0.000266785, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=54923 +epoch 034: 549 / 1689 loss=4.045, nll_loss=2.413, ppl=5.33, wps=455540, ups=1.05, wpb=433581, bsz=16550.4, num_updates=56200, lr=0.000266785, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=54923 +epoch 034: 549 / 1689 loss=4.045, nll_loss=2.413, ppl=5.33, wps=455540, ups=1.05, wpb=433581, bsz=16550.4, num_updates=56200, lr=0.000266785, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=54923 +epoch 034: 549 / 1689 loss=4.045, nll_loss=2.413, ppl=5.33, wps=455540, ups=1.05, wpb=433581, bsz=16550.4, num_updates=56200, lr=0.000266785, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=54923 +epoch 034: 549 / 1689 loss=4.045, nll_loss=2.413, ppl=5.33, wps=455540, ups=1.05, wpb=433581, bsz=16550.4, num_updates=56200, lr=0.000266785, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=54923 +epoch 034: 549 / 1689 loss=4.045, nll_loss=2.413, ppl=5.33, wps=455540, ups=1.05, wpb=433581, bsz=16550.4, num_updates=56200, lr=0.000266785, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=54923 +epoch 034: 549 / 1689 loss=4.045, nll_loss=2.413, ppl=5.33, wps=455540, ups=1.05, wpb=433581, bsz=16550.4, num_updates=56200, lr=0.000266785, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=54923 +epoch 034: 549 / 1689 loss=4.045, nll_loss=2.413, ppl=5.33, wps=455540, ups=1.05, wpb=433581, bsz=16550.4, num_updates=56200, lr=0.000266785, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=54923 +epoch 034: 549 / 1689 loss=4.045, nll_loss=2.413, ppl=5.33, wps=455540, ups=1.05, wpb=433581, bsz=16550.4, num_updates=56200, lr=0.000266785, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=54923 +epoch 034: 549 / 1689 loss=4.045, nll_loss=2.413, ppl=5.33, wps=455540, ups=1.05, wpb=433581, bsz=16550.4, num_updates=56200, lr=0.000266785, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=54923 +epoch 034: 549 / 1689 loss=4.045, nll_loss=2.413, ppl=5.33, wps=455540, ups=1.05, wpb=433581, bsz=16550.4, num_updates=56200, lr=0.000266785, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=54923 +epoch 034: 549 / 1689 loss=4.045, nll_loss=2.413, ppl=5.33, wps=455540, ups=1.05, wpb=433581, bsz=16550.4, num_updates=56200, lr=0.000266785, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=54923 +epoch 034: 549 / 1689 loss=4.045, nll_loss=2.413, ppl=5.33, wps=455540, ups=1.05, wpb=433581, bsz=16550.4, num_updates=56200, lr=0.000266785, gnorm=0.216, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=54923 +epoch 034: 649 / 1689 loss=4.058, nll_loss=2.428, ppl=5.38, wps=461591, ups=1.06, wpb=433440, bsz=16686.6, num_updates=56300, lr=0.000266548, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=55017 +epoch 034: 649 / 1689 loss=4.058, nll_loss=2.428, ppl=5.38, wps=461591, ups=1.06, wpb=433440, bsz=16686.6, num_updates=56300, lr=0.000266548, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=55017 +epoch 034: 649 / 1689 loss=4.058, nll_loss=2.428, ppl=5.38, wps=461591, ups=1.06, wpb=433440, bsz=16686.6, num_updates=56300, lr=0.000266548, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=55017 +epoch 034: 649 / 1689 loss=4.058, nll_loss=2.428, ppl=5.38, wps=461591, ups=1.06, wpb=433440, bsz=16686.6, num_updates=56300, lr=0.000266548, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=55017 +epoch 034: 649 / 1689 loss=4.058, nll_loss=2.428, ppl=5.38, wps=461591, ups=1.06, wpb=433440, bsz=16686.6, num_updates=56300, lr=0.000266548, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=55017 +epoch 034: 649 / 1689 loss=4.058, nll_loss=2.428, ppl=5.38, wps=461591, ups=1.06, wpb=433440, bsz=16686.6, num_updates=56300, lr=0.000266548, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=55017 +epoch 034: 649 / 1689 loss=4.058, nll_loss=2.428, ppl=5.38, wps=461591, ups=1.06, wpb=433440, bsz=16686.6, num_updates=56300, lr=0.000266548, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=55017 +epoch 034: 649 / 1689 loss=4.058, nll_loss=2.428, ppl=5.38, wps=461591, ups=1.06, wpb=433440, bsz=16686.6, num_updates=56300, lr=0.000266548, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=55017 +epoch 034: 649 / 1689 loss=4.058, nll_loss=2.428, ppl=5.38, wps=461591, ups=1.06, wpb=433440, bsz=16686.6, num_updates=56300, lr=0.000266548, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=55017 +epoch 034: 649 / 1689 loss=4.058, nll_loss=2.428, ppl=5.38, wps=461591, ups=1.06, wpb=433440, bsz=16686.6, num_updates=56300, lr=0.000266548, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=55017 +epoch 034: 649 / 1689 loss=4.058, nll_loss=2.428, ppl=5.38, wps=461591, ups=1.06, wpb=433440, bsz=16686.6, num_updates=56300, lr=0.000266548, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=55017 +epoch 034: 649 / 1689 loss=4.058, nll_loss=2.428, ppl=5.38, wps=461591, ups=1.06, wpb=433440, bsz=16686.6, num_updates=56300, lr=0.000266548, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=55017 +epoch 034: 649 / 1689 loss=4.058, nll_loss=2.428, ppl=5.38, wps=461591, ups=1.06, wpb=433440, bsz=16686.6, num_updates=56300, lr=0.000266548, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=55017 +epoch 034: 649 / 1689 loss=4.058, nll_loss=2.428, ppl=5.38, wps=461591, ups=1.06, wpb=433440, bsz=16686.6, num_updates=56300, lr=0.000266548, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=55017 +epoch 034: 649 / 1689 loss=4.058, nll_loss=2.428, ppl=5.38, wps=461591, ups=1.06, wpb=433440, bsz=16686.6, num_updates=56300, lr=0.000266548, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=55017 +epoch 034: 649 / 1689 loss=4.058, nll_loss=2.428, ppl=5.38, wps=461591, ups=1.06, wpb=433440, bsz=16686.6, num_updates=56300, lr=0.000266548, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=55017 +epoch 034: 649 / 1689 loss=4.058, nll_loss=2.428, ppl=5.38, wps=461591, ups=1.06, wpb=433440, bsz=16686.6, num_updates=56300, lr=0.000266548, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=55017 +epoch 034: 649 / 1689 loss=4.058, nll_loss=2.428, ppl=5.38, wps=461591, ups=1.06, wpb=433440, bsz=16686.6, num_updates=56300, lr=0.000266548, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=55017 +epoch 034: 649 / 1689 loss=4.058, nll_loss=2.428, ppl=5.38, wps=461591, ups=1.06, wpb=433440, bsz=16686.6, num_updates=56300, lr=0.000266548, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=55017 +epoch 034: 649 / 1689 loss=4.058, nll_loss=2.428, ppl=5.38, wps=461591, ups=1.06, wpb=433440, bsz=16686.6, num_updates=56300, lr=0.000266548, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=55017 +epoch 034: 649 / 1689 loss=4.058, nll_loss=2.428, ppl=5.38, wps=461591, ups=1.06, wpb=433440, bsz=16686.6, num_updates=56300, lr=0.000266548, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=55017 +epoch 034: 649 / 1689 loss=4.058, nll_loss=2.428, ppl=5.38, wps=461591, ups=1.06, wpb=433440, bsz=16686.6, num_updates=56300, lr=0.000266548, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=55017 +epoch 034: 649 / 1689 loss=4.058, nll_loss=2.428, ppl=5.38, wps=461591, ups=1.06, wpb=433440, bsz=16686.6, num_updates=56300, lr=0.000266548, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=55017 +epoch 034: 649 / 1689 loss=4.058, nll_loss=2.428, ppl=5.38, wps=461591, ups=1.06, wpb=433440, bsz=16686.6, num_updates=56300, lr=0.000266548, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=55017 +epoch 034: 649 / 1689 loss=4.058, nll_loss=2.428, ppl=5.38, wps=461591, ups=1.06, wpb=433440, bsz=16686.6, num_updates=56300, lr=0.000266548, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=55017 +epoch 034: 649 / 1689 loss=4.058, nll_loss=2.428, ppl=5.38, wps=461591, ups=1.06, wpb=433440, bsz=16686.6, num_updates=56300, lr=0.000266548, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=55017 +epoch 034: 649 / 1689 loss=4.058, nll_loss=2.428, ppl=5.38, wps=461591, ups=1.06, wpb=433440, bsz=16686.6, num_updates=56300, lr=0.000266548, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=55017 +epoch 034: 649 / 1689 loss=4.058, nll_loss=2.428, ppl=5.38, wps=461591, ups=1.06, wpb=433440, bsz=16686.6, num_updates=56300, lr=0.000266548, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=55017 +epoch 034: 649 / 1689 loss=4.058, nll_loss=2.428, ppl=5.38, wps=461591, ups=1.06, wpb=433440, bsz=16686.6, num_updates=56300, lr=0.000266548, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=55017 +epoch 034: 649 / 1689 loss=4.058, nll_loss=2.428, ppl=5.38, wps=461591, ups=1.06, wpb=433440, bsz=16686.6, num_updates=56300, lr=0.000266548, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=55017 +epoch 034: 649 / 1689 loss=4.058, nll_loss=2.428, ppl=5.38, wps=461591, ups=1.06, wpb=433440, bsz=16686.6, num_updates=56300, lr=0.000266548, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=55017 +epoch 034: 649 / 1689 loss=4.058, nll_loss=2.428, ppl=5.38, wps=461591, ups=1.06, wpb=433440, bsz=16686.6, num_updates=56300, lr=0.000266548, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=55017 +epoch 034: 649 / 1689 loss=4.058, nll_loss=2.428, ppl=5.38, wps=461591, ups=1.06, wpb=433440, bsz=16686.6, num_updates=56300, lr=0.000266548, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=55017 +epoch 034: 649 / 1689 loss=4.058, nll_loss=2.428, ppl=5.38, wps=461591, ups=1.06, wpb=433440, bsz=16686.6, num_updates=56300, lr=0.000266548, gnorm=0.22, clip=0, loss_scale=0.5, train_wall=93, gb_free=18.9, wall=55017 +epoch 034: 749 / 1689 loss=4.046, nll_loss=2.414, ppl=5.33, wps=462366, ups=1.07, wpb=433395, bsz=16686.5, num_updates=56400, lr=0.000266312, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.3, wall=55110 +epoch 034: 749 / 1689 loss=4.046, nll_loss=2.414, ppl=5.33, wps=462366, ups=1.07, wpb=433395, bsz=16686.5, num_updates=56400, lr=0.000266312, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.3, wall=55110 +epoch 034: 749 / 1689 loss=4.046, nll_loss=2.414, ppl=5.33, wps=462366, ups=1.07, wpb=433395, bsz=16686.5, num_updates=56400, lr=0.000266312, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.3, wall=55110 +epoch 034: 749 / 1689 loss=4.046, nll_loss=2.414, ppl=5.33, wps=462366, ups=1.07, wpb=433395, bsz=16686.5, num_updates=56400, lr=0.000266312, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.3, wall=55110 +epoch 034: 749 / 1689 loss=4.046, nll_loss=2.414, ppl=5.33, wps=462366, ups=1.07, wpb=433395, bsz=16686.5, num_updates=56400, lr=0.000266312, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.3, wall=55110 +epoch 034: 749 / 1689 loss=4.046, nll_loss=2.414, ppl=5.33, wps=462366, ups=1.07, wpb=433395, bsz=16686.5, num_updates=56400, lr=0.000266312, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.3, wall=55110 +epoch 034: 749 / 1689 loss=4.046, nll_loss=2.414, ppl=5.33, wps=462366, ups=1.07, wpb=433395, bsz=16686.5, num_updates=56400, lr=0.000266312, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.3, wall=55110 +epoch 034: 749 / 1689 loss=4.046, nll_loss=2.414, ppl=5.33, wps=462366, ups=1.07, wpb=433395, bsz=16686.5, num_updates=56400, lr=0.000266312, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.3, wall=55110 +epoch 034: 749 / 1689 loss=4.046, nll_loss=2.414, ppl=5.33, wps=462366, ups=1.07, wpb=433395, bsz=16686.5, num_updates=56400, lr=0.000266312, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.3, wall=55110 +epoch 034: 749 / 1689 loss=4.046, nll_loss=2.414, ppl=5.33, wps=462366, ups=1.07, wpb=433395, bsz=16686.5, num_updates=56400, lr=0.000266312, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.3, wall=55110 +epoch 034: 749 / 1689 loss=4.046, nll_loss=2.414, ppl=5.33, wps=462366, ups=1.07, wpb=433395, bsz=16686.5, num_updates=56400, lr=0.000266312, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.3, wall=55110 +epoch 034: 749 / 1689 loss=4.046, nll_loss=2.414, ppl=5.33, wps=462366, ups=1.07, wpb=433395, bsz=16686.5, num_updates=56400, lr=0.000266312, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.3, wall=55110 +epoch 034: 749 / 1689 loss=4.046, nll_loss=2.414, ppl=5.33, wps=462366, ups=1.07, wpb=433395, bsz=16686.5, num_updates=56400, lr=0.000266312, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.3, wall=55110 +epoch 034: 749 / 1689 loss=4.046, nll_loss=2.414, ppl=5.33, wps=462366, ups=1.07, wpb=433395, bsz=16686.5, num_updates=56400, lr=0.000266312, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.3, wall=55110 +epoch 034: 749 / 1689 loss=4.046, nll_loss=2.414, ppl=5.33, wps=462366, ups=1.07, wpb=433395, bsz=16686.5, num_updates=56400, lr=0.000266312, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.3, wall=55110 +epoch 034: 749 / 1689 loss=4.046, nll_loss=2.414, ppl=5.33, wps=462366, ups=1.07, wpb=433395, bsz=16686.5, num_updates=56400, lr=0.000266312, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.3, wall=55110 +epoch 034: 749 / 1689 loss=4.046, nll_loss=2.414, ppl=5.33, wps=462366, ups=1.07, wpb=433395, bsz=16686.5, num_updates=56400, lr=0.000266312, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.3, wall=55110 +epoch 034: 749 / 1689 loss=4.046, nll_loss=2.414, ppl=5.33, wps=462366, ups=1.07, wpb=433395, bsz=16686.5, num_updates=56400, lr=0.000266312, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.3, wall=55110 +epoch 034: 749 / 1689 loss=4.046, nll_loss=2.414, ppl=5.33, wps=462366, ups=1.07, wpb=433395, bsz=16686.5, num_updates=56400, lr=0.000266312, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.3, wall=55110 +epoch 034: 749 / 1689 loss=4.046, nll_loss=2.414, ppl=5.33, wps=462366, ups=1.07, wpb=433395, bsz=16686.5, num_updates=56400, lr=0.000266312, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.3, wall=55110 +epoch 034: 749 / 1689 loss=4.046, nll_loss=2.414, ppl=5.33, wps=462366, ups=1.07, wpb=433395, bsz=16686.5, num_updates=56400, lr=0.000266312, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.3, wall=55110 +epoch 034: 749 / 1689 loss=4.046, nll_loss=2.414, ppl=5.33, wps=462366, ups=1.07, wpb=433395, bsz=16686.5, num_updates=56400, lr=0.000266312, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.3, wall=55110 +epoch 034: 749 / 1689 loss=4.046, nll_loss=2.414, ppl=5.33, wps=462366, ups=1.07, wpb=433395, bsz=16686.5, num_updates=56400, lr=0.000266312, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.3, wall=55110 +epoch 034: 749 / 1689 loss=4.046, nll_loss=2.414, ppl=5.33, wps=462366, ups=1.07, wpb=433395, bsz=16686.5, num_updates=56400, lr=0.000266312, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.3, wall=55110 +epoch 034: 749 / 1689 loss=4.046, nll_loss=2.414, ppl=5.33, wps=462366, ups=1.07, wpb=433395, bsz=16686.5, num_updates=56400, lr=0.000266312, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.3, wall=55110 +epoch 034: 749 / 1689 loss=4.046, nll_loss=2.414, ppl=5.33, wps=462366, ups=1.07, wpb=433395, bsz=16686.5, num_updates=56400, lr=0.000266312, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.3, wall=55110 +epoch 034: 749 / 1689 loss=4.046, nll_loss=2.414, ppl=5.33, wps=462366, ups=1.07, wpb=433395, bsz=16686.5, num_updates=56400, lr=0.000266312, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.3, wall=55110 +epoch 034: 749 / 1689 loss=4.046, nll_loss=2.414, ppl=5.33, wps=462366, ups=1.07, wpb=433395, bsz=16686.5, num_updates=56400, lr=0.000266312, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.3, wall=55110 +epoch 034: 749 / 1689 loss=4.046, nll_loss=2.414, ppl=5.33, wps=462366, ups=1.07, wpb=433395, bsz=16686.5, num_updates=56400, lr=0.000266312, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.3, wall=55110 +epoch 034: 749 / 1689 loss=4.046, nll_loss=2.414, ppl=5.33, wps=462366, ups=1.07, wpb=433395, bsz=16686.5, num_updates=56400, lr=0.000266312, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.3, wall=55110 +epoch 034: 749 / 1689 loss=4.046, nll_loss=2.414, ppl=5.33, wps=462366, ups=1.07, wpb=433395, bsz=16686.5, num_updates=56400, lr=0.000266312, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.3, wall=55110 +epoch 034: 749 / 1689 loss=4.046, nll_loss=2.414, ppl=5.33, wps=462366, ups=1.07, wpb=433395, bsz=16686.5, num_updates=56400, lr=0.000266312, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.3, wall=55110 +epoch 034: 749 / 1689 loss=4.046, nll_loss=2.414, ppl=5.33, wps=462366, ups=1.07, wpb=433395, bsz=16686.5, num_updates=56400, lr=0.000266312, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.3, wall=55110 +epoch 034: 749 / 1689 loss=4.046, nll_loss=2.414, ppl=5.33, wps=462366, ups=1.07, wpb=433395, bsz=16686.5, num_updates=56400, lr=0.000266312, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=92, gb_free=18.3, wall=55110 +epoch 034: 849 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=465393, ups=1.07, wpb=433254, bsz=16435.8, num_updates=56500, lr=0.000266076, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=55203 +epoch 034: 849 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=465393, ups=1.07, wpb=433254, bsz=16435.8, num_updates=56500, lr=0.000266076, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=55203 +epoch 034: 849 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=465393, ups=1.07, wpb=433254, bsz=16435.8, num_updates=56500, lr=0.000266076, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=55203 +epoch 034: 849 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=465393, ups=1.07, wpb=433254, bsz=16435.8, num_updates=56500, lr=0.000266076, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=55203 +epoch 034: 849 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=465393, ups=1.07, wpb=433254, bsz=16435.8, num_updates=56500, lr=0.000266076, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=55203 +epoch 034: 849 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=465393, ups=1.07, wpb=433254, bsz=16435.8, num_updates=56500, lr=0.000266076, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=55203 +epoch 034: 849 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=465393, ups=1.07, wpb=433254, bsz=16435.8, num_updates=56500, lr=0.000266076, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=55203 +epoch 034: 849 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=465393, ups=1.07, wpb=433254, bsz=16435.8, num_updates=56500, lr=0.000266076, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=55203 +epoch 034: 849 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=465393, ups=1.07, wpb=433254, bsz=16435.8, num_updates=56500, lr=0.000266076, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=55203 +epoch 034: 849 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=465393, ups=1.07, wpb=433254, bsz=16435.8, num_updates=56500, lr=0.000266076, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=55203 +epoch 034: 849 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=465393, ups=1.07, wpb=433254, bsz=16435.8, num_updates=56500, lr=0.000266076, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=55203 +epoch 034: 849 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=465393, ups=1.07, wpb=433254, bsz=16435.8, num_updates=56500, lr=0.000266076, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=55203 +epoch 034: 849 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=465393, ups=1.07, wpb=433254, bsz=16435.8, num_updates=56500, lr=0.000266076, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=55203 +epoch 034: 849 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=465393, ups=1.07, wpb=433254, bsz=16435.8, num_updates=56500, lr=0.000266076, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=55203 +epoch 034: 849 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=465393, ups=1.07, wpb=433254, bsz=16435.8, num_updates=56500, lr=0.000266076, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=55203 +epoch 034: 849 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=465393, ups=1.07, wpb=433254, bsz=16435.8, num_updates=56500, lr=0.000266076, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=55203 +epoch 034: 849 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=465393, ups=1.07, wpb=433254, bsz=16435.8, num_updates=56500, lr=0.000266076, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=55203 +epoch 034: 849 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=465393, ups=1.07, wpb=433254, bsz=16435.8, num_updates=56500, lr=0.000266076, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=55203 +epoch 034: 849 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=465393, ups=1.07, wpb=433254, bsz=16435.8, num_updates=56500, lr=0.000266076, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=55203 +epoch 034: 849 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=465393, ups=1.07, wpb=433254, bsz=16435.8, num_updates=56500, lr=0.000266076, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=55203 +epoch 034: 849 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=465393, ups=1.07, wpb=433254, bsz=16435.8, num_updates=56500, lr=0.000266076, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=55203 +epoch 034: 849 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=465393, ups=1.07, wpb=433254, bsz=16435.8, num_updates=56500, lr=0.000266076, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=55203 +epoch 034: 849 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=465393, ups=1.07, wpb=433254, bsz=16435.8, num_updates=56500, lr=0.000266076, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=55203 +epoch 034: 849 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=465393, ups=1.07, wpb=433254, bsz=16435.8, num_updates=56500, lr=0.000266076, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=55203 +epoch 034: 849 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=465393, ups=1.07, wpb=433254, bsz=16435.8, num_updates=56500, lr=0.000266076, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=55203 +epoch 034: 849 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=465393, ups=1.07, wpb=433254, bsz=16435.8, num_updates=56500, lr=0.000266076, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=55203 +epoch 034: 849 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=465393, ups=1.07, wpb=433254, bsz=16435.8, num_updates=56500, lr=0.000266076, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=55203 +epoch 034: 849 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=465393, ups=1.07, wpb=433254, bsz=16435.8, num_updates=56500, lr=0.000266076, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=55203 +epoch 034: 849 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=465393, ups=1.07, wpb=433254, bsz=16435.8, num_updates=56500, lr=0.000266076, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=55203 +epoch 034: 849 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=465393, ups=1.07, wpb=433254, bsz=16435.8, num_updates=56500, lr=0.000266076, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=55203 +epoch 034: 849 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=465393, ups=1.07, wpb=433254, bsz=16435.8, num_updates=56500, lr=0.000266076, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=55203 +epoch 034: 849 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=465393, ups=1.07, wpb=433254, bsz=16435.8, num_updates=56500, lr=0.000266076, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=55203 +epoch 034: 849 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=465393, ups=1.07, wpb=433254, bsz=16435.8, num_updates=56500, lr=0.000266076, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=55203 +epoch 034: 849 / 1689 loss=4.053, nll_loss=2.422, ppl=5.36, wps=465393, ups=1.07, wpb=433254, bsz=16435.8, num_updates=56500, lr=0.000266076, gnorm=0.213, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.5, wall=55203 +epoch 034: 949 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=463499, ups=1.07, wpb=433348, bsz=16601.4, num_updates=56600, lr=0.000265841, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=55297 +epoch 034: 949 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=463499, ups=1.07, wpb=433348, bsz=16601.4, num_updates=56600, lr=0.000265841, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=55297 +epoch 034: 949 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=463499, ups=1.07, wpb=433348, bsz=16601.4, num_updates=56600, lr=0.000265841, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=55297 +epoch 034: 949 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=463499, ups=1.07, wpb=433348, bsz=16601.4, num_updates=56600, lr=0.000265841, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=55297 +epoch 034: 949 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=463499, ups=1.07, wpb=433348, bsz=16601.4, num_updates=56600, lr=0.000265841, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=55297 +epoch 034: 949 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=463499, ups=1.07, wpb=433348, bsz=16601.4, num_updates=56600, lr=0.000265841, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=55297 +epoch 034: 949 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=463499, ups=1.07, wpb=433348, bsz=16601.4, num_updates=56600, lr=0.000265841, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=55297 +epoch 034: 949 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=463499, ups=1.07, wpb=433348, bsz=16601.4, num_updates=56600, lr=0.000265841, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=55297 +epoch 034: 949 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=463499, ups=1.07, wpb=433348, bsz=16601.4, num_updates=56600, lr=0.000265841, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=55297 +epoch 034: 949 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=463499, ups=1.07, wpb=433348, bsz=16601.4, num_updates=56600, lr=0.000265841, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=55297 +epoch 034: 949 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=463499, ups=1.07, wpb=433348, bsz=16601.4, num_updates=56600, lr=0.000265841, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=55297 +epoch 034: 949 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=463499, ups=1.07, wpb=433348, bsz=16601.4, num_updates=56600, lr=0.000265841, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=55297 +epoch 034: 949 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=463499, ups=1.07, wpb=433348, bsz=16601.4, num_updates=56600, lr=0.000265841, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=55297 +epoch 034: 949 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=463499, ups=1.07, wpb=433348, bsz=16601.4, num_updates=56600, lr=0.000265841, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=55297 +epoch 034: 949 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=463499, ups=1.07, wpb=433348, bsz=16601.4, num_updates=56600, lr=0.000265841, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=55297 +epoch 034: 949 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=463499, ups=1.07, wpb=433348, bsz=16601.4, num_updates=56600, lr=0.000265841, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=55297 +epoch 034: 949 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=463499, ups=1.07, wpb=433348, bsz=16601.4, num_updates=56600, lr=0.000265841, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=55297 +epoch 034: 949 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=463499, ups=1.07, wpb=433348, bsz=16601.4, num_updates=56600, lr=0.000265841, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=55297 +epoch 034: 949 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=463499, ups=1.07, wpb=433348, bsz=16601.4, num_updates=56600, lr=0.000265841, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=55297 +epoch 034: 949 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=463499, ups=1.07, wpb=433348, bsz=16601.4, num_updates=56600, lr=0.000265841, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=55297 +epoch 034: 949 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=463499, ups=1.07, wpb=433348, bsz=16601.4, num_updates=56600, lr=0.000265841, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=55297 +epoch 034: 949 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=463499, ups=1.07, wpb=433348, bsz=16601.4, num_updates=56600, lr=0.000265841, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=55297 +epoch 034: 949 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=463499, ups=1.07, wpb=433348, bsz=16601.4, num_updates=56600, lr=0.000265841, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=55297 +epoch 034: 949 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=463499, ups=1.07, wpb=433348, bsz=16601.4, num_updates=56600, lr=0.000265841, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=55297 +epoch 034: 949 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=463499, ups=1.07, wpb=433348, bsz=16601.4, num_updates=56600, lr=0.000265841, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=55297 +epoch 034: 949 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=463499, ups=1.07, wpb=433348, bsz=16601.4, num_updates=56600, lr=0.000265841, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=55297 +epoch 034: 949 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=463499, ups=1.07, wpb=433348, bsz=16601.4, num_updates=56600, lr=0.000265841, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=55297 +epoch 034: 949 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=463499, ups=1.07, wpb=433348, bsz=16601.4, num_updates=56600, lr=0.000265841, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=55297 +epoch 034: 949 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=463499, ups=1.07, wpb=433348, bsz=16601.4, num_updates=56600, lr=0.000265841, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=55297 +epoch 034: 949 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=463499, ups=1.07, wpb=433348, bsz=16601.4, num_updates=56600, lr=0.000265841, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=55297 +epoch 034: 949 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=463499, ups=1.07, wpb=433348, bsz=16601.4, num_updates=56600, lr=0.000265841, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=55297 +epoch 034: 949 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=463499, ups=1.07, wpb=433348, bsz=16601.4, num_updates=56600, lr=0.000265841, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=55297 +epoch 034: 949 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=463499, ups=1.07, wpb=433348, bsz=16601.4, num_updates=56600, lr=0.000265841, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=55297 +epoch 034: 949 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=463499, ups=1.07, wpb=433348, bsz=16601.4, num_updates=56600, lr=0.000265841, gnorm=0.217, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=55297 +epoch 034: 1050 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=457286, ups=1.06, wpb=432728, bsz=16151.8, num_updates=56700, lr=0.000265606, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.6, wall=55392 +epoch 034: 1050 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=457286, ups=1.06, wpb=432728, bsz=16151.8, num_updates=56700, lr=0.000265606, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.6, wall=55392 +epoch 034: 1050 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=457286, ups=1.06, wpb=432728, bsz=16151.8, num_updates=56700, lr=0.000265606, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.6, wall=55392 +epoch 034: 1050 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=457286, ups=1.06, wpb=432728, bsz=16151.8, num_updates=56700, lr=0.000265606, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.6, wall=55392 +epoch 034: 1050 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=457286, ups=1.06, wpb=432728, bsz=16151.8, num_updates=56700, lr=0.000265606, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.6, wall=55392 +epoch 034: 1050 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=457286, ups=1.06, wpb=432728, bsz=16151.8, num_updates=56700, lr=0.000265606, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.6, wall=55392 +epoch 034: 1050 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=457286, ups=1.06, wpb=432728, bsz=16151.8, num_updates=56700, lr=0.000265606, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.6, wall=55392 +epoch 034: 1050 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=457286, ups=1.06, wpb=432728, bsz=16151.8, num_updates=56700, lr=0.000265606, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.6, wall=55392 +epoch 034: 1050 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=457286, ups=1.06, wpb=432728, bsz=16151.8, num_updates=56700, lr=0.000265606, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.6, wall=55392 +epoch 034: 1050 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=457286, ups=1.06, wpb=432728, bsz=16151.8, num_updates=56700, lr=0.000265606, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.6, wall=55392 +epoch 034: 1050 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=457286, ups=1.06, wpb=432728, bsz=16151.8, num_updates=56700, lr=0.000265606, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.6, wall=55392 +epoch 034: 1050 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=457286, ups=1.06, wpb=432728, bsz=16151.8, num_updates=56700, lr=0.000265606, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.6, wall=55392 +epoch 034: 1050 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=457286, ups=1.06, wpb=432728, bsz=16151.8, num_updates=56700, lr=0.000265606, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.6, wall=55392 +epoch 034: 1050 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=457286, ups=1.06, wpb=432728, bsz=16151.8, num_updates=56700, lr=0.000265606, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.6, wall=55392 +epoch 034: 1050 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=457286, ups=1.06, wpb=432728, bsz=16151.8, num_updates=56700, lr=0.000265606, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.6, wall=55392 +epoch 034: 1050 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=457286, ups=1.06, wpb=432728, bsz=16151.8, num_updates=56700, lr=0.000265606, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.6, wall=55392 +epoch 034: 1050 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=457286, ups=1.06, wpb=432728, bsz=16151.8, num_updates=56700, lr=0.000265606, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.6, wall=55392 +epoch 034: 1050 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=457286, ups=1.06, wpb=432728, bsz=16151.8, num_updates=56700, lr=0.000265606, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.6, wall=55392 +epoch 034: 1050 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=457286, ups=1.06, wpb=432728, bsz=16151.8, num_updates=56700, lr=0.000265606, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.6, wall=55392 +epoch 034: 1050 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=457286, ups=1.06, wpb=432728, bsz=16151.8, num_updates=56700, lr=0.000265606, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.6, wall=55392 +epoch 034: 1050 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=457286, ups=1.06, wpb=432728, bsz=16151.8, num_updates=56700, lr=0.000265606, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.6, wall=55392 +epoch 034: 1050 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=457286, ups=1.06, wpb=432728, bsz=16151.8, num_updates=56700, lr=0.000265606, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.6, wall=55392 +epoch 034: 1050 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=457286, ups=1.06, wpb=432728, bsz=16151.8, num_updates=56700, lr=0.000265606, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.6, wall=55392 +epoch 034: 1050 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=457286, ups=1.06, wpb=432728, bsz=16151.8, num_updates=56700, lr=0.000265606, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.6, wall=55392 +epoch 034: 1050 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=457286, ups=1.06, wpb=432728, bsz=16151.8, num_updates=56700, lr=0.000265606, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.6, wall=55392 +epoch 034: 1050 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=457286, ups=1.06, wpb=432728, bsz=16151.8, num_updates=56700, lr=0.000265606, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.6, wall=55392 +epoch 034: 1050 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=457286, ups=1.06, wpb=432728, bsz=16151.8, num_updates=56700, lr=0.000265606, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.6, wall=55392 +epoch 034: 1050 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=457286, ups=1.06, wpb=432728, bsz=16151.8, num_updates=56700, lr=0.000265606, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.6, wall=55392 +epoch 034: 1050 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=457286, ups=1.06, wpb=432728, bsz=16151.8, num_updates=56700, lr=0.000265606, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.6, wall=55392 +epoch 034: 1050 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=457286, ups=1.06, wpb=432728, bsz=16151.8, num_updates=56700, lr=0.000265606, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.6, wall=55392 +epoch 034: 1050 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=457286, ups=1.06, wpb=432728, bsz=16151.8, num_updates=56700, lr=0.000265606, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.6, wall=55392 +epoch 034: 1050 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=457286, ups=1.06, wpb=432728, bsz=16151.8, num_updates=56700, lr=0.000265606, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.6, wall=55392 +epoch 034: 1050 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=457286, ups=1.06, wpb=432728, bsz=16151.8, num_updates=56700, lr=0.000265606, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.6, wall=55392 +epoch 034: 1050 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=457286, ups=1.06, wpb=432728, bsz=16151.8, num_updates=56700, lr=0.000265606, gnorm=0.201, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.6, wall=55392 +epoch 034: 1150 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=458972, ups=1.06, wpb=431595, bsz=16640.4, num_updates=56800, lr=0.000265372, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=55486 +epoch 034: 1150 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=458972, ups=1.06, wpb=431595, bsz=16640.4, num_updates=56800, lr=0.000265372, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=55486 +epoch 034: 1150 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=458972, ups=1.06, wpb=431595, bsz=16640.4, num_updates=56800, lr=0.000265372, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=55486 +epoch 034: 1150 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=458972, ups=1.06, wpb=431595, bsz=16640.4, num_updates=56800, lr=0.000265372, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=55486 +epoch 034: 1150 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=458972, ups=1.06, wpb=431595, bsz=16640.4, num_updates=56800, lr=0.000265372, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=55486 +epoch 034: 1150 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=458972, ups=1.06, wpb=431595, bsz=16640.4, num_updates=56800, lr=0.000265372, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=55486 +epoch 034: 1150 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=458972, ups=1.06, wpb=431595, bsz=16640.4, num_updates=56800, lr=0.000265372, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=55486 +epoch 034: 1150 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=458972, ups=1.06, wpb=431595, bsz=16640.4, num_updates=56800, lr=0.000265372, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=55486 +epoch 034: 1150 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=458972, ups=1.06, wpb=431595, bsz=16640.4, num_updates=56800, lr=0.000265372, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=55486 +epoch 034: 1150 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=458972, ups=1.06, wpb=431595, bsz=16640.4, num_updates=56800, lr=0.000265372, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=55486 +epoch 034: 1150 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=458972, ups=1.06, wpb=431595, bsz=16640.4, num_updates=56800, lr=0.000265372, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=55486 +epoch 034: 1150 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=458972, ups=1.06, wpb=431595, bsz=16640.4, num_updates=56800, lr=0.000265372, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=55486 +epoch 034: 1150 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=458972, ups=1.06, wpb=431595, bsz=16640.4, num_updates=56800, lr=0.000265372, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=55486 +epoch 034: 1150 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=458972, ups=1.06, wpb=431595, bsz=16640.4, num_updates=56800, lr=0.000265372, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=55486 +epoch 034: 1150 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=458972, ups=1.06, wpb=431595, bsz=16640.4, num_updates=56800, lr=0.000265372, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=55486 +epoch 034: 1150 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=458972, ups=1.06, wpb=431595, bsz=16640.4, num_updates=56800, lr=0.000265372, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=55486 +epoch 034: 1150 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=458972, ups=1.06, wpb=431595, bsz=16640.4, num_updates=56800, lr=0.000265372, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=55486 +epoch 034: 1150 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=458972, ups=1.06, wpb=431595, bsz=16640.4, num_updates=56800, lr=0.000265372, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=55486 +epoch 034: 1150 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=458972, ups=1.06, wpb=431595, bsz=16640.4, num_updates=56800, lr=0.000265372, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=55486 +epoch 034: 1150 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=458972, ups=1.06, wpb=431595, bsz=16640.4, num_updates=56800, lr=0.000265372, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=55486 +epoch 034: 1150 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=458972, ups=1.06, wpb=431595, bsz=16640.4, num_updates=56800, lr=0.000265372, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=55486 +epoch 034: 1150 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=458972, ups=1.06, wpb=431595, bsz=16640.4, num_updates=56800, lr=0.000265372, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=55486 +epoch 034: 1150 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=458972, ups=1.06, wpb=431595, bsz=16640.4, num_updates=56800, lr=0.000265372, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=55486 +epoch 034: 1150 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=458972, ups=1.06, wpb=431595, bsz=16640.4, num_updates=56800, lr=0.000265372, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=55486 +epoch 034: 1150 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=458972, ups=1.06, wpb=431595, bsz=16640.4, num_updates=56800, lr=0.000265372, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=55486 +epoch 034: 1150 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=458972, ups=1.06, wpb=431595, bsz=16640.4, num_updates=56800, lr=0.000265372, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=55486 +epoch 034: 1150 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=458972, ups=1.06, wpb=431595, bsz=16640.4, num_updates=56800, lr=0.000265372, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=55486 +epoch 034: 1150 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=458972, ups=1.06, wpb=431595, bsz=16640.4, num_updates=56800, lr=0.000265372, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=55486 +epoch 034: 1150 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=458972, ups=1.06, wpb=431595, bsz=16640.4, num_updates=56800, lr=0.000265372, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=55486 +epoch 034: 1150 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=458972, ups=1.06, wpb=431595, bsz=16640.4, num_updates=56800, lr=0.000265372, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=55486 +epoch 034: 1150 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=458972, ups=1.06, wpb=431595, bsz=16640.4, num_updates=56800, lr=0.000265372, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=55486 +epoch 034: 1150 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=458972, ups=1.06, wpb=431595, bsz=16640.4, num_updates=56800, lr=0.000265372, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=55486 +epoch 034: 1150 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=458972, ups=1.06, wpb=431595, bsz=16640.4, num_updates=56800, lr=0.000265372, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=55486 +epoch 034: 1150 / 1689 loss=4.052, nll_loss=2.422, ppl=5.36, wps=458972, ups=1.06, wpb=431595, bsz=16640.4, num_updates=56800, lr=0.000265372, gnorm=0.218, clip=0, loss_scale=0.5, train_wall=93, gb_free=19, wall=55486 +epoch 034: 1250 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463365, ups=1.07, wpb=433276, bsz=16662.8, num_updates=56900, lr=0.000265139, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=55579 +epoch 034: 1250 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463365, ups=1.07, wpb=433276, bsz=16662.8, num_updates=56900, lr=0.000265139, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=55579 +epoch 034: 1250 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463365, ups=1.07, wpb=433276, bsz=16662.8, num_updates=56900, lr=0.000265139, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=55579 +epoch 034: 1250 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463365, ups=1.07, wpb=433276, bsz=16662.8, num_updates=56900, lr=0.000265139, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=55579 +epoch 034: 1250 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463365, ups=1.07, wpb=433276, bsz=16662.8, num_updates=56900, lr=0.000265139, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=55579 +epoch 034: 1250 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463365, ups=1.07, wpb=433276, bsz=16662.8, num_updates=56900, lr=0.000265139, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=55579 +epoch 034: 1250 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463365, ups=1.07, wpb=433276, bsz=16662.8, num_updates=56900, lr=0.000265139, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=55579 +epoch 034: 1250 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463365, ups=1.07, wpb=433276, bsz=16662.8, num_updates=56900, lr=0.000265139, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=55579 +epoch 034: 1250 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463365, ups=1.07, wpb=433276, bsz=16662.8, num_updates=56900, lr=0.000265139, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=55579 +epoch 034: 1250 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463365, ups=1.07, wpb=433276, bsz=16662.8, num_updates=56900, lr=0.000265139, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=55579 +epoch 034: 1250 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463365, ups=1.07, wpb=433276, bsz=16662.8, num_updates=56900, lr=0.000265139, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=55579 +epoch 034: 1250 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463365, ups=1.07, wpb=433276, bsz=16662.8, num_updates=56900, lr=0.000265139, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=55579 +epoch 034: 1250 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463365, ups=1.07, wpb=433276, bsz=16662.8, num_updates=56900, lr=0.000265139, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=55579 +epoch 034: 1250 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463365, ups=1.07, wpb=433276, bsz=16662.8, num_updates=56900, lr=0.000265139, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=55579 +epoch 034: 1250 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463365, ups=1.07, wpb=433276, bsz=16662.8, num_updates=56900, lr=0.000265139, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=55579 +epoch 034: 1250 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463365, ups=1.07, wpb=433276, bsz=16662.8, num_updates=56900, lr=0.000265139, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=55579 +epoch 034: 1250 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463365, ups=1.07, wpb=433276, bsz=16662.8, num_updates=56900, lr=0.000265139, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=55579 +epoch 034: 1250 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463365, ups=1.07, wpb=433276, bsz=16662.8, num_updates=56900, lr=0.000265139, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=55579 +epoch 034: 1250 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463365, ups=1.07, wpb=433276, bsz=16662.8, num_updates=56900, lr=0.000265139, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=55579 +epoch 034: 1250 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463365, ups=1.07, wpb=433276, bsz=16662.8, num_updates=56900, lr=0.000265139, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=55579 +epoch 034: 1250 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463365, ups=1.07, wpb=433276, bsz=16662.8, num_updates=56900, lr=0.000265139, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=55579 +epoch 034: 1250 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463365, ups=1.07, wpb=433276, bsz=16662.8, num_updates=56900, lr=0.000265139, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=55579 +epoch 034: 1250 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463365, ups=1.07, wpb=433276, bsz=16662.8, num_updates=56900, lr=0.000265139, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=55579 +epoch 034: 1250 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463365, ups=1.07, wpb=433276, bsz=16662.8, num_updates=56900, lr=0.000265139, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=55579 +epoch 034: 1250 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463365, ups=1.07, wpb=433276, bsz=16662.8, num_updates=56900, lr=0.000265139, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=55579 +epoch 034: 1250 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463365, ups=1.07, wpb=433276, bsz=16662.8, num_updates=56900, lr=0.000265139, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=55579 +epoch 034: 1250 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463365, ups=1.07, wpb=433276, bsz=16662.8, num_updates=56900, lr=0.000265139, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=55579 +epoch 034: 1250 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463365, ups=1.07, wpb=433276, bsz=16662.8, num_updates=56900, lr=0.000265139, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=55579 +epoch 034: 1250 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463365, ups=1.07, wpb=433276, bsz=16662.8, num_updates=56900, lr=0.000265139, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=55579 +epoch 034: 1250 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463365, ups=1.07, wpb=433276, bsz=16662.8, num_updates=56900, lr=0.000265139, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=55579 +epoch 034: 1250 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463365, ups=1.07, wpb=433276, bsz=16662.8, num_updates=56900, lr=0.000265139, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=55579 +epoch 034: 1250 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463365, ups=1.07, wpb=433276, bsz=16662.8, num_updates=56900, lr=0.000265139, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=55579 +epoch 034: 1250 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463365, ups=1.07, wpb=433276, bsz=16662.8, num_updates=56900, lr=0.000265139, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=55579 +epoch 034: 1250 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=463365, ups=1.07, wpb=433276, bsz=16662.8, num_updates=56900, lr=0.000265139, gnorm=0.223, clip=0, loss_scale=0.5, train_wall=92, gb_free=20.4, wall=55579 +epoch 034: 1350 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=462946, ups=1.07, wpb=434664, bsz=16697, num_updates=57000, lr=0.000264906, gnorm=0.198, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=55673 +epoch 034: 1350 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=462946, ups=1.07, wpb=434664, bsz=16697, num_updates=57000, lr=0.000264906, gnorm=0.198, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=55673 +epoch 034: 1350 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=462946, ups=1.07, wpb=434664, bsz=16697, num_updates=57000, lr=0.000264906, gnorm=0.198, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=55673 +epoch 034: 1350 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=462946, ups=1.07, wpb=434664, bsz=16697, num_updates=57000, lr=0.000264906, gnorm=0.198, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=55673 +epoch 034: 1350 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=462946, ups=1.07, wpb=434664, bsz=16697, num_updates=57000, lr=0.000264906, gnorm=0.198, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=55673 +epoch 034: 1350 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=462946, ups=1.07, wpb=434664, bsz=16697, num_updates=57000, lr=0.000264906, gnorm=0.198, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=55673 +epoch 034: 1350 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=462946, ups=1.07, wpb=434664, bsz=16697, num_updates=57000, lr=0.000264906, gnorm=0.198, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=55673 +epoch 034: 1350 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=462946, ups=1.07, wpb=434664, bsz=16697, num_updates=57000, lr=0.000264906, gnorm=0.198, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=55673 +epoch 034: 1350 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=462946, ups=1.07, wpb=434664, bsz=16697, num_updates=57000, lr=0.000264906, gnorm=0.198, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=55673 +epoch 034: 1350 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=462946, ups=1.07, wpb=434664, bsz=16697, num_updates=57000, lr=0.000264906, gnorm=0.198, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=55673 +epoch 034: 1350 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=462946, ups=1.07, wpb=434664, bsz=16697, num_updates=57000, lr=0.000264906, gnorm=0.198, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=55673 +epoch 034: 1350 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=462946, ups=1.07, wpb=434664, bsz=16697, num_updates=57000, lr=0.000264906, gnorm=0.198, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=55673 +epoch 034: 1350 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=462946, ups=1.07, wpb=434664, bsz=16697, num_updates=57000, lr=0.000264906, gnorm=0.198, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=55673 +epoch 034: 1350 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=462946, ups=1.07, wpb=434664, bsz=16697, num_updates=57000, lr=0.000264906, gnorm=0.198, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=55673 +epoch 034: 1350 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=462946, ups=1.07, wpb=434664, bsz=16697, num_updates=57000, lr=0.000264906, gnorm=0.198, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=55673 +epoch 034: 1350 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=462946, ups=1.07, wpb=434664, bsz=16697, num_updates=57000, lr=0.000264906, gnorm=0.198, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=55673 +epoch 034: 1350 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=462946, ups=1.07, wpb=434664, bsz=16697, num_updates=57000, lr=0.000264906, gnorm=0.198, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=55673 +epoch 034: 1350 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=462946, ups=1.07, wpb=434664, bsz=16697, num_updates=57000, lr=0.000264906, gnorm=0.198, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=55673 +epoch 034: 1350 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=462946, ups=1.07, wpb=434664, bsz=16697, num_updates=57000, lr=0.000264906, gnorm=0.198, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=55673 +epoch 034: 1350 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=462946, ups=1.07, wpb=434664, bsz=16697, num_updates=57000, lr=0.000264906, gnorm=0.198, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=55673 +epoch 034: 1350 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=462946, ups=1.07, wpb=434664, bsz=16697, num_updates=57000, lr=0.000264906, gnorm=0.198, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=55673 +epoch 034: 1350 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=462946, ups=1.07, wpb=434664, bsz=16697, num_updates=57000, lr=0.000264906, gnorm=0.198, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=55673 +epoch 034: 1350 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=462946, ups=1.07, wpb=434664, bsz=16697, num_updates=57000, lr=0.000264906, gnorm=0.198, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=55673 +epoch 034: 1350 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=462946, ups=1.07, wpb=434664, bsz=16697, num_updates=57000, lr=0.000264906, gnorm=0.198, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=55673 +epoch 034: 1350 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=462946, ups=1.07, wpb=434664, bsz=16697, num_updates=57000, lr=0.000264906, gnorm=0.198, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=55673 +epoch 034: 1350 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=462946, ups=1.07, wpb=434664, bsz=16697, num_updates=57000, lr=0.000264906, gnorm=0.198, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=55673 +epoch 034: 1350 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=462946, ups=1.07, wpb=434664, bsz=16697, num_updates=57000, lr=0.000264906, gnorm=0.198, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=55673 +epoch 034: 1350 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=462946, ups=1.07, wpb=434664, bsz=16697, num_updates=57000, lr=0.000264906, gnorm=0.198, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=55673 +epoch 034: 1350 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=462946, ups=1.07, wpb=434664, bsz=16697, num_updates=57000, lr=0.000264906, gnorm=0.198, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=55673 +epoch 034: 1350 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=462946, ups=1.07, wpb=434664, bsz=16697, num_updates=57000, lr=0.000264906, gnorm=0.198, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=55673 +epoch 034: 1350 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=462946, ups=1.07, wpb=434664, bsz=16697, num_updates=57000, lr=0.000264906, gnorm=0.198, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=55673 +epoch 034: 1350 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=462946, ups=1.07, wpb=434664, bsz=16697, num_updates=57000, lr=0.000264906, gnorm=0.198, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=55673 +epoch 034: 1350 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=462946, ups=1.07, wpb=434664, bsz=16697, num_updates=57000, lr=0.000264906, gnorm=0.198, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=55673 +epoch 034: 1350 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=462946, ups=1.07, wpb=434664, bsz=16697, num_updates=57000, lr=0.000264906, gnorm=0.198, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=55673 +begin validation on "valid" subset +epoch 034 | valid on 'valid' subset | loss 4.221 | nll_loss 2.591 | ppl 6.03 | wps 0 | wpb 42662 | bsz 2032 | num_updates 57000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.221 | nll_loss 2.591 | ppl 6.03 | wps 0 | wpb 42662 | bsz 2032 | num_updates 57000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.221 | nll_loss 2.591 | ppl 6.03 | wps 0 | wpb 42662 | bsz 2032 | num_updates 57000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.221 | nll_loss 2.591 | ppl 6.03 | wps 0 | wpb 42662 | bsz 2032 | num_updates 57000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.221 | nll_loss 2.591 | ppl 6.03 | wps 0 | wpb 42662 | bsz 2032 | num_updates 57000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.221 | nll_loss 2.591 | ppl 6.03 | wps 0 | wpb 42662 | bsz 2032 | num_updates 57000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.221 | nll_loss 2.591 | ppl 6.03 | wps 0 | wpb 42662 | bsz 2032 | num_updates 57000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.221 | nll_loss 2.591 | ppl 6.03 | wps 0 | wpb 42662 | bsz 2032 | num_updates 57000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.221 | nll_loss 2.591 | ppl 6.03 | wps 0 | wpb 42662 | bsz 2032 | num_updates 57000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.221 | nll_loss 2.591 | ppl 6.03 | wps 0 | wpb 42662 | bsz 2032 | num_updates 57000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.221 | nll_loss 2.591 | ppl 6.03 | wps 0 | wpb 42662 | bsz 2032 | num_updates 57000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.221 | nll_loss 2.591 | ppl 6.03 | wps 0 | wpb 42662 | bsz 2032 | num_updates 57000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.221 | nll_loss 2.591 | ppl 6.03 | wps 0 | wpb 42662 | bsz 2032 | num_updates 57000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.221 | nll_loss 2.591 | ppl 6.03 | wps 0 | wpb 42662 | bsz 2032 | num_updates 57000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.221 | nll_loss 2.591 | ppl 6.03 | wps 0 | wpb 42662 | bsz 2032 | num_updates 57000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.221 | nll_loss 2.591 | ppl 6.03 | wps 0 | wpb 42662 | bsz 2032 | num_updates 57000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.221 | nll_loss 2.591 | ppl 6.03 | wps 0 | wpb 42662 | bsz 2032 | num_updates 57000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.221 | nll_loss 2.591 | ppl 6.03 | wps 0 | wpb 42662 | bsz 2032 | num_updates 57000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.221 | nll_loss 2.591 | ppl 6.03 | wps 0 | wpb 42662 | bsz 2032 | num_updates 57000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.221 | nll_loss 2.591 | ppl 6.03 | wps 0 | wpb 42662 | bsz 2032 | num_updates 57000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.221 | nll_loss 2.591 | ppl 6.03 | wps 0 | wpb 42662 | bsz 2032 | num_updates 57000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.221 | nll_loss 2.591 | ppl 6.03 | wps 0 | wpb 42662 | bsz 2032 | num_updates 57000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.221 | nll_loss 2.591 | ppl 6.03 | wps 0 | wpb 42662 | bsz 2032 | num_updates 57000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.221 | nll_loss 2.591 | ppl 6.03 | wps 0 | wpb 42662 | bsz 2032 | num_updates 57000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.221 | nll_loss 2.591 | ppl 6.03 | wps 0 | wpb 42662 | bsz 2032 | num_updates 57000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.221 | nll_loss 2.591 | ppl 6.03 | wps 0 | wpb 42662 | bsz 2032 | num_updates 57000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.221 | nll_loss 2.591 | ppl 6.03 | wps 0 | wpb 42662 | bsz 2032 | num_updates 57000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.221 | nll_loss 2.591 | ppl 6.03 | wps 0 | wpb 42662 | bsz 2032 | num_updates 57000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.221 | nll_loss 2.591 | ppl 6.03 | wps 0 | wpb 42662 | bsz 2032 | num_updates 57000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.221 | nll_loss 2.591 | ppl 6.03 | wps 0 | wpb 42662 | bsz 2032 | num_updates 57000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.221 | nll_loss 2.591 | ppl 6.03 | wps 0 | wpb 42662 | bsz 2032 | num_updates 57000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.221 | nll_loss 2.591 | ppl 6.03 | wps 0 | wpb 42662 | bsz 2032 | num_updates 57000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.221 | nll_loss 2.591 | ppl 6.03 | wps 0 | wpb 42662 | bsz 2032 | num_updates 57000 | best_loss 4.213 +epoch 034 | valid on 'valid' subset | loss 4.221 | nll_loss 2.591 | ppl 6.03 | wps 0 | wpb 42662 | bsz 2032 | num_updates 57000 | best_loss 4.213 +epoch 034: 1450 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=351081, ups=0.81, wpb=434948, bsz=16337, num_updates=57100, lr=0.000264674, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=99, gb_free=19.7, wall=55797 +epoch 034: 1450 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=351081, ups=0.81, wpb=434948, bsz=16337, num_updates=57100, lr=0.000264674, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=99, gb_free=19.7, wall=55797 +epoch 034: 1450 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=351081, ups=0.81, wpb=434948, bsz=16337, num_updates=57100, lr=0.000264674, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=99, gb_free=19.7, wall=55797 +epoch 034: 1450 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=351081, ups=0.81, wpb=434948, bsz=16337, num_updates=57100, lr=0.000264674, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=99, gb_free=19.7, wall=55797 +epoch 034: 1450 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=351081, ups=0.81, wpb=434948, bsz=16337, num_updates=57100, lr=0.000264674, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=99, gb_free=19.7, wall=55797 +epoch 034: 1450 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=351081, ups=0.81, wpb=434948, bsz=16337, num_updates=57100, lr=0.000264674, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=99, gb_free=19.7, wall=55797 +epoch 034: 1450 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=351081, ups=0.81, wpb=434948, bsz=16337, num_updates=57100, lr=0.000264674, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=99, gb_free=19.7, wall=55797 +epoch 034: 1450 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=351081, ups=0.81, wpb=434948, bsz=16337, num_updates=57100, lr=0.000264674, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=99, gb_free=19.7, wall=55797 +epoch 034: 1450 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=351081, ups=0.81, wpb=434948, bsz=16337, num_updates=57100, lr=0.000264674, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=99, gb_free=19.7, wall=55797 +epoch 034: 1450 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=351081, ups=0.81, wpb=434948, bsz=16337, num_updates=57100, lr=0.000264674, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=99, gb_free=19.7, wall=55797 +epoch 034: 1450 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=351081, ups=0.81, wpb=434948, bsz=16337, num_updates=57100, lr=0.000264674, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=99, gb_free=19.7, wall=55797 +epoch 034: 1450 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=351081, ups=0.81, wpb=434948, bsz=16337, num_updates=57100, lr=0.000264674, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=99, gb_free=19.7, wall=55797 +epoch 034: 1450 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=351081, ups=0.81, wpb=434948, bsz=16337, num_updates=57100, lr=0.000264674, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=99, gb_free=19.7, wall=55797 +epoch 034: 1450 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=351081, ups=0.81, wpb=434948, bsz=16337, num_updates=57100, lr=0.000264674, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=99, gb_free=19.7, wall=55797 +epoch 034: 1450 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=351081, ups=0.81, wpb=434948, bsz=16337, num_updates=57100, lr=0.000264674, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=99, gb_free=19.7, wall=55797 +epoch 034: 1450 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=351081, ups=0.81, wpb=434948, bsz=16337, num_updates=57100, lr=0.000264674, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=99, gb_free=19.7, wall=55797 +epoch 034: 1450 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=351081, ups=0.81, wpb=434948, bsz=16337, num_updates=57100, lr=0.000264674, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=99, gb_free=19.7, wall=55797 +epoch 034: 1450 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=351081, ups=0.81, wpb=434948, bsz=16337, num_updates=57100, lr=0.000264674, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=99, gb_free=19.7, wall=55797 +epoch 034: 1450 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=351081, ups=0.81, wpb=434948, bsz=16337, num_updates=57100, lr=0.000264674, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=99, gb_free=19.7, wall=55797 +epoch 034: 1450 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=351081, ups=0.81, wpb=434948, bsz=16337, num_updates=57100, lr=0.000264674, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=99, gb_free=19.7, wall=55797 +epoch 034: 1450 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=351081, ups=0.81, wpb=434948, bsz=16337, num_updates=57100, lr=0.000264674, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=99, gb_free=19.7, wall=55797 +epoch 034: 1450 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=351081, ups=0.81, wpb=434948, bsz=16337, num_updates=57100, lr=0.000264674, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=99, gb_free=19.7, wall=55797 +epoch 034: 1450 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=351081, ups=0.81, wpb=434948, bsz=16337, num_updates=57100, lr=0.000264674, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=99, gb_free=19.7, wall=55797 +epoch 034: 1450 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=351081, ups=0.81, wpb=434948, bsz=16337, num_updates=57100, lr=0.000264674, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=99, gb_free=19.7, wall=55797 +epoch 034: 1450 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=351081, ups=0.81, wpb=434948, bsz=16337, num_updates=57100, lr=0.000264674, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=99, gb_free=19.7, wall=55797 +epoch 034: 1450 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=351081, ups=0.81, wpb=434948, bsz=16337, num_updates=57100, lr=0.000264674, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=99, gb_free=19.7, wall=55797 +epoch 034: 1450 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=351081, ups=0.81, wpb=434948, bsz=16337, num_updates=57100, lr=0.000264674, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=99, gb_free=19.7, wall=55797 +epoch 034: 1450 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=351081, ups=0.81, wpb=434948, bsz=16337, num_updates=57100, lr=0.000264674, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=99, gb_free=19.7, wall=55797 +epoch 034: 1450 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=351081, ups=0.81, wpb=434948, bsz=16337, num_updates=57100, lr=0.000264674, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=99, gb_free=19.7, wall=55797 +epoch 034: 1450 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=351081, ups=0.81, wpb=434948, bsz=16337, num_updates=57100, lr=0.000264674, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=99, gb_free=19.7, wall=55797 +epoch 034: 1450 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=351081, ups=0.81, wpb=434948, bsz=16337, num_updates=57100, lr=0.000264674, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=99, gb_free=19.7, wall=55797 +epoch 034: 1450 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=351081, ups=0.81, wpb=434948, bsz=16337, num_updates=57100, lr=0.000264674, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=99, gb_free=19.7, wall=55797 +epoch 034: 1450 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=351081, ups=0.81, wpb=434948, bsz=16337, num_updates=57100, lr=0.000264674, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=99, gb_free=19.7, wall=55797 +epoch 034: 1450 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=351081, ups=0.81, wpb=434948, bsz=16337, num_updates=57100, lr=0.000264674, gnorm=0.204, clip=0, loss_scale=0.5, train_wall=99, gb_free=19.7, wall=55797 +epoch 034: 1550 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=469595, ups=1.08, wpb=436546, bsz=16405.1, num_updates=57200, lr=0.000264443, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=55890 +epoch 034: 1550 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=469595, ups=1.08, wpb=436546, bsz=16405.1, num_updates=57200, lr=0.000264443, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=55890 +epoch 034: 1550 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=469595, ups=1.08, wpb=436546, bsz=16405.1, num_updates=57200, lr=0.000264443, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=55890 +epoch 034: 1550 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=469595, ups=1.08, wpb=436546, bsz=16405.1, num_updates=57200, lr=0.000264443, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=55890 +epoch 034: 1550 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=469595, ups=1.08, wpb=436546, bsz=16405.1, num_updates=57200, lr=0.000264443, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=55890 +epoch 034: 1550 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=469595, ups=1.08, wpb=436546, bsz=16405.1, num_updates=57200, lr=0.000264443, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=55890 +epoch 034: 1550 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=469595, ups=1.08, wpb=436546, bsz=16405.1, num_updates=57200, lr=0.000264443, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=55890 +epoch 034: 1550 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=469595, ups=1.08, wpb=436546, bsz=16405.1, num_updates=57200, lr=0.000264443, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=55890 +epoch 034: 1550 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=469595, ups=1.08, wpb=436546, bsz=16405.1, num_updates=57200, lr=0.000264443, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=55890 +epoch 034: 1550 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=469595, ups=1.08, wpb=436546, bsz=16405.1, num_updates=57200, lr=0.000264443, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=55890 +epoch 034: 1550 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=469595, ups=1.08, wpb=436546, bsz=16405.1, num_updates=57200, lr=0.000264443, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=55890 +epoch 034: 1550 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=469595, ups=1.08, wpb=436546, bsz=16405.1, num_updates=57200, lr=0.000264443, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=55890 +epoch 034: 1550 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=469595, ups=1.08, wpb=436546, bsz=16405.1, num_updates=57200, lr=0.000264443, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=55890 +epoch 034: 1550 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=469595, ups=1.08, wpb=436546, bsz=16405.1, num_updates=57200, lr=0.000264443, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=55890 +epoch 034: 1550 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=469595, ups=1.08, wpb=436546, bsz=16405.1, num_updates=57200, lr=0.000264443, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=55890 +epoch 034: 1550 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=469595, ups=1.08, wpb=436546, bsz=16405.1, num_updates=57200, lr=0.000264443, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=55890 +epoch 034: 1550 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=469595, ups=1.08, wpb=436546, bsz=16405.1, num_updates=57200, lr=0.000264443, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=55890 +epoch 034: 1550 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=469595, ups=1.08, wpb=436546, bsz=16405.1, num_updates=57200, lr=0.000264443, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=55890 +epoch 034: 1550 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=469595, ups=1.08, wpb=436546, bsz=16405.1, num_updates=57200, lr=0.000264443, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=55890 +epoch 034: 1550 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=469595, ups=1.08, wpb=436546, bsz=16405.1, num_updates=57200, lr=0.000264443, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=55890 +epoch 034: 1550 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=469595, ups=1.08, wpb=436546, bsz=16405.1, num_updates=57200, lr=0.000264443, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=55890 +epoch 034: 1550 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=469595, ups=1.08, wpb=436546, bsz=16405.1, num_updates=57200, lr=0.000264443, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=55890 +epoch 034: 1550 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=469595, ups=1.08, wpb=436546, bsz=16405.1, num_updates=57200, lr=0.000264443, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=55890 +epoch 034: 1550 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=469595, ups=1.08, wpb=436546, bsz=16405.1, num_updates=57200, lr=0.000264443, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=55890 +epoch 034: 1550 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=469595, ups=1.08, wpb=436546, bsz=16405.1, num_updates=57200, lr=0.000264443, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=55890 +epoch 034: 1550 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=469595, ups=1.08, wpb=436546, bsz=16405.1, num_updates=57200, lr=0.000264443, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=55890 +epoch 034: 1550 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=469595, ups=1.08, wpb=436546, bsz=16405.1, num_updates=57200, lr=0.000264443, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=55890 +epoch 034: 1550 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=469595, ups=1.08, wpb=436546, bsz=16405.1, num_updates=57200, lr=0.000264443, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=55890 +epoch 034: 1550 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=469595, ups=1.08, wpb=436546, bsz=16405.1, num_updates=57200, lr=0.000264443, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=55890 +epoch 034: 1550 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=469595, ups=1.08, wpb=436546, bsz=16405.1, num_updates=57200, lr=0.000264443, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=55890 +epoch 034: 1550 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=469595, ups=1.08, wpb=436546, bsz=16405.1, num_updates=57200, lr=0.000264443, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=55890 +epoch 034: 1550 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=469595, ups=1.08, wpb=436546, bsz=16405.1, num_updates=57200, lr=0.000264443, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=55890 +epoch 034: 1550 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=469595, ups=1.08, wpb=436546, bsz=16405.1, num_updates=57200, lr=0.000264443, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=55890 +epoch 034: 1550 / 1689 loss=4.066, nll_loss=2.438, ppl=5.42, wps=469595, ups=1.08, wpb=436546, bsz=16405.1, num_updates=57200, lr=0.000264443, gnorm=0.227, clip=0, loss_scale=1, train_wall=92, gb_free=19.5, wall=55890 +epoch 034: 1650 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=467873, ups=1.07, wpb=435342, bsz=16336.1, num_updates=57300, lr=0.000264212, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=55983 +epoch 034: 1650 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=467873, ups=1.07, wpb=435342, bsz=16336.1, num_updates=57300, lr=0.000264212, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=55983 +epoch 034: 1650 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=467873, ups=1.07, wpb=435342, bsz=16336.1, num_updates=57300, lr=0.000264212, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=55983 +epoch 034: 1650 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=467873, ups=1.07, wpb=435342, bsz=16336.1, num_updates=57300, lr=0.000264212, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=55983 +epoch 034: 1650 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=467873, ups=1.07, wpb=435342, bsz=16336.1, num_updates=57300, lr=0.000264212, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=55983 +epoch 034: 1650 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=467873, ups=1.07, wpb=435342, bsz=16336.1, num_updates=57300, lr=0.000264212, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=55983 +epoch 034: 1650 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=467873, ups=1.07, wpb=435342, bsz=16336.1, num_updates=57300, lr=0.000264212, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=55983 +epoch 034: 1650 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=467873, ups=1.07, wpb=435342, bsz=16336.1, num_updates=57300, lr=0.000264212, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=55983 +epoch 034: 1650 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=467873, ups=1.07, wpb=435342, bsz=16336.1, num_updates=57300, lr=0.000264212, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=55983 +epoch 034: 1650 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=467873, ups=1.07, wpb=435342, bsz=16336.1, num_updates=57300, lr=0.000264212, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=55983 +epoch 034: 1650 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=467873, ups=1.07, wpb=435342, bsz=16336.1, num_updates=57300, lr=0.000264212, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=55983 +epoch 034: 1650 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=467873, ups=1.07, wpb=435342, bsz=16336.1, num_updates=57300, lr=0.000264212, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=55983 +epoch 034: 1650 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=467873, ups=1.07, wpb=435342, bsz=16336.1, num_updates=57300, lr=0.000264212, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=55983 +epoch 034: 1650 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=467873, ups=1.07, wpb=435342, bsz=16336.1, num_updates=57300, lr=0.000264212, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=55983 +epoch 034: 1650 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=467873, ups=1.07, wpb=435342, bsz=16336.1, num_updates=57300, lr=0.000264212, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=55983 +epoch 034: 1650 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=467873, ups=1.07, wpb=435342, bsz=16336.1, num_updates=57300, lr=0.000264212, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=55983 +epoch 034: 1650 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=467873, ups=1.07, wpb=435342, bsz=16336.1, num_updates=57300, lr=0.000264212, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=55983 +epoch 034: 1650 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=467873, ups=1.07, wpb=435342, bsz=16336.1, num_updates=57300, lr=0.000264212, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=55983 +epoch 034: 1650 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=467873, ups=1.07, wpb=435342, bsz=16336.1, num_updates=57300, lr=0.000264212, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=55983 +epoch 034: 1650 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=467873, ups=1.07, wpb=435342, bsz=16336.1, num_updates=57300, lr=0.000264212, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=55983 +epoch 034: 1650 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=467873, ups=1.07, wpb=435342, bsz=16336.1, num_updates=57300, lr=0.000264212, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=55983 +epoch 034: 1650 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=467873, ups=1.07, wpb=435342, bsz=16336.1, num_updates=57300, lr=0.000264212, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=55983 +epoch 034: 1650 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=467873, ups=1.07, wpb=435342, bsz=16336.1, num_updates=57300, lr=0.000264212, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=55983 +epoch 034: 1650 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=467873, ups=1.07, wpb=435342, bsz=16336.1, num_updates=57300, lr=0.000264212, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=55983 +epoch 034: 1650 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=467873, ups=1.07, wpb=435342, bsz=16336.1, num_updates=57300, lr=0.000264212, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=55983 +epoch 034: 1650 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=467873, ups=1.07, wpb=435342, bsz=16336.1, num_updates=57300, lr=0.000264212, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=55983 +epoch 034: 1650 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=467873, ups=1.07, wpb=435342, bsz=16336.1, num_updates=57300, lr=0.000264212, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=55983 +epoch 034: 1650 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=467873, ups=1.07, wpb=435342, bsz=16336.1, num_updates=57300, lr=0.000264212, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=55983 +epoch 034: 1650 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=467873, ups=1.07, wpb=435342, bsz=16336.1, num_updates=57300, lr=0.000264212, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=55983 +epoch 034: 1650 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=467873, ups=1.07, wpb=435342, bsz=16336.1, num_updates=57300, lr=0.000264212, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=55983 +epoch 034: 1650 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=467873, ups=1.07, wpb=435342, bsz=16336.1, num_updates=57300, lr=0.000264212, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=55983 +epoch 034: 1650 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=467873, ups=1.07, wpb=435342, bsz=16336.1, num_updates=57300, lr=0.000264212, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=55983 +epoch 034: 1650 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=467873, ups=1.07, wpb=435342, bsz=16336.1, num_updates=57300, lr=0.000264212, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=55983 +epoch 034: 1650 / 1689 loss=4.064, nll_loss=2.435, ppl=5.41, wps=467873, ups=1.07, wpb=435342, bsz=16336.1, num_updates=57300, lr=0.000264212, gnorm=0.215, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=55983 +end of epoch 34 (average epoch stats below) +epoch 034 | loss 4.053 | nll_loss 2.423 | ppl 5.36 | wps 449382 | ups 1.04 | wpb 433528 | bsz 16503 | num_updates 57339 | lr 0.000264122 | gnorm 0.212 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 56019 +epoch 034 | loss 4.053 | nll_loss 2.423 | ppl 5.36 | wps 449382 | ups 1.04 | wpb 433528 | bsz 16503 | num_updates 57339 | lr 0.000264122 | gnorm 0.212 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 56019 +epoch 034 | loss 4.053 | nll_loss 2.423 | ppl 5.36 | wps 449382 | ups 1.04 | wpb 433528 | bsz 16503 | num_updates 57339 | lr 0.000264122 | gnorm 0.212 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 56019 +epoch 034 | loss 4.053 | nll_loss 2.423 | ppl 5.36 | wps 449382 | ups 1.04 | wpb 433528 | bsz 16503 | num_updates 57339 | lr 0.000264122 | gnorm 0.212 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 56019 +epoch 034 | loss 4.053 | nll_loss 2.423 | ppl 5.36 | wps 449382 | ups 1.04 | wpb 433528 | bsz 16503 | num_updates 57339 | lr 0.000264122 | gnorm 0.212 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 56019 +epoch 034 | loss 4.053 | nll_loss 2.423 | ppl 5.36 | wps 449382 | ups 1.04 | wpb 433528 | bsz 16503 | num_updates 57339 | lr 0.000264122 | gnorm 0.212 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 56019 +epoch 034 | loss 4.053 | nll_loss 2.423 | ppl 5.36 | wps 449382 | ups 1.04 | wpb 433528 | bsz 16503 | num_updates 57339 | lr 0.000264122 | gnorm 0.212 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 56019 +epoch 034 | loss 4.053 | nll_loss 2.423 | ppl 5.36 | wps 449382 | ups 1.04 | wpb 433528 | bsz 16503 | num_updates 57339 | lr 0.000264122 | gnorm 0.212 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 56019 +epoch 034 | loss 4.053 | nll_loss 2.423 | ppl 5.36 | wps 449382 | ups 1.04 | wpb 433528 | bsz 16503 | num_updates 57339 | lr 0.000264122 | gnorm 0.212 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 56019 +epoch 034 | loss 4.053 | nll_loss 2.423 | ppl 5.36 | wps 449382 | ups 1.04 | wpb 433528 | bsz 16503 | num_updates 57339 | lr 0.000264122 | gnorm 0.212 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 56019 +epoch 034 | loss 4.053 | nll_loss 2.423 | ppl 5.36 | wps 449382 | ups 1.04 | wpb 433528 | bsz 16503 | num_updates 57339 | lr 0.000264122 | gnorm 0.212 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 56019 +epoch 034 | loss 4.053 | nll_loss 2.423 | ppl 5.36 | wps 449382 | ups 1.04 | wpb 433528 | bsz 16503 | num_updates 57339 | lr 0.000264122 | gnorm 0.212 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 56019 +epoch 034 | loss 4.053 | nll_loss 2.423 | ppl 5.36 | wps 449382 | ups 1.04 | wpb 433528 | bsz 16503 | num_updates 57339 | lr 0.000264122 | gnorm 0.212 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 56019 +epoch 034 | loss 4.053 | nll_loss 2.423 | ppl 5.36 | wps 449382 | ups 1.04 | wpb 433528 | bsz 16503 | num_updates 57339 | lr 0.000264122 | gnorm 0.212 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 56019 +epoch 034 | loss 4.053 | nll_loss 2.423 | ppl 5.36 | wps 449382 | ups 1.04 | wpb 433528 | bsz 16503 | num_updates 57339 | lr 0.000264122 | gnorm 0.212 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 56019 +epoch 034 | loss 4.053 | nll_loss 2.423 | ppl 5.36 | wps 449382 | ups 1.04 | wpb 433528 | bsz 16503 | num_updates 57339 | lr 0.000264122 | gnorm 0.212 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 56019 +epoch 034 | loss 4.053 | nll_loss 2.423 | ppl 5.36 | wps 449382 | ups 1.04 | wpb 433528 | bsz 16503 | num_updates 57339 | lr 0.000264122 | gnorm 0.212 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 56019 +epoch 034 | loss 4.053 | nll_loss 2.423 | ppl 5.36 | wps 449382 | ups 1.04 | wpb 433528 | bsz 16503 | num_updates 57339 | lr 0.000264122 | gnorm 0.212 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 56019 +epoch 034 | loss 4.053 | nll_loss 2.423 | ppl 5.36 | wps 449382 | ups 1.04 | wpb 433528 | bsz 16503 | num_updates 57339 | lr 0.000264122 | gnorm 0.212 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 56019 +epoch 034 | loss 4.053 | nll_loss 2.423 | ppl 5.36 | wps 449382 | ups 1.04 | wpb 433528 | bsz 16503 | num_updates 57339 | lr 0.000264122 | gnorm 0.212 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 56019 +epoch 034 | loss 4.053 | nll_loss 2.423 | ppl 5.36 | wps 449382 | ups 1.04 | wpb 433528 | bsz 16503 | num_updates 57339 | lr 0.000264122 | gnorm 0.212 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 56019 +epoch 034 | loss 4.053 | nll_loss 2.423 | ppl 5.36 | wps 449382 | ups 1.04 | wpb 433528 | bsz 16503 | num_updates 57339 | lr 0.000264122 | gnorm 0.212 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 56019 +epoch 034 | loss 4.053 | nll_loss 2.423 | ppl 5.36 | wps 449382 | ups 1.04 | wpb 433528 | bsz 16503 | num_updates 57339 | lr 0.000264122 | gnorm 0.212 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 56019 +epoch 034 | loss 4.053 | nll_loss 2.423 | ppl 5.36 | wps 449382 | ups 1.04 | wpb 433528 | bsz 16503 | num_updates 57339 | lr 0.000264122 | gnorm 0.212 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 56019 +epoch 034 | loss 4.053 | nll_loss 2.423 | ppl 5.36 | wps 449382 | ups 1.04 | wpb 433528 | bsz 16503 | num_updates 57339 | lr 0.000264122 | gnorm 0.212 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 56019 +epoch 034 | loss 4.053 | nll_loss 2.423 | ppl 5.36 | wps 449382 | ups 1.04 | wpb 433528 | bsz 16503 | num_updates 57339 | lr 0.000264122 | gnorm 0.212 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 56019 +epoch 034 | loss 4.053 | nll_loss 2.423 | ppl 5.36 | wps 449382 | ups 1.04 | wpb 433528 | bsz 16503 | num_updates 57339 | lr 0.000264122 | gnorm 0.212 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 56019 +epoch 034 | loss 4.053 | nll_loss 2.423 | ppl 5.36 | wps 449382 | ups 1.04 | wpb 433528 | bsz 16503 | num_updates 57339 | lr 0.000264122 | gnorm 0.212 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 56019 +epoch 034 | loss 4.053 | nll_loss 2.423 | ppl 5.36 | wps 449382 | ups 1.04 | wpb 433528 | bsz 16503 | num_updates 57339 | lr 0.000264122 | gnorm 0.212 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 56019 +epoch 034 | loss 4.053 | nll_loss 2.423 | ppl 5.36 | wps 449382 | ups 1.04 | wpb 433528 | bsz 16503 | num_updates 57339 | lr 0.000264122 | gnorm 0.212 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 56019 +epoch 034 | loss 4.053 | nll_loss 2.423 | ppl 5.36 | wps 449382 | ups 1.04 | wpb 433528 | bsz 16503 | num_updates 57339 | lr 0.000264122 | gnorm 0.212 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 56019 +epoch 034 | loss 4.053 | nll_loss 2.423 | ppl 5.36 | wps 449382 | ups 1.04 | wpb 433528 | bsz 16503 | num_updates 57339 | lr 0.000264122 | gnorm 0.212 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 56019 +epoch 034 | loss 4.053 | nll_loss 2.423 | ppl 5.36 | wps 449382 | ups 1.04 | wpb 433528 | bsz 16503 | num_updates 57339 | lr 0.000264122 | gnorm 0.212 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 56019 +epoch 034 | loss 4.053 | nll_loss 2.423 | ppl 5.36 | wps 449382 | ups 1.04 | wpb 433528 | bsz 16503 | num_updates 57339 | lr 0.000264122 | gnorm 0.212 | clip 0 | loss_scale 1 | train_wall 1569 | gb_free 20.2 | wall 56019 +Start iterating over samples +epoch 035: 61 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=460557, ups=1.07, wpb=431032, bsz=16328.6, num_updates=57400, lr=0.000263982, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56076 +epoch 035: 61 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=460557, ups=1.07, wpb=431032, bsz=16328.6, num_updates=57400, lr=0.000263982, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56076 +epoch 035: 61 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=460557, ups=1.07, wpb=431032, bsz=16328.6, num_updates=57400, lr=0.000263982, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56076 +epoch 035: 61 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=460557, ups=1.07, wpb=431032, bsz=16328.6, num_updates=57400, lr=0.000263982, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56076 +epoch 035: 61 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=460557, ups=1.07, wpb=431032, bsz=16328.6, num_updates=57400, lr=0.000263982, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56076 +epoch 035: 61 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=460557, ups=1.07, wpb=431032, bsz=16328.6, num_updates=57400, lr=0.000263982, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56076 +epoch 035: 61 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=460557, ups=1.07, wpb=431032, bsz=16328.6, num_updates=57400, lr=0.000263982, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56076 +epoch 035: 61 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=460557, ups=1.07, wpb=431032, bsz=16328.6, num_updates=57400, lr=0.000263982, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56076 +epoch 035: 61 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=460557, ups=1.07, wpb=431032, bsz=16328.6, num_updates=57400, lr=0.000263982, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56076 +epoch 035: 61 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=460557, ups=1.07, wpb=431032, bsz=16328.6, num_updates=57400, lr=0.000263982, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56076 +epoch 035: 61 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=460557, ups=1.07, wpb=431032, bsz=16328.6, num_updates=57400, lr=0.000263982, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56076 +epoch 035: 61 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=460557, ups=1.07, wpb=431032, bsz=16328.6, num_updates=57400, lr=0.000263982, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56076 +epoch 035: 61 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=460557, ups=1.07, wpb=431032, bsz=16328.6, num_updates=57400, lr=0.000263982, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56076 +epoch 035: 61 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=460557, ups=1.07, wpb=431032, bsz=16328.6, num_updates=57400, lr=0.000263982, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56076 +epoch 035: 61 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=460557, ups=1.07, wpb=431032, bsz=16328.6, num_updates=57400, lr=0.000263982, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56076 +epoch 035: 61 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=460557, ups=1.07, wpb=431032, bsz=16328.6, num_updates=57400, lr=0.000263982, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56076 +epoch 035: 61 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=460557, ups=1.07, wpb=431032, bsz=16328.6, num_updates=57400, lr=0.000263982, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56076 +epoch 035: 61 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=460557, ups=1.07, wpb=431032, bsz=16328.6, num_updates=57400, lr=0.000263982, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56076 +epoch 035: 61 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=460557, ups=1.07, wpb=431032, bsz=16328.6, num_updates=57400, lr=0.000263982, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56076 +epoch 035: 61 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=460557, ups=1.07, wpb=431032, bsz=16328.6, num_updates=57400, lr=0.000263982, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56076 +epoch 035: 61 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=460557, ups=1.07, wpb=431032, bsz=16328.6, num_updates=57400, lr=0.000263982, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56076 +epoch 035: 61 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=460557, ups=1.07, wpb=431032, bsz=16328.6, num_updates=57400, lr=0.000263982, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56076 +epoch 035: 61 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=460557, ups=1.07, wpb=431032, bsz=16328.6, num_updates=57400, lr=0.000263982, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56076 +epoch 035: 61 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=460557, ups=1.07, wpb=431032, bsz=16328.6, num_updates=57400, lr=0.000263982, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56076 +epoch 035: 61 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=460557, ups=1.07, wpb=431032, bsz=16328.6, num_updates=57400, lr=0.000263982, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56076 +epoch 035: 61 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=460557, ups=1.07, wpb=431032, bsz=16328.6, num_updates=57400, lr=0.000263982, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56076 +epoch 035: 61 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=460557, ups=1.07, wpb=431032, bsz=16328.6, num_updates=57400, lr=0.000263982, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56076 +epoch 035: 61 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=460557, ups=1.07, wpb=431032, bsz=16328.6, num_updates=57400, lr=0.000263982, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56076 +epoch 035: 61 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=460557, ups=1.07, wpb=431032, bsz=16328.6, num_updates=57400, lr=0.000263982, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56076 +epoch 035: 61 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=460557, ups=1.07, wpb=431032, bsz=16328.6, num_updates=57400, lr=0.000263982, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56076 +epoch 035: 61 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=460557, ups=1.07, wpb=431032, bsz=16328.6, num_updates=57400, lr=0.000263982, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56076 +epoch 035: 61 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=460557, ups=1.07, wpb=431032, bsz=16328.6, num_updates=57400, lr=0.000263982, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56076 +epoch 035: 61 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=460557, ups=1.07, wpb=431032, bsz=16328.6, num_updates=57400, lr=0.000263982, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56076 +epoch 035: 61 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=460557, ups=1.07, wpb=431032, bsz=16328.6, num_updates=57400, lr=0.000263982, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56076 +epoch 035: 61 / 1689 loss=4.049, nll_loss=2.417, ppl=5.34, wps=460557, ups=1.07, wpb=431032, bsz=16328.6, num_updates=57400, lr=0.000263982, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56076 +epoch 035: 161 / 1689 loss=4.038, nll_loss=2.405, ppl=5.3, wps=462441, ups=1.07, wpb=432518, bsz=16395.4, num_updates=57500, lr=0.000263752, gnorm=0.198, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=56170 +epoch 035: 161 / 1689 loss=4.038, nll_loss=2.405, ppl=5.3, wps=462441, ups=1.07, wpb=432518, bsz=16395.4, num_updates=57500, lr=0.000263752, gnorm=0.198, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=56170 +epoch 035: 161 / 1689 loss=4.038, nll_loss=2.405, ppl=5.3, wps=462441, ups=1.07, wpb=432518, bsz=16395.4, num_updates=57500, lr=0.000263752, gnorm=0.198, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=56170 +epoch 035: 161 / 1689 loss=4.038, nll_loss=2.405, ppl=5.3, wps=462441, ups=1.07, wpb=432518, bsz=16395.4, num_updates=57500, lr=0.000263752, gnorm=0.198, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=56170 +epoch 035: 161 / 1689 loss=4.038, nll_loss=2.405, ppl=5.3, wps=462441, ups=1.07, wpb=432518, bsz=16395.4, num_updates=57500, lr=0.000263752, gnorm=0.198, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=56170 +epoch 035: 161 / 1689 loss=4.038, nll_loss=2.405, ppl=5.3, wps=462441, ups=1.07, wpb=432518, bsz=16395.4, num_updates=57500, lr=0.000263752, gnorm=0.198, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=56170 +epoch 035: 161 / 1689 loss=4.038, nll_loss=2.405, ppl=5.3, wps=462441, ups=1.07, wpb=432518, bsz=16395.4, num_updates=57500, lr=0.000263752, gnorm=0.198, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=56170 +epoch 035: 161 / 1689 loss=4.038, nll_loss=2.405, ppl=5.3, wps=462441, ups=1.07, wpb=432518, bsz=16395.4, num_updates=57500, lr=0.000263752, gnorm=0.198, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=56170 +epoch 035: 161 / 1689 loss=4.038, nll_loss=2.405, ppl=5.3, wps=462441, ups=1.07, wpb=432518, bsz=16395.4, num_updates=57500, lr=0.000263752, gnorm=0.198, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=56170 +epoch 035: 161 / 1689 loss=4.038, nll_loss=2.405, ppl=5.3, wps=462441, ups=1.07, wpb=432518, bsz=16395.4, num_updates=57500, lr=0.000263752, gnorm=0.198, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=56170 +epoch 035: 161 / 1689 loss=4.038, nll_loss=2.405, ppl=5.3, wps=462441, ups=1.07, wpb=432518, bsz=16395.4, num_updates=57500, lr=0.000263752, gnorm=0.198, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=56170 +epoch 035: 161 / 1689 loss=4.038, nll_loss=2.405, ppl=5.3, wps=462441, ups=1.07, wpb=432518, bsz=16395.4, num_updates=57500, lr=0.000263752, gnorm=0.198, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=56170 +epoch 035: 161 / 1689 loss=4.038, nll_loss=2.405, ppl=5.3, wps=462441, ups=1.07, wpb=432518, bsz=16395.4, num_updates=57500, lr=0.000263752, gnorm=0.198, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=56170 +epoch 035: 161 / 1689 loss=4.038, nll_loss=2.405, ppl=5.3, wps=462441, ups=1.07, wpb=432518, bsz=16395.4, num_updates=57500, lr=0.000263752, gnorm=0.198, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=56170 +epoch 035: 161 / 1689 loss=4.038, nll_loss=2.405, ppl=5.3, wps=462441, ups=1.07, wpb=432518, bsz=16395.4, num_updates=57500, lr=0.000263752, gnorm=0.198, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=56170 +epoch 035: 161 / 1689 loss=4.038, nll_loss=2.405, ppl=5.3, wps=462441, ups=1.07, wpb=432518, bsz=16395.4, num_updates=57500, lr=0.000263752, gnorm=0.198, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=56170 +epoch 035: 161 / 1689 loss=4.038, nll_loss=2.405, ppl=5.3, wps=462441, ups=1.07, wpb=432518, bsz=16395.4, num_updates=57500, lr=0.000263752, gnorm=0.198, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=56170 +epoch 035: 161 / 1689 loss=4.038, nll_loss=2.405, ppl=5.3, wps=462441, ups=1.07, wpb=432518, bsz=16395.4, num_updates=57500, lr=0.000263752, gnorm=0.198, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=56170 +epoch 035: 161 / 1689 loss=4.038, nll_loss=2.405, ppl=5.3, wps=462441, ups=1.07, wpb=432518, bsz=16395.4, num_updates=57500, lr=0.000263752, gnorm=0.198, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=56170 +epoch 035: 161 / 1689 loss=4.038, nll_loss=2.405, ppl=5.3, wps=462441, ups=1.07, wpb=432518, bsz=16395.4, num_updates=57500, lr=0.000263752, gnorm=0.198, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=56170 +epoch 035: 161 / 1689 loss=4.038, nll_loss=2.405, ppl=5.3, wps=462441, ups=1.07, wpb=432518, bsz=16395.4, num_updates=57500, lr=0.000263752, gnorm=0.198, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=56170 +epoch 035: 161 / 1689 loss=4.038, nll_loss=2.405, ppl=5.3, wps=462441, ups=1.07, wpb=432518, bsz=16395.4, num_updates=57500, lr=0.000263752, gnorm=0.198, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=56170 +epoch 035: 161 / 1689 loss=4.038, nll_loss=2.405, ppl=5.3, wps=462441, ups=1.07, wpb=432518, bsz=16395.4, num_updates=57500, lr=0.000263752, gnorm=0.198, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=56170 +epoch 035: 161 / 1689 loss=4.038, nll_loss=2.405, ppl=5.3, wps=462441, ups=1.07, wpb=432518, bsz=16395.4, num_updates=57500, lr=0.000263752, gnorm=0.198, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=56170 +epoch 035: 161 / 1689 loss=4.038, nll_loss=2.405, ppl=5.3, wps=462441, ups=1.07, wpb=432518, bsz=16395.4, num_updates=57500, lr=0.000263752, gnorm=0.198, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=56170 +epoch 035: 161 / 1689 loss=4.038, nll_loss=2.405, ppl=5.3, wps=462441, ups=1.07, wpb=432518, bsz=16395.4, num_updates=57500, lr=0.000263752, gnorm=0.198, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=56170 +epoch 035: 161 / 1689 loss=4.038, nll_loss=2.405, ppl=5.3, wps=462441, ups=1.07, wpb=432518, bsz=16395.4, num_updates=57500, lr=0.000263752, gnorm=0.198, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=56170 +epoch 035: 161 / 1689 loss=4.038, nll_loss=2.405, ppl=5.3, wps=462441, ups=1.07, wpb=432518, bsz=16395.4, num_updates=57500, lr=0.000263752, gnorm=0.198, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=56170 +epoch 035: 161 / 1689 loss=4.038, nll_loss=2.405, ppl=5.3, wps=462441, ups=1.07, wpb=432518, bsz=16395.4, num_updates=57500, lr=0.000263752, gnorm=0.198, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=56170 +epoch 035: 161 / 1689 loss=4.038, nll_loss=2.405, ppl=5.3, wps=462441, ups=1.07, wpb=432518, bsz=16395.4, num_updates=57500, lr=0.000263752, gnorm=0.198, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=56170 +epoch 035: 161 / 1689 loss=4.038, nll_loss=2.405, ppl=5.3, wps=462441, ups=1.07, wpb=432518, bsz=16395.4, num_updates=57500, lr=0.000263752, gnorm=0.198, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=56170 +epoch 035: 161 / 1689 loss=4.038, nll_loss=2.405, ppl=5.3, wps=462441, ups=1.07, wpb=432518, bsz=16395.4, num_updates=57500, lr=0.000263752, gnorm=0.198, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=56170 +epoch 035: 161 / 1689 loss=4.038, nll_loss=2.405, ppl=5.3, wps=462441, ups=1.07, wpb=432518, bsz=16395.4, num_updates=57500, lr=0.000263752, gnorm=0.198, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=56170 +epoch 035: 161 / 1689 loss=4.038, nll_loss=2.405, ppl=5.3, wps=462441, ups=1.07, wpb=432518, bsz=16395.4, num_updates=57500, lr=0.000263752, gnorm=0.198, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=56170 +epoch 035: 161 / 1689 loss=4.038, nll_loss=2.405, ppl=5.3, wps=462441, ups=1.07, wpb=432518, bsz=16395.4, num_updates=57500, lr=0.000263752, gnorm=0.198, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=56170 +epoch 035: 261 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=462538, ups=1.07, wpb=433531, bsz=16588.6, num_updates=57600, lr=0.000263523, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56264 +epoch 035: 261 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=462538, ups=1.07, wpb=433531, bsz=16588.6, num_updates=57600, lr=0.000263523, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56264 +epoch 035: 261 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=462538, ups=1.07, wpb=433531, bsz=16588.6, num_updates=57600, lr=0.000263523, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56264 +epoch 035: 261 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=462538, ups=1.07, wpb=433531, bsz=16588.6, num_updates=57600, lr=0.000263523, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56264 +epoch 035: 261 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=462538, ups=1.07, wpb=433531, bsz=16588.6, num_updates=57600, lr=0.000263523, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56264 +epoch 035: 261 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=462538, ups=1.07, wpb=433531, bsz=16588.6, num_updates=57600, lr=0.000263523, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56264 +epoch 035: 261 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=462538, ups=1.07, wpb=433531, bsz=16588.6, num_updates=57600, lr=0.000263523, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56264 +epoch 035: 261 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=462538, ups=1.07, wpb=433531, bsz=16588.6, num_updates=57600, lr=0.000263523, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56264 +epoch 035: 261 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=462538, ups=1.07, wpb=433531, bsz=16588.6, num_updates=57600, lr=0.000263523, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56264 +epoch 035: 261 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=462538, ups=1.07, wpb=433531, bsz=16588.6, num_updates=57600, lr=0.000263523, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56264 +epoch 035: 261 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=462538, ups=1.07, wpb=433531, bsz=16588.6, num_updates=57600, lr=0.000263523, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56264 +epoch 035: 261 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=462538, ups=1.07, wpb=433531, bsz=16588.6, num_updates=57600, lr=0.000263523, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56264 +epoch 035: 261 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=462538, ups=1.07, wpb=433531, bsz=16588.6, num_updates=57600, lr=0.000263523, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56264 +epoch 035: 261 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=462538, ups=1.07, wpb=433531, bsz=16588.6, num_updates=57600, lr=0.000263523, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56264 +epoch 035: 261 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=462538, ups=1.07, wpb=433531, bsz=16588.6, num_updates=57600, lr=0.000263523, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56264 +epoch 035: 261 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=462538, ups=1.07, wpb=433531, bsz=16588.6, num_updates=57600, lr=0.000263523, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56264 +epoch 035: 261 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=462538, ups=1.07, wpb=433531, bsz=16588.6, num_updates=57600, lr=0.000263523, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56264 +epoch 035: 261 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=462538, ups=1.07, wpb=433531, bsz=16588.6, num_updates=57600, lr=0.000263523, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56264 +epoch 035: 261 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=462538, ups=1.07, wpb=433531, bsz=16588.6, num_updates=57600, lr=0.000263523, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56264 +epoch 035: 261 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=462538, ups=1.07, wpb=433531, bsz=16588.6, num_updates=57600, lr=0.000263523, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56264 +epoch 035: 261 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=462538, ups=1.07, wpb=433531, bsz=16588.6, num_updates=57600, lr=0.000263523, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56264 +epoch 035: 261 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=462538, ups=1.07, wpb=433531, bsz=16588.6, num_updates=57600, lr=0.000263523, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56264 +epoch 035: 261 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=462538, ups=1.07, wpb=433531, bsz=16588.6, num_updates=57600, lr=0.000263523, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56264 +epoch 035: 261 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=462538, ups=1.07, wpb=433531, bsz=16588.6, num_updates=57600, lr=0.000263523, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56264 +epoch 035: 261 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=462538, ups=1.07, wpb=433531, bsz=16588.6, num_updates=57600, lr=0.000263523, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56264 +epoch 035: 261 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=462538, ups=1.07, wpb=433531, bsz=16588.6, num_updates=57600, lr=0.000263523, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56264 +epoch 035: 261 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=462538, ups=1.07, wpb=433531, bsz=16588.6, num_updates=57600, lr=0.000263523, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56264 +epoch 035: 261 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=462538, ups=1.07, wpb=433531, bsz=16588.6, num_updates=57600, lr=0.000263523, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56264 +epoch 035: 261 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=462538, ups=1.07, wpb=433531, bsz=16588.6, num_updates=57600, lr=0.000263523, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56264 +epoch 035: 261 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=462538, ups=1.07, wpb=433531, bsz=16588.6, num_updates=57600, lr=0.000263523, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56264 +epoch 035: 261 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=462538, ups=1.07, wpb=433531, bsz=16588.6, num_updates=57600, lr=0.000263523, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56264 +epoch 035: 261 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=462538, ups=1.07, wpb=433531, bsz=16588.6, num_updates=57600, lr=0.000263523, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56264 +epoch 035: 261 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=462538, ups=1.07, wpb=433531, bsz=16588.6, num_updates=57600, lr=0.000263523, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56264 +epoch 035: 261 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=462538, ups=1.07, wpb=433531, bsz=16588.6, num_updates=57600, lr=0.000263523, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56264 +epoch 035: 261 / 1689 loss=4.039, nll_loss=2.406, ppl=5.3, wps=462538, ups=1.07, wpb=433531, bsz=16588.6, num_updates=57600, lr=0.000263523, gnorm=0.217, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56264 +epoch 035: 361 / 1689 loss=4.035, nll_loss=2.402, ppl=5.29, wps=464510, ups=1.07, wpb=434708, bsz=16591.1, num_updates=57700, lr=0.000263295, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=56357 +epoch 035: 361 / 1689 loss=4.035, nll_loss=2.402, ppl=5.29, wps=464510, ups=1.07, wpb=434708, bsz=16591.1, num_updates=57700, lr=0.000263295, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=56357 +epoch 035: 361 / 1689 loss=4.035, nll_loss=2.402, ppl=5.29, wps=464510, ups=1.07, wpb=434708, bsz=16591.1, num_updates=57700, lr=0.000263295, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=56357 +epoch 035: 361 / 1689 loss=4.035, nll_loss=2.402, ppl=5.29, wps=464510, ups=1.07, wpb=434708, bsz=16591.1, num_updates=57700, lr=0.000263295, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=56357 +epoch 035: 361 / 1689 loss=4.035, nll_loss=2.402, ppl=5.29, wps=464510, ups=1.07, wpb=434708, bsz=16591.1, num_updates=57700, lr=0.000263295, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=56357 +epoch 035: 361 / 1689 loss=4.035, nll_loss=2.402, ppl=5.29, wps=464510, ups=1.07, wpb=434708, bsz=16591.1, num_updates=57700, lr=0.000263295, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=56357 +epoch 035: 361 / 1689 loss=4.035, nll_loss=2.402, ppl=5.29, wps=464510, ups=1.07, wpb=434708, bsz=16591.1, num_updates=57700, lr=0.000263295, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=56357 +epoch 035: 361 / 1689 loss=4.035, nll_loss=2.402, ppl=5.29, wps=464510, ups=1.07, wpb=434708, bsz=16591.1, num_updates=57700, lr=0.000263295, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=56357 +epoch 035: 361 / 1689 loss=4.035, nll_loss=2.402, ppl=5.29, wps=464510, ups=1.07, wpb=434708, bsz=16591.1, num_updates=57700, lr=0.000263295, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=56357 +epoch 035: 361 / 1689 loss=4.035, nll_loss=2.402, ppl=5.29, wps=464510, ups=1.07, wpb=434708, bsz=16591.1, num_updates=57700, lr=0.000263295, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=56357 +epoch 035: 361 / 1689 loss=4.035, nll_loss=2.402, ppl=5.29, wps=464510, ups=1.07, wpb=434708, bsz=16591.1, num_updates=57700, lr=0.000263295, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=56357 +epoch 035: 361 / 1689 loss=4.035, nll_loss=2.402, ppl=5.29, wps=464510, ups=1.07, wpb=434708, bsz=16591.1, num_updates=57700, lr=0.000263295, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=56357 +epoch 035: 361 / 1689 loss=4.035, nll_loss=2.402, ppl=5.29, wps=464510, ups=1.07, wpb=434708, bsz=16591.1, num_updates=57700, lr=0.000263295, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=56357 +epoch 035: 361 / 1689 loss=4.035, nll_loss=2.402, ppl=5.29, wps=464510, ups=1.07, wpb=434708, bsz=16591.1, num_updates=57700, lr=0.000263295, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=56357 +epoch 035: 361 / 1689 loss=4.035, nll_loss=2.402, ppl=5.29, wps=464510, ups=1.07, wpb=434708, bsz=16591.1, num_updates=57700, lr=0.000263295, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=56357 +epoch 035: 361 / 1689 loss=4.035, nll_loss=2.402, ppl=5.29, wps=464510, ups=1.07, wpb=434708, bsz=16591.1, num_updates=57700, lr=0.000263295, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=56357 +epoch 035: 361 / 1689 loss=4.035, nll_loss=2.402, ppl=5.29, wps=464510, ups=1.07, wpb=434708, bsz=16591.1, num_updates=57700, lr=0.000263295, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=56357 +epoch 035: 361 / 1689 loss=4.035, nll_loss=2.402, ppl=5.29, wps=464510, ups=1.07, wpb=434708, bsz=16591.1, num_updates=57700, lr=0.000263295, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=56357 +epoch 035: 361 / 1689 loss=4.035, nll_loss=2.402, ppl=5.29, wps=464510, ups=1.07, wpb=434708, bsz=16591.1, num_updates=57700, lr=0.000263295, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=56357 +epoch 035: 361 / 1689 loss=4.035, nll_loss=2.402, ppl=5.29, wps=464510, ups=1.07, wpb=434708, bsz=16591.1, num_updates=57700, lr=0.000263295, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=56357 +epoch 035: 361 / 1689 loss=4.035, nll_loss=2.402, ppl=5.29, wps=464510, ups=1.07, wpb=434708, bsz=16591.1, num_updates=57700, lr=0.000263295, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=56357 +epoch 035: 361 / 1689 loss=4.035, nll_loss=2.402, ppl=5.29, wps=464510, ups=1.07, wpb=434708, bsz=16591.1, num_updates=57700, lr=0.000263295, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=56357 +epoch 035: 361 / 1689 loss=4.035, nll_loss=2.402, ppl=5.29, wps=464510, ups=1.07, wpb=434708, bsz=16591.1, num_updates=57700, lr=0.000263295, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=56357 +epoch 035: 361 / 1689 loss=4.035, nll_loss=2.402, ppl=5.29, wps=464510, ups=1.07, wpb=434708, bsz=16591.1, num_updates=57700, lr=0.000263295, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=56357 +epoch 035: 361 / 1689 loss=4.035, nll_loss=2.402, ppl=5.29, wps=464510, ups=1.07, wpb=434708, bsz=16591.1, num_updates=57700, lr=0.000263295, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=56357 +epoch 035: 361 / 1689 loss=4.035, nll_loss=2.402, ppl=5.29, wps=464510, ups=1.07, wpb=434708, bsz=16591.1, num_updates=57700, lr=0.000263295, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=56357 +epoch 035: 361 / 1689 loss=4.035, nll_loss=2.402, ppl=5.29, wps=464510, ups=1.07, wpb=434708, bsz=16591.1, num_updates=57700, lr=0.000263295, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=56357 +epoch 035: 361 / 1689 loss=4.035, nll_loss=2.402, ppl=5.29, wps=464510, ups=1.07, wpb=434708, bsz=16591.1, num_updates=57700, lr=0.000263295, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=56357 +epoch 035: 361 / 1689 loss=4.035, nll_loss=2.402, ppl=5.29, wps=464510, ups=1.07, wpb=434708, bsz=16591.1, num_updates=57700, lr=0.000263295, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=56357 +epoch 035: 361 / 1689 loss=4.035, nll_loss=2.402, ppl=5.29, wps=464510, ups=1.07, wpb=434708, bsz=16591.1, num_updates=57700, lr=0.000263295, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=56357 +epoch 035: 361 / 1689 loss=4.035, nll_loss=2.402, ppl=5.29, wps=464510, ups=1.07, wpb=434708, bsz=16591.1, num_updates=57700, lr=0.000263295, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=56357 +epoch 035: 361 / 1689 loss=4.035, nll_loss=2.402, ppl=5.29, wps=464510, ups=1.07, wpb=434708, bsz=16591.1, num_updates=57700, lr=0.000263295, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=56357 +epoch 035: 361 / 1689 loss=4.035, nll_loss=2.402, ppl=5.29, wps=464510, ups=1.07, wpb=434708, bsz=16591.1, num_updates=57700, lr=0.000263295, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=56357 +epoch 035: 361 / 1689 loss=4.035, nll_loss=2.402, ppl=5.29, wps=464510, ups=1.07, wpb=434708, bsz=16591.1, num_updates=57700, lr=0.000263295, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=56357 +epoch 035: 361 / 1689 loss=4.035, nll_loss=2.402, ppl=5.29, wps=464510, ups=1.07, wpb=434708, bsz=16591.1, num_updates=57700, lr=0.000263295, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=19.1, wall=56357 +epoch 035: 462 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=458398, ups=1.05, wpb=435102, bsz=16427, num_updates=57800, lr=0.000263067, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=56452 +epoch 035: 462 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=458398, ups=1.05, wpb=435102, bsz=16427, num_updates=57800, lr=0.000263067, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=56452 +epoch 035: 462 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=458398, ups=1.05, wpb=435102, bsz=16427, num_updates=57800, lr=0.000263067, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=56452 +epoch 035: 462 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=458398, ups=1.05, wpb=435102, bsz=16427, num_updates=57800, lr=0.000263067, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=56452 +epoch 035: 462 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=458398, ups=1.05, wpb=435102, bsz=16427, num_updates=57800, lr=0.000263067, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=56452 +epoch 035: 462 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=458398, ups=1.05, wpb=435102, bsz=16427, num_updates=57800, lr=0.000263067, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=56452 +epoch 035: 462 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=458398, ups=1.05, wpb=435102, bsz=16427, num_updates=57800, lr=0.000263067, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=56452 +epoch 035: 462 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=458398, ups=1.05, wpb=435102, bsz=16427, num_updates=57800, lr=0.000263067, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=56452 +epoch 035: 462 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=458398, ups=1.05, wpb=435102, bsz=16427, num_updates=57800, lr=0.000263067, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=56452 +epoch 035: 462 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=458398, ups=1.05, wpb=435102, bsz=16427, num_updates=57800, lr=0.000263067, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=56452 +epoch 035: 462 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=458398, ups=1.05, wpb=435102, bsz=16427, num_updates=57800, lr=0.000263067, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=56452 +epoch 035: 462 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=458398, ups=1.05, wpb=435102, bsz=16427, num_updates=57800, lr=0.000263067, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=56452 +epoch 035: 462 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=458398, ups=1.05, wpb=435102, bsz=16427, num_updates=57800, lr=0.000263067, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=56452 +epoch 035: 462 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=458398, ups=1.05, wpb=435102, bsz=16427, num_updates=57800, lr=0.000263067, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=56452 +epoch 035: 462 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=458398, ups=1.05, wpb=435102, bsz=16427, num_updates=57800, lr=0.000263067, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=56452 +epoch 035: 462 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=458398, ups=1.05, wpb=435102, bsz=16427, num_updates=57800, lr=0.000263067, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=56452 +epoch 035: 462 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=458398, ups=1.05, wpb=435102, bsz=16427, num_updates=57800, lr=0.000263067, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=56452 +epoch 035: 462 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=458398, ups=1.05, wpb=435102, bsz=16427, num_updates=57800, lr=0.000263067, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=56452 +epoch 035: 462 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=458398, ups=1.05, wpb=435102, bsz=16427, num_updates=57800, lr=0.000263067, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=56452 +epoch 035: 462 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=458398, ups=1.05, wpb=435102, bsz=16427, num_updates=57800, lr=0.000263067, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=56452 +epoch 035: 462 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=458398, ups=1.05, wpb=435102, bsz=16427, num_updates=57800, lr=0.000263067, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=56452 +epoch 035: 462 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=458398, ups=1.05, wpb=435102, bsz=16427, num_updates=57800, lr=0.000263067, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=56452 +epoch 035: 462 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=458398, ups=1.05, wpb=435102, bsz=16427, num_updates=57800, lr=0.000263067, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=56452 +epoch 035: 462 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=458398, ups=1.05, wpb=435102, bsz=16427, num_updates=57800, lr=0.000263067, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=56452 +epoch 035: 462 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=458398, ups=1.05, wpb=435102, bsz=16427, num_updates=57800, lr=0.000263067, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=56452 +epoch 035: 462 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=458398, ups=1.05, wpb=435102, bsz=16427, num_updates=57800, lr=0.000263067, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=56452 +epoch 035: 462 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=458398, ups=1.05, wpb=435102, bsz=16427, num_updates=57800, lr=0.000263067, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=56452 +epoch 035: 462 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=458398, ups=1.05, wpb=435102, bsz=16427, num_updates=57800, lr=0.000263067, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=56452 +epoch 035: 462 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=458398, ups=1.05, wpb=435102, bsz=16427, num_updates=57800, lr=0.000263067, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=56452 +epoch 035: 462 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=458398, ups=1.05, wpb=435102, bsz=16427, num_updates=57800, lr=0.000263067, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=56452 +epoch 035: 462 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=458398, ups=1.05, wpb=435102, bsz=16427, num_updates=57800, lr=0.000263067, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=56452 +epoch 035: 462 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=458398, ups=1.05, wpb=435102, bsz=16427, num_updates=57800, lr=0.000263067, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=56452 +epoch 035: 462 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=458398, ups=1.05, wpb=435102, bsz=16427, num_updates=57800, lr=0.000263067, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=56452 +epoch 035: 462 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=458398, ups=1.05, wpb=435102, bsz=16427, num_updates=57800, lr=0.000263067, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=56452 +epoch 035: 462 / 1689 loss=4.056, nll_loss=2.425, ppl=5.37, wps=458398, ups=1.05, wpb=435102, bsz=16427, num_updates=57800, lr=0.000263067, gnorm=0.21, clip=0, loss_scale=1, train_wall=93, gb_free=18.8, wall=56452 +epoch 035: 562 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=466764, ups=1.07, wpb=435033, bsz=16343.9, num_updates=57900, lr=0.00026284, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=56545 +epoch 035: 562 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=466764, ups=1.07, wpb=435033, bsz=16343.9, num_updates=57900, lr=0.00026284, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=56545 +epoch 035: 562 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=466764, ups=1.07, wpb=435033, bsz=16343.9, num_updates=57900, lr=0.00026284, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=56545 +epoch 035: 562 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=466764, ups=1.07, wpb=435033, bsz=16343.9, num_updates=57900, lr=0.00026284, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=56545 +epoch 035: 562 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=466764, ups=1.07, wpb=435033, bsz=16343.9, num_updates=57900, lr=0.00026284, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=56545 +epoch 035: 562 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=466764, ups=1.07, wpb=435033, bsz=16343.9, num_updates=57900, lr=0.00026284, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=56545 +epoch 035: 562 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=466764, ups=1.07, wpb=435033, bsz=16343.9, num_updates=57900, lr=0.00026284, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=56545 +epoch 035: 562 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=466764, ups=1.07, wpb=435033, bsz=16343.9, num_updates=57900, lr=0.00026284, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=56545 +epoch 035: 562 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=466764, ups=1.07, wpb=435033, bsz=16343.9, num_updates=57900, lr=0.00026284, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=56545 +epoch 035: 562 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=466764, ups=1.07, wpb=435033, bsz=16343.9, num_updates=57900, lr=0.00026284, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=56545 +epoch 035: 562 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=466764, ups=1.07, wpb=435033, bsz=16343.9, num_updates=57900, lr=0.00026284, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=56545 +epoch 035: 562 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=466764, ups=1.07, wpb=435033, bsz=16343.9, num_updates=57900, lr=0.00026284, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=56545 +epoch 035: 562 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=466764, ups=1.07, wpb=435033, bsz=16343.9, num_updates=57900, lr=0.00026284, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=56545 +epoch 035: 562 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=466764, ups=1.07, wpb=435033, bsz=16343.9, num_updates=57900, lr=0.00026284, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=56545 +epoch 035: 562 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=466764, ups=1.07, wpb=435033, bsz=16343.9, num_updates=57900, lr=0.00026284, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=56545 +epoch 035: 562 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=466764, ups=1.07, wpb=435033, bsz=16343.9, num_updates=57900, lr=0.00026284, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=56545 +epoch 035: 562 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=466764, ups=1.07, wpb=435033, bsz=16343.9, num_updates=57900, lr=0.00026284, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=56545 +epoch 035: 562 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=466764, ups=1.07, wpb=435033, bsz=16343.9, num_updates=57900, lr=0.00026284, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=56545 +epoch 035: 562 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=466764, ups=1.07, wpb=435033, bsz=16343.9, num_updates=57900, lr=0.00026284, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=56545 +epoch 035: 562 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=466764, ups=1.07, wpb=435033, bsz=16343.9, num_updates=57900, lr=0.00026284, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=56545 +epoch 035: 562 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=466764, ups=1.07, wpb=435033, bsz=16343.9, num_updates=57900, lr=0.00026284, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=56545 +epoch 035: 562 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=466764, ups=1.07, wpb=435033, bsz=16343.9, num_updates=57900, lr=0.00026284, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=56545 +epoch 035: 562 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=466764, ups=1.07, wpb=435033, bsz=16343.9, num_updates=57900, lr=0.00026284, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=56545 +epoch 035: 562 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=466764, ups=1.07, wpb=435033, bsz=16343.9, num_updates=57900, lr=0.00026284, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=56545 +epoch 035: 562 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=466764, ups=1.07, wpb=435033, bsz=16343.9, num_updates=57900, lr=0.00026284, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=56545 +epoch 035: 562 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=466764, ups=1.07, wpb=435033, bsz=16343.9, num_updates=57900, lr=0.00026284, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=56545 +epoch 035: 562 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=466764, ups=1.07, wpb=435033, bsz=16343.9, num_updates=57900, lr=0.00026284, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=56545 +epoch 035: 562 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=466764, ups=1.07, wpb=435033, bsz=16343.9, num_updates=57900, lr=0.00026284, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=56545 +epoch 035: 562 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=466764, ups=1.07, wpb=435033, bsz=16343.9, num_updates=57900, lr=0.00026284, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=56545 +epoch 035: 562 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=466764, ups=1.07, wpb=435033, bsz=16343.9, num_updates=57900, lr=0.00026284, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=56545 +epoch 035: 562 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=466764, ups=1.07, wpb=435033, bsz=16343.9, num_updates=57900, lr=0.00026284, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=56545 +epoch 035: 562 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=466764, ups=1.07, wpb=435033, bsz=16343.9, num_updates=57900, lr=0.00026284, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=56545 +epoch 035: 562 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=466764, ups=1.07, wpb=435033, bsz=16343.9, num_updates=57900, lr=0.00026284, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=56545 +epoch 035: 562 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=466764, ups=1.07, wpb=435033, bsz=16343.9, num_updates=57900, lr=0.00026284, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=56545 +epoch 035: 562 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=466764, ups=1.07, wpb=435033, bsz=16343.9, num_updates=57900, lr=0.00026284, gnorm=0.208, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=56545 +epoch 035: 662 / 1689 loss=4.049, nll_loss=2.418, ppl=5.34, wps=463468, ups=1.07, wpb=433846, bsz=16407, num_updates=58000, lr=0.000262613, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=56639 +epoch 035: 662 / 1689 loss=4.049, nll_loss=2.418, ppl=5.34, wps=463468, ups=1.07, wpb=433846, bsz=16407, num_updates=58000, lr=0.000262613, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=56639 +epoch 035: 662 / 1689 loss=4.049, nll_loss=2.418, ppl=5.34, wps=463468, ups=1.07, wpb=433846, bsz=16407, num_updates=58000, lr=0.000262613, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=56639 +epoch 035: 662 / 1689 loss=4.049, nll_loss=2.418, ppl=5.34, wps=463468, ups=1.07, wpb=433846, bsz=16407, num_updates=58000, lr=0.000262613, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=56639 +epoch 035: 662 / 1689 loss=4.049, nll_loss=2.418, ppl=5.34, wps=463468, ups=1.07, wpb=433846, bsz=16407, num_updates=58000, lr=0.000262613, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=56639 +epoch 035: 662 / 1689 loss=4.049, nll_loss=2.418, ppl=5.34, wps=463468, ups=1.07, wpb=433846, bsz=16407, num_updates=58000, lr=0.000262613, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=56639 +epoch 035: 662 / 1689 loss=4.049, nll_loss=2.418, ppl=5.34, wps=463468, ups=1.07, wpb=433846, bsz=16407, num_updates=58000, lr=0.000262613, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=56639 +epoch 035: 662 / 1689 loss=4.049, nll_loss=2.418, ppl=5.34, wps=463468, ups=1.07, wpb=433846, bsz=16407, num_updates=58000, lr=0.000262613, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=56639 +epoch 035: 662 / 1689 loss=4.049, nll_loss=2.418, ppl=5.34, wps=463468, ups=1.07, wpb=433846, bsz=16407, num_updates=58000, lr=0.000262613, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=56639 +epoch 035: 662 / 1689 loss=4.049, nll_loss=2.418, ppl=5.34, wps=463468, ups=1.07, wpb=433846, bsz=16407, num_updates=58000, lr=0.000262613, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=56639 +epoch 035: 662 / 1689 loss=4.049, nll_loss=2.418, ppl=5.34, wps=463468, ups=1.07, wpb=433846, bsz=16407, num_updates=58000, lr=0.000262613, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=56639 +epoch 035: 662 / 1689 loss=4.049, nll_loss=2.418, ppl=5.34, wps=463468, ups=1.07, wpb=433846, bsz=16407, num_updates=58000, lr=0.000262613, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=56639 +epoch 035: 662 / 1689 loss=4.049, nll_loss=2.418, ppl=5.34, wps=463468, ups=1.07, wpb=433846, bsz=16407, num_updates=58000, lr=0.000262613, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=56639 +epoch 035: 662 / 1689 loss=4.049, nll_loss=2.418, ppl=5.34, wps=463468, ups=1.07, wpb=433846, bsz=16407, num_updates=58000, lr=0.000262613, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=56639 +epoch 035: 662 / 1689 loss=4.049, nll_loss=2.418, ppl=5.34, wps=463468, ups=1.07, wpb=433846, bsz=16407, num_updates=58000, lr=0.000262613, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=56639 +epoch 035: 662 / 1689 loss=4.049, nll_loss=2.418, ppl=5.34, wps=463468, ups=1.07, wpb=433846, bsz=16407, num_updates=58000, lr=0.000262613, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=56639 +epoch 035: 662 / 1689 loss=4.049, nll_loss=2.418, ppl=5.34, wps=463468, ups=1.07, wpb=433846, bsz=16407, num_updates=58000, lr=0.000262613, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=56639 +epoch 035: 662 / 1689 loss=4.049, nll_loss=2.418, ppl=5.34, wps=463468, ups=1.07, wpb=433846, bsz=16407, num_updates=58000, lr=0.000262613, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=56639 +epoch 035: 662 / 1689 loss=4.049, nll_loss=2.418, ppl=5.34, wps=463468, ups=1.07, wpb=433846, bsz=16407, num_updates=58000, lr=0.000262613, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=56639 +epoch 035: 662 / 1689 loss=4.049, nll_loss=2.418, ppl=5.34, wps=463468, ups=1.07, wpb=433846, bsz=16407, num_updates=58000, lr=0.000262613, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=56639 +epoch 035: 662 / 1689 loss=4.049, nll_loss=2.418, ppl=5.34, wps=463468, ups=1.07, wpb=433846, bsz=16407, num_updates=58000, lr=0.000262613, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=56639 +epoch 035: 662 / 1689 loss=4.049, nll_loss=2.418, ppl=5.34, wps=463468, ups=1.07, wpb=433846, bsz=16407, num_updates=58000, lr=0.000262613, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=56639 +epoch 035: 662 / 1689 loss=4.049, nll_loss=2.418, ppl=5.34, wps=463468, ups=1.07, wpb=433846, bsz=16407, num_updates=58000, lr=0.000262613, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=56639 +epoch 035: 662 / 1689 loss=4.049, nll_loss=2.418, ppl=5.34, wps=463468, ups=1.07, wpb=433846, bsz=16407, num_updates=58000, lr=0.000262613, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=56639 +epoch 035: 662 / 1689 loss=4.049, nll_loss=2.418, ppl=5.34, wps=463468, ups=1.07, wpb=433846, bsz=16407, num_updates=58000, lr=0.000262613, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=56639 +epoch 035: 662 / 1689 loss=4.049, nll_loss=2.418, ppl=5.34, wps=463468, ups=1.07, wpb=433846, bsz=16407, num_updates=58000, lr=0.000262613, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=56639 +epoch 035: 662 / 1689 loss=4.049, nll_loss=2.418, ppl=5.34, wps=463468, ups=1.07, wpb=433846, bsz=16407, num_updates=58000, lr=0.000262613, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=56639 +epoch 035: 662 / 1689 loss=4.049, nll_loss=2.418, ppl=5.34, wps=463468, ups=1.07, wpb=433846, bsz=16407, num_updates=58000, lr=0.000262613, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=56639 +epoch 035: 662 / 1689 loss=4.049, nll_loss=2.418, ppl=5.34, wps=463468, ups=1.07, wpb=433846, bsz=16407, num_updates=58000, lr=0.000262613, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=56639 +epoch 035: 662 / 1689 loss=4.049, nll_loss=2.418, ppl=5.34, wps=463468, ups=1.07, wpb=433846, bsz=16407, num_updates=58000, lr=0.000262613, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=56639 +epoch 035: 662 / 1689 loss=4.049, nll_loss=2.418, ppl=5.34, wps=463468, ups=1.07, wpb=433846, bsz=16407, num_updates=58000, lr=0.000262613, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=56639 +epoch 035: 662 / 1689 loss=4.049, nll_loss=2.418, ppl=5.34, wps=463468, ups=1.07, wpb=433846, bsz=16407, num_updates=58000, lr=0.000262613, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=56639 +epoch 035: 662 / 1689 loss=4.049, nll_loss=2.418, ppl=5.34, wps=463468, ups=1.07, wpb=433846, bsz=16407, num_updates=58000, lr=0.000262613, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=56639 +epoch 035: 662 / 1689 loss=4.049, nll_loss=2.418, ppl=5.34, wps=463468, ups=1.07, wpb=433846, bsz=16407, num_updates=58000, lr=0.000262613, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=56639 +epoch 035: 662 / 1689 loss=4.049, nll_loss=2.418, ppl=5.34, wps=463468, ups=1.07, wpb=433846, bsz=16407, num_updates=58000, lr=0.000262613, gnorm=0.214, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=56639 +begin validation on "valid" subset +epoch 035 | valid on 'valid' subset | loss 4.231 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 58000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.231 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 58000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.231 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 58000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.231 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 58000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.231 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 58000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.231 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 58000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.231 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 58000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.231 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 58000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.231 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 58000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.231 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 58000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.231 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 58000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.231 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 58000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.231 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 58000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.231 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 58000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.231 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 58000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.231 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 58000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.231 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 58000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.231 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 58000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.231 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 58000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.231 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 58000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.231 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 58000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.231 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 58000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.231 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 58000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.231 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 58000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.231 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 58000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.231 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 58000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.231 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 58000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.231 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 58000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.231 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 58000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.231 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 58000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.231 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 58000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.231 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 58000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.231 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 58000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.231 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 58000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.231 | nll_loss 2.602 | ppl 6.07 | wps 0 | wpb 42662 | bsz 2032 | num_updates 58000 | best_loss 4.213 +epoch 035: 762 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=409443, ups=0.94, wpb=436065, bsz=16888.2, num_updates=58100, lr=0.000262387, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56746 +epoch 035: 762 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=409443, ups=0.94, wpb=436065, bsz=16888.2, num_updates=58100, lr=0.000262387, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56746 +epoch 035: 762 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=409443, ups=0.94, wpb=436065, bsz=16888.2, num_updates=58100, lr=0.000262387, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56746 +epoch 035: 762 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=409443, ups=0.94, wpb=436065, bsz=16888.2, num_updates=58100, lr=0.000262387, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56746 +epoch 035: 762 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=409443, ups=0.94, wpb=436065, bsz=16888.2, num_updates=58100, lr=0.000262387, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56746 +epoch 035: 762 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=409443, ups=0.94, wpb=436065, bsz=16888.2, num_updates=58100, lr=0.000262387, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56746 +epoch 035: 762 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=409443, ups=0.94, wpb=436065, bsz=16888.2, num_updates=58100, lr=0.000262387, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56746 +epoch 035: 762 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=409443, ups=0.94, wpb=436065, bsz=16888.2, num_updates=58100, lr=0.000262387, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56746 +epoch 035: 762 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=409443, ups=0.94, wpb=436065, bsz=16888.2, num_updates=58100, lr=0.000262387, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56746 +epoch 035: 762 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=409443, ups=0.94, wpb=436065, bsz=16888.2, num_updates=58100, lr=0.000262387, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56746 +epoch 035: 762 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=409443, ups=0.94, wpb=436065, bsz=16888.2, num_updates=58100, lr=0.000262387, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56746 +epoch 035: 762 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=409443, ups=0.94, wpb=436065, bsz=16888.2, num_updates=58100, lr=0.000262387, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56746 +epoch 035: 762 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=409443, ups=0.94, wpb=436065, bsz=16888.2, num_updates=58100, lr=0.000262387, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56746 +epoch 035: 762 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=409443, ups=0.94, wpb=436065, bsz=16888.2, num_updates=58100, lr=0.000262387, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56746 +epoch 035: 762 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=409443, ups=0.94, wpb=436065, bsz=16888.2, num_updates=58100, lr=0.000262387, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56746 +epoch 035: 762 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=409443, ups=0.94, wpb=436065, bsz=16888.2, num_updates=58100, lr=0.000262387, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56746 +epoch 035: 762 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=409443, ups=0.94, wpb=436065, bsz=16888.2, num_updates=58100, lr=0.000262387, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56746 +epoch 035: 762 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=409443, ups=0.94, wpb=436065, bsz=16888.2, num_updates=58100, lr=0.000262387, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56746 +epoch 035: 762 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=409443, ups=0.94, wpb=436065, bsz=16888.2, num_updates=58100, lr=0.000262387, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56746 +epoch 035: 762 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=409443, ups=0.94, wpb=436065, bsz=16888.2, num_updates=58100, lr=0.000262387, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56746 +epoch 035: 762 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=409443, ups=0.94, wpb=436065, bsz=16888.2, num_updates=58100, lr=0.000262387, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56746 +epoch 035: 762 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=409443, ups=0.94, wpb=436065, bsz=16888.2, num_updates=58100, lr=0.000262387, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56746 +epoch 035: 762 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=409443, ups=0.94, wpb=436065, bsz=16888.2, num_updates=58100, lr=0.000262387, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56746 +epoch 035: 762 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=409443, ups=0.94, wpb=436065, bsz=16888.2, num_updates=58100, lr=0.000262387, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56746 +epoch 035: 762 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=409443, ups=0.94, wpb=436065, bsz=16888.2, num_updates=58100, lr=0.000262387, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56746 +epoch 035: 762 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=409443, ups=0.94, wpb=436065, bsz=16888.2, num_updates=58100, lr=0.000262387, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56746 +epoch 035: 762 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=409443, ups=0.94, wpb=436065, bsz=16888.2, num_updates=58100, lr=0.000262387, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56746 +epoch 035: 762 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=409443, ups=0.94, wpb=436065, bsz=16888.2, num_updates=58100, lr=0.000262387, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56746 +epoch 035: 762 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=409443, ups=0.94, wpb=436065, bsz=16888.2, num_updates=58100, lr=0.000262387, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56746 +epoch 035: 762 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=409443, ups=0.94, wpb=436065, bsz=16888.2, num_updates=58100, lr=0.000262387, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56746 +epoch 035: 762 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=409443, ups=0.94, wpb=436065, bsz=16888.2, num_updates=58100, lr=0.000262387, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56746 +epoch 035: 762 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=409443, ups=0.94, wpb=436065, bsz=16888.2, num_updates=58100, lr=0.000262387, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56746 +epoch 035: 762 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=409443, ups=0.94, wpb=436065, bsz=16888.2, num_updates=58100, lr=0.000262387, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56746 +epoch 035: 762 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=409443, ups=0.94, wpb=436065, bsz=16888.2, num_updates=58100, lr=0.000262387, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56746 +epoch 035: 762 / 1689 loss=4.054, nll_loss=2.424, ppl=5.37, wps=409443, ups=0.94, wpb=436065, bsz=16888.2, num_updates=58100, lr=0.000262387, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=18.8, wall=56746 +epoch 035: 862 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=464287, ups=1.08, wpb=431863, bsz=16660.3, num_updates=58200, lr=0.000262161, gnorm=0.199, clip=0, loss_scale=1, train_wall=91, gb_free=18.5, wall=56839 +epoch 035: 862 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=464287, ups=1.08, wpb=431863, bsz=16660.3, num_updates=58200, lr=0.000262161, gnorm=0.199, clip=0, loss_scale=1, train_wall=91, gb_free=18.5, wall=56839 +epoch 035: 862 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=464287, ups=1.08, wpb=431863, bsz=16660.3, num_updates=58200, lr=0.000262161, gnorm=0.199, clip=0, loss_scale=1, train_wall=91, gb_free=18.5, wall=56839 +epoch 035: 862 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=464287, ups=1.08, wpb=431863, bsz=16660.3, num_updates=58200, lr=0.000262161, gnorm=0.199, clip=0, loss_scale=1, train_wall=91, gb_free=18.5, wall=56839 +epoch 035: 862 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=464287, ups=1.08, wpb=431863, bsz=16660.3, num_updates=58200, lr=0.000262161, gnorm=0.199, clip=0, loss_scale=1, train_wall=91, gb_free=18.5, wall=56839 +epoch 035: 862 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=464287, ups=1.08, wpb=431863, bsz=16660.3, num_updates=58200, lr=0.000262161, gnorm=0.199, clip=0, loss_scale=1, train_wall=91, gb_free=18.5, wall=56839 +epoch 035: 862 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=464287, ups=1.08, wpb=431863, bsz=16660.3, num_updates=58200, lr=0.000262161, gnorm=0.199, clip=0, loss_scale=1, train_wall=91, gb_free=18.5, wall=56839 +epoch 035: 862 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=464287, ups=1.08, wpb=431863, bsz=16660.3, num_updates=58200, lr=0.000262161, gnorm=0.199, clip=0, loss_scale=1, train_wall=91, gb_free=18.5, wall=56839 +epoch 035: 862 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=464287, ups=1.08, wpb=431863, bsz=16660.3, num_updates=58200, lr=0.000262161, gnorm=0.199, clip=0, loss_scale=1, train_wall=91, gb_free=18.5, wall=56839 +epoch 035: 862 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=464287, ups=1.08, wpb=431863, bsz=16660.3, num_updates=58200, lr=0.000262161, gnorm=0.199, clip=0, loss_scale=1, train_wall=91, gb_free=18.5, wall=56839 +epoch 035: 862 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=464287, ups=1.08, wpb=431863, bsz=16660.3, num_updates=58200, lr=0.000262161, gnorm=0.199, clip=0, loss_scale=1, train_wall=91, gb_free=18.5, wall=56839 +epoch 035: 862 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=464287, ups=1.08, wpb=431863, bsz=16660.3, num_updates=58200, lr=0.000262161, gnorm=0.199, clip=0, loss_scale=1, train_wall=91, gb_free=18.5, wall=56839 +epoch 035: 862 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=464287, ups=1.08, wpb=431863, bsz=16660.3, num_updates=58200, lr=0.000262161, gnorm=0.199, clip=0, loss_scale=1, train_wall=91, gb_free=18.5, wall=56839 +epoch 035: 862 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=464287, ups=1.08, wpb=431863, bsz=16660.3, num_updates=58200, lr=0.000262161, gnorm=0.199, clip=0, loss_scale=1, train_wall=91, gb_free=18.5, wall=56839 +epoch 035: 862 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=464287, ups=1.08, wpb=431863, bsz=16660.3, num_updates=58200, lr=0.000262161, gnorm=0.199, clip=0, loss_scale=1, train_wall=91, gb_free=18.5, wall=56839 +epoch 035: 862 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=464287, ups=1.08, wpb=431863, bsz=16660.3, num_updates=58200, lr=0.000262161, gnorm=0.199, clip=0, loss_scale=1, train_wall=91, gb_free=18.5, wall=56839 +epoch 035: 862 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=464287, ups=1.08, wpb=431863, bsz=16660.3, num_updates=58200, lr=0.000262161, gnorm=0.199, clip=0, loss_scale=1, train_wall=91, gb_free=18.5, wall=56839 +epoch 035: 862 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=464287, ups=1.08, wpb=431863, bsz=16660.3, num_updates=58200, lr=0.000262161, gnorm=0.199, clip=0, loss_scale=1, train_wall=91, gb_free=18.5, wall=56839 +epoch 035: 862 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=464287, ups=1.08, wpb=431863, bsz=16660.3, num_updates=58200, lr=0.000262161, gnorm=0.199, clip=0, loss_scale=1, train_wall=91, gb_free=18.5, wall=56839 +epoch 035: 862 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=464287, ups=1.08, wpb=431863, bsz=16660.3, num_updates=58200, lr=0.000262161, gnorm=0.199, clip=0, loss_scale=1, train_wall=91, gb_free=18.5, wall=56839 +epoch 035: 862 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=464287, ups=1.08, wpb=431863, bsz=16660.3, num_updates=58200, lr=0.000262161, gnorm=0.199, clip=0, loss_scale=1, train_wall=91, gb_free=18.5, wall=56839 +epoch 035: 862 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=464287, ups=1.08, wpb=431863, bsz=16660.3, num_updates=58200, lr=0.000262161, gnorm=0.199, clip=0, loss_scale=1, train_wall=91, gb_free=18.5, wall=56839 +epoch 035: 862 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=464287, ups=1.08, wpb=431863, bsz=16660.3, num_updates=58200, lr=0.000262161, gnorm=0.199, clip=0, loss_scale=1, train_wall=91, gb_free=18.5, wall=56839 +epoch 035: 862 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=464287, ups=1.08, wpb=431863, bsz=16660.3, num_updates=58200, lr=0.000262161, gnorm=0.199, clip=0, loss_scale=1, train_wall=91, gb_free=18.5, wall=56839 +epoch 035: 862 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=464287, ups=1.08, wpb=431863, bsz=16660.3, num_updates=58200, lr=0.000262161, gnorm=0.199, clip=0, loss_scale=1, train_wall=91, gb_free=18.5, wall=56839 +epoch 035: 862 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=464287, ups=1.08, wpb=431863, bsz=16660.3, num_updates=58200, lr=0.000262161, gnorm=0.199, clip=0, loss_scale=1, train_wall=91, gb_free=18.5, wall=56839 +epoch 035: 862 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=464287, ups=1.08, wpb=431863, bsz=16660.3, num_updates=58200, lr=0.000262161, gnorm=0.199, clip=0, loss_scale=1, train_wall=91, gb_free=18.5, wall=56839 +epoch 035: 862 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=464287, ups=1.08, wpb=431863, bsz=16660.3, num_updates=58200, lr=0.000262161, gnorm=0.199, clip=0, loss_scale=1, train_wall=91, gb_free=18.5, wall=56839 +epoch 035: 862 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=464287, ups=1.08, wpb=431863, bsz=16660.3, num_updates=58200, lr=0.000262161, gnorm=0.199, clip=0, loss_scale=1, train_wall=91, gb_free=18.5, wall=56839 +epoch 035: 862 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=464287, ups=1.08, wpb=431863, bsz=16660.3, num_updates=58200, lr=0.000262161, gnorm=0.199, clip=0, loss_scale=1, train_wall=91, gb_free=18.5, wall=56839 +epoch 035: 862 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=464287, ups=1.08, wpb=431863, bsz=16660.3, num_updates=58200, lr=0.000262161, gnorm=0.199, clip=0, loss_scale=1, train_wall=91, gb_free=18.5, wall=56839 +epoch 035: 862 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=464287, ups=1.08, wpb=431863, bsz=16660.3, num_updates=58200, lr=0.000262161, gnorm=0.199, clip=0, loss_scale=1, train_wall=91, gb_free=18.5, wall=56839 +epoch 035: 862 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=464287, ups=1.08, wpb=431863, bsz=16660.3, num_updates=58200, lr=0.000262161, gnorm=0.199, clip=0, loss_scale=1, train_wall=91, gb_free=18.5, wall=56839 +epoch 035: 862 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=464287, ups=1.08, wpb=431863, bsz=16660.3, num_updates=58200, lr=0.000262161, gnorm=0.199, clip=0, loss_scale=1, train_wall=91, gb_free=18.5, wall=56839 +epoch 035: 862 / 1689 loss=4.042, nll_loss=2.41, ppl=5.31, wps=464287, ups=1.08, wpb=431863, bsz=16660.3, num_updates=58200, lr=0.000262161, gnorm=0.199, clip=0, loss_scale=1, train_wall=91, gb_free=18.5, wall=56839 +epoch 035: 964 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=451463, ups=1.04, wpb=433138, bsz=16619.9, num_updates=58300, lr=0.000261936, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.3, wall=56935 +epoch 035: 964 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=451463, ups=1.04, wpb=433138, bsz=16619.9, num_updates=58300, lr=0.000261936, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.3, wall=56935 +epoch 035: 964 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=451463, ups=1.04, wpb=433138, bsz=16619.9, num_updates=58300, lr=0.000261936, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.3, wall=56935 +epoch 035: 964 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=451463, ups=1.04, wpb=433138, bsz=16619.9, num_updates=58300, lr=0.000261936, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.3, wall=56935 +epoch 035: 964 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=451463, ups=1.04, wpb=433138, bsz=16619.9, num_updates=58300, lr=0.000261936, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.3, wall=56935 +epoch 035: 964 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=451463, ups=1.04, wpb=433138, bsz=16619.9, num_updates=58300, lr=0.000261936, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.3, wall=56935 +epoch 035: 964 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=451463, ups=1.04, wpb=433138, bsz=16619.9, num_updates=58300, lr=0.000261936, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.3, wall=56935 +epoch 035: 964 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=451463, ups=1.04, wpb=433138, bsz=16619.9, num_updates=58300, lr=0.000261936, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.3, wall=56935 +epoch 035: 964 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=451463, ups=1.04, wpb=433138, bsz=16619.9, num_updates=58300, lr=0.000261936, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.3, wall=56935 +epoch 035: 964 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=451463, ups=1.04, wpb=433138, bsz=16619.9, num_updates=58300, lr=0.000261936, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.3, wall=56935 +epoch 035: 964 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=451463, ups=1.04, wpb=433138, bsz=16619.9, num_updates=58300, lr=0.000261936, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.3, wall=56935 +epoch 035: 964 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=451463, ups=1.04, wpb=433138, bsz=16619.9, num_updates=58300, lr=0.000261936, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.3, wall=56935 +epoch 035: 964 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=451463, ups=1.04, wpb=433138, bsz=16619.9, num_updates=58300, lr=0.000261936, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.3, wall=56935 +epoch 035: 964 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=451463, ups=1.04, wpb=433138, bsz=16619.9, num_updates=58300, lr=0.000261936, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.3, wall=56935 +epoch 035: 964 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=451463, ups=1.04, wpb=433138, bsz=16619.9, num_updates=58300, lr=0.000261936, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.3, wall=56935 +epoch 035: 964 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=451463, ups=1.04, wpb=433138, bsz=16619.9, num_updates=58300, lr=0.000261936, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.3, wall=56935 +epoch 035: 964 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=451463, ups=1.04, wpb=433138, bsz=16619.9, num_updates=58300, lr=0.000261936, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.3, wall=56935 +epoch 035: 964 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=451463, ups=1.04, wpb=433138, bsz=16619.9, num_updates=58300, lr=0.000261936, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.3, wall=56935 +epoch 035: 964 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=451463, ups=1.04, wpb=433138, bsz=16619.9, num_updates=58300, lr=0.000261936, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.3, wall=56935 +epoch 035: 964 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=451463, ups=1.04, wpb=433138, bsz=16619.9, num_updates=58300, lr=0.000261936, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.3, wall=56935 +epoch 035: 964 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=451463, ups=1.04, wpb=433138, bsz=16619.9, num_updates=58300, lr=0.000261936, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.3, wall=56935 +epoch 035: 964 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=451463, ups=1.04, wpb=433138, bsz=16619.9, num_updates=58300, lr=0.000261936, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.3, wall=56935 +epoch 035: 964 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=451463, ups=1.04, wpb=433138, bsz=16619.9, num_updates=58300, lr=0.000261936, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.3, wall=56935 +epoch 035: 964 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=451463, ups=1.04, wpb=433138, bsz=16619.9, num_updates=58300, lr=0.000261936, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.3, wall=56935 +epoch 035: 964 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=451463, ups=1.04, wpb=433138, bsz=16619.9, num_updates=58300, lr=0.000261936, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.3, wall=56935 +epoch 035: 964 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=451463, ups=1.04, wpb=433138, bsz=16619.9, num_updates=58300, lr=0.000261936, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.3, wall=56935 +epoch 035: 964 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=451463, ups=1.04, wpb=433138, bsz=16619.9, num_updates=58300, lr=0.000261936, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.3, wall=56935 +epoch 035: 964 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=451463, ups=1.04, wpb=433138, bsz=16619.9, num_updates=58300, lr=0.000261936, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.3, wall=56935 +epoch 035: 964 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=451463, ups=1.04, wpb=433138, bsz=16619.9, num_updates=58300, lr=0.000261936, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.3, wall=56935 +epoch 035: 964 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=451463, ups=1.04, wpb=433138, bsz=16619.9, num_updates=58300, lr=0.000261936, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.3, wall=56935 +epoch 035: 964 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=451463, ups=1.04, wpb=433138, bsz=16619.9, num_updates=58300, lr=0.000261936, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.3, wall=56935 +epoch 035: 964 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=451463, ups=1.04, wpb=433138, bsz=16619.9, num_updates=58300, lr=0.000261936, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.3, wall=56935 +epoch 035: 964 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=451463, ups=1.04, wpb=433138, bsz=16619.9, num_updates=58300, lr=0.000261936, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.3, wall=56935 +epoch 035: 964 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=451463, ups=1.04, wpb=433138, bsz=16619.9, num_updates=58300, lr=0.000261936, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.3, wall=56935 +epoch 035: 964 / 1689 loss=4.048, nll_loss=2.417, ppl=5.34, wps=451463, ups=1.04, wpb=433138, bsz=16619.9, num_updates=58300, lr=0.000261936, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=94, gb_free=19.3, wall=56935 +epoch 035: 1064 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461572, ups=1.06, wpb=435021, bsz=16279.2, num_updates=58400, lr=0.000261712, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=57029 +epoch 035: 1064 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461572, ups=1.06, wpb=435021, bsz=16279.2, num_updates=58400, lr=0.000261712, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=57029 +epoch 035: 1064 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461572, ups=1.06, wpb=435021, bsz=16279.2, num_updates=58400, lr=0.000261712, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=57029 +epoch 035: 1064 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461572, ups=1.06, wpb=435021, bsz=16279.2, num_updates=58400, lr=0.000261712, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=57029 +epoch 035: 1064 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461572, ups=1.06, wpb=435021, bsz=16279.2, num_updates=58400, lr=0.000261712, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=57029 +epoch 035: 1064 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461572, ups=1.06, wpb=435021, bsz=16279.2, num_updates=58400, lr=0.000261712, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=57029 +epoch 035: 1064 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461572, ups=1.06, wpb=435021, bsz=16279.2, num_updates=58400, lr=0.000261712, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=57029 +epoch 035: 1064 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461572, ups=1.06, wpb=435021, bsz=16279.2, num_updates=58400, lr=0.000261712, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=57029 +epoch 035: 1064 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461572, ups=1.06, wpb=435021, bsz=16279.2, num_updates=58400, lr=0.000261712, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=57029 +epoch 035: 1064 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461572, ups=1.06, wpb=435021, bsz=16279.2, num_updates=58400, lr=0.000261712, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=57029 +epoch 035: 1064 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461572, ups=1.06, wpb=435021, bsz=16279.2, num_updates=58400, lr=0.000261712, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=57029 +epoch 035: 1064 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461572, ups=1.06, wpb=435021, bsz=16279.2, num_updates=58400, lr=0.000261712, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=57029 +epoch 035: 1064 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461572, ups=1.06, wpb=435021, bsz=16279.2, num_updates=58400, lr=0.000261712, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=57029 +epoch 035: 1064 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461572, ups=1.06, wpb=435021, bsz=16279.2, num_updates=58400, lr=0.000261712, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=57029 +epoch 035: 1064 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461572, ups=1.06, wpb=435021, bsz=16279.2, num_updates=58400, lr=0.000261712, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=57029 +epoch 035: 1064 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461572, ups=1.06, wpb=435021, bsz=16279.2, num_updates=58400, lr=0.000261712, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=57029 +epoch 035: 1064 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461572, ups=1.06, wpb=435021, bsz=16279.2, num_updates=58400, lr=0.000261712, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=57029 +epoch 035: 1064 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461572, ups=1.06, wpb=435021, bsz=16279.2, num_updates=58400, lr=0.000261712, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=57029 +epoch 035: 1064 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461572, ups=1.06, wpb=435021, bsz=16279.2, num_updates=58400, lr=0.000261712, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=57029 +epoch 035: 1064 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461572, ups=1.06, wpb=435021, bsz=16279.2, num_updates=58400, lr=0.000261712, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=57029 +epoch 035: 1064 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461572, ups=1.06, wpb=435021, bsz=16279.2, num_updates=58400, lr=0.000261712, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=57029 +epoch 035: 1064 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461572, ups=1.06, wpb=435021, bsz=16279.2, num_updates=58400, lr=0.000261712, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=57029 +epoch 035: 1064 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461572, ups=1.06, wpb=435021, bsz=16279.2, num_updates=58400, lr=0.000261712, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=57029 +epoch 035: 1064 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461572, ups=1.06, wpb=435021, bsz=16279.2, num_updates=58400, lr=0.000261712, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=57029 +epoch 035: 1064 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461572, ups=1.06, wpb=435021, bsz=16279.2, num_updates=58400, lr=0.000261712, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=57029 +epoch 035: 1064 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461572, ups=1.06, wpb=435021, bsz=16279.2, num_updates=58400, lr=0.000261712, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=57029 +epoch 035: 1064 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461572, ups=1.06, wpb=435021, bsz=16279.2, num_updates=58400, lr=0.000261712, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=57029 +epoch 035: 1064 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461572, ups=1.06, wpb=435021, bsz=16279.2, num_updates=58400, lr=0.000261712, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=57029 +epoch 035: 1064 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461572, ups=1.06, wpb=435021, bsz=16279.2, num_updates=58400, lr=0.000261712, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=57029 +epoch 035: 1064 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461572, ups=1.06, wpb=435021, bsz=16279.2, num_updates=58400, lr=0.000261712, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=57029 +epoch 035: 1064 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461572, ups=1.06, wpb=435021, bsz=16279.2, num_updates=58400, lr=0.000261712, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=57029 +epoch 035: 1064 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461572, ups=1.06, wpb=435021, bsz=16279.2, num_updates=58400, lr=0.000261712, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=57029 +epoch 035: 1064 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461572, ups=1.06, wpb=435021, bsz=16279.2, num_updates=58400, lr=0.000261712, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=57029 +epoch 035: 1064 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461572, ups=1.06, wpb=435021, bsz=16279.2, num_updates=58400, lr=0.000261712, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=57029 +epoch 035: 1064 / 1689 loss=4.06, nll_loss=2.43, ppl=5.39, wps=461572, ups=1.06, wpb=435021, bsz=16279.2, num_updates=58400, lr=0.000261712, gnorm=0.207, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.1, wall=57029 +epoch 035: 1164 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=458880, ups=1.06, wpb=432498, bsz=16291.2, num_updates=58500, lr=0.000261488, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=57123 +epoch 035: 1164 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=458880, ups=1.06, wpb=432498, bsz=16291.2, num_updates=58500, lr=0.000261488, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=57123 +epoch 035: 1164 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=458880, ups=1.06, wpb=432498, bsz=16291.2, num_updates=58500, lr=0.000261488, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=57123 +epoch 035: 1164 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=458880, ups=1.06, wpb=432498, bsz=16291.2, num_updates=58500, lr=0.000261488, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=57123 +epoch 035: 1164 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=458880, ups=1.06, wpb=432498, bsz=16291.2, num_updates=58500, lr=0.000261488, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=57123 +epoch 035: 1164 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=458880, ups=1.06, wpb=432498, bsz=16291.2, num_updates=58500, lr=0.000261488, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=57123 +epoch 035: 1164 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=458880, ups=1.06, wpb=432498, bsz=16291.2, num_updates=58500, lr=0.000261488, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=57123 +epoch 035: 1164 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=458880, ups=1.06, wpb=432498, bsz=16291.2, num_updates=58500, lr=0.000261488, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=57123 +epoch 035: 1164 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=458880, ups=1.06, wpb=432498, bsz=16291.2, num_updates=58500, lr=0.000261488, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=57123 +epoch 035: 1164 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=458880, ups=1.06, wpb=432498, bsz=16291.2, num_updates=58500, lr=0.000261488, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=57123 +epoch 035: 1164 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=458880, ups=1.06, wpb=432498, bsz=16291.2, num_updates=58500, lr=0.000261488, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=57123 +epoch 035: 1164 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=458880, ups=1.06, wpb=432498, bsz=16291.2, num_updates=58500, lr=0.000261488, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=57123 +epoch 035: 1164 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=458880, ups=1.06, wpb=432498, bsz=16291.2, num_updates=58500, lr=0.000261488, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=57123 +epoch 035: 1164 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=458880, ups=1.06, wpb=432498, bsz=16291.2, num_updates=58500, lr=0.000261488, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=57123 +epoch 035: 1164 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=458880, ups=1.06, wpb=432498, bsz=16291.2, num_updates=58500, lr=0.000261488, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=57123 +epoch 035: 1164 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=458880, ups=1.06, wpb=432498, bsz=16291.2, num_updates=58500, lr=0.000261488, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=57123 +epoch 035: 1164 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=458880, ups=1.06, wpb=432498, bsz=16291.2, num_updates=58500, lr=0.000261488, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=57123 +epoch 035: 1164 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=458880, ups=1.06, wpb=432498, bsz=16291.2, num_updates=58500, lr=0.000261488, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=57123 +epoch 035: 1164 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=458880, ups=1.06, wpb=432498, bsz=16291.2, num_updates=58500, lr=0.000261488, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=57123 +epoch 035: 1164 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=458880, ups=1.06, wpb=432498, bsz=16291.2, num_updates=58500, lr=0.000261488, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=57123 +epoch 035: 1164 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=458880, ups=1.06, wpb=432498, bsz=16291.2, num_updates=58500, lr=0.000261488, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=57123 +epoch 035: 1164 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=458880, ups=1.06, wpb=432498, bsz=16291.2, num_updates=58500, lr=0.000261488, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=57123 +epoch 035: 1164 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=458880, ups=1.06, wpb=432498, bsz=16291.2, num_updates=58500, lr=0.000261488, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=57123 +epoch 035: 1164 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=458880, ups=1.06, wpb=432498, bsz=16291.2, num_updates=58500, lr=0.000261488, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=57123 +epoch 035: 1164 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=458880, ups=1.06, wpb=432498, bsz=16291.2, num_updates=58500, lr=0.000261488, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=57123 +epoch 035: 1164 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=458880, ups=1.06, wpb=432498, bsz=16291.2, num_updates=58500, lr=0.000261488, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=57123 +epoch 035: 1164 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=458880, ups=1.06, wpb=432498, bsz=16291.2, num_updates=58500, lr=0.000261488, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=57123 +epoch 035: 1164 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=458880, ups=1.06, wpb=432498, bsz=16291.2, num_updates=58500, lr=0.000261488, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=57123 +epoch 035: 1164 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=458880, ups=1.06, wpb=432498, bsz=16291.2, num_updates=58500, lr=0.000261488, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=57123 +epoch 035: 1164 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=458880, ups=1.06, wpb=432498, bsz=16291.2, num_updates=58500, lr=0.000261488, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=57123 +epoch 035: 1164 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=458880, ups=1.06, wpb=432498, bsz=16291.2, num_updates=58500, lr=0.000261488, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=57123 +epoch 035: 1164 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=458880, ups=1.06, wpb=432498, bsz=16291.2, num_updates=58500, lr=0.000261488, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=57123 +epoch 035: 1164 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=458880, ups=1.06, wpb=432498, bsz=16291.2, num_updates=58500, lr=0.000261488, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=57123 +epoch 035: 1164 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=458880, ups=1.06, wpb=432498, bsz=16291.2, num_updates=58500, lr=0.000261488, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=57123 +epoch 035: 1164 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=458880, ups=1.06, wpb=432498, bsz=16291.2, num_updates=58500, lr=0.000261488, gnorm=0.211, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.6, wall=57123 +epoch 035: 1264 / 1689 loss=4.049, nll_loss=2.419, ppl=5.35, wps=460589, ups=1.07, wpb=430389, bsz=16251.6, num_updates=58600, lr=0.000261265, gnorm=0.205, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.7, wall=57216 +epoch 035: 1264 / 1689 loss=4.049, nll_loss=2.419, ppl=5.35, wps=460589, ups=1.07, wpb=430389, bsz=16251.6, num_updates=58600, lr=0.000261265, gnorm=0.205, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.7, wall=57216 +epoch 035: 1264 / 1689 loss=4.049, nll_loss=2.419, ppl=5.35, wps=460589, ups=1.07, wpb=430389, bsz=16251.6, num_updates=58600, lr=0.000261265, gnorm=0.205, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.7, wall=57216 +epoch 035: 1264 / 1689 loss=4.049, nll_loss=2.419, ppl=5.35, wps=460589, ups=1.07, wpb=430389, bsz=16251.6, num_updates=58600, lr=0.000261265, gnorm=0.205, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.7, wall=57216 +epoch 035: 1264 / 1689 loss=4.049, nll_loss=2.419, ppl=5.35, wps=460589, ups=1.07, wpb=430389, bsz=16251.6, num_updates=58600, lr=0.000261265, gnorm=0.205, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.7, wall=57216 +epoch 035: 1264 / 1689 loss=4.049, nll_loss=2.419, ppl=5.35, wps=460589, ups=1.07, wpb=430389, bsz=16251.6, num_updates=58600, lr=0.000261265, gnorm=0.205, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.7, wall=57216 +epoch 035: 1264 / 1689 loss=4.049, nll_loss=2.419, ppl=5.35, wps=460589, ups=1.07, wpb=430389, bsz=16251.6, num_updates=58600, lr=0.000261265, gnorm=0.205, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.7, wall=57216 +epoch 035: 1264 / 1689 loss=4.049, nll_loss=2.419, ppl=5.35, wps=460589, ups=1.07, wpb=430389, bsz=16251.6, num_updates=58600, lr=0.000261265, gnorm=0.205, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.7, wall=57216 +epoch 035: 1264 / 1689 loss=4.049, nll_loss=2.419, ppl=5.35, wps=460589, ups=1.07, wpb=430389, bsz=16251.6, num_updates=58600, lr=0.000261265, gnorm=0.205, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.7, wall=57216 +epoch 035: 1264 / 1689 loss=4.049, nll_loss=2.419, ppl=5.35, wps=460589, ups=1.07, wpb=430389, bsz=16251.6, num_updates=58600, lr=0.000261265, gnorm=0.205, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.7, wall=57216 +epoch 035: 1264 / 1689 loss=4.049, nll_loss=2.419, ppl=5.35, wps=460589, ups=1.07, wpb=430389, bsz=16251.6, num_updates=58600, lr=0.000261265, gnorm=0.205, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.7, wall=57216 +epoch 035: 1264 / 1689 loss=4.049, nll_loss=2.419, ppl=5.35, wps=460589, ups=1.07, wpb=430389, bsz=16251.6, num_updates=58600, lr=0.000261265, gnorm=0.205, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.7, wall=57216 +epoch 035: 1264 / 1689 loss=4.049, nll_loss=2.419, ppl=5.35, wps=460589, ups=1.07, wpb=430389, bsz=16251.6, num_updates=58600, lr=0.000261265, gnorm=0.205, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.7, wall=57216 +epoch 035: 1264 / 1689 loss=4.049, nll_loss=2.419, ppl=5.35, wps=460589, ups=1.07, wpb=430389, bsz=16251.6, num_updates=58600, lr=0.000261265, gnorm=0.205, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.7, wall=57216 +epoch 035: 1264 / 1689 loss=4.049, nll_loss=2.419, ppl=5.35, wps=460589, ups=1.07, wpb=430389, bsz=16251.6, num_updates=58600, lr=0.000261265, gnorm=0.205, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.7, wall=57216 +epoch 035: 1264 / 1689 loss=4.049, nll_loss=2.419, ppl=5.35, wps=460589, ups=1.07, wpb=430389, bsz=16251.6, num_updates=58600, lr=0.000261265, gnorm=0.205, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.7, wall=57216 +epoch 035: 1264 / 1689 loss=4.049, nll_loss=2.419, ppl=5.35, wps=460589, ups=1.07, wpb=430389, bsz=16251.6, num_updates=58600, lr=0.000261265, gnorm=0.205, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.7, wall=57216 +epoch 035: 1264 / 1689 loss=4.049, nll_loss=2.419, ppl=5.35, wps=460589, ups=1.07, wpb=430389, bsz=16251.6, num_updates=58600, lr=0.000261265, gnorm=0.205, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.7, wall=57216 +epoch 035: 1264 / 1689 loss=4.049, nll_loss=2.419, ppl=5.35, wps=460589, ups=1.07, wpb=430389, bsz=16251.6, num_updates=58600, lr=0.000261265, gnorm=0.205, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.7, wall=57216 +epoch 035: 1264 / 1689 loss=4.049, nll_loss=2.419, ppl=5.35, wps=460589, ups=1.07, wpb=430389, bsz=16251.6, num_updates=58600, lr=0.000261265, gnorm=0.205, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.7, wall=57216 +epoch 035: 1264 / 1689 loss=4.049, nll_loss=2.419, ppl=5.35, wps=460589, ups=1.07, wpb=430389, bsz=16251.6, num_updates=58600, lr=0.000261265, gnorm=0.205, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.7, wall=57216 +epoch 035: 1264 / 1689 loss=4.049, nll_loss=2.419, ppl=5.35, wps=460589, ups=1.07, wpb=430389, bsz=16251.6, num_updates=58600, lr=0.000261265, gnorm=0.205, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.7, wall=57216 +epoch 035: 1264 / 1689 loss=4.049, nll_loss=2.419, ppl=5.35, wps=460589, ups=1.07, wpb=430389, bsz=16251.6, num_updates=58600, lr=0.000261265, gnorm=0.205, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.7, wall=57216 +epoch 035: 1264 / 1689 loss=4.049, nll_loss=2.419, ppl=5.35, wps=460589, ups=1.07, wpb=430389, bsz=16251.6, num_updates=58600, lr=0.000261265, gnorm=0.205, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.7, wall=57216 +epoch 035: 1264 / 1689 loss=4.049, nll_loss=2.419, ppl=5.35, wps=460589, ups=1.07, wpb=430389, bsz=16251.6, num_updates=58600, lr=0.000261265, gnorm=0.205, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.7, wall=57216 +epoch 035: 1264 / 1689 loss=4.049, nll_loss=2.419, ppl=5.35, wps=460589, ups=1.07, wpb=430389, bsz=16251.6, num_updates=58600, lr=0.000261265, gnorm=0.205, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.7, wall=57216 +epoch 035: 1264 / 1689 loss=4.049, nll_loss=2.419, ppl=5.35, wps=460589, ups=1.07, wpb=430389, bsz=16251.6, num_updates=58600, lr=0.000261265, gnorm=0.205, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.7, wall=57216 +epoch 035: 1264 / 1689 loss=4.049, nll_loss=2.419, ppl=5.35, wps=460589, ups=1.07, wpb=430389, bsz=16251.6, num_updates=58600, lr=0.000261265, gnorm=0.205, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.7, wall=57216 +epoch 035: 1264 / 1689 loss=4.049, nll_loss=2.419, ppl=5.35, wps=460589, ups=1.07, wpb=430389, bsz=16251.6, num_updates=58600, lr=0.000261265, gnorm=0.205, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.7, wall=57216 +epoch 035: 1264 / 1689 loss=4.049, nll_loss=2.419, ppl=5.35, wps=460589, ups=1.07, wpb=430389, bsz=16251.6, num_updates=58600, lr=0.000261265, gnorm=0.205, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.7, wall=57216 +epoch 035: 1264 / 1689 loss=4.049, nll_loss=2.419, ppl=5.35, wps=460589, ups=1.07, wpb=430389, bsz=16251.6, num_updates=58600, lr=0.000261265, gnorm=0.205, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.7, wall=57216 +epoch 035: 1264 / 1689 loss=4.049, nll_loss=2.419, ppl=5.35, wps=460589, ups=1.07, wpb=430389, bsz=16251.6, num_updates=58600, lr=0.000261265, gnorm=0.205, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.7, wall=57216 +epoch 035: 1264 / 1689 loss=4.049, nll_loss=2.419, ppl=5.35, wps=460589, ups=1.07, wpb=430389, bsz=16251.6, num_updates=58600, lr=0.000261265, gnorm=0.205, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.7, wall=57216 +epoch 035: 1264 / 1689 loss=4.049, nll_loss=2.419, ppl=5.35, wps=460589, ups=1.07, wpb=430389, bsz=16251.6, num_updates=58600, lr=0.000261265, gnorm=0.205, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.7, wall=57216 +epoch 035: 1264 / 1689 loss=4.049, nll_loss=2.419, ppl=5.35, wps=460589, ups=1.07, wpb=430389, bsz=16251.6, num_updates=58600, lr=0.000261265, gnorm=0.205, clip=0, loss_scale=0.5, train_wall=92, gb_free=19.7, wall=57216 +epoch 035: 1364 / 1689 loss=4.047, nll_loss=2.417, ppl=5.34, wps=458856, ups=1.06, wpb=433245, bsz=16362.8, num_updates=58700, lr=0.000261042, gnorm=0.214, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=57311 +epoch 035: 1364 / 1689 loss=4.047, nll_loss=2.417, ppl=5.34, wps=458856, ups=1.06, wpb=433245, bsz=16362.8, num_updates=58700, lr=0.000261042, gnorm=0.214, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=57311 +epoch 035: 1364 / 1689 loss=4.047, nll_loss=2.417, ppl=5.34, wps=458856, ups=1.06, wpb=433245, bsz=16362.8, num_updates=58700, lr=0.000261042, gnorm=0.214, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=57311 +epoch 035: 1364 / 1689 loss=4.047, nll_loss=2.417, ppl=5.34, wps=458856, ups=1.06, wpb=433245, bsz=16362.8, num_updates=58700, lr=0.000261042, gnorm=0.214, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=57311 +epoch 035: 1364 / 1689 loss=4.047, nll_loss=2.417, ppl=5.34, wps=458856, ups=1.06, wpb=433245, bsz=16362.8, num_updates=58700, lr=0.000261042, gnorm=0.214, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=57311 +epoch 035: 1364 / 1689 loss=4.047, nll_loss=2.417, ppl=5.34, wps=458856, ups=1.06, wpb=433245, bsz=16362.8, num_updates=58700, lr=0.000261042, gnorm=0.214, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=57311 +epoch 035: 1364 / 1689 loss=4.047, nll_loss=2.417, ppl=5.34, wps=458856, ups=1.06, wpb=433245, bsz=16362.8, num_updates=58700, lr=0.000261042, gnorm=0.214, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=57311 +epoch 035: 1364 / 1689 loss=4.047, nll_loss=2.417, ppl=5.34, wps=458856, ups=1.06, wpb=433245, bsz=16362.8, num_updates=58700, lr=0.000261042, gnorm=0.214, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=57311 +epoch 035: 1364 / 1689 loss=4.047, nll_loss=2.417, ppl=5.34, wps=458856, ups=1.06, wpb=433245, bsz=16362.8, num_updates=58700, lr=0.000261042, gnorm=0.214, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=57311 +epoch 035: 1364 / 1689 loss=4.047, nll_loss=2.417, ppl=5.34, wps=458856, ups=1.06, wpb=433245, bsz=16362.8, num_updates=58700, lr=0.000261042, gnorm=0.214, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=57311 +epoch 035: 1364 / 1689 loss=4.047, nll_loss=2.417, ppl=5.34, wps=458856, ups=1.06, wpb=433245, bsz=16362.8, num_updates=58700, lr=0.000261042, gnorm=0.214, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=57311 +epoch 035: 1364 / 1689 loss=4.047, nll_loss=2.417, ppl=5.34, wps=458856, ups=1.06, wpb=433245, bsz=16362.8, num_updates=58700, lr=0.000261042, gnorm=0.214, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=57311 +epoch 035: 1364 / 1689 loss=4.047, nll_loss=2.417, ppl=5.34, wps=458856, ups=1.06, wpb=433245, bsz=16362.8, num_updates=58700, lr=0.000261042, gnorm=0.214, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=57311 +epoch 035: 1364 / 1689 loss=4.047, nll_loss=2.417, ppl=5.34, wps=458856, ups=1.06, wpb=433245, bsz=16362.8, num_updates=58700, lr=0.000261042, gnorm=0.214, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=57311 +epoch 035: 1364 / 1689 loss=4.047, nll_loss=2.417, ppl=5.34, wps=458856, ups=1.06, wpb=433245, bsz=16362.8, num_updates=58700, lr=0.000261042, gnorm=0.214, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=57311 +epoch 035: 1364 / 1689 loss=4.047, nll_loss=2.417, ppl=5.34, wps=458856, ups=1.06, wpb=433245, bsz=16362.8, num_updates=58700, lr=0.000261042, gnorm=0.214, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=57311 +epoch 035: 1364 / 1689 loss=4.047, nll_loss=2.417, ppl=5.34, wps=458856, ups=1.06, wpb=433245, bsz=16362.8, num_updates=58700, lr=0.000261042, gnorm=0.214, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=57311 +epoch 035: 1364 / 1689 loss=4.047, nll_loss=2.417, ppl=5.34, wps=458856, ups=1.06, wpb=433245, bsz=16362.8, num_updates=58700, lr=0.000261042, gnorm=0.214, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=57311 +epoch 035: 1364 / 1689 loss=4.047, nll_loss=2.417, ppl=5.34, wps=458856, ups=1.06, wpb=433245, bsz=16362.8, num_updates=58700, lr=0.000261042, gnorm=0.214, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=57311 +epoch 035: 1364 / 1689 loss=4.047, nll_loss=2.417, ppl=5.34, wps=458856, ups=1.06, wpb=433245, bsz=16362.8, num_updates=58700, lr=0.000261042, gnorm=0.214, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=57311 +epoch 035: 1364 / 1689 loss=4.047, nll_loss=2.417, ppl=5.34, wps=458856, ups=1.06, wpb=433245, bsz=16362.8, num_updates=58700, lr=0.000261042, gnorm=0.214, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=57311 +epoch 035: 1364 / 1689 loss=4.047, nll_loss=2.417, ppl=5.34, wps=458856, ups=1.06, wpb=433245, bsz=16362.8, num_updates=58700, lr=0.000261042, gnorm=0.214, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=57311 +epoch 035: 1364 / 1689 loss=4.047, nll_loss=2.417, ppl=5.34, wps=458856, ups=1.06, wpb=433245, bsz=16362.8, num_updates=58700, lr=0.000261042, gnorm=0.214, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=57311 +epoch 035: 1364 / 1689 loss=4.047, nll_loss=2.417, ppl=5.34, wps=458856, ups=1.06, wpb=433245, bsz=16362.8, num_updates=58700, lr=0.000261042, gnorm=0.214, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=57311 +epoch 035: 1364 / 1689 loss=4.047, nll_loss=2.417, ppl=5.34, wps=458856, ups=1.06, wpb=433245, bsz=16362.8, num_updates=58700, lr=0.000261042, gnorm=0.214, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=57311 +epoch 035: 1364 / 1689 loss=4.047, nll_loss=2.417, ppl=5.34, wps=458856, ups=1.06, wpb=433245, bsz=16362.8, num_updates=58700, lr=0.000261042, gnorm=0.214, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=57311 +epoch 035: 1364 / 1689 loss=4.047, nll_loss=2.417, ppl=5.34, wps=458856, ups=1.06, wpb=433245, bsz=16362.8, num_updates=58700, lr=0.000261042, gnorm=0.214, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=57311 +epoch 035: 1364 / 1689 loss=4.047, nll_loss=2.417, ppl=5.34, wps=458856, ups=1.06, wpb=433245, bsz=16362.8, num_updates=58700, lr=0.000261042, gnorm=0.214, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=57311 +epoch 035: 1364 / 1689 loss=4.047, nll_loss=2.417, ppl=5.34, wps=458856, ups=1.06, wpb=433245, bsz=16362.8, num_updates=58700, lr=0.000261042, gnorm=0.214, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=57311 +epoch 035: 1364 / 1689 loss=4.047, nll_loss=2.417, ppl=5.34, wps=458856, ups=1.06, wpb=433245, bsz=16362.8, num_updates=58700, lr=0.000261042, gnorm=0.214, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=57311 +epoch 035: 1364 / 1689 loss=4.047, nll_loss=2.417, ppl=5.34, wps=458856, ups=1.06, wpb=433245, bsz=16362.8, num_updates=58700, lr=0.000261042, gnorm=0.214, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=57311 +epoch 035: 1364 / 1689 loss=4.047, nll_loss=2.417, ppl=5.34, wps=458856, ups=1.06, wpb=433245, bsz=16362.8, num_updates=58700, lr=0.000261042, gnorm=0.214, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=57311 +epoch 035: 1364 / 1689 loss=4.047, nll_loss=2.417, ppl=5.34, wps=458856, ups=1.06, wpb=433245, bsz=16362.8, num_updates=58700, lr=0.000261042, gnorm=0.214, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=57311 +epoch 035: 1364 / 1689 loss=4.047, nll_loss=2.417, ppl=5.34, wps=458856, ups=1.06, wpb=433245, bsz=16362.8, num_updates=58700, lr=0.000261042, gnorm=0.214, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=57311 +epoch 035: 1364 / 1689 loss=4.047, nll_loss=2.417, ppl=5.34, wps=458856, ups=1.06, wpb=433245, bsz=16362.8, num_updates=58700, lr=0.000261042, gnorm=0.214, clip=0, loss_scale=0.5, train_wall=93, gb_free=19.4, wall=57311 +epoch 035: 1464 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461287, ups=1.06, wpb=435487, bsz=16626.8, num_updates=58800, lr=0.00026082, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=18.2, wall=57405 +epoch 035: 1464 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461287, ups=1.06, wpb=435487, bsz=16626.8, num_updates=58800, lr=0.00026082, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=18.2, wall=57405 +epoch 035: 1464 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461287, ups=1.06, wpb=435487, bsz=16626.8, num_updates=58800, lr=0.00026082, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=18.2, wall=57405 +epoch 035: 1464 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461287, ups=1.06, wpb=435487, bsz=16626.8, num_updates=58800, lr=0.00026082, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=18.2, wall=57405 +epoch 035: 1464 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461287, ups=1.06, wpb=435487, bsz=16626.8, num_updates=58800, lr=0.00026082, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=18.2, wall=57405 +epoch 035: 1464 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461287, ups=1.06, wpb=435487, bsz=16626.8, num_updates=58800, lr=0.00026082, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=18.2, wall=57405 +epoch 035: 1464 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461287, ups=1.06, wpb=435487, bsz=16626.8, num_updates=58800, lr=0.00026082, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=18.2, wall=57405 +epoch 035: 1464 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461287, ups=1.06, wpb=435487, bsz=16626.8, num_updates=58800, lr=0.00026082, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=18.2, wall=57405 +epoch 035: 1464 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461287, ups=1.06, wpb=435487, bsz=16626.8, num_updates=58800, lr=0.00026082, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=18.2, wall=57405 +epoch 035: 1464 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461287, ups=1.06, wpb=435487, bsz=16626.8, num_updates=58800, lr=0.00026082, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=18.2, wall=57405 +epoch 035: 1464 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461287, ups=1.06, wpb=435487, bsz=16626.8, num_updates=58800, lr=0.00026082, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=18.2, wall=57405 +epoch 035: 1464 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461287, ups=1.06, wpb=435487, bsz=16626.8, num_updates=58800, lr=0.00026082, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=18.2, wall=57405 +epoch 035: 1464 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461287, ups=1.06, wpb=435487, bsz=16626.8, num_updates=58800, lr=0.00026082, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=18.2, wall=57405 +epoch 035: 1464 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461287, ups=1.06, wpb=435487, bsz=16626.8, num_updates=58800, lr=0.00026082, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=18.2, wall=57405 +epoch 035: 1464 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461287, ups=1.06, wpb=435487, bsz=16626.8, num_updates=58800, lr=0.00026082, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=18.2, wall=57405 +epoch 035: 1464 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461287, ups=1.06, wpb=435487, bsz=16626.8, num_updates=58800, lr=0.00026082, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=18.2, wall=57405 +epoch 035: 1464 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461287, ups=1.06, wpb=435487, bsz=16626.8, num_updates=58800, lr=0.00026082, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=18.2, wall=57405 +epoch 035: 1464 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461287, ups=1.06, wpb=435487, bsz=16626.8, num_updates=58800, lr=0.00026082, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=18.2, wall=57405 +epoch 035: 1464 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461287, ups=1.06, wpb=435487, bsz=16626.8, num_updates=58800, lr=0.00026082, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=18.2, wall=57405 +epoch 035: 1464 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461287, ups=1.06, wpb=435487, bsz=16626.8, num_updates=58800, lr=0.00026082, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=18.2, wall=57405 +epoch 035: 1464 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461287, ups=1.06, wpb=435487, bsz=16626.8, num_updates=58800, lr=0.00026082, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=18.2, wall=57405 +epoch 035: 1464 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461287, ups=1.06, wpb=435487, bsz=16626.8, num_updates=58800, lr=0.00026082, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=18.2, wall=57405 +epoch 035: 1464 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461287, ups=1.06, wpb=435487, bsz=16626.8, num_updates=58800, lr=0.00026082, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=18.2, wall=57405 +epoch 035: 1464 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461287, ups=1.06, wpb=435487, bsz=16626.8, num_updates=58800, lr=0.00026082, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=18.2, wall=57405 +epoch 035: 1464 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461287, ups=1.06, wpb=435487, bsz=16626.8, num_updates=58800, lr=0.00026082, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=18.2, wall=57405 +epoch 035: 1464 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461287, ups=1.06, wpb=435487, bsz=16626.8, num_updates=58800, lr=0.00026082, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=18.2, wall=57405 +epoch 035: 1464 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461287, ups=1.06, wpb=435487, bsz=16626.8, num_updates=58800, lr=0.00026082, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=18.2, wall=57405 +epoch 035: 1464 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461287, ups=1.06, wpb=435487, bsz=16626.8, num_updates=58800, lr=0.00026082, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=18.2, wall=57405 +epoch 035: 1464 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461287, ups=1.06, wpb=435487, bsz=16626.8, num_updates=58800, lr=0.00026082, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=18.2, wall=57405 +epoch 035: 1464 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461287, ups=1.06, wpb=435487, bsz=16626.8, num_updates=58800, lr=0.00026082, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=18.2, wall=57405 +epoch 035: 1464 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461287, ups=1.06, wpb=435487, bsz=16626.8, num_updates=58800, lr=0.00026082, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=18.2, wall=57405 +epoch 035: 1464 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461287, ups=1.06, wpb=435487, bsz=16626.8, num_updates=58800, lr=0.00026082, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=18.2, wall=57405 +epoch 035: 1464 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461287, ups=1.06, wpb=435487, bsz=16626.8, num_updates=58800, lr=0.00026082, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=18.2, wall=57405 +epoch 035: 1464 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461287, ups=1.06, wpb=435487, bsz=16626.8, num_updates=58800, lr=0.00026082, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=18.2, wall=57405 +epoch 035: 1464 / 1689 loss=4.063, nll_loss=2.434, ppl=5.4, wps=461287, ups=1.06, wpb=435487, bsz=16626.8, num_updates=58800, lr=0.00026082, gnorm=0.208, clip=0, loss_scale=1, train_wall=93, gb_free=18.2, wall=57405 +epoch 035: 1564 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=462748, ups=1.07, wpb=434496, bsz=16851, num_updates=58900, lr=0.000260599, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57499 +epoch 035: 1564 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=462748, ups=1.07, wpb=434496, bsz=16851, num_updates=58900, lr=0.000260599, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57499 +epoch 035: 1564 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=462748, ups=1.07, wpb=434496, bsz=16851, num_updates=58900, lr=0.000260599, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57499 +epoch 035: 1564 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=462748, ups=1.07, wpb=434496, bsz=16851, num_updates=58900, lr=0.000260599, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57499 +epoch 035: 1564 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=462748, ups=1.07, wpb=434496, bsz=16851, num_updates=58900, lr=0.000260599, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57499 +epoch 035: 1564 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=462748, ups=1.07, wpb=434496, bsz=16851, num_updates=58900, lr=0.000260599, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57499 +epoch 035: 1564 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=462748, ups=1.07, wpb=434496, bsz=16851, num_updates=58900, lr=0.000260599, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57499 +epoch 035: 1564 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=462748, ups=1.07, wpb=434496, bsz=16851, num_updates=58900, lr=0.000260599, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57499 +epoch 035: 1564 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=462748, ups=1.07, wpb=434496, bsz=16851, num_updates=58900, lr=0.000260599, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57499 +epoch 035: 1564 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=462748, ups=1.07, wpb=434496, bsz=16851, num_updates=58900, lr=0.000260599, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57499 +epoch 035: 1564 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=462748, ups=1.07, wpb=434496, bsz=16851, num_updates=58900, lr=0.000260599, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57499 +epoch 035: 1564 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=462748, ups=1.07, wpb=434496, bsz=16851, num_updates=58900, lr=0.000260599, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57499 +epoch 035: 1564 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=462748, ups=1.07, wpb=434496, bsz=16851, num_updates=58900, lr=0.000260599, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57499 +epoch 035: 1564 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=462748, ups=1.07, wpb=434496, bsz=16851, num_updates=58900, lr=0.000260599, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57499 +epoch 035: 1564 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=462748, ups=1.07, wpb=434496, bsz=16851, num_updates=58900, lr=0.000260599, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57499 +epoch 035: 1564 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=462748, ups=1.07, wpb=434496, bsz=16851, num_updates=58900, lr=0.000260599, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57499 +epoch 035: 1564 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=462748, ups=1.07, wpb=434496, bsz=16851, num_updates=58900, lr=0.000260599, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57499 +epoch 035: 1564 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=462748, ups=1.07, wpb=434496, bsz=16851, num_updates=58900, lr=0.000260599, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57499 +epoch 035: 1564 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=462748, ups=1.07, wpb=434496, bsz=16851, num_updates=58900, lr=0.000260599, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57499 +epoch 035: 1564 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=462748, ups=1.07, wpb=434496, bsz=16851, num_updates=58900, lr=0.000260599, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57499 +epoch 035: 1564 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=462748, ups=1.07, wpb=434496, bsz=16851, num_updates=58900, lr=0.000260599, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57499 +epoch 035: 1564 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=462748, ups=1.07, wpb=434496, bsz=16851, num_updates=58900, lr=0.000260599, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57499 +epoch 035: 1564 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=462748, ups=1.07, wpb=434496, bsz=16851, num_updates=58900, lr=0.000260599, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57499 +epoch 035: 1564 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=462748, ups=1.07, wpb=434496, bsz=16851, num_updates=58900, lr=0.000260599, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57499 +epoch 035: 1564 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=462748, ups=1.07, wpb=434496, bsz=16851, num_updates=58900, lr=0.000260599, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57499 +epoch 035: 1564 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=462748, ups=1.07, wpb=434496, bsz=16851, num_updates=58900, lr=0.000260599, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57499 +epoch 035: 1564 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=462748, ups=1.07, wpb=434496, bsz=16851, num_updates=58900, lr=0.000260599, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57499 +epoch 035: 1564 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=462748, ups=1.07, wpb=434496, bsz=16851, num_updates=58900, lr=0.000260599, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57499 +epoch 035: 1564 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=462748, ups=1.07, wpb=434496, bsz=16851, num_updates=58900, lr=0.000260599, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57499 +epoch 035: 1564 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=462748, ups=1.07, wpb=434496, bsz=16851, num_updates=58900, lr=0.000260599, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57499 +epoch 035: 1564 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=462748, ups=1.07, wpb=434496, bsz=16851, num_updates=58900, lr=0.000260599, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57499 +epoch 035: 1564 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=462748, ups=1.07, wpb=434496, bsz=16851, num_updates=58900, lr=0.000260599, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57499 +epoch 035: 1564 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=462748, ups=1.07, wpb=434496, bsz=16851, num_updates=58900, lr=0.000260599, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57499 +epoch 035: 1564 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=462748, ups=1.07, wpb=434496, bsz=16851, num_updates=58900, lr=0.000260599, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57499 +epoch 035: 1564 / 1689 loss=4.055, nll_loss=2.425, ppl=5.37, wps=462748, ups=1.07, wpb=434496, bsz=16851, num_updates=58900, lr=0.000260599, gnorm=0.216, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57499 +epoch 035: 1664 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=460357, ups=1.06, wpb=432312, bsz=16710.9, num_updates=59000, lr=0.000260378, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57593 +epoch 035: 1664 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=460357, ups=1.06, wpb=432312, bsz=16710.9, num_updates=59000, lr=0.000260378, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57593 +epoch 035: 1664 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=460357, ups=1.06, wpb=432312, bsz=16710.9, num_updates=59000, lr=0.000260378, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57593 +epoch 035: 1664 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=460357, ups=1.06, wpb=432312, bsz=16710.9, num_updates=59000, lr=0.000260378, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57593 +epoch 035: 1664 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=460357, ups=1.06, wpb=432312, bsz=16710.9, num_updates=59000, lr=0.000260378, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57593 +epoch 035: 1664 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=460357, ups=1.06, wpb=432312, bsz=16710.9, num_updates=59000, lr=0.000260378, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57593 +epoch 035: 1664 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=460357, ups=1.06, wpb=432312, bsz=16710.9, num_updates=59000, lr=0.000260378, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57593 +epoch 035: 1664 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=460357, ups=1.06, wpb=432312, bsz=16710.9, num_updates=59000, lr=0.000260378, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57593 +epoch 035: 1664 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=460357, ups=1.06, wpb=432312, bsz=16710.9, num_updates=59000, lr=0.000260378, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57593 +epoch 035: 1664 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=460357, ups=1.06, wpb=432312, bsz=16710.9, num_updates=59000, lr=0.000260378, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57593 +epoch 035: 1664 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=460357, ups=1.06, wpb=432312, bsz=16710.9, num_updates=59000, lr=0.000260378, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57593 +epoch 035: 1664 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=460357, ups=1.06, wpb=432312, bsz=16710.9, num_updates=59000, lr=0.000260378, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57593 +epoch 035: 1664 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=460357, ups=1.06, wpb=432312, bsz=16710.9, num_updates=59000, lr=0.000260378, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57593 +epoch 035: 1664 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=460357, ups=1.06, wpb=432312, bsz=16710.9, num_updates=59000, lr=0.000260378, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57593 +epoch 035: 1664 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=460357, ups=1.06, wpb=432312, bsz=16710.9, num_updates=59000, lr=0.000260378, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57593 +epoch 035: 1664 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=460357, ups=1.06, wpb=432312, bsz=16710.9, num_updates=59000, lr=0.000260378, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57593 +epoch 035: 1664 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=460357, ups=1.06, wpb=432312, bsz=16710.9, num_updates=59000, lr=0.000260378, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57593 +epoch 035: 1664 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=460357, ups=1.06, wpb=432312, bsz=16710.9, num_updates=59000, lr=0.000260378, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57593 +epoch 035: 1664 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=460357, ups=1.06, wpb=432312, bsz=16710.9, num_updates=59000, lr=0.000260378, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57593 +epoch 035: 1664 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=460357, ups=1.06, wpb=432312, bsz=16710.9, num_updates=59000, lr=0.000260378, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57593 +epoch 035: 1664 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=460357, ups=1.06, wpb=432312, bsz=16710.9, num_updates=59000, lr=0.000260378, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57593 +epoch 035: 1664 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=460357, ups=1.06, wpb=432312, bsz=16710.9, num_updates=59000, lr=0.000260378, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57593 +epoch 035: 1664 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=460357, ups=1.06, wpb=432312, bsz=16710.9, num_updates=59000, lr=0.000260378, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57593 +epoch 035: 1664 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=460357, ups=1.06, wpb=432312, bsz=16710.9, num_updates=59000, lr=0.000260378, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57593 +epoch 035: 1664 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=460357, ups=1.06, wpb=432312, bsz=16710.9, num_updates=59000, lr=0.000260378, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57593 +epoch 035: 1664 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=460357, ups=1.06, wpb=432312, bsz=16710.9, num_updates=59000, lr=0.000260378, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57593 +epoch 035: 1664 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=460357, ups=1.06, wpb=432312, bsz=16710.9, num_updates=59000, lr=0.000260378, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57593 +epoch 035: 1664 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=460357, ups=1.06, wpb=432312, bsz=16710.9, num_updates=59000, lr=0.000260378, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57593 +epoch 035: 1664 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=460357, ups=1.06, wpb=432312, bsz=16710.9, num_updates=59000, lr=0.000260378, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57593 +epoch 035: 1664 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=460357, ups=1.06, wpb=432312, bsz=16710.9, num_updates=59000, lr=0.000260378, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57593 +epoch 035: 1664 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=460357, ups=1.06, wpb=432312, bsz=16710.9, num_updates=59000, lr=0.000260378, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57593 +epoch 035: 1664 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=460357, ups=1.06, wpb=432312, bsz=16710.9, num_updates=59000, lr=0.000260378, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57593 +epoch 035: 1664 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=460357, ups=1.06, wpb=432312, bsz=16710.9, num_updates=59000, lr=0.000260378, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57593 +epoch 035: 1664 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=460357, ups=1.06, wpb=432312, bsz=16710.9, num_updates=59000, lr=0.000260378, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57593 +epoch 035: 1664 / 1689 loss=4.062, nll_loss=2.433, ppl=5.4, wps=460357, ups=1.06, wpb=432312, bsz=16710.9, num_updates=59000, lr=0.000260378, gnorm=0.205, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=57593 +begin validation on "valid" subset +epoch 035 | valid on 'valid' subset | loss 4.216 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 59000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.216 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 59000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.216 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 59000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.216 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 59000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.216 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 59000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.216 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 59000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.216 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 59000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.216 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 59000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.216 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 59000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.216 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 59000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.216 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 59000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.216 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 59000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.216 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 59000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.216 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 59000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.216 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 59000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.216 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 59000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.216 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 59000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.216 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 59000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.216 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 59000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.216 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 59000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.216 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 59000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.216 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 59000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.216 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 59000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.216 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 59000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.216 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 59000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.216 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 59000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.216 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 59000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.216 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 59000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.216 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 59000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.216 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 59000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.216 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 59000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.216 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 59000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.216 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 59000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.216 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 59000 | best_loss 4.213 +epoch 035 | valid on 'valid' subset | loss 4.216 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 59000 | best_loss 4.213 +end of epoch 35 (average epoch stats below) +epoch 035 | loss 4.05 | nll_loss 2.419 | ppl 5.35 | wps 446585 | ups 1.03 | wpb 433554 | bsz 16506.8 | num_updates 59025 | lr 0.000260323 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 1583 | gb_free 21.7 | wall 57655 +epoch 035 | loss 4.05 | nll_loss 2.419 | ppl 5.35 | wps 446585 | ups 1.03 | wpb 433554 | bsz 16506.8 | num_updates 59025 | lr 0.000260323 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 1583 | gb_free 21.7 | wall 57655 +epoch 035 | loss 4.05 | nll_loss 2.419 | ppl 5.35 | wps 446585 | ups 1.03 | wpb 433554 | bsz 16506.8 | num_updates 59025 | lr 0.000260323 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 1583 | gb_free 21.7 | wall 57655 +epoch 035 | loss 4.05 | nll_loss 2.419 | ppl 5.35 | wps 446585 | ups 1.03 | wpb 433554 | bsz 16506.8 | num_updates 59025 | lr 0.000260323 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 1583 | gb_free 21.7 | wall 57655 +epoch 035 | loss 4.05 | nll_loss 2.419 | ppl 5.35 | wps 446585 | ups 1.03 | wpb 433554 | bsz 16506.8 | num_updates 59025 | lr 0.000260323 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 1583 | gb_free 21.7 | wall 57655 +epoch 035 | loss 4.05 | nll_loss 2.419 | ppl 5.35 | wps 446585 | ups 1.03 | wpb 433554 | bsz 16506.8 | num_updates 59025 | lr 0.000260323 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 1583 | gb_free 21.7 | wall 57655 +epoch 035 | loss 4.05 | nll_loss 2.419 | ppl 5.35 | wps 446585 | ups 1.03 | wpb 433554 | bsz 16506.8 | num_updates 59025 | lr 0.000260323 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 1583 | gb_free 21.7 | wall 57655 +epoch 035 | loss 4.05 | nll_loss 2.419 | ppl 5.35 | wps 446585 | ups 1.03 | wpb 433554 | bsz 16506.8 | num_updates 59025 | lr 0.000260323 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 1583 | gb_free 21.7 | wall 57655 +epoch 035 | loss 4.05 | nll_loss 2.419 | ppl 5.35 | wps 446585 | ups 1.03 | wpb 433554 | bsz 16506.8 | num_updates 59025 | lr 0.000260323 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 1583 | gb_free 21.7 | wall 57655 +epoch 035 | loss 4.05 | nll_loss 2.419 | ppl 5.35 | wps 446585 | ups 1.03 | wpb 433554 | bsz 16506.8 | num_updates 59025 | lr 0.000260323 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 1583 | gb_free 21.7 | wall 57655 +epoch 035 | loss 4.05 | nll_loss 2.419 | ppl 5.35 | wps 446585 | ups 1.03 | wpb 433554 | bsz 16506.8 | num_updates 59025 | lr 0.000260323 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 1583 | gb_free 21.7 | wall 57655 +epoch 035 | loss 4.05 | nll_loss 2.419 | ppl 5.35 | wps 446585 | ups 1.03 | wpb 433554 | bsz 16506.8 | num_updates 59025 | lr 0.000260323 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 1583 | gb_free 21.7 | wall 57655 +epoch 035 | loss 4.05 | nll_loss 2.419 | ppl 5.35 | wps 446585 | ups 1.03 | wpb 433554 | bsz 16506.8 | num_updates 59025 | lr 0.000260323 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 1583 | gb_free 21.7 | wall 57655 +epoch 035 | loss 4.05 | nll_loss 2.419 | ppl 5.35 | wps 446585 | ups 1.03 | wpb 433554 | bsz 16506.8 | num_updates 59025 | lr 0.000260323 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 1583 | gb_free 21.7 | wall 57655 +epoch 035 | loss 4.05 | nll_loss 2.419 | ppl 5.35 | wps 446585 | ups 1.03 | wpb 433554 | bsz 16506.8 | num_updates 59025 | lr 0.000260323 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 1583 | gb_free 21.7 | wall 57655 +epoch 035 | loss 4.05 | nll_loss 2.419 | ppl 5.35 | wps 446585 | ups 1.03 | wpb 433554 | bsz 16506.8 | num_updates 59025 | lr 0.000260323 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 1583 | gb_free 21.7 | wall 57655 +epoch 035 | loss 4.05 | nll_loss 2.419 | ppl 5.35 | wps 446585 | ups 1.03 | wpb 433554 | bsz 16506.8 | num_updates 59025 | lr 0.000260323 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 1583 | gb_free 21.7 | wall 57655 +epoch 035 | loss 4.05 | nll_loss 2.419 | ppl 5.35 | wps 446585 | ups 1.03 | wpb 433554 | bsz 16506.8 | num_updates 59025 | lr 0.000260323 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 1583 | gb_free 21.7 | wall 57655 +epoch 035 | loss 4.05 | nll_loss 2.419 | ppl 5.35 | wps 446585 | ups 1.03 | wpb 433554 | bsz 16506.8 | num_updates 59025 | lr 0.000260323 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 1583 | gb_free 21.7 | wall 57655 +epoch 035 | loss 4.05 | nll_loss 2.419 | ppl 5.35 | wps 446585 | ups 1.03 | wpb 433554 | bsz 16506.8 | num_updates 59025 | lr 0.000260323 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 1583 | gb_free 21.7 | wall 57655 +epoch 035 | loss 4.05 | nll_loss 2.419 | ppl 5.35 | wps 446585 | ups 1.03 | wpb 433554 | bsz 16506.8 | num_updates 59025 | lr 0.000260323 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 1583 | gb_free 21.7 | wall 57655 +epoch 035 | loss 4.05 | nll_loss 2.419 | ppl 5.35 | wps 446585 | ups 1.03 | wpb 433554 | bsz 16506.8 | num_updates 59025 | lr 0.000260323 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 1583 | gb_free 21.7 | wall 57655 +epoch 035 | loss 4.05 | nll_loss 2.419 | ppl 5.35 | wps 446585 | ups 1.03 | wpb 433554 | bsz 16506.8 | num_updates 59025 | lr 0.000260323 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 1583 | gb_free 21.7 | wall 57655 +epoch 035 | loss 4.05 | nll_loss 2.419 | ppl 5.35 | wps 446585 | ups 1.03 | wpb 433554 | bsz 16506.8 | num_updates 59025 | lr 0.000260323 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 1583 | gb_free 21.7 | wall 57655 +epoch 035 | loss 4.05 | nll_loss 2.419 | ppl 5.35 | wps 446585 | ups 1.03 | wpb 433554 | bsz 16506.8 | num_updates 59025 | lr 0.000260323 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 1583 | gb_free 21.7 | wall 57655 +epoch 035 | loss 4.05 | nll_loss 2.419 | ppl 5.35 | wps 446585 | ups 1.03 | wpb 433554 | bsz 16506.8 | num_updates 59025 | lr 0.000260323 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 1583 | gb_free 21.7 | wall 57655 +epoch 035 | loss 4.05 | nll_loss 2.419 | ppl 5.35 | wps 446585 | ups 1.03 | wpb 433554 | bsz 16506.8 | num_updates 59025 | lr 0.000260323 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 1583 | gb_free 21.7 | wall 57655 +epoch 035 | loss 4.05 | nll_loss 2.419 | ppl 5.35 | wps 446585 | ups 1.03 | wpb 433554 | bsz 16506.8 | num_updates 59025 | lr 0.000260323 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 1583 | gb_free 21.7 | wall 57655 +epoch 035 | loss 4.05 | nll_loss 2.419 | ppl 5.35 | wps 446585 | ups 1.03 | wpb 433554 | bsz 16506.8 | num_updates 59025 | lr 0.000260323 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 1583 | gb_free 21.7 | wall 57655 +epoch 035 | loss 4.05 | nll_loss 2.419 | ppl 5.35 | wps 446585 | ups 1.03 | wpb 433554 | bsz 16506.8 | num_updates 59025 | lr 0.000260323 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 1583 | gb_free 21.7 | wall 57655 +epoch 035 | loss 4.05 | nll_loss 2.419 | ppl 5.35 | wps 446585 | ups 1.03 | wpb 433554 | bsz 16506.8 | num_updates 59025 | lr 0.000260323 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 1583 | gb_free 21.7 | wall 57655 +epoch 035 | loss 4.05 | nll_loss 2.419 | ppl 5.35 | wps 446585 | ups 1.03 | wpb 433554 | bsz 16506.8 | num_updates 59025 | lr 0.000260323 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 1583 | gb_free 21.7 | wall 57655 +epoch 035 | loss 4.05 | nll_loss 2.419 | ppl 5.35 | wps 446585 | ups 1.03 | wpb 433554 | bsz 16506.8 | num_updates 59025 | lr 0.000260323 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 1583 | gb_free 21.7 | wall 57655 +epoch 035 | loss 4.05 | nll_loss 2.419 | ppl 5.35 | wps 446585 | ups 1.03 | wpb 433554 | bsz 16506.8 | num_updates 59025 | lr 0.000260323 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 1583 | gb_free 21.7 | wall 57655 +epoch 035 | loss 4.05 | nll_loss 2.419 | ppl 5.35 | wps 446585 | ups 1.03 | wpb 433554 | bsz 16506.8 | num_updates 59025 | lr 0.000260323 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 1583 | gb_free 21.7 | wall 57655 +Start iterating over samples +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 75 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=325058, ups=0.75, wpb=431235, bsz=16576.7, num_updates=59100, lr=0.000260157, gnorm=0.214, clip=0, loss_scale=1, train_wall=114, gb_free=20.2, wall=57726 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 175 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=468785, ups=1.08, wpb=434327, bsz=16649.3, num_updates=59200, lr=0.000259938, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=21.4, wall=57818 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 275 / 1689 loss=4.031, nll_loss=2.397, ppl=5.27, wps=465814, ups=1.08, wpb=432258, bsz=16233.4, num_updates=59300, lr=0.000259718, gnorm=0.197, clip=0, loss_scale=2, train_wall=92, gb_free=19.1, wall=57911 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 376 / 1689 loss=4.037, nll_loss=2.404, ppl=5.29, wps=457925, ups=1.06, wpb=433492, bsz=16597.8, num_updates=59400, lr=0.0002595, gnorm=0.206, clip=0, loss_scale=1, train_wall=93, gb_free=19.4, wall=58006 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 476 / 1689 loss=4.04, nll_loss=2.408, ppl=5.31, wps=462036, ups=1.07, wpb=431837, bsz=16671.2, num_updates=59500, lr=0.000259281, gnorm=0.211, clip=0, loss_scale=1, train_wall=92, gb_free=18.9, wall=58099 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 576 / 1689 loss=4.035, nll_loss=2.402, ppl=5.28, wps=464333, ups=1.08, wpb=431282, bsz=16469, num_updates=59600, lr=0.000259064, gnorm=0.21, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58192 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 676 / 1689 loss=4.041, nll_loss=2.409, ppl=5.31, wps=462494, ups=1.07, wpb=431875, bsz=16179.5, num_updates=59700, lr=0.000258847, gnorm=0.2, clip=0, loss_scale=1, train_wall=92, gb_free=18.6, wall=58286 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 776 / 1689 loss=4.047, nll_loss=2.415, ppl=5.33, wps=465371, ups=1.07, wpb=433377, bsz=16310, num_updates=59800, lr=0.00025863, gnorm=0.204, clip=0, loss_scale=1, train_wall=92, gb_free=19.4, wall=58379 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 877 / 1689 loss=4.043, nll_loss=2.412, ppl=5.32, wps=458007, ups=1.05, wpb=434955, bsz=16383.8, num_updates=59900, lr=0.000258414, gnorm=0.226, clip=0, loss_scale=1, train_wall=94, gb_free=19.6, wall=58474 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +epoch 036: 977 / 1689 loss=4.044, nll_loss=2.412, ppl=5.32, wps=463676, ups=1.07, wpb=434046, bsz=16435.5, num_updates=60000, lr=0.000258199, gnorm=0.203, clip=0, loss_scale=1, train_wall=92, gb_free=19, wall=58567 +Stopping training due to num_updates: 60000 >= max_update: 60000 +begin validation on "valid" subset +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +epoch 036 | valid on 'valid' subset | loss 4.217 | nll_loss 2.588 | ppl 6.01 | wps 0 | wpb 42662 | bsz 2032 | num_updates 60000 | best_loss 4.213 +end of epoch 36 (average epoch stats below) +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +epoch 036 | loss 4.038 | nll_loss 2.406 | ppl 5.3 | wps 456950 | ups 1.05 | wpb 433173 | bsz 16460.6 | num_updates 60000 | lr 0.000258199 | gnorm 0.208 | clip 0 | loss_scale 1 | train_wall 900 | gb_free 19 | wall 58580 +done training in 58569.2 seconds