{'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 100, 'log_format': 'simple', 'log_file': 'chkpt/ja-en/train.log', 'aim_repo': None, 'aim_run_hash': None, 'tensorboard_logdir': None, 'wandb_project': 'wmt23', 'azureml_logging': False, 'seed': 0, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_efficient_bf16': False, 'fp16': True, 'memory_efficient_fp16': False, 'fp16_no_flatten_grads': False, 'fp16_init_scale': 8, 'fp16_scale_window': None, 'fp16_scale_tolerance': 0.0, 'on_cpu_convert_precision': False, 'min_loss_scale': 0.0001, 'threshold_loss_scale': None, 'amp': False, 'amp_batch_retries': 2, 'amp_init_scale': 128, 'amp_scale_window': None, 'user_dir': None, 'empty_cache_freq': 0, 'all_gather_list_size': 16384, 'model_parallel_size': 1, 'quantization_config_path': None, 'profile': False, 'reset_logging': False, 'suppress_crashes': False, 'use_plasma_view': False, 'plasma_path': '/tmp/plasma'}, 'common_eval': {'_name': None, 'path': None, 'post_process': None, 'quiet': False, 'model_overrides': '{}', 'results_path': None}, 'distributed_training': {'_name': None, 'distributed_world_size': 8, 'distributed_num_procs': 8, 'distributed_rank': 0, 'distributed_backend': 'nccl', 'distributed_init_method': 'tcp://localhost:13884', 'distributed_port': 13884, 'device_id': 0, 'distributed_no_spawn': False, 'ddp_backend': 'pytorch_ddp', 'ddp_comm_hook': 'none', 'bucket_cap_mb': 25, 'fix_batches_to_gpus': False, 'find_unused_parameters': False, 'gradient_as_bucket_view': False, 'fast_stat_sync': False, 'heartbeat_timeout': -1, 'broadcast_buffers': False, 'slowmo_momentum': None, 'slowmo_base_algorithm': 'localsgd', 'localsgd_frequency': 3, 'nprocs_per_node': 8, 'pipeline_model_parallel': False, 'pipeline_balance': None, 'pipeline_devices': None, 'pipeline_chunks': 0, 'pipeline_encoder_balance': None, 'pipeline_encoder_devices': None, 'pipeline_decoder_balance': None, 'pipeline_decoder_devices': None, 'pipeline_checkpoint': 'never', 'zero_sharding': 'none', 'fp16': True, 'memory_efficient_fp16': False, 'tpu': False, 'no_reshard_after_forward': False, 'fp32_reduce_scatter': False, 'cpu_offload': False, 'use_sharded_state': False, 'not_fsdp_flatten_parameters': False}, 'dataset': {'_name': None, 'num_workers': 0, 'skip_invalid_size_inputs_valid_test': False, 'max_tokens': 16384, 'batch_size': None, 'required_batch_size_multiple': 8, 'required_seq_len_multiple': 1, 'dataset_impl': None, 'data_buffer_size': 10, 'train_subset': 'train', 'valid_subset': 'valid', 'combine_valid_subsets': None, 'ignore_unused_valid_subsets': False, 'validate_interval': 100000, 'validate_interval_updates': 0, 'validate_after_updates': 0, 'fixed_validation_seed': None, 'disable_validation': False, 'max_tokens_valid': 16384, 'batch_size_valid': None, 'max_valid_steps': None, 'curriculum': 0, 'gen_subset': 'test', 'num_shards': 1, 'shard_id': 0, 'grouped_shuffling': False, 'update_epoch_batch_itr': False, 'update_ordered_indices_seed': False}, 'optimization': {'_name': None, 'max_epoch': 0, 'max_update': 60000, 'stop_time_hours': 0.0, 'clip_norm': 1.0, 'sentence_avg': False, 'update_freq': [4], 'lr': [0.001], 'stop_min_lr': -1.0, 'use_bmuf': False, 'skip_remainder_batch': False, 'debug_param_names': False}, 'checkpoint': {'_name': None, 'save_dir': 'chkpt/ja-en', 'restore_file': 'checkpoint_last.pt', 'continue_once': None, 'finetune_from_model': None, 'reset_dataloader': False, 'reset_lr_scheduler': False, 'reset_meters': False, 'reset_optimizer': False, 'optimizer_overrides': '{}', 'save_interval': 100000, 'save_interval_updates': 1000, 'keep_interval_updates': 10, 'keep_interval_updates_pattern': -1, 'keep_last_epochs': -1, 'keep_best_checkpoints': -1, 'no_save': False, 'no_epoch_checkpoints': True, 'no_last_checkpoints': False, 'no_save_optimizer_state': False, 'best_checkpoint_metric': 'loss', 'maximize_best_checkpoint_metric': False, 'patience': -1, 'checkpoint_suffix': '', 'checkpoint_shard_count': 1, 'load_checkpoint_on_all_dp_ranks': False, 'write_checkpoints_asynchronously': False, 'model_parallel_size': 1}, 'bmuf': {'_name': None, 'block_lr': 1.0, 'block_momentum': 0.875, 'global_sync_iter': 50, 'warmup_iterations': 500, 'use_nbm': False, 'average_sync': False, 'distributed_world_size': 8}, 'generation': {'_name': None, 'beam': 5, 'beam_mt': 0, 'nbest': 1, 'max_len_a': 0.0, 'max_len_b': 200, 'max_len_a_mt': 0.0, 'max_len_b_mt': 200, 'min_len': 1, 'match_source_len': False, 'unnormalized': False, 'no_early_stop': False, 'no_beamable_mm': False, 'lenpen': 1.0, 'lenpen_mt': 1.0, 'unkpen': 0.0, 'replace_unk': None, 'sacrebleu': False, 'score_reference': False, 'prefix_size': 0, 'no_repeat_ngram_size': 0, 'sampling': False, 'sampling_topk': -1, 'sampling_topp': -1.0, 'constraints': None, 'temperature': 1.0, 'diverse_beam_groups': -1, 'diverse_beam_strength': 0.5, 'diversity_rate': -1.0, 'print_alignment': None, 'print_step': False, 'lm_path': None, 'lm_weight': 0.0, 'iter_decode_eos_penalty': 0.0, 'iter_decode_max_iter': 10, 'iter_decode_force_max_iter': False, 'iter_decode_with_beam': 1, 'iter_decode_with_external_reranker': False, 'retain_iter_history': False, 'retain_dropout': False, 'retain_dropout_modules': None, 'decoding_format': None, 'no_seed_provided': False, 'eos_token': None}, 'eval_lm': {'_name': None, 'output_word_probs': False, 'output_word_stats': False, 'context_window': 0, 'softmax_batch': 9223372036854775807}, 'interactive': {'_name': None, 'buffer_size': 0, 'input': '-'}, 'model': Namespace(no_progress_bar=False, log_interval=100, log_format='simple', log_file='chkpt/ja-en/train.log', aim_repo=None, aim_run_hash=None, tensorboard_logdir=None, wandb_project='wmt23', azureml_logging=False, seed=0, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=True, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=8, fp16_scale_window=None, fp16_scale_tolerance=0.0, on_cpu_convert_precision=False, min_loss_scale=0.0001, threshold_loss_scale=None, amp=False, amp_batch_retries=2, amp_init_scale=128, amp_scale_window=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, quantization_config_path=None, profile=False, reset_logging=False, suppress_crashes=False, use_plasma_view=False, plasma_path='/tmp/plasma', criterion='label_smoothed_cross_entropy', tokenizer=None, bpe=None, optimizer='adam', lr_scheduler='inverse_sqrt', scoring='bleu', task='translation', num_workers=0, skip_invalid_size_inputs_valid_test=False, max_tokens=16384, batch_size=None, required_batch_size_multiple=8, required_seq_len_multiple=1, dataset_impl=None, data_buffer_size=10, train_subset='train', valid_subset='valid', combine_valid_subsets=None, ignore_unused_valid_subsets=False, validate_interval=100000, validate_interval_updates=0, validate_after_updates=0, fixed_validation_seed=None, disable_validation=False, max_tokens_valid=16384, batch_size_valid=None, max_valid_steps=None, curriculum=0, gen_subset='test', num_shards=1, shard_id=0, grouped_shuffling=False, update_epoch_batch_itr=False, update_ordered_indices_seed=False, distributed_world_size=8, distributed_num_procs=8, distributed_rank=0, distributed_backend='nccl', distributed_init_method=None, distributed_port=-1, device_id=0, distributed_no_spawn=False, ddp_backend='pytorch_ddp', ddp_comm_hook='none', bucket_cap_mb=25, fix_batches_to_gpus=False, find_unused_parameters=False, gradient_as_bucket_view=False, fast_stat_sync=False, heartbeat_timeout=-1, broadcast_buffers=False, slowmo_momentum=None, slowmo_base_algorithm='localsgd', localsgd_frequency=3, nprocs_per_node=8, pipeline_model_parallel=False, pipeline_balance=None, pipeline_devices=None, pipeline_chunks=0, pipeline_encoder_balance=None, pipeline_encoder_devices=None, pipeline_decoder_balance=None, pipeline_decoder_devices=None, pipeline_checkpoint='never', zero_sharding='none', no_reshard_after_forward=False, fp32_reduce_scatter=False, cpu_offload=False, use_sharded_state=False, not_fsdp_flatten_parameters=False, arch='transformer_vaswani_wmt_en_de_big', max_epoch=0, max_update=60000, stop_time_hours=0, clip_norm=1.0, sentence_avg=False, update_freq=[4], lr=[0.001], stop_min_lr=-1.0, use_bmuf=False, skip_remainder_batch=False, debug_param_names=False, save_dir='chkpt/ja-en', restore_file='checkpoint_last.pt', continue_once=None, finetune_from_model=None, reset_dataloader=False, reset_lr_scheduler=False, reset_meters=False, reset_optimizer=False, optimizer_overrides='{}', save_interval=100000, save_interval_updates=1000, keep_interval_updates=10, keep_interval_updates_pattern=-1, keep_last_epochs=-1, keep_best_checkpoints=-1, no_save=False, no_epoch_checkpoints=True, no_last_checkpoints=False, no_save_optimizer_state=False, best_checkpoint_metric='loss', maximize_best_checkpoint_metric=False, patience=-1, checkpoint_suffix='', checkpoint_shard_count=1, load_checkpoint_on_all_dp_ranks=False, write_checkpoints_asynchronously=False, store_ema=False, ema_decay=0.9999, ema_start_update=0, ema_seed_model=None, ema_update_freq=1, ema_fp32=False, data='binarized/ja-en/', source_lang=None, target_lang=None, load_alignments=False, left_pad_source=True, left_pad_target=False, upsample_primary=-1, truncate_source=False, num_batch_buckets=0, eval_bleu=False, eval_bleu_args='{}', eval_bleu_detok='space', eval_bleu_detok_args='{}', eval_tokenized_bleu=False, eval_bleu_remove_bpe=None, eval_bleu_print_samples=False, label_smoothing=0.1, report_accuracy=False, ignore_prefix_size=0, adam_betas='(0.9, 0.98)', adam_eps=1e-08, weight_decay=0.0, use_old_adam=False, fp16_adam_stats=False, warmup_updates=4000, warmup_init_lr=-1, pad=1, eos=2, unk=3, encoder_ffn_embed_dim=8192, decoder_ffn_embed_dim=8192, dropout=0.1, share_decoder_input_output_embed=True, no_seed_provided=False, encoder_embed_dim=1024, encoder_attention_heads=16, encoder_normalize_before=False, decoder_embed_dim=1024, decoder_attention_heads=16, encoder_embed_path=None, encoder_layers=6, encoder_learned_pos=False, decoder_embed_path=None, decoder_layers=6, decoder_normalize_before=False, decoder_learned_pos=False, attention_dropout=0.0, activation_dropout=0.0, activation_fn='relu', adaptive_softmax_cutoff=None, adaptive_softmax_dropout=0, share_all_embeddings=False, merge_src_tgt_embed=False, no_token_positional_embeddings=False, adaptive_input=False, no_cross_attention=False, cross_self_attention=False, decoder_output_dim=1024, decoder_input_dim=1024, no_scale_embedding=False, layernorm_embedding=False, tie_adaptive_weights=False, checkpoint_activations=False, offload_activations=False, encoder_layers_to_keep=None, decoder_layers_to_keep=None, encoder_layerdrop=0, decoder_layerdrop=0, quant_noise_pq=0, quant_noise_pq_block_size=8, quant_noise_scalar=0, _name='transformer_vaswani_wmt_en_de_big'), 'task': {'_name': 'translation', 'data': 'binarized/ja-en/', 'source_lang': None, 'target_lang': None, 'load_alignments': False, 'left_pad_source': True, 'left_pad_target': False, 'max_source_positions': 1024, 'max_target_positions': 1024, 'upsample_primary': -1, 'truncate_source': False, 'num_batch_buckets': 0, 'train_subset': 'train', 'dataset_impl': None, 'required_seq_len_multiple': 1, 'eval_bleu': False, 'eval_bleu_args': '{}', 'eval_bleu_detok': 'space', 'eval_bleu_detok_args': '{}', 'eval_tokenized_bleu': False, 'eval_bleu_remove_bpe': None, 'eval_bleu_print_samples': False}, 'criterion': {'_name': 'label_smoothed_cross_entropy', 'label_smoothing': 0.1, 'report_accuracy': False, 'ignore_prefix_size': 0, 'sentence_avg': False}, 'optimizer': {'_name': 'adam', 'adam_betas': '(0.9, 0.98)', 'adam_eps': 1e-08, 'weight_decay': 0.0, 'use_old_adam': False, 'fp16_adam_stats': False, 'tpu': False, 'lr': [0.001]}, 'lr_scheduler': {'_name': 'inverse_sqrt', 'warmup_updates': 4000, 'warmup_init_lr': -1.0, 'lr': [0.001]}, 'scoring': {'_name': 'bleu', 'pad': 1, 'eos': 2, 'unk': 3}, 'bpe': None, 'tokenizer': None, 'ema': {'_name': None, 'store_ema': False, 'ema_decay': 0.9999, 'ema_start_update': 0, 'ema_seed_model': None, 'ema_update_freq': 1, 'ema_fp32': False}} TransformerModel( (encoder): TransformerEncoderBase( (dropout_module): FairseqDropout() (embed_tokens): Embedding(32000, 1024, padding_idx=1) (embed_positions): SinusoidalPositionalEmbedding() (layers): ModuleList( (0-5): 6 x TransformerEncoderLayerBase( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) ) (self_attn_layer_norm): FusedLayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=1024, out_features=8192, bias=True) (fc2): Linear(in_features=8192, out_features=1024, bias=True) (final_layer_norm): FusedLayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) ) ) ) (decoder): TransformerDecoderBase( (dropout_module): FairseqDropout() (embed_tokens): Embedding(16000, 1024, padding_idx=1) (embed_positions): SinusoidalPositionalEmbedding() (layers): ModuleList( (0-5): 6 x TransformerDecoderLayerBase( (dropout_module): FairseqDropout() (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) ) (activation_dropout_module): FairseqDropout() (self_attn_layer_norm): FusedLayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) (encoder_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) ) (encoder_attn_layer_norm): FusedLayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) (fc1): Linear(in_features=1024, out_features=8192, bias=True) (fc2): Linear(in_features=8192, out_features=1024, bias=True) (final_layer_norm): FusedLayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) ) ) (output_projection): Linear(in_features=1024, out_features=16000, bias=False) ) ) task: TranslationTask model: TransformerModel criterion: LabelSmoothedCrossEntropyCriterion num. shared model params: 326,221,824 (num. trained: 326,221,824) num. expert model params: 0 (num. trained: 0) training on 8 devices (GPUs/TPUs) max tokens per device = 16384 and max sentences per device = None begin dry-run validation on "valid" subset Start iterating over samples epoch 001: 101 / 1689 loss=12.075, nll_loss=11.76, ppl=3468.25, wps=544310, ups=1.1, wpb=495063, bsz=16556.9, num_updates=100, lr=2.5e-05, gnorm=2.744, clip=85, loss_scale=4, train_wall=95, gb_free=21.5, wall=111 epoch 001: 201 / 1689 loss=10.468, nll_loss=9.919, ppl=968.11, wps=550513, ups=1.11, wpb=494772, bsz=16958.6, num_updates=200, lr=5e-05, gnorm=2.043, clip=97, loss_scale=4, train_wall=89, gb_free=21.6, wall=201 epoch 001: 301 / 1689 loss=9.727, nll_loss=9.042, ppl=527.05, wps=553007, ups=1.11, wpb=496328, bsz=16644.9, num_updates=300, lr=7.5e-05, gnorm=2.196, clip=100, loss_scale=4, train_wall=89, gb_free=21.8, wall=291 epoch 001: 401 / 1689 loss=9.118, nll_loss=8.322, ppl=320.01, wps=552994, ups=1.12, wpb=495021, bsz=16565.7, num_updates=400, lr=0.0001, gnorm=2.026, clip=100, loss_scale=4, train_wall=89, gb_free=22, wall=380 epoch 001: 501 / 1689 loss=8.644, nll_loss=7.762, ppl=217.08, wps=544686, ups=1.1, wpb=495038, bsz=16610.6, num_updates=500, lr=0.000125, gnorm=1.893, clip=100, loss_scale=4, train_wall=90, gb_free=21.9, wall=471 epoch 001: 601 / 1689 loss=8.251, nll_loss=7.3, ppl=157.62, wps=552081, ups=1.11, wpb=495492, bsz=16496.7, num_updates=600, lr=0.00015, gnorm=1.742, clip=100, loss_scale=8, train_wall=89, gb_free=19.9, wall=561 epoch 001: 701 / 1689 loss=7.827, nll_loss=6.808, ppl=112.01, wps=549814, ups=1.11, wpb=494852, bsz=16337.6, num_updates=700, lr=0.000175, gnorm=1.567, clip=100, loss_scale=8, train_wall=89, gb_free=22.3, wall=651 epoch 001: 801 / 1689 loss=7.423, nll_loss=6.338, ppl=80.92, wps=545621, ups=1.1, wpb=496124, bsz=16372.8, num_updates=800, lr=0.0002, gnorm=1.385, clip=97, loss_scale=8, train_wall=90, gb_free=21.9, wall=742 epoch 001: 901 / 1689 loss=7.037, nll_loss=5.893, ppl=59.41, wps=549273, ups=1.11, wpb=496813, bsz=16599.5, num_updates=900, lr=0.000225, gnorm=1.29, clip=98, loss_scale=8, train_wall=89, gb_free=21.9, wall=832 epoch 001: 1002 / 1689 loss=6.699, nll_loss=5.503, ppl=45.35, wps=538336, ups=1.08, wpb=496202, bsz=16673.8, num_updates=1000, lr=0.00025, gnorm=1.203, clip=91, loss_scale=4, train_wall=90, gb_free=21.8, wall=924 begin validation on "valid" subset epoch 001 | valid on 'valid' subset | loss 6.597 | nll_loss 5.352 | ppl 40.84 | wps 0 | wpb 44526 | bsz 2008 | num_updates 1000 epoch 001: 1102 / 1689 loss=6.359, nll_loss=5.113, ppl=34.6, wps=458231, ups=0.93, wpb=494438, bsz=16503.4, num_updates=1100, lr=0.000275, gnorm=1.132, clip=79, loss_scale=4, train_wall=90, gb_free=20.6, wall=1032 epoch 001: 1202 / 1689 loss=6.027, nll_loss=4.733, ppl=26.59, wps=546671, ups=1.1, wpb=494853, bsz=16237.2, num_updates=1200, lr=0.0003, gnorm=1.039, clip=58, loss_scale=4, train_wall=89, gb_free=22.1, wall=1123 epoch 001: 1302 / 1689 loss=5.727, nll_loss=4.39, ppl=20.97, wps=546651, ups=1.11, wpb=493588, bsz=16373.8, num_updates=1300, lr=0.000325, gnorm=0.955, clip=28, loss_scale=4, train_wall=89, gb_free=21.7, wall=1213 epoch 001: 1402 / 1689 loss=5.481, nll_loss=4.112, ppl=17.29, wps=549705, ups=1.11, wpb=497384, bsz=16279.9, num_updates=1400, lr=0.00035, gnorm=0.841, clip=20, loss_scale=4, train_wall=89, gb_free=22.2, wall=1304 epoch 001: 1502 / 1689 loss=5.281, nll_loss=3.888, ppl=14.81, wps=548137, ups=1.11, wpb=494548, bsz=16575.5, num_updates=1500, lr=0.000375, gnorm=0.757, clip=15, loss_scale=4, train_wall=88, gb_free=22.1, wall=1394 epoch 001: 1603 / 1689 loss=5.116, nll_loss=3.703, ppl=13.03, wps=537116, ups=1.08, wpb=495877, bsz=16534.4, num_updates=1600, lr=0.0004, gnorm=0.664, clip=8, loss_scale=4, train_wall=90, gb_free=21.8, wall=1486 end of epoch 1 (average epoch stats below) epoch 001 | loss 7.45 | nll_loss 6.389 | ppl 83.79 | wps 540899 | ups 1.09 | wpb 495135 | bsz 16503.6 | num_updates 1686 | lr 0.0004215 | gnorm 1.427 | clip 70.2 | loss_scale 4 | train_wall 1509 | gb_free 21.6 | wall 1563 Start iterating over samples epoch 002: 14 / 1689 loss=5.02, nll_loss=3.598, ppl=12.11, wps=540818, ups=1.1, wpb=490777, bsz=16322.4, num_updates=1700, lr=0.000425, gnorm=0.648, clip=7, loss_scale=4, train_wall=89, gb_free=22, wall=1577 epoch 002: 14 / 1689 loss=5.02, nll_loss=3.598, ppl=12.11, wps=540818, ups=1.1, wpb=490777, bsz=16322.4, num_updates=1700, lr=0.000425, gnorm=0.648, clip=7, loss_scale=4, train_wall=89, gb_free=22, wall=1577 epoch 002: 114 / 1689 loss=4.908, nll_loss=3.473, ppl=11.1, wps=552620, ups=1.11, wpb=496688, bsz=16760.9, num_updates=1800, lr=0.00045, gnorm=0.579, clip=6, loss_scale=4, train_wall=88, gb_free=21.8, wall=1667 epoch 002: 114 / 1689 loss=4.908, nll_loss=3.473, ppl=11.1, wps=552620, ups=1.11, wpb=496688, bsz=16760.9, num_updates=1800, lr=0.00045, gnorm=0.579, clip=6, loss_scale=4, train_wall=88, gb_free=21.8, wall=1667 epoch 002: 214 / 1689 loss=4.804, nll_loss=3.358, ppl=10.25, wps=550275, ups=1.11, wpb=494526, bsz=16524.7, num_updates=1900, lr=0.000475, gnorm=0.53, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=1757 epoch 002: 214 / 1689 loss=4.804, nll_loss=3.358, ppl=10.25, wps=550275, ups=1.11, wpb=494526, bsz=16524.7, num_updates=1900, lr=0.000475, gnorm=0.53, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=1757 epoch 002: 314 / 1689 loss=4.72, nll_loss=3.267, ppl=9.63, wps=545683, ups=1.1, wpb=494140, bsz=16833.4, num_updates=2000, lr=0.0005, gnorm=0.484, clip=0, loss_scale=4, train_wall=89, gb_free=21.5, wall=1847 epoch 002: 314 / 1689 loss=4.72, nll_loss=3.267, ppl=9.63, wps=545683, ups=1.1, wpb=494140, bsz=16833.4, num_updates=2000, lr=0.0005, gnorm=0.484, clip=0, loss_scale=4, train_wall=89, gb_free=21.5, wall=1847 begin validation on "valid" subset epoch 002 | valid on 'valid' subset | loss 4.752 | nll_loss 3.267 | ppl 9.63 | wps 0 | wpb 44526 | bsz 2008 | num_updates 2000 | best_loss 4.752 epoch 002 | valid on 'valid' subset | loss 4.752 | nll_loss 3.267 | ppl 9.63 | wps 0 | wpb 44526 | bsz 2008 | num_updates 2000 | best_loss 4.752 epoch 002: 414 / 1689 loss=4.652, nll_loss=3.192, ppl=9.14, wps=460354, ups=0.93, wpb=495220, bsz=16290.6, num_updates=2100, lr=0.000525, gnorm=0.486, clip=0, loss_scale=4, train_wall=89, gb_free=22, wall=1955 epoch 002: 414 / 1689 loss=4.652, nll_loss=3.192, ppl=9.14, wps=460354, ups=0.93, wpb=495220, bsz=16290.6, num_updates=2100, lr=0.000525, gnorm=0.486, clip=0, loss_scale=4, train_wall=89, gb_free=22, wall=1955 epoch 002: 515 / 1689 loss=4.631, nll_loss=3.171, ppl=9.01, wps=545018, ups=1.1, wpb=494819, bsz=16552.1, num_updates=2200, lr=0.00055, gnorm=0.507, clip=7, loss_scale=4, train_wall=89, gb_free=22, wall=2046 epoch 002: 515 / 1689 loss=4.631, nll_loss=3.171, ppl=9.01, wps=545018, ups=1.1, wpb=494819, bsz=16552.1, num_updates=2200, lr=0.00055, gnorm=0.507, clip=7, loss_scale=4, train_wall=89, gb_free=22, wall=2046 epoch 002: 615 / 1689 loss=4.521, nll_loss=3.05, ppl=8.28, wps=552670, ups=1.12, wpb=495521, bsz=16630.4, num_updates=2300, lr=0.000575, gnorm=0.405, clip=0, loss_scale=4, train_wall=88, gb_free=22, wall=2135 epoch 002: 615 / 1689 loss=4.521, nll_loss=3.05, ppl=8.28, wps=552670, ups=1.12, wpb=495521, bsz=16630.4, num_updates=2300, lr=0.000575, gnorm=0.405, clip=0, loss_scale=4, train_wall=88, gb_free=22, wall=2135 epoch 002: 715 / 1689 loss=4.493, nll_loss=3.02, ppl=8.11, wps=549099, ups=1.11, wpb=495264, bsz=16162, num_updates=2400, lr=0.0006, gnorm=0.432, clip=0, loss_scale=4, train_wall=89, gb_free=21.5, wall=2225 epoch 002: 715 / 1689 loss=4.493, nll_loss=3.02, ppl=8.11, wps=549099, ups=1.11, wpb=495264, bsz=16162, num_updates=2400, lr=0.0006, gnorm=0.432, clip=0, loss_scale=4, train_wall=89, gb_free=21.5, wall=2225 epoch 002: 815 / 1689 loss=4.437, nll_loss=2.959, ppl=7.78, wps=552450, ups=1.12, wpb=494583, bsz=16776.5, num_updates=2500, lr=0.000625, gnorm=0.387, clip=0, loss_scale=4, train_wall=88, gb_free=21.6, wall=2315 epoch 002: 815 / 1689 loss=4.437, nll_loss=2.959, ppl=7.78, wps=552450, ups=1.12, wpb=494583, bsz=16776.5, num_updates=2500, lr=0.000625, gnorm=0.387, clip=0, loss_scale=4, train_wall=88, gb_free=21.6, wall=2315 epoch 002: 915 / 1689 loss=4.41, nll_loss=2.93, ppl=7.62, wps=544771, ups=1.1, wpb=495507, bsz=16601.8, num_updates=2600, lr=0.00065, gnorm=0.398, clip=0, loss_scale=4, train_wall=89, gb_free=22.2, wall=2406 epoch 002: 915 / 1689 loss=4.41, nll_loss=2.93, ppl=7.62, wps=544771, ups=1.1, wpb=495507, bsz=16601.8, num_updates=2600, lr=0.00065, gnorm=0.398, clip=0, loss_scale=4, train_wall=89, gb_free=22.2, wall=2406 epoch 002: 1015 / 1689 loss=4.36, nll_loss=2.876, ppl=7.34, wps=549268, ups=1.11, wpb=495244, bsz=16349.9, num_updates=2700, lr=0.000675, gnorm=0.399, clip=0, loss_scale=8, train_wall=89, gb_free=21.8, wall=2496 epoch 002: 1015 / 1689 loss=4.36, nll_loss=2.876, ppl=7.34, wps=549268, ups=1.11, wpb=495244, bsz=16349.9, num_updates=2700, lr=0.000675, gnorm=0.399, clip=0, loss_scale=8, train_wall=89, gb_free=21.8, wall=2496 epoch 002: 1116 / 1689 loss=4.332, nll_loss=2.846, ppl=7.19, wps=543977, ups=1.1, wpb=495128, bsz=16391, num_updates=2800, lr=0.0007, gnorm=0.371, clip=0, loss_scale=4, train_wall=89, gb_free=22.3, wall=2587 epoch 002: 1116 / 1689 loss=4.332, nll_loss=2.846, ppl=7.19, wps=543977, ups=1.1, wpb=495128, bsz=16391, num_updates=2800, lr=0.0007, gnorm=0.371, clip=0, loss_scale=4, train_wall=89, gb_free=22.3, wall=2587 epoch 002: 1216 / 1689 loss=4.294, nll_loss=2.805, ppl=6.99, wps=554686, ups=1.12, wpb=496100, bsz=16472.5, num_updates=2900, lr=0.000725, gnorm=0.359, clip=0, loss_scale=4, train_wall=88, gb_free=21.9, wall=2677 epoch 002: 1216 / 1689 loss=4.294, nll_loss=2.805, ppl=6.99, wps=554686, ups=1.12, wpb=496100, bsz=16472.5, num_updates=2900, lr=0.000725, gnorm=0.359, clip=0, loss_scale=4, train_wall=88, gb_free=21.9, wall=2677 epoch 002: 1316 / 1689 loss=4.282, nll_loss=2.793, ppl=6.93, wps=555815, ups=1.12, wpb=496192, bsz=16642.6, num_updates=3000, lr=0.00075, gnorm=0.376, clip=0, loss_scale=4, train_wall=88, gb_free=22.1, wall=2766 epoch 002: 1316 / 1689 loss=4.282, nll_loss=2.793, ppl=6.93, wps=555815, ups=1.12, wpb=496192, bsz=16642.6, num_updates=3000, lr=0.00075, gnorm=0.376, clip=0, loss_scale=4, train_wall=88, gb_free=22.1, wall=2766 begin validation on "valid" subset epoch 002 | valid on 'valid' subset | loss 4.296 | nll_loss 2.773 | ppl 6.84 | wps 0 | wpb 44526 | bsz 2008 | num_updates 3000 | best_loss 4.296 epoch 002 | valid on 'valid' subset | loss 4.296 | nll_loss 2.773 | ppl 6.84 | wps 0 | wpb 44526 | bsz 2008 | num_updates 3000 | best_loss 4.296 epoch 002: 1416 / 1689 loss=4.238, nll_loss=2.744, ppl=6.7, wps=459500, ups=0.93, wpb=494703, bsz=16277.2, num_updates=3100, lr=0.000775, gnorm=0.349, clip=0, loss_scale=4, train_wall=88, gb_free=21.9, wall=2873 epoch 002: 1416 / 1689 loss=4.238, nll_loss=2.744, ppl=6.7, wps=459500, ups=0.93, wpb=494703, bsz=16277.2, num_updates=3100, lr=0.000775, gnorm=0.349, clip=0, loss_scale=4, train_wall=88, gb_free=21.9, wall=2873 epoch 002: 1516 / 1689 loss=4.221, nll_loss=2.727, ppl=6.62, wps=553266, ups=1.12, wpb=495594, bsz=16465.1, num_updates=3200, lr=0.0008, gnorm=0.347, clip=0, loss_scale=4, train_wall=88, gb_free=23, wall=2963 epoch 002: 1516 / 1689 loss=4.221, nll_loss=2.727, ppl=6.62, wps=553266, ups=1.12, wpb=495594, bsz=16465.1, num_updates=3200, lr=0.0008, gnorm=0.347, clip=0, loss_scale=4, train_wall=88, gb_free=23, wall=2963 epoch 002: 1617 / 1689 loss=4.196, nll_loss=2.7, ppl=6.5, wps=548668, ups=1.1, wpb=496950, bsz=16306.1, num_updates=3300, lr=0.000825, gnorm=0.346, clip=0, loss_scale=4, train_wall=89, gb_free=21.8, wall=3054 epoch 002: 1617 / 1689 loss=4.196, nll_loss=2.7, ppl=6.5, wps=548668, ups=1.1, wpb=496950, bsz=16306.1, num_updates=3300, lr=0.000825, gnorm=0.346, clip=0, loss_scale=4, train_wall=89, gb_free=21.8, wall=3054 end of epoch 2 (average epoch stats below) epoch 002 | loss 4.461 | nll_loss 2.986 | ppl 7.92 | wps 537114 | ups 1.08 | wpb 495113 | bsz 16504.7 | num_updates 3372 | lr 0.000843 | gnorm 0.421 | clip 0.8 | loss_scale 4 | train_wall 1491 | gb_free 25.7 | wall 3118 epoch 002 | loss 4.461 | nll_loss 2.986 | ppl 7.92 | wps 537114 | ups 1.08 | wpb 495113 | bsz 16504.7 | num_updates 3372 | lr 0.000843 | gnorm 0.421 | clip 0.8 | loss_scale 4 | train_wall 1491 | gb_free 25.7 | wall 3118 Start iterating over samples epoch 003: 28 / 1689 loss=4.197, nll_loss=2.701, ppl=6.5, wps=545918, ups=1.11, wpb=490460, bsz=16511.3, num_updates=3400, lr=0.00085, gnorm=0.385, clip=0, loss_scale=4, train_wall=88, gb_free=21.6, wall=3143 epoch 003: 28 / 1689 loss=4.197, nll_loss=2.701, ppl=6.5, wps=545918, ups=1.11, wpb=490460, bsz=16511.3, num_updates=3400, lr=0.00085, gnorm=0.385, clip=0, loss_scale=4, train_wall=88, gb_free=21.6, wall=3143 epoch 003: 28 / 1689 loss=4.197, nll_loss=2.701, ppl=6.5, wps=545918, ups=1.11, wpb=490460, bsz=16511.3, num_updates=3400, lr=0.00085, gnorm=0.385, clip=0, loss_scale=4, train_wall=88, gb_free=21.6, wall=3143 epoch 003: 128 / 1689 loss=4.139, nll_loss=2.637, ppl=6.22, wps=551584, ups=1.11, wpb=495133, bsz=16849.4, num_updates=3500, lr=0.000875, gnorm=0.312, clip=0, loss_scale=4, train_wall=88, gb_free=21.9, wall=3233 epoch 003: 128 / 1689 loss=4.139, nll_loss=2.637, ppl=6.22, wps=551584, ups=1.11, wpb=495133, bsz=16849.4, num_updates=3500, lr=0.000875, gnorm=0.312, clip=0, loss_scale=4, train_wall=88, gb_free=21.9, wall=3233 epoch 003: 128 / 1689 loss=4.139, nll_loss=2.637, ppl=6.22, wps=551584, ups=1.11, wpb=495133, bsz=16849.4, num_updates=3500, lr=0.000875, gnorm=0.312, clip=0, loss_scale=4, train_wall=88, gb_free=21.9, wall=3233 epoch 003: 228 / 1689 loss=4.141, nll_loss=2.64, ppl=6.23, wps=551737, ups=1.11, wpb=495438, bsz=16848.2, num_updates=3600, lr=0.0009, gnorm=0.34, clip=0, loss_scale=4, train_wall=88, gb_free=22.9, wall=3323 epoch 003: 228 / 1689 loss=4.141, nll_loss=2.64, ppl=6.23, wps=551737, ups=1.11, wpb=495438, bsz=16848.2, num_updates=3600, lr=0.0009, gnorm=0.34, clip=0, loss_scale=4, train_wall=88, gb_free=22.9, wall=3323 epoch 003: 228 / 1689 loss=4.141, nll_loss=2.64, ppl=6.23, wps=551737, ups=1.11, wpb=495438, bsz=16848.2, num_updates=3600, lr=0.0009, gnorm=0.34, clip=0, loss_scale=4, train_wall=88, gb_free=22.9, wall=3323 epoch 003: 328 / 1689 loss=4.13, nll_loss=2.629, ppl=6.19, wps=554509, ups=1.12, wpb=494814, bsz=16504.7, num_updates=3700, lr=0.000925, gnorm=0.339, clip=0, loss_scale=4, train_wall=88, gb_free=21.6, wall=3412 epoch 003: 328 / 1689 loss=4.13, nll_loss=2.629, ppl=6.19, wps=554509, ups=1.12, wpb=494814, bsz=16504.7, num_updates=3700, lr=0.000925, gnorm=0.339, clip=0, loss_scale=4, train_wall=88, gb_free=21.6, wall=3412 epoch 003: 328 / 1689 loss=4.13, nll_loss=2.629, ppl=6.19, wps=554509, ups=1.12, wpb=494814, bsz=16504.7, num_updates=3700, lr=0.000925, gnorm=0.339, clip=0, loss_scale=4, train_wall=88, gb_free=21.6, wall=3412 epoch 003: 429 / 1689 loss=4.126, nll_loss=2.625, ppl=6.17, wps=546409, ups=1.1, wpb=494762, bsz=16420.4, num_updates=3800, lr=0.00095, gnorm=0.344, clip=0, loss_scale=2, train_wall=89, gb_free=21.1, wall=3503 epoch 003: 429 / 1689 loss=4.126, nll_loss=2.625, ppl=6.17, wps=546409, ups=1.1, wpb=494762, bsz=16420.4, num_updates=3800, lr=0.00095, gnorm=0.344, clip=0, loss_scale=2, train_wall=89, gb_free=21.1, wall=3503 epoch 003: 429 / 1689 loss=4.126, nll_loss=2.625, ppl=6.17, wps=546409, ups=1.1, wpb=494762, bsz=16420.4, num_updates=3800, lr=0.00095, gnorm=0.344, clip=0, loss_scale=2, train_wall=89, gb_free=21.1, wall=3503 epoch 003: 529 / 1689 loss=4.108, nll_loss=2.606, ppl=6.09, wps=557242, ups=1.12, wpb=496084, bsz=16501, num_updates=3900, lr=0.000975, gnorm=0.349, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=3592 epoch 003: 529 / 1689 loss=4.108, nll_loss=2.606, ppl=6.09, wps=557242, ups=1.12, wpb=496084, bsz=16501, num_updates=3900, lr=0.000975, gnorm=0.349, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=3592 epoch 003: 529 / 1689 loss=4.108, nll_loss=2.606, ppl=6.09, wps=557242, ups=1.12, wpb=496084, bsz=16501, num_updates=3900, lr=0.000975, gnorm=0.349, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=3592 epoch 003: 629 / 1689 loss=4.095, nll_loss=2.592, ppl=6.03, wps=554091, ups=1.12, wpb=495658, bsz=16451.8, num_updates=4000, lr=0.001, gnorm=0.337, clip=0, loss_scale=2, train_wall=88, gb_free=21.4, wall=3681 epoch 003: 629 / 1689 loss=4.095, nll_loss=2.592, ppl=6.03, wps=554091, ups=1.12, wpb=495658, bsz=16451.8, num_updates=4000, lr=0.001, gnorm=0.337, clip=0, loss_scale=2, train_wall=88, gb_free=21.4, wall=3681 epoch 003: 629 / 1689 loss=4.095, nll_loss=2.592, ppl=6.03, wps=554091, ups=1.12, wpb=495658, bsz=16451.8, num_updates=4000, lr=0.001, gnorm=0.337, clip=0, loss_scale=2, train_wall=88, gb_free=21.4, wall=3681 begin validation on "valid" subset epoch 003 | valid on 'valid' subset | loss 4.167 | nll_loss 2.635 | ppl 6.21 | wps 0 | wpb 44526 | bsz 2008 | num_updates 4000 | best_loss 4.167 epoch 003 | valid on 'valid' subset | loss 4.167 | nll_loss 2.635 | ppl 6.21 | wps 0 | wpb 44526 | bsz 2008 | num_updates 4000 | best_loss 4.167 epoch 003 | valid on 'valid' subset | loss 4.167 | nll_loss 2.635 | ppl 6.21 | wps 0 | wpb 44526 | bsz 2008 | num_updates 4000 | best_loss 4.167 epoch 003: 729 / 1689 loss=4.09, nll_loss=2.586, ppl=6.01, wps=458043, ups=0.92, wpb=496213, bsz=16341, num_updates=4100, lr=0.00098773, gnorm=0.331, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=3790 epoch 003: 729 / 1689 loss=4.09, nll_loss=2.586, ppl=6.01, wps=458043, ups=0.92, wpb=496213, bsz=16341, num_updates=4100, lr=0.00098773, gnorm=0.331, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=3790 epoch 003: 729 / 1689 loss=4.09, nll_loss=2.586, ppl=6.01, wps=458043, ups=0.92, wpb=496213, bsz=16341, num_updates=4100, lr=0.00098773, gnorm=0.331, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=3790 epoch 003: 829 / 1689 loss=4.077, nll_loss=2.572, ppl=5.95, wps=548840, ups=1.11, wpb=493044, bsz=16311.8, num_updates=4200, lr=0.0009759, gnorm=0.316, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=3879 epoch 003: 829 / 1689 loss=4.077, nll_loss=2.572, ppl=5.95, wps=548840, ups=1.11, wpb=493044, bsz=16311.8, num_updates=4200, lr=0.0009759, gnorm=0.316, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=3879 epoch 003: 829 / 1689 loss=4.077, nll_loss=2.572, ppl=5.95, wps=548840, ups=1.11, wpb=493044, bsz=16311.8, num_updates=4200, lr=0.0009759, gnorm=0.316, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=3879 epoch 003: 929 / 1689 loss=4.063, nll_loss=2.558, ppl=5.89, wps=552946, ups=1.12, wpb=494194, bsz=16361.3, num_updates=4300, lr=0.000964486, gnorm=0.317, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=3969 epoch 003: 929 / 1689 loss=4.063, nll_loss=2.558, ppl=5.89, wps=552946, ups=1.12, wpb=494194, bsz=16361.3, num_updates=4300, lr=0.000964486, gnorm=0.317, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=3969 epoch 003: 929 / 1689 loss=4.063, nll_loss=2.558, ppl=5.89, wps=552946, ups=1.12, wpb=494194, bsz=16361.3, num_updates=4300, lr=0.000964486, gnorm=0.317, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=3969 epoch 003: 1029 / 1689 loss=4.056, nll_loss=2.551, ppl=5.86, wps=560618, ups=1.13, wpb=496554, bsz=16440.3, num_updates=4400, lr=0.000953463, gnorm=0.314, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=4057 epoch 003: 1029 / 1689 loss=4.056, nll_loss=2.551, ppl=5.86, wps=560618, ups=1.13, wpb=496554, bsz=16440.3, num_updates=4400, lr=0.000953463, gnorm=0.314, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=4057 epoch 003: 1029 / 1689 loss=4.056, nll_loss=2.551, ppl=5.86, wps=560618, ups=1.13, wpb=496554, bsz=16440.3, num_updates=4400, lr=0.000953463, gnorm=0.314, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=4057 epoch 003: 1129 / 1689 loss=4.033, nll_loss=2.527, ppl=5.76, wps=557439, ups=1.12, wpb=496113, bsz=16938.9, num_updates=4500, lr=0.000942809, gnorm=0.311, clip=0, loss_scale=4, train_wall=88, gb_free=22.1, wall=4146 epoch 003: 1129 / 1689 loss=4.033, nll_loss=2.527, ppl=5.76, wps=557439, ups=1.12, wpb=496113, bsz=16938.9, num_updates=4500, lr=0.000942809, gnorm=0.311, clip=0, loss_scale=4, train_wall=88, gb_free=22.1, wall=4146 epoch 003: 1129 / 1689 loss=4.033, nll_loss=2.527, ppl=5.76, wps=557439, ups=1.12, wpb=496113, bsz=16938.9, num_updates=4500, lr=0.000942809, gnorm=0.311, clip=0, loss_scale=4, train_wall=88, gb_free=22.1, wall=4146 epoch 003: 1229 / 1689 loss=4.029, nll_loss=2.521, ppl=5.74, wps=560372, ups=1.13, wpb=494048, bsz=16474.2, num_updates=4600, lr=0.000932505, gnorm=0.304, clip=0, loss_scale=4, train_wall=88, gb_free=22, wall=4235 epoch 003: 1229 / 1689 loss=4.029, nll_loss=2.521, ppl=5.74, wps=560372, ups=1.13, wpb=494048, bsz=16474.2, num_updates=4600, lr=0.000932505, gnorm=0.304, clip=0, loss_scale=4, train_wall=88, gb_free=22, wall=4235 epoch 003: 1229 / 1689 loss=4.029, nll_loss=2.521, ppl=5.74, wps=560372, ups=1.13, wpb=494048, bsz=16474.2, num_updates=4600, lr=0.000932505, gnorm=0.304, clip=0, loss_scale=4, train_wall=88, gb_free=22, wall=4235 epoch 003: 1329 / 1689 loss=4.018, nll_loss=2.51, ppl=5.69, wps=560982, ups=1.13, wpb=495822, bsz=16321, num_updates=4700, lr=0.000922531, gnorm=0.298, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=4323 epoch 003: 1329 / 1689 loss=4.018, nll_loss=2.51, ppl=5.69, wps=560982, ups=1.13, wpb=495822, bsz=16321, num_updates=4700, lr=0.000922531, gnorm=0.298, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=4323 epoch 003: 1329 / 1689 loss=4.018, nll_loss=2.51, ppl=5.69, wps=560982, ups=1.13, wpb=495822, bsz=16321, num_updates=4700, lr=0.000922531, gnorm=0.298, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=4323 epoch 003: 1430 / 1689 loss=4.002, nll_loss=2.493, ppl=5.63, wps=552205, ups=1.11, wpb=496349, bsz=16693, num_updates=4800, lr=0.000912871, gnorm=0.293, clip=0, loss_scale=4, train_wall=89, gb_free=21.9, wall=4413 epoch 003: 1430 / 1689 loss=4.002, nll_loss=2.493, ppl=5.63, wps=552205, ups=1.11, wpb=496349, bsz=16693, num_updates=4800, lr=0.000912871, gnorm=0.293, clip=0, loss_scale=4, train_wall=89, gb_free=21.9, wall=4413 epoch 003: 1430 / 1689 loss=4.002, nll_loss=2.493, ppl=5.63, wps=552205, ups=1.11, wpb=496349, bsz=16693, num_updates=4800, lr=0.000912871, gnorm=0.293, clip=0, loss_scale=4, train_wall=89, gb_free=21.9, wall=4413 epoch 003: 1530 / 1689 loss=3.991, nll_loss=2.481, ppl=5.58, wps=556066, ups=1.12, wpb=495935, bsz=16441.5, num_updates=4900, lr=0.000903508, gnorm=0.291, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=4502 epoch 003: 1530 / 1689 loss=3.991, nll_loss=2.481, ppl=5.58, wps=556066, ups=1.12, wpb=495935, bsz=16441.5, num_updates=4900, lr=0.000903508, gnorm=0.291, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=4502 epoch 003: 1530 / 1689 loss=3.991, nll_loss=2.481, ppl=5.58, wps=556066, ups=1.12, wpb=495935, bsz=16441.5, num_updates=4900, lr=0.000903508, gnorm=0.291, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=4502 epoch 003: 1630 / 1689 loss=3.987, nll_loss=2.476, ppl=5.57, wps=552247, ups=1.11, wpb=495488, bsz=16111.9, num_updates=5000, lr=0.000894427, gnorm=0.289, clip=0, loss_scale=4, train_wall=88, gb_free=22.1, wall=4592 epoch 003: 1630 / 1689 loss=3.987, nll_loss=2.476, ppl=5.57, wps=552247, ups=1.11, wpb=495488, bsz=16111.9, num_updates=5000, lr=0.000894427, gnorm=0.289, clip=0, loss_scale=4, train_wall=88, gb_free=22.1, wall=4592 epoch 003: 1630 / 1689 loss=3.987, nll_loss=2.476, ppl=5.57, wps=552247, ups=1.11, wpb=495488, bsz=16111.9, num_updates=5000, lr=0.000894427, gnorm=0.289, clip=0, loss_scale=4, train_wall=88, gb_free=22.1, wall=4592 begin validation on "valid" subset epoch 003 | valid on 'valid' subset | loss 4.02 | nll_loss 2.481 | ppl 5.58 | wps 0 | wpb 44526 | bsz 2008 | num_updates 5000 | best_loss 4.02 epoch 003 | valid on 'valid' subset | loss 4.02 | nll_loss 2.481 | ppl 5.58 | wps 0 | wpb 44526 | bsz 2008 | num_updates 5000 | best_loss 4.02 epoch 003 | valid on 'valid' subset | loss 4.02 | nll_loss 2.481 | ppl 5.58 | wps 0 | wpb 44526 | bsz 2008 | num_updates 5000 | best_loss 4.02 end of epoch 3 (average epoch stats below) epoch 003 | loss 4.067 | nll_loss 2.562 | ppl 5.91 | wps 529906 | ups 1.07 | wpb 495116 | bsz 16504.1 | num_updates 5059 | lr 0.000889196 | gnorm 0.319 | clip 0 | loss_scale 4 | train_wall 1513 | gb_free 23.3 | wall 4694 epoch 003 | loss 4.067 | nll_loss 2.562 | ppl 5.91 | wps 529906 | ups 1.07 | wpb 495116 | bsz 16504.1 | num_updates 5059 | lr 0.000889196 | gnorm 0.319 | clip 0 | loss_scale 4 | train_wall 1513 | gb_free 23.3 | wall 4694 epoch 003 | loss 4.067 | nll_loss 2.562 | ppl 5.91 | wps 529906 | ups 1.07 | wpb 495116 | bsz 16504.1 | num_updates 5059 | lr 0.000889196 | gnorm 0.319 | clip 0 | loss_scale 4 | train_wall 1513 | gb_free 23.3 | wall 4694 Start iterating over samples epoch 004: 41 / 1689 loss=3.962, nll_loss=2.449, ppl=5.46, wps=352544, ups=0.72, wpb=491687, bsz=16828.6, num_updates=5100, lr=0.000885615, gnorm=0.288, clip=0, loss_scale=4, train_wall=114, gb_free=22.4, wall=4731 epoch 004: 41 / 1689 loss=3.962, nll_loss=2.449, ppl=5.46, wps=352544, ups=0.72, wpb=491687, bsz=16828.6, num_updates=5100, lr=0.000885615, gnorm=0.288, clip=0, loss_scale=4, train_wall=114, gb_free=22.4, wall=4731 epoch 004: 41 / 1689 loss=3.962, nll_loss=2.449, ppl=5.46, wps=352544, ups=0.72, wpb=491687, bsz=16828.6, num_updates=5100, lr=0.000885615, gnorm=0.288, clip=0, loss_scale=4, train_wall=114, gb_free=22.4, wall=4731 epoch 004: 41 / 1689 loss=3.962, nll_loss=2.449, ppl=5.46, wps=352544, ups=0.72, wpb=491687, bsz=16828.6, num_updates=5100, lr=0.000885615, gnorm=0.288, clip=0, loss_scale=4, train_wall=114, gb_free=22.4, wall=4731 epoch 004: 141 / 1689 loss=3.945, nll_loss=2.43, ppl=5.39, wps=566346, ups=1.14, wpb=496024, bsz=16396.1, num_updates=5200, lr=0.000877058, gnorm=0.278, clip=0, loss_scale=4, train_wall=87, gb_free=22.1, wall=4819 epoch 004: 141 / 1689 loss=3.945, nll_loss=2.43, ppl=5.39, wps=566346, ups=1.14, wpb=496024, bsz=16396.1, num_updates=5200, lr=0.000877058, gnorm=0.278, clip=0, loss_scale=4, train_wall=87, gb_free=22.1, wall=4819 epoch 004: 141 / 1689 loss=3.945, nll_loss=2.43, ppl=5.39, wps=566346, ups=1.14, wpb=496024, bsz=16396.1, num_updates=5200, lr=0.000877058, gnorm=0.278, clip=0, loss_scale=4, train_wall=87, gb_free=22.1, wall=4819 epoch 004: 141 / 1689 loss=3.945, nll_loss=2.43, ppl=5.39, wps=566346, ups=1.14, wpb=496024, bsz=16396.1, num_updates=5200, lr=0.000877058, gnorm=0.278, clip=0, loss_scale=4, train_wall=87, gb_free=22.1, wall=4819 epoch 004: 242 / 1689 loss=3.938, nll_loss=2.423, ppl=5.36, wps=551120, ups=1.11, wpb=496338, bsz=16307.9, num_updates=5300, lr=0.000868744, gnorm=0.275, clip=0, loss_scale=2, train_wall=89, gb_free=22.1, wall=4909 epoch 004: 242 / 1689 loss=3.938, nll_loss=2.423, ppl=5.36, wps=551120, ups=1.11, wpb=496338, bsz=16307.9, num_updates=5300, lr=0.000868744, gnorm=0.275, clip=0, loss_scale=2, train_wall=89, gb_free=22.1, wall=4909 epoch 004: 242 / 1689 loss=3.938, nll_loss=2.423, ppl=5.36, wps=551120, ups=1.11, wpb=496338, bsz=16307.9, num_updates=5300, lr=0.000868744, gnorm=0.275, clip=0, loss_scale=2, train_wall=89, gb_free=22.1, wall=4909 epoch 004: 242 / 1689 loss=3.938, nll_loss=2.423, ppl=5.36, wps=551120, ups=1.11, wpb=496338, bsz=16307.9, num_updates=5300, lr=0.000868744, gnorm=0.275, clip=0, loss_scale=2, train_wall=89, gb_free=22.1, wall=4909 epoch 004: 342 / 1689 loss=3.932, nll_loss=2.417, ppl=5.34, wps=553092, ups=1.12, wpb=493975, bsz=16735.7, num_updates=5400, lr=0.000860663, gnorm=0.284, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=4998 epoch 004: 342 / 1689 loss=3.932, nll_loss=2.417, ppl=5.34, wps=553092, ups=1.12, wpb=493975, bsz=16735.7, num_updates=5400, lr=0.000860663, gnorm=0.284, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=4998 epoch 004: 342 / 1689 loss=3.932, nll_loss=2.417, ppl=5.34, wps=553092, ups=1.12, wpb=493975, bsz=16735.7, num_updates=5400, lr=0.000860663, gnorm=0.284, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=4998 epoch 004: 342 / 1689 loss=3.932, nll_loss=2.417, ppl=5.34, wps=553092, ups=1.12, wpb=493975, bsz=16735.7, num_updates=5400, lr=0.000860663, gnorm=0.284, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=4998 epoch 004: 442 / 1689 loss=3.923, nll_loss=2.407, ppl=5.3, wps=553277, ups=1.11, wpb=496824, bsz=16678.8, num_updates=5500, lr=0.000852803, gnorm=0.279, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=5088 epoch 004: 442 / 1689 loss=3.923, nll_loss=2.407, ppl=5.3, wps=553277, ups=1.11, wpb=496824, bsz=16678.8, num_updates=5500, lr=0.000852803, gnorm=0.279, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=5088 epoch 004: 442 / 1689 loss=3.923, nll_loss=2.407, ppl=5.3, wps=553277, ups=1.11, wpb=496824, bsz=16678.8, num_updates=5500, lr=0.000852803, gnorm=0.279, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=5088 epoch 004: 442 / 1689 loss=3.923, nll_loss=2.407, ppl=5.3, wps=553277, ups=1.11, wpb=496824, bsz=16678.8, num_updates=5500, lr=0.000852803, gnorm=0.279, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=5088 epoch 004: 542 / 1689 loss=3.932, nll_loss=2.417, ppl=5.34, wps=555602, ups=1.12, wpb=495916, bsz=16423.7, num_updates=5600, lr=0.000845154, gnorm=0.272, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=5177 epoch 004: 542 / 1689 loss=3.932, nll_loss=2.417, ppl=5.34, wps=555602, ups=1.12, wpb=495916, bsz=16423.7, num_updates=5600, lr=0.000845154, gnorm=0.272, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=5177 epoch 004: 542 / 1689 loss=3.932, nll_loss=2.417, ppl=5.34, wps=555602, ups=1.12, wpb=495916, bsz=16423.7, num_updates=5600, lr=0.000845154, gnorm=0.272, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=5177 epoch 004: 542 / 1689 loss=3.932, nll_loss=2.417, ppl=5.34, wps=555602, ups=1.12, wpb=495916, bsz=16423.7, num_updates=5600, lr=0.000845154, gnorm=0.272, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=5177 epoch 004: 642 / 1689 loss=3.919, nll_loss=2.403, ppl=5.29, wps=550301, ups=1.11, wpb=494379, bsz=16563, num_updates=5700, lr=0.000837708, gnorm=0.278, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=5267 epoch 004: 642 / 1689 loss=3.919, nll_loss=2.403, ppl=5.29, wps=550301, ups=1.11, wpb=494379, bsz=16563, num_updates=5700, lr=0.000837708, gnorm=0.278, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=5267 epoch 004: 642 / 1689 loss=3.919, nll_loss=2.403, ppl=5.29, wps=550301, ups=1.11, wpb=494379, bsz=16563, num_updates=5700, lr=0.000837708, gnorm=0.278, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=5267 epoch 004: 642 / 1689 loss=3.919, nll_loss=2.403, ppl=5.29, wps=550301, ups=1.11, wpb=494379, bsz=16563, num_updates=5700, lr=0.000837708, gnorm=0.278, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=5267 epoch 004: 742 / 1689 loss=3.913, nll_loss=2.396, ppl=5.26, wps=554796, ups=1.12, wpb=495835, bsz=16531, num_updates=5800, lr=0.000830455, gnorm=0.269, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=5357 epoch 004: 742 / 1689 loss=3.913, nll_loss=2.396, ppl=5.26, wps=554796, ups=1.12, wpb=495835, bsz=16531, num_updates=5800, lr=0.000830455, gnorm=0.269, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=5357 epoch 004: 742 / 1689 loss=3.913, nll_loss=2.396, ppl=5.26, wps=554796, ups=1.12, wpb=495835, bsz=16531, num_updates=5800, lr=0.000830455, gnorm=0.269, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=5357 epoch 004: 742 / 1689 loss=3.913, nll_loss=2.396, ppl=5.26, wps=554796, ups=1.12, wpb=495835, bsz=16531, num_updates=5800, lr=0.000830455, gnorm=0.269, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=5357 epoch 004: 842 / 1689 loss=3.918, nll_loss=2.402, ppl=5.29, wps=553598, ups=1.12, wpb=495047, bsz=16309.8, num_updates=5900, lr=0.000823387, gnorm=0.261, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=5446 epoch 004: 842 / 1689 loss=3.918, nll_loss=2.402, ppl=5.29, wps=553598, ups=1.12, wpb=495047, bsz=16309.8, num_updates=5900, lr=0.000823387, gnorm=0.261, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=5446 epoch 004: 842 / 1689 loss=3.918, nll_loss=2.402, ppl=5.29, wps=553598, ups=1.12, wpb=495047, bsz=16309.8, num_updates=5900, lr=0.000823387, gnorm=0.261, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=5446 epoch 004: 842 / 1689 loss=3.918, nll_loss=2.402, ppl=5.29, wps=553598, ups=1.12, wpb=495047, bsz=16309.8, num_updates=5900, lr=0.000823387, gnorm=0.261, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=5446 epoch 004: 942 / 1689 loss=3.899, nll_loss=2.382, ppl=5.21, wps=556581, ups=1.12, wpb=495867, bsz=16303.1, num_updates=6000, lr=0.000816497, gnorm=0.269, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=5535 epoch 004: 942 / 1689 loss=3.899, nll_loss=2.382, ppl=5.21, wps=556581, ups=1.12, wpb=495867, bsz=16303.1, num_updates=6000, lr=0.000816497, gnorm=0.269, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=5535 epoch 004: 942 / 1689 loss=3.899, nll_loss=2.382, ppl=5.21, wps=556581, ups=1.12, wpb=495867, bsz=16303.1, num_updates=6000, lr=0.000816497, gnorm=0.269, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=5535 epoch 004: 942 / 1689 loss=3.899, nll_loss=2.382, ppl=5.21, wps=556581, ups=1.12, wpb=495867, bsz=16303.1, num_updates=6000, lr=0.000816497, gnorm=0.269, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=5535 begin validation on "valid" subset epoch 004 | valid on 'valid' subset | loss 3.964 | nll_loss 2.433 | ppl 5.4 | wps 0 | wpb 44526 | bsz 2008 | num_updates 6000 | best_loss 3.964 epoch 004 | valid on 'valid' subset | loss 3.964 | nll_loss 2.433 | ppl 5.4 | wps 0 | wpb 44526 | bsz 2008 | num_updates 6000 | best_loss 3.964 epoch 004 | valid on 'valid' subset | loss 3.964 | nll_loss 2.433 | ppl 5.4 | wps 0 | wpb 44526 | bsz 2008 | num_updates 6000 | best_loss 3.964 epoch 004 | valid on 'valid' subset | loss 3.964 | nll_loss 2.433 | ppl 5.4 | wps 0 | wpb 44526 | bsz 2008 | num_updates 6000 | best_loss 3.964 epoch 004: 1042 / 1689 loss=3.895, nll_loss=2.378, ppl=5.2, wps=457357, ups=0.92, wpb=495105, bsz=16619.3, num_updates=6100, lr=0.000809776, gnorm=0.264, clip=0, loss_scale=4, train_wall=88, gb_free=21.6, wall=5643 epoch 004: 1042 / 1689 loss=3.895, nll_loss=2.378, ppl=5.2, wps=457357, ups=0.92, wpb=495105, bsz=16619.3, num_updates=6100, lr=0.000809776, gnorm=0.264, clip=0, loss_scale=4, train_wall=88, gb_free=21.6, wall=5643 epoch 004: 1042 / 1689 loss=3.895, nll_loss=2.378, ppl=5.2, wps=457357, ups=0.92, wpb=495105, bsz=16619.3, num_updates=6100, lr=0.000809776, gnorm=0.264, clip=0, loss_scale=4, train_wall=88, gb_free=21.6, wall=5643 epoch 004: 1042 / 1689 loss=3.895, nll_loss=2.378, ppl=5.2, wps=457357, ups=0.92, wpb=495105, bsz=16619.3, num_updates=6100, lr=0.000809776, gnorm=0.264, clip=0, loss_scale=4, train_wall=88, gb_free=21.6, wall=5643 epoch 004: 1142 / 1689 loss=3.895, nll_loss=2.378, ppl=5.2, wps=543669, ups=1.1, wpb=494956, bsz=16745.6, num_updates=6200, lr=0.000803219, gnorm=0.259, clip=0, loss_scale=4, train_wall=89, gb_free=21.2, wall=5734 epoch 004: 1142 / 1689 loss=3.895, nll_loss=2.378, ppl=5.2, wps=543669, ups=1.1, wpb=494956, bsz=16745.6, num_updates=6200, lr=0.000803219, gnorm=0.259, clip=0, loss_scale=4, train_wall=89, gb_free=21.2, wall=5734 epoch 004: 1142 / 1689 loss=3.895, nll_loss=2.378, ppl=5.2, wps=543669, ups=1.1, wpb=494956, bsz=16745.6, num_updates=6200, lr=0.000803219, gnorm=0.259, clip=0, loss_scale=4, train_wall=89, gb_free=21.2, wall=5734 epoch 004: 1142 / 1689 loss=3.895, nll_loss=2.378, ppl=5.2, wps=543669, ups=1.1, wpb=494956, bsz=16745.6, num_updates=6200, lr=0.000803219, gnorm=0.259, clip=0, loss_scale=4, train_wall=89, gb_free=21.2, wall=5734 epoch 004: 1242 / 1689 loss=3.889, nll_loss=2.372, ppl=5.18, wps=548912, ups=1.11, wpb=494927, bsz=16264.9, num_updates=6300, lr=0.000796819, gnorm=0.261, clip=0, loss_scale=4, train_wall=89, gb_free=21.9, wall=5825 epoch 004: 1242 / 1689 loss=3.889, nll_loss=2.372, ppl=5.18, wps=548912, ups=1.11, wpb=494927, bsz=16264.9, num_updates=6300, lr=0.000796819, gnorm=0.261, clip=0, loss_scale=4, train_wall=89, gb_free=21.9, wall=5825 epoch 004: 1242 / 1689 loss=3.889, nll_loss=2.372, ppl=5.18, wps=548912, ups=1.11, wpb=494927, bsz=16264.9, num_updates=6300, lr=0.000796819, gnorm=0.261, clip=0, loss_scale=4, train_wall=89, gb_free=21.9, wall=5825 epoch 004: 1242 / 1689 loss=3.889, nll_loss=2.372, ppl=5.18, wps=548912, ups=1.11, wpb=494927, bsz=16264.9, num_updates=6300, lr=0.000796819, gnorm=0.261, clip=0, loss_scale=4, train_wall=89, gb_free=21.9, wall=5825 epoch 004: 1343 / 1689 loss=3.887, nll_loss=2.369, ppl=5.17, wps=541446, ups=1.09, wpb=494640, bsz=16639.3, num_updates=6400, lr=0.000790569, gnorm=0.259, clip=0, loss_scale=4, train_wall=90, gb_free=21.9, wall=5916 epoch 004: 1343 / 1689 loss=3.887, nll_loss=2.369, ppl=5.17, wps=541446, ups=1.09, wpb=494640, bsz=16639.3, num_updates=6400, lr=0.000790569, gnorm=0.259, clip=0, loss_scale=4, train_wall=90, gb_free=21.9, wall=5916 epoch 004: 1343 / 1689 loss=3.887, nll_loss=2.369, ppl=5.17, wps=541446, ups=1.09, wpb=494640, bsz=16639.3, num_updates=6400, lr=0.000790569, gnorm=0.259, clip=0, loss_scale=4, train_wall=90, gb_free=21.9, wall=5916 epoch 004: 1343 / 1689 loss=3.887, nll_loss=2.369, ppl=5.17, wps=541446, ups=1.09, wpb=494640, bsz=16639.3, num_updates=6400, lr=0.000790569, gnorm=0.259, clip=0, loss_scale=4, train_wall=90, gb_free=21.9, wall=5916 epoch 004: 1443 / 1689 loss=3.88, nll_loss=2.362, ppl=5.14, wps=546900, ups=1.11, wpb=494315, bsz=16775.5, num_updates=6500, lr=0.000784465, gnorm=0.266, clip=0, loss_scale=4, train_wall=88, gb_free=23.1, wall=6006 epoch 004: 1443 / 1689 loss=3.88, nll_loss=2.362, ppl=5.14, wps=546900, ups=1.11, wpb=494315, bsz=16775.5, num_updates=6500, lr=0.000784465, gnorm=0.266, clip=0, loss_scale=4, train_wall=88, gb_free=23.1, wall=6006 epoch 004: 1443 / 1689 loss=3.88, nll_loss=2.362, ppl=5.14, wps=546900, ups=1.11, wpb=494315, bsz=16775.5, num_updates=6500, lr=0.000784465, gnorm=0.266, clip=0, loss_scale=4, train_wall=88, gb_free=23.1, wall=6006 epoch 004: 1443 / 1689 loss=3.88, nll_loss=2.362, ppl=5.14, wps=546900, ups=1.11, wpb=494315, bsz=16775.5, num_updates=6500, lr=0.000784465, gnorm=0.266, clip=0, loss_scale=4, train_wall=88, gb_free=23.1, wall=6006 epoch 004: 1543 / 1689 loss=3.875, nll_loss=2.356, ppl=5.12, wps=549704, ups=1.11, wpb=494708, bsz=16402, num_updates=6600, lr=0.000778499, gnorm=0.261, clip=0, loss_scale=4, train_wall=89, gb_free=22, wall=6096 epoch 004: 1543 / 1689 loss=3.875, nll_loss=2.356, ppl=5.12, wps=549704, ups=1.11, wpb=494708, bsz=16402, num_updates=6600, lr=0.000778499, gnorm=0.261, clip=0, loss_scale=4, train_wall=89, gb_free=22, wall=6096 epoch 004: 1543 / 1689 loss=3.875, nll_loss=2.356, ppl=5.12, wps=549704, ups=1.11, wpb=494708, bsz=16402, num_updates=6600, lr=0.000778499, gnorm=0.261, clip=0, loss_scale=4, train_wall=89, gb_free=22, wall=6096 epoch 004: 1543 / 1689 loss=3.875, nll_loss=2.356, ppl=5.12, wps=549704, ups=1.11, wpb=494708, bsz=16402, num_updates=6600, lr=0.000778499, gnorm=0.261, clip=0, loss_scale=4, train_wall=89, gb_free=22, wall=6096 epoch 004: 1643 / 1689 loss=3.866, nll_loss=2.346, ppl=5.09, wps=551485, ups=1.11, wpb=496980, bsz=16207, num_updates=6700, lr=0.000772667, gnorm=0.246, clip=0, loss_scale=4, train_wall=89, gb_free=22, wall=6186 epoch 004: 1643 / 1689 loss=3.866, nll_loss=2.346, ppl=5.09, wps=551485, ups=1.11, wpb=496980, bsz=16207, num_updates=6700, lr=0.000772667, gnorm=0.246, clip=0, loss_scale=4, train_wall=89, gb_free=22, wall=6186 epoch 004: 1643 / 1689 loss=3.866, nll_loss=2.346, ppl=5.09, wps=551485, ups=1.11, wpb=496980, bsz=16207, num_updates=6700, lr=0.000772667, gnorm=0.246, clip=0, loss_scale=4, train_wall=89, gb_free=22, wall=6186 epoch 004: 1643 / 1689 loss=3.866, nll_loss=2.346, ppl=5.09, wps=551485, ups=1.11, wpb=496980, bsz=16207, num_updates=6700, lr=0.000772667, gnorm=0.246, clip=0, loss_scale=4, train_wall=89, gb_free=22, wall=6186 end of epoch 4 (average epoch stats below) epoch 004 | loss 3.907 | nll_loss 2.39 | ppl 5.24 | wps 544830 | ups 1.1 | wpb 495127 | bsz 16501.5 | num_updates 6746 | lr 0.000770029 | gnorm 0.267 | clip 0 | loss_scale 4 | train_wall 1491 | gb_free 23.6 | wall 6227 epoch 004 | loss 3.907 | nll_loss 2.39 | ppl 5.24 | wps 544830 | ups 1.1 | wpb 495127 | bsz 16501.5 | num_updates 6746 | lr 0.000770029 | gnorm 0.267 | clip 0 | loss_scale 4 | train_wall 1491 | gb_free 23.6 | wall 6227 epoch 004 | loss 3.907 | nll_loss 2.39 | ppl 5.24 | wps 544830 | ups 1.1 | wpb 495127 | bsz 16501.5 | num_updates 6746 | lr 0.000770029 | gnorm 0.267 | clip 0 | loss_scale 4 | train_wall 1491 | gb_free 23.6 | wall 6227 epoch 004 | loss 3.907 | nll_loss 2.39 | ppl 5.24 | wps 544830 | ups 1.1 | wpb 495127 | bsz 16501.5 | num_updates 6746 | lr 0.000770029 | gnorm 0.267 | clip 0 | loss_scale 4 | train_wall 1491 | gb_free 23.6 | wall 6227 Start iterating over samples epoch 005: 54 / 1689 loss=3.851, nll_loss=2.329, ppl=5.02, wps=545457, ups=1.11, wpb=491068, bsz=16227.2, num_updates=6800, lr=0.000766965, gnorm=0.254, clip=0, loss_scale=4, train_wall=88, gb_free=21.9, wall=6276 epoch 005: 54 / 1689 loss=3.851, nll_loss=2.329, ppl=5.02, wps=545457, ups=1.11, wpb=491068, bsz=16227.2, num_updates=6800, lr=0.000766965, gnorm=0.254, clip=0, loss_scale=4, train_wall=88, gb_free=21.9, wall=6276 epoch 005: 54 / 1689 loss=3.851, nll_loss=2.329, ppl=5.02, wps=545457, ups=1.11, wpb=491068, bsz=16227.2, num_updates=6800, lr=0.000766965, gnorm=0.254, clip=0, loss_scale=4, train_wall=88, gb_free=21.9, wall=6276 epoch 005: 54 / 1689 loss=3.851, nll_loss=2.329, ppl=5.02, wps=545457, ups=1.11, wpb=491068, bsz=16227.2, num_updates=6800, lr=0.000766965, gnorm=0.254, clip=0, loss_scale=4, train_wall=88, gb_free=21.9, wall=6276 epoch 005: 54 / 1689 loss=3.851, nll_loss=2.329, ppl=5.02, wps=545457, ups=1.11, wpb=491068, bsz=16227.2, num_updates=6800, lr=0.000766965, gnorm=0.254, clip=0, loss_scale=4, train_wall=88, gb_free=21.9, wall=6276 epoch 005: 154 / 1689 loss=3.83, nll_loss=2.306, ppl=4.95, wps=555358, ups=1.12, wpb=494999, bsz=16482.5, num_updates=6900, lr=0.000761387, gnorm=0.249, clip=0, loss_scale=8, train_wall=88, gb_free=22.3, wall=6366 epoch 005: 154 / 1689 loss=3.83, nll_loss=2.306, ppl=4.95, wps=555358, ups=1.12, wpb=494999, bsz=16482.5, num_updates=6900, lr=0.000761387, gnorm=0.249, clip=0, loss_scale=8, train_wall=88, gb_free=22.3, wall=6366 epoch 005: 154 / 1689 loss=3.83, nll_loss=2.306, ppl=4.95, wps=555358, ups=1.12, wpb=494999, bsz=16482.5, num_updates=6900, lr=0.000761387, gnorm=0.249, clip=0, loss_scale=8, train_wall=88, gb_free=22.3, wall=6366 epoch 005: 154 / 1689 loss=3.83, nll_loss=2.306, ppl=4.95, wps=555358, ups=1.12, wpb=494999, bsz=16482.5, num_updates=6900, lr=0.000761387, gnorm=0.249, clip=0, loss_scale=8, train_wall=88, gb_free=22.3, wall=6366 epoch 005: 154 / 1689 loss=3.83, nll_loss=2.306, ppl=4.95, wps=555358, ups=1.12, wpb=494999, bsz=16482.5, num_updates=6900, lr=0.000761387, gnorm=0.249, clip=0, loss_scale=8, train_wall=88, gb_free=22.3, wall=6366 epoch 005: 254 / 1689 loss=3.824, nll_loss=2.299, ppl=4.92, wps=551669, ups=1.12, wpb=494600, bsz=16395, num_updates=7000, lr=0.000755929, gnorm=0.255, clip=0, loss_scale=8, train_wall=88, gb_free=20.9, wall=6455 epoch 005: 254 / 1689 loss=3.824, nll_loss=2.299, ppl=4.92, wps=551669, ups=1.12, wpb=494600, bsz=16395, num_updates=7000, lr=0.000755929, gnorm=0.255, clip=0, loss_scale=8, train_wall=88, gb_free=20.9, wall=6455 epoch 005: 254 / 1689 loss=3.824, nll_loss=2.299, ppl=4.92, wps=551669, ups=1.12, wpb=494600, bsz=16395, num_updates=7000, lr=0.000755929, gnorm=0.255, clip=0, loss_scale=8, train_wall=88, gb_free=20.9, wall=6455 epoch 005: 254 / 1689 loss=3.824, nll_loss=2.299, ppl=4.92, wps=551669, ups=1.12, wpb=494600, bsz=16395, num_updates=7000, lr=0.000755929, gnorm=0.255, clip=0, loss_scale=8, train_wall=88, gb_free=20.9, wall=6455 epoch 005: 254 / 1689 loss=3.824, nll_loss=2.299, ppl=4.92, wps=551669, ups=1.12, wpb=494600, bsz=16395, num_updates=7000, lr=0.000755929, gnorm=0.255, clip=0, loss_scale=8, train_wall=88, gb_free=20.9, wall=6455 begin validation on "valid" subset epoch 005 | valid on 'valid' subset | loss 3.897 | nll_loss 2.356 | ppl 5.12 | wps 0 | wpb 44526 | bsz 2008 | num_updates 7000 | best_loss 3.897 epoch 005 | valid on 'valid' subset | loss 3.897 | nll_loss 2.356 | ppl 5.12 | wps 0 | wpb 44526 | bsz 2008 | num_updates 7000 | best_loss 3.897 epoch 005 | valid on 'valid' subset | loss 3.897 | nll_loss 2.356 | ppl 5.12 | wps 0 | wpb 44526 | bsz 2008 | num_updates 7000 | best_loss 3.897 epoch 005 | valid on 'valid' subset | loss 3.897 | nll_loss 2.356 | ppl 5.12 | wps 0 | wpb 44526 | bsz 2008 | num_updates 7000 | best_loss 3.897 epoch 005 | valid on 'valid' subset | loss 3.897 | nll_loss 2.356 | ppl 5.12 | wps 0 | wpb 44526 | bsz 2008 | num_updates 7000 | best_loss 3.897 epoch 005: 354 / 1689 loss=3.834, nll_loss=2.311, ppl=4.96, wps=463812, ups=0.94, wpb=494951, bsz=16453, num_updates=7100, lr=0.000750587, gnorm=0.252, clip=0, loss_scale=8, train_wall=88, gb_free=22.6, wall=6562 epoch 005: 354 / 1689 loss=3.834, nll_loss=2.311, ppl=4.96, wps=463812, ups=0.94, wpb=494951, bsz=16453, num_updates=7100, lr=0.000750587, gnorm=0.252, clip=0, loss_scale=8, train_wall=88, gb_free=22.6, wall=6562 epoch 005: 354 / 1689 loss=3.834, nll_loss=2.311, ppl=4.96, wps=463812, ups=0.94, wpb=494951, bsz=16453, num_updates=7100, lr=0.000750587, gnorm=0.252, clip=0, loss_scale=8, train_wall=88, gb_free=22.6, wall=6562 epoch 005: 354 / 1689 loss=3.834, nll_loss=2.311, ppl=4.96, wps=463812, ups=0.94, wpb=494951, bsz=16453, num_updates=7100, lr=0.000750587, gnorm=0.252, clip=0, loss_scale=8, train_wall=88, gb_free=22.6, wall=6562 epoch 005: 354 / 1689 loss=3.834, nll_loss=2.311, ppl=4.96, wps=463812, ups=0.94, wpb=494951, bsz=16453, num_updates=7100, lr=0.000750587, gnorm=0.252, clip=0, loss_scale=8, train_wall=88, gb_free=22.6, wall=6562 epoch 005: 455 / 1689 loss=3.823, nll_loss=2.299, ppl=4.92, wps=544336, ups=1.1, wpb=496114, bsz=16696.9, num_updates=7200, lr=0.000745356, gnorm=0.236, clip=0, loss_scale=4, train_wall=89, gb_free=21.9, wall=6653 epoch 005: 455 / 1689 loss=3.823, nll_loss=2.299, ppl=4.92, wps=544336, ups=1.1, wpb=496114, bsz=16696.9, num_updates=7200, lr=0.000745356, gnorm=0.236, clip=0, loss_scale=4, train_wall=89, gb_free=21.9, wall=6653 epoch 005: 455 / 1689 loss=3.823, nll_loss=2.299, ppl=4.92, wps=544336, ups=1.1, wpb=496114, bsz=16696.9, num_updates=7200, lr=0.000745356, gnorm=0.236, clip=0, loss_scale=4, train_wall=89, gb_free=21.9, wall=6653 epoch 005: 455 / 1689 loss=3.823, nll_loss=2.299, ppl=4.92, wps=544336, ups=1.1, wpb=496114, bsz=16696.9, num_updates=7200, lr=0.000745356, gnorm=0.236, clip=0, loss_scale=4, train_wall=89, gb_free=21.9, wall=6653 epoch 005: 455 / 1689 loss=3.823, nll_loss=2.299, ppl=4.92, wps=544336, ups=1.1, wpb=496114, bsz=16696.9, num_updates=7200, lr=0.000745356, gnorm=0.236, clip=0, loss_scale=4, train_wall=89, gb_free=21.9, wall=6653 epoch 005: 555 / 1689 loss=3.822, nll_loss=2.298, ppl=4.92, wps=550696, ups=1.11, wpb=495952, bsz=16605.6, num_updates=7300, lr=0.000740233, gnorm=0.241, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=6743 epoch 005: 555 / 1689 loss=3.822, nll_loss=2.298, ppl=4.92, wps=550696, ups=1.11, wpb=495952, bsz=16605.6, num_updates=7300, lr=0.000740233, gnorm=0.241, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=6743 epoch 005: 555 / 1689 loss=3.822, nll_loss=2.298, ppl=4.92, wps=550696, ups=1.11, wpb=495952, bsz=16605.6, num_updates=7300, lr=0.000740233, gnorm=0.241, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=6743 epoch 005: 555 / 1689 loss=3.822, nll_loss=2.298, ppl=4.92, wps=550696, ups=1.11, wpb=495952, bsz=16605.6, num_updates=7300, lr=0.000740233, gnorm=0.241, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=6743 epoch 005: 555 / 1689 loss=3.822, nll_loss=2.298, ppl=4.92, wps=550696, ups=1.11, wpb=495952, bsz=16605.6, num_updates=7300, lr=0.000740233, gnorm=0.241, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=6743 epoch 005: 655 / 1689 loss=3.826, nll_loss=2.302, ppl=4.93, wps=552309, ups=1.11, wpb=495698, bsz=16362.2, num_updates=7400, lr=0.000735215, gnorm=0.249, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=6833 epoch 005: 655 / 1689 loss=3.826, nll_loss=2.302, ppl=4.93, wps=552309, ups=1.11, wpb=495698, bsz=16362.2, num_updates=7400, lr=0.000735215, gnorm=0.249, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=6833 epoch 005: 655 / 1689 loss=3.826, nll_loss=2.302, ppl=4.93, wps=552309, ups=1.11, wpb=495698, bsz=16362.2, num_updates=7400, lr=0.000735215, gnorm=0.249, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=6833 epoch 005: 655 / 1689 loss=3.826, nll_loss=2.302, ppl=4.93, wps=552309, ups=1.11, wpb=495698, bsz=16362.2, num_updates=7400, lr=0.000735215, gnorm=0.249, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=6833 epoch 005: 655 / 1689 loss=3.826, nll_loss=2.302, ppl=4.93, wps=552309, ups=1.11, wpb=495698, bsz=16362.2, num_updates=7400, lr=0.000735215, gnorm=0.249, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=6833 epoch 005: 755 / 1689 loss=3.829, nll_loss=2.306, ppl=4.95, wps=554816, ups=1.12, wpb=494323, bsz=16388.2, num_updates=7500, lr=0.000730297, gnorm=0.253, clip=0, loss_scale=4, train_wall=88, gb_free=22.3, wall=6922 epoch 005: 755 / 1689 loss=3.829, nll_loss=2.306, ppl=4.95, wps=554816, ups=1.12, wpb=494323, bsz=16388.2, num_updates=7500, lr=0.000730297, gnorm=0.253, clip=0, loss_scale=4, train_wall=88, gb_free=22.3, wall=6922 epoch 005: 755 / 1689 loss=3.829, nll_loss=2.306, ppl=4.95, wps=554816, ups=1.12, wpb=494323, bsz=16388.2, num_updates=7500, lr=0.000730297, gnorm=0.253, clip=0, loss_scale=4, train_wall=88, gb_free=22.3, wall=6922 epoch 005: 755 / 1689 loss=3.829, nll_loss=2.306, ppl=4.95, wps=554816, ups=1.12, wpb=494323, bsz=16388.2, num_updates=7500, lr=0.000730297, gnorm=0.253, clip=0, loss_scale=4, train_wall=88, gb_free=22.3, wall=6922 epoch 005: 755 / 1689 loss=3.829, nll_loss=2.306, ppl=4.95, wps=554816, ups=1.12, wpb=494323, bsz=16388.2, num_updates=7500, lr=0.000730297, gnorm=0.253, clip=0, loss_scale=4, train_wall=88, gb_free=22.3, wall=6922 epoch 005: 855 / 1689 loss=3.822, nll_loss=2.298, ppl=4.92, wps=548416, ups=1.11, wpb=495428, bsz=16640, num_updates=7600, lr=0.000725476, gnorm=0.239, clip=0, loss_scale=4, train_wall=89, gb_free=21.9, wall=7012 epoch 005: 855 / 1689 loss=3.822, nll_loss=2.298, ppl=4.92, wps=548416, ups=1.11, wpb=495428, bsz=16640, num_updates=7600, lr=0.000725476, gnorm=0.239, clip=0, loss_scale=4, train_wall=89, gb_free=21.9, wall=7012 epoch 005: 855 / 1689 loss=3.822, nll_loss=2.298, ppl=4.92, wps=548416, ups=1.11, wpb=495428, bsz=16640, num_updates=7600, lr=0.000725476, gnorm=0.239, clip=0, loss_scale=4, train_wall=89, gb_free=21.9, wall=7012 epoch 005: 855 / 1689 loss=3.822, nll_loss=2.298, ppl=4.92, wps=548416, ups=1.11, wpb=495428, bsz=16640, num_updates=7600, lr=0.000725476, gnorm=0.239, clip=0, loss_scale=4, train_wall=89, gb_free=21.9, wall=7012 epoch 005: 855 / 1689 loss=3.822, nll_loss=2.298, ppl=4.92, wps=548416, ups=1.11, wpb=495428, bsz=16640, num_updates=7600, lr=0.000725476, gnorm=0.239, clip=0, loss_scale=4, train_wall=89, gb_free=21.9, wall=7012 epoch 005: 955 / 1689 loss=3.828, nll_loss=2.305, ppl=4.94, wps=550808, ups=1.11, wpb=495775, bsz=16443, num_updates=7700, lr=0.00072075, gnorm=0.248, clip=0, loss_scale=4, train_wall=89, gb_free=22, wall=7102 epoch 005: 955 / 1689 loss=3.828, nll_loss=2.305, ppl=4.94, wps=550808, ups=1.11, wpb=495775, bsz=16443, num_updates=7700, lr=0.00072075, gnorm=0.248, clip=0, loss_scale=4, train_wall=89, gb_free=22, wall=7102 epoch 005: 955 / 1689 loss=3.828, nll_loss=2.305, ppl=4.94, wps=550808, ups=1.11, wpb=495775, bsz=16443, num_updates=7700, lr=0.00072075, gnorm=0.248, clip=0, loss_scale=4, train_wall=89, gb_free=22, wall=7102 epoch 005: 955 / 1689 loss=3.828, nll_loss=2.305, ppl=4.94, wps=550808, ups=1.11, wpb=495775, bsz=16443, num_updates=7700, lr=0.00072075, gnorm=0.248, clip=0, loss_scale=4, train_wall=89, gb_free=22, wall=7102 epoch 005: 955 / 1689 loss=3.828, nll_loss=2.305, ppl=4.94, wps=550808, ups=1.11, wpb=495775, bsz=16443, num_updates=7700, lr=0.00072075, gnorm=0.248, clip=0, loss_scale=4, train_wall=89, gb_free=22, wall=7102 epoch 005: 1056 / 1689 loss=3.814, nll_loss=2.29, ppl=4.89, wps=547030, ups=1.1, wpb=497773, bsz=16646.4, num_updates=7800, lr=0.000716115, gnorm=0.239, clip=0, loss_scale=4, train_wall=89, gb_free=22.6, wall=7193 epoch 005: 1056 / 1689 loss=3.814, nll_loss=2.29, ppl=4.89, wps=547030, ups=1.1, wpb=497773, bsz=16646.4, num_updates=7800, lr=0.000716115, gnorm=0.239, clip=0, loss_scale=4, train_wall=89, gb_free=22.6, wall=7193 epoch 005: 1056 / 1689 loss=3.814, nll_loss=2.29, ppl=4.89, wps=547030, ups=1.1, wpb=497773, bsz=16646.4, num_updates=7800, lr=0.000716115, gnorm=0.239, clip=0, loss_scale=4, train_wall=89, gb_free=22.6, wall=7193 epoch 005: 1056 / 1689 loss=3.814, nll_loss=2.29, ppl=4.89, wps=547030, ups=1.1, wpb=497773, bsz=16646.4, num_updates=7800, lr=0.000716115, gnorm=0.239, clip=0, loss_scale=4, train_wall=89, gb_free=22.6, wall=7193 epoch 005: 1056 / 1689 loss=3.814, nll_loss=2.29, ppl=4.89, wps=547030, ups=1.1, wpb=497773, bsz=16646.4, num_updates=7800, lr=0.000716115, gnorm=0.239, clip=0, loss_scale=4, train_wall=89, gb_free=22.6, wall=7193 epoch 005: 1156 / 1689 loss=3.81, nll_loss=2.286, ppl=4.88, wps=552315, ups=1.12, wpb=494045, bsz=16677.1, num_updates=7900, lr=0.000711568, gnorm=0.239, clip=0, loss_scale=4, train_wall=88, gb_free=22.7, wall=7283 epoch 005: 1156 / 1689 loss=3.81, nll_loss=2.286, ppl=4.88, wps=552315, ups=1.12, wpb=494045, bsz=16677.1, num_updates=7900, lr=0.000711568, gnorm=0.239, clip=0, loss_scale=4, train_wall=88, gb_free=22.7, wall=7283 epoch 005: 1156 / 1689 loss=3.81, nll_loss=2.286, ppl=4.88, wps=552315, ups=1.12, wpb=494045, bsz=16677.1, num_updates=7900, lr=0.000711568, gnorm=0.239, clip=0, loss_scale=4, train_wall=88, gb_free=22.7, wall=7283 epoch 005: 1156 / 1689 loss=3.81, nll_loss=2.286, ppl=4.88, wps=552315, ups=1.12, wpb=494045, bsz=16677.1, num_updates=7900, lr=0.000711568, gnorm=0.239, clip=0, loss_scale=4, train_wall=88, gb_free=22.7, wall=7283 epoch 005: 1156 / 1689 loss=3.81, nll_loss=2.286, ppl=4.88, wps=552315, ups=1.12, wpb=494045, bsz=16677.1, num_updates=7900, lr=0.000711568, gnorm=0.239, clip=0, loss_scale=4, train_wall=88, gb_free=22.7, wall=7283 epoch 005: 1257 / 1689 loss=3.811, nll_loss=2.287, ppl=4.88, wps=546015, ups=1.11, wpb=494066, bsz=16369, num_updates=8000, lr=0.000707107, gnorm=0.237, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=7373 epoch 005: 1257 / 1689 loss=3.811, nll_loss=2.287, ppl=4.88, wps=546015, ups=1.11, wpb=494066, bsz=16369, num_updates=8000, lr=0.000707107, gnorm=0.237, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=7373 epoch 005: 1257 / 1689 loss=3.811, nll_loss=2.287, ppl=4.88, wps=546015, ups=1.11, wpb=494066, bsz=16369, num_updates=8000, lr=0.000707107, gnorm=0.237, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=7373 epoch 005: 1257 / 1689 loss=3.811, nll_loss=2.287, ppl=4.88, wps=546015, ups=1.11, wpb=494066, bsz=16369, num_updates=8000, lr=0.000707107, gnorm=0.237, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=7373 epoch 005: 1257 / 1689 loss=3.811, nll_loss=2.287, ppl=4.88, wps=546015, ups=1.11, wpb=494066, bsz=16369, num_updates=8000, lr=0.000707107, gnorm=0.237, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=7373 begin validation on "valid" subset epoch 005 | valid on 'valid' subset | loss 3.859 | nll_loss 2.321 | ppl 5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 8000 | best_loss 3.859 epoch 005 | valid on 'valid' subset | loss 3.859 | nll_loss 2.321 | ppl 5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 8000 | best_loss 3.859 epoch 005 | valid on 'valid' subset | loss 3.859 | nll_loss 2.321 | ppl 5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 8000 | best_loss 3.859 epoch 005 | valid on 'valid' subset | loss 3.859 | nll_loss 2.321 | ppl 5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 8000 | best_loss 3.859 epoch 005 | valid on 'valid' subset | loss 3.859 | nll_loss 2.321 | ppl 5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 8000 | best_loss 3.859 epoch 005: 1357 / 1689 loss=3.811, nll_loss=2.287, ppl=4.88, wps=460239, ups=0.93, wpb=494962, bsz=16785.4, num_updates=8100, lr=0.000702728, gnorm=0.247, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=7481 epoch 005: 1357 / 1689 loss=3.811, nll_loss=2.287, ppl=4.88, wps=460239, ups=0.93, wpb=494962, bsz=16785.4, num_updates=8100, lr=0.000702728, gnorm=0.247, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=7481 epoch 005: 1357 / 1689 loss=3.811, nll_loss=2.287, ppl=4.88, wps=460239, ups=0.93, wpb=494962, bsz=16785.4, num_updates=8100, lr=0.000702728, gnorm=0.247, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=7481 epoch 005: 1357 / 1689 loss=3.811, nll_loss=2.287, ppl=4.88, wps=460239, ups=0.93, wpb=494962, bsz=16785.4, num_updates=8100, lr=0.000702728, gnorm=0.247, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=7481 epoch 005: 1357 / 1689 loss=3.811, nll_loss=2.287, ppl=4.88, wps=460239, ups=0.93, wpb=494962, bsz=16785.4, num_updates=8100, lr=0.000702728, gnorm=0.247, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=7481 epoch 005: 1457 / 1689 loss=3.804, nll_loss=2.28, ppl=4.86, wps=552165, ups=1.11, wpb=495674, bsz=16477.2, num_updates=8200, lr=0.00069843, gnorm=0.233, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=7571 epoch 005: 1457 / 1689 loss=3.804, nll_loss=2.28, ppl=4.86, wps=552165, ups=1.11, wpb=495674, bsz=16477.2, num_updates=8200, lr=0.00069843, gnorm=0.233, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=7571 epoch 005: 1457 / 1689 loss=3.804, nll_loss=2.28, ppl=4.86, wps=552165, ups=1.11, wpb=495674, bsz=16477.2, num_updates=8200, lr=0.00069843, gnorm=0.233, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=7571 epoch 005: 1457 / 1689 loss=3.804, nll_loss=2.28, ppl=4.86, wps=552165, ups=1.11, wpb=495674, bsz=16477.2, num_updates=8200, lr=0.00069843, gnorm=0.233, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=7571 epoch 005: 1457 / 1689 loss=3.804, nll_loss=2.28, ppl=4.86, wps=552165, ups=1.11, wpb=495674, bsz=16477.2, num_updates=8200, lr=0.00069843, gnorm=0.233, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=7571 epoch 005: 1557 / 1689 loss=3.804, nll_loss=2.28, ppl=4.86, wps=556029, ups=1.12, wpb=495932, bsz=16143.1, num_updates=8300, lr=0.00069421, gnorm=0.23, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=7660 epoch 005: 1557 / 1689 loss=3.804, nll_loss=2.28, ppl=4.86, wps=556029, ups=1.12, wpb=495932, bsz=16143.1, num_updates=8300, lr=0.00069421, gnorm=0.23, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=7660 epoch 005: 1557 / 1689 loss=3.804, nll_loss=2.28, ppl=4.86, wps=556029, ups=1.12, wpb=495932, bsz=16143.1, num_updates=8300, lr=0.00069421, gnorm=0.23, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=7660 epoch 005: 1557 / 1689 loss=3.804, nll_loss=2.28, ppl=4.86, wps=556029, ups=1.12, wpb=495932, bsz=16143.1, num_updates=8300, lr=0.00069421, gnorm=0.23, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=7660 epoch 005: 1557 / 1689 loss=3.804, nll_loss=2.28, ppl=4.86, wps=556029, ups=1.12, wpb=495932, bsz=16143.1, num_updates=8300, lr=0.00069421, gnorm=0.23, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=7660 epoch 005: 1657 / 1689 loss=3.802, nll_loss=2.278, ppl=4.85, wps=548580, ups=1.11, wpb=495486, bsz=16830.4, num_updates=8400, lr=0.000690066, gnorm=0.248, clip=0, loss_scale=2, train_wall=89, gb_free=22.4, wall=7750 epoch 005: 1657 / 1689 loss=3.802, nll_loss=2.278, ppl=4.85, wps=548580, ups=1.11, wpb=495486, bsz=16830.4, num_updates=8400, lr=0.000690066, gnorm=0.248, clip=0, loss_scale=2, train_wall=89, gb_free=22.4, wall=7750 epoch 005: 1657 / 1689 loss=3.802, nll_loss=2.278, ppl=4.85, wps=548580, ups=1.11, wpb=495486, bsz=16830.4, num_updates=8400, lr=0.000690066, gnorm=0.248, clip=0, loss_scale=2, train_wall=89, gb_free=22.4, wall=7750 epoch 005: 1657 / 1689 loss=3.802, nll_loss=2.278, ppl=4.85, wps=548580, ups=1.11, wpb=495486, bsz=16830.4, num_updates=8400, lr=0.000690066, gnorm=0.248, clip=0, loss_scale=2, train_wall=89, gb_free=22.4, wall=7750 epoch 005: 1657 / 1689 loss=3.802, nll_loss=2.278, ppl=4.85, wps=548580, ups=1.11, wpb=495486, bsz=16830.4, num_updates=8400, lr=0.000690066, gnorm=0.248, clip=0, loss_scale=2, train_wall=89, gb_free=22.4, wall=7750 end of epoch 5 (average epoch stats below) epoch 005 | loss 3.819 | nll_loss 2.295 | ppl 4.91 | wps 538128 | ups 1.09 | wpb 495114 | bsz 16505.3 | num_updates 8432 | lr 0.000688755 | gnorm 0.244 | clip 0 | loss_scale 2 | train_wall 1488 | gb_free 22.5 | wall 7778 epoch 005 | loss 3.819 | nll_loss 2.295 | ppl 4.91 | wps 538128 | ups 1.09 | wpb 495114 | bsz 16505.3 | num_updates 8432 | lr 0.000688755 | gnorm 0.244 | clip 0 | loss_scale 2 | train_wall 1488 | gb_free 22.5 | wall 7778 epoch 005 | loss 3.819 | nll_loss 2.295 | ppl 4.91 | wps 538128 | ups 1.09 | wpb 495114 | bsz 16505.3 | num_updates 8432 | lr 0.000688755 | gnorm 0.244 | clip 0 | loss_scale 2 | train_wall 1488 | gb_free 22.5 | wall 7778 epoch 005 | loss 3.819 | nll_loss 2.295 | ppl 4.91 | wps 538128 | ups 1.09 | wpb 495114 | bsz 16505.3 | num_updates 8432 | lr 0.000688755 | gnorm 0.244 | clip 0 | loss_scale 2 | train_wall 1488 | gb_free 22.5 | wall 7778 epoch 005 | loss 3.819 | nll_loss 2.295 | ppl 4.91 | wps 538128 | ups 1.09 | wpb 495114 | bsz 16505.3 | num_updates 8432 | lr 0.000688755 | gnorm 0.244 | clip 0 | loss_scale 2 | train_wall 1488 | gb_free 22.5 | wall 7778 Start iterating over samples epoch 006: 68 / 1689 loss=3.769, nll_loss=2.239, ppl=4.72, wps=550569, ups=1.12, wpb=492620, bsz=16013.7, num_updates=8500, lr=0.000685994, gnorm=0.234, clip=0, loss_scale=4, train_wall=88, gb_free=21.6, wall=7840 epoch 006: 68 / 1689 loss=3.769, nll_loss=2.239, ppl=4.72, wps=550569, ups=1.12, wpb=492620, bsz=16013.7, num_updates=8500, lr=0.000685994, gnorm=0.234, clip=0, loss_scale=4, train_wall=88, gb_free=21.6, wall=7840 epoch 006: 68 / 1689 loss=3.769, nll_loss=2.239, ppl=4.72, wps=550569, ups=1.12, wpb=492620, bsz=16013.7, num_updates=8500, lr=0.000685994, gnorm=0.234, clip=0, loss_scale=4, train_wall=88, gb_free=21.6, wall=7840 epoch 006: 68 / 1689 loss=3.769, nll_loss=2.239, ppl=4.72, wps=550569, ups=1.12, wpb=492620, bsz=16013.7, num_updates=8500, lr=0.000685994, gnorm=0.234, clip=0, loss_scale=4, train_wall=88, gb_free=21.6, wall=7840 epoch 006: 68 / 1689 loss=3.769, nll_loss=2.239, ppl=4.72, wps=550569, ups=1.12, wpb=492620, bsz=16013.7, num_updates=8500, lr=0.000685994, gnorm=0.234, clip=0, loss_scale=4, train_wall=88, gb_free=21.6, wall=7840 epoch 006: 68 / 1689 loss=3.769, nll_loss=2.239, ppl=4.72, wps=550569, ups=1.12, wpb=492620, bsz=16013.7, num_updates=8500, lr=0.000685994, gnorm=0.234, clip=0, loss_scale=4, train_wall=88, gb_free=21.6, wall=7840 epoch 006: 168 / 1689 loss=3.764, nll_loss=2.234, ppl=4.7, wps=560208, ups=1.13, wpb=495533, bsz=16295.3, num_updates=8600, lr=0.000681994, gnorm=0.235, clip=0, loss_scale=4, train_wall=87, gb_free=21.3, wall=7928 epoch 006: 168 / 1689 loss=3.764, nll_loss=2.234, ppl=4.7, wps=560208, ups=1.13, wpb=495533, bsz=16295.3, num_updates=8600, lr=0.000681994, gnorm=0.235, clip=0, loss_scale=4, train_wall=87, gb_free=21.3, wall=7928 epoch 006: 168 / 1689 loss=3.764, nll_loss=2.234, ppl=4.7, wps=560208, ups=1.13, wpb=495533, bsz=16295.3, num_updates=8600, lr=0.000681994, gnorm=0.235, clip=0, loss_scale=4, train_wall=87, gb_free=21.3, wall=7928 epoch 006: 168 / 1689 loss=3.764, nll_loss=2.234, ppl=4.7, wps=560208, ups=1.13, wpb=495533, bsz=16295.3, num_updates=8600, lr=0.000681994, gnorm=0.235, clip=0, loss_scale=4, train_wall=87, gb_free=21.3, wall=7928 epoch 006: 168 / 1689 loss=3.764, nll_loss=2.234, ppl=4.7, wps=560208, ups=1.13, wpb=495533, bsz=16295.3, num_updates=8600, lr=0.000681994, gnorm=0.235, clip=0, loss_scale=4, train_wall=87, gb_free=21.3, wall=7928 epoch 006: 168 / 1689 loss=3.764, nll_loss=2.234, ppl=4.7, wps=560208, ups=1.13, wpb=495533, bsz=16295.3, num_updates=8600, lr=0.000681994, gnorm=0.235, clip=0, loss_scale=4, train_wall=87, gb_free=21.3, wall=7928 epoch 006: 268 / 1689 loss=3.758, nll_loss=2.228, ppl=4.68, wps=558261, ups=1.13, wpb=495847, bsz=16619.6, num_updates=8700, lr=0.000678064, gnorm=0.225, clip=0, loss_scale=4, train_wall=88, gb_free=22.3, wall=8017 epoch 006: 268 / 1689 loss=3.758, nll_loss=2.228, ppl=4.68, wps=558261, ups=1.13, wpb=495847, bsz=16619.6, num_updates=8700, lr=0.000678064, gnorm=0.225, clip=0, loss_scale=4, train_wall=88, gb_free=22.3, wall=8017 epoch 006: 268 / 1689 loss=3.758, nll_loss=2.228, ppl=4.68, wps=558261, ups=1.13, wpb=495847, bsz=16619.6, num_updates=8700, lr=0.000678064, gnorm=0.225, clip=0, loss_scale=4, train_wall=88, gb_free=22.3, wall=8017 epoch 006: 268 / 1689 loss=3.758, nll_loss=2.228, ppl=4.68, wps=558261, ups=1.13, wpb=495847, bsz=16619.6, num_updates=8700, lr=0.000678064, gnorm=0.225, clip=0, loss_scale=4, train_wall=88, gb_free=22.3, wall=8017 epoch 006: 268 / 1689 loss=3.758, nll_loss=2.228, ppl=4.68, wps=558261, ups=1.13, wpb=495847, bsz=16619.6, num_updates=8700, lr=0.000678064, gnorm=0.225, clip=0, loss_scale=4, train_wall=88, gb_free=22.3, wall=8017 epoch 006: 268 / 1689 loss=3.758, nll_loss=2.228, ppl=4.68, wps=558261, ups=1.13, wpb=495847, bsz=16619.6, num_updates=8700, lr=0.000678064, gnorm=0.225, clip=0, loss_scale=4, train_wall=88, gb_free=22.3, wall=8017 epoch 006: 368 / 1689 loss=3.77, nll_loss=2.241, ppl=4.73, wps=555667, ups=1.12, wpb=494280, bsz=16324.2, num_updates=8800, lr=0.0006742, gnorm=0.238, clip=0, loss_scale=4, train_wall=88, gb_free=22, wall=8106 epoch 006: 368 / 1689 loss=3.77, nll_loss=2.241, ppl=4.73, wps=555667, ups=1.12, wpb=494280, bsz=16324.2, num_updates=8800, lr=0.0006742, gnorm=0.238, clip=0, loss_scale=4, train_wall=88, gb_free=22, wall=8106 epoch 006: 368 / 1689 loss=3.77, nll_loss=2.241, ppl=4.73, wps=555667, ups=1.12, wpb=494280, bsz=16324.2, num_updates=8800, lr=0.0006742, gnorm=0.238, clip=0, loss_scale=4, train_wall=88, gb_free=22, wall=8106 epoch 006: 368 / 1689 loss=3.77, nll_loss=2.241, ppl=4.73, wps=555667, ups=1.12, wpb=494280, bsz=16324.2, num_updates=8800, lr=0.0006742, gnorm=0.238, clip=0, loss_scale=4, train_wall=88, gb_free=22, wall=8106 epoch 006: 368 / 1689 loss=3.77, nll_loss=2.241, ppl=4.73, wps=555667, ups=1.12, wpb=494280, bsz=16324.2, num_updates=8800, lr=0.0006742, gnorm=0.238, clip=0, loss_scale=4, train_wall=88, gb_free=22, wall=8106 epoch 006: 368 / 1689 loss=3.77, nll_loss=2.241, ppl=4.73, wps=555667, ups=1.12, wpb=494280, bsz=16324.2, num_updates=8800, lr=0.0006742, gnorm=0.238, clip=0, loss_scale=4, train_wall=88, gb_free=22, wall=8106 epoch 006: 468 / 1689 loss=3.764, nll_loss=2.235, ppl=4.71, wps=549761, ups=1.11, wpb=496041, bsz=16477.8, num_updates=8900, lr=0.000670402, gnorm=0.243, clip=0, loss_scale=4, train_wall=89, gb_free=21.5, wall=8196 epoch 006: 468 / 1689 loss=3.764, nll_loss=2.235, ppl=4.71, wps=549761, ups=1.11, wpb=496041, bsz=16477.8, num_updates=8900, lr=0.000670402, gnorm=0.243, clip=0, loss_scale=4, train_wall=89, gb_free=21.5, wall=8196 epoch 006: 468 / 1689 loss=3.764, nll_loss=2.235, ppl=4.71, wps=549761, ups=1.11, wpb=496041, bsz=16477.8, num_updates=8900, lr=0.000670402, gnorm=0.243, clip=0, loss_scale=4, train_wall=89, gb_free=21.5, wall=8196 epoch 006: 468 / 1689 loss=3.764, nll_loss=2.235, ppl=4.71, wps=549761, ups=1.11, wpb=496041, bsz=16477.8, num_updates=8900, lr=0.000670402, gnorm=0.243, clip=0, loss_scale=4, train_wall=89, gb_free=21.5, wall=8196 epoch 006: 468 / 1689 loss=3.764, nll_loss=2.235, ppl=4.71, wps=549761, ups=1.11, wpb=496041, bsz=16477.8, num_updates=8900, lr=0.000670402, gnorm=0.243, clip=0, loss_scale=4, train_wall=89, gb_free=21.5, wall=8196 epoch 006: 468 / 1689 loss=3.764, nll_loss=2.235, ppl=4.71, wps=549761, ups=1.11, wpb=496041, bsz=16477.8, num_updates=8900, lr=0.000670402, gnorm=0.243, clip=0, loss_scale=4, train_wall=89, gb_free=21.5, wall=8196 epoch 006: 569 / 1689 loss=3.764, nll_loss=2.235, ppl=4.71, wps=549239, ups=1.11, wpb=495796, bsz=16601.1, num_updates=9000, lr=0.000666667, gnorm=0.227, clip=0, loss_scale=4, train_wall=89, gb_free=21.9, wall=8286 epoch 006: 569 / 1689 loss=3.764, nll_loss=2.235, ppl=4.71, wps=549239, ups=1.11, wpb=495796, bsz=16601.1, num_updates=9000, lr=0.000666667, gnorm=0.227, clip=0, loss_scale=4, train_wall=89, gb_free=21.9, wall=8286 epoch 006: 569 / 1689 loss=3.764, nll_loss=2.235, ppl=4.71, wps=549239, ups=1.11, wpb=495796, bsz=16601.1, num_updates=9000, lr=0.000666667, gnorm=0.227, clip=0, loss_scale=4, train_wall=89, gb_free=21.9, wall=8286 epoch 006: 569 / 1689 loss=3.764, nll_loss=2.235, ppl=4.71, wps=549239, ups=1.11, wpb=495796, bsz=16601.1, num_updates=9000, lr=0.000666667, gnorm=0.227, clip=0, loss_scale=4, train_wall=89, gb_free=21.9, wall=8286 epoch 006: 569 / 1689 loss=3.764, nll_loss=2.235, ppl=4.71, wps=549239, ups=1.11, wpb=495796, bsz=16601.1, num_updates=9000, lr=0.000666667, gnorm=0.227, clip=0, loss_scale=4, train_wall=89, gb_free=21.9, wall=8286 epoch 006: 569 / 1689 loss=3.764, nll_loss=2.235, ppl=4.71, wps=549239, ups=1.11, wpb=495796, bsz=16601.1, num_updates=9000, lr=0.000666667, gnorm=0.227, clip=0, loss_scale=4, train_wall=89, gb_free=21.9, wall=8286 begin validation on "valid" subset epoch 006 | valid on 'valid' subset | loss 3.842 | nll_loss 2.301 | ppl 4.93 | wps 0 | wpb 44526 | bsz 2008 | num_updates 9000 | best_loss 3.842 epoch 006 | valid on 'valid' subset | loss 3.842 | nll_loss 2.301 | ppl 4.93 | wps 0 | wpb 44526 | bsz 2008 | num_updates 9000 | best_loss 3.842 epoch 006 | valid on 'valid' subset | loss 3.842 | nll_loss 2.301 | ppl 4.93 | wps 0 | wpb 44526 | bsz 2008 | num_updates 9000 | best_loss 3.842 epoch 006 | valid on 'valid' subset | loss 3.842 | nll_loss 2.301 | ppl 4.93 | wps 0 | wpb 44526 | bsz 2008 | num_updates 9000 | best_loss 3.842 epoch 006 | valid on 'valid' subset | loss 3.842 | nll_loss 2.301 | ppl 4.93 | wps 0 | wpb 44526 | bsz 2008 | num_updates 9000 | best_loss 3.842 epoch 006 | valid on 'valid' subset | loss 3.842 | nll_loss 2.301 | ppl 4.93 | wps 0 | wpb 44526 | bsz 2008 | num_updates 9000 | best_loss 3.842 epoch 006: 669 / 1689 loss=3.764, nll_loss=2.236, ppl=4.71, wps=420158, ups=0.85, wpb=493916, bsz=16854.6, num_updates=9100, lr=0.000662994, gnorm=0.23, clip=0, loss_scale=4, train_wall=94, gb_free=21.4, wall=8404 epoch 006: 669 / 1689 loss=3.764, nll_loss=2.236, ppl=4.71, wps=420158, ups=0.85, wpb=493916, bsz=16854.6, num_updates=9100, lr=0.000662994, gnorm=0.23, clip=0, loss_scale=4, train_wall=94, gb_free=21.4, wall=8404 epoch 006: 669 / 1689 loss=3.764, nll_loss=2.236, ppl=4.71, wps=420158, ups=0.85, wpb=493916, bsz=16854.6, num_updates=9100, lr=0.000662994, gnorm=0.23, clip=0, loss_scale=4, train_wall=94, gb_free=21.4, wall=8404 epoch 006: 669 / 1689 loss=3.764, nll_loss=2.236, ppl=4.71, wps=420158, ups=0.85, wpb=493916, bsz=16854.6, num_updates=9100, lr=0.000662994, gnorm=0.23, clip=0, loss_scale=4, train_wall=94, gb_free=21.4, wall=8404 epoch 006: 669 / 1689 loss=3.764, nll_loss=2.236, ppl=4.71, wps=420158, ups=0.85, wpb=493916, bsz=16854.6, num_updates=9100, lr=0.000662994, gnorm=0.23, clip=0, loss_scale=4, train_wall=94, gb_free=21.4, wall=8404 epoch 006: 669 / 1689 loss=3.764, nll_loss=2.236, ppl=4.71, wps=420158, ups=0.85, wpb=493916, bsz=16854.6, num_updates=9100, lr=0.000662994, gnorm=0.23, clip=0, loss_scale=4, train_wall=94, gb_free=21.4, wall=8404 epoch 006: 769 / 1689 loss=3.765, nll_loss=2.236, ppl=4.71, wps=553442, ups=1.12, wpb=495923, bsz=16416.4, num_updates=9200, lr=0.00065938, gnorm=0.225, clip=0, loss_scale=4, train_wall=88, gb_free=20.9, wall=8494 epoch 006: 769 / 1689 loss=3.765, nll_loss=2.236, ppl=4.71, wps=553442, ups=1.12, wpb=495923, bsz=16416.4, num_updates=9200, lr=0.00065938, gnorm=0.225, clip=0, loss_scale=4, train_wall=88, gb_free=20.9, wall=8494 epoch 006: 769 / 1689 loss=3.765, nll_loss=2.236, ppl=4.71, wps=553442, ups=1.12, wpb=495923, bsz=16416.4, num_updates=9200, lr=0.00065938, gnorm=0.225, clip=0, loss_scale=4, train_wall=88, gb_free=20.9, wall=8494 epoch 006: 769 / 1689 loss=3.765, nll_loss=2.236, ppl=4.71, wps=553442, ups=1.12, wpb=495923, bsz=16416.4, num_updates=9200, lr=0.00065938, gnorm=0.225, clip=0, loss_scale=4, train_wall=88, gb_free=20.9, wall=8494 epoch 006: 769 / 1689 loss=3.765, nll_loss=2.236, ppl=4.71, wps=553442, ups=1.12, wpb=495923, bsz=16416.4, num_updates=9200, lr=0.00065938, gnorm=0.225, clip=0, loss_scale=4, train_wall=88, gb_free=20.9, wall=8494 epoch 006: 769 / 1689 loss=3.765, nll_loss=2.236, ppl=4.71, wps=553442, ups=1.12, wpb=495923, bsz=16416.4, num_updates=9200, lr=0.00065938, gnorm=0.225, clip=0, loss_scale=4, train_wall=88, gb_free=20.9, wall=8494 epoch 006: 869 / 1689 loss=3.767, nll_loss=2.239, ppl=4.72, wps=555960, ups=1.12, wpb=495153, bsz=16927.2, num_updates=9300, lr=0.000655826, gnorm=0.242, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=8583 epoch 006: 869 / 1689 loss=3.767, nll_loss=2.239, ppl=4.72, wps=555960, ups=1.12, wpb=495153, bsz=16927.2, num_updates=9300, lr=0.000655826, gnorm=0.242, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=8583 epoch 006: 869 / 1689 loss=3.767, nll_loss=2.239, ppl=4.72, wps=555960, ups=1.12, wpb=495153, bsz=16927.2, num_updates=9300, lr=0.000655826, gnorm=0.242, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=8583 epoch 006: 869 / 1689 loss=3.767, nll_loss=2.239, ppl=4.72, wps=555960, ups=1.12, wpb=495153, bsz=16927.2, num_updates=9300, lr=0.000655826, gnorm=0.242, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=8583 epoch 006: 869 / 1689 loss=3.767, nll_loss=2.239, ppl=4.72, wps=555960, ups=1.12, wpb=495153, bsz=16927.2, num_updates=9300, lr=0.000655826, gnorm=0.242, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=8583 epoch 006: 869 / 1689 loss=3.767, nll_loss=2.239, ppl=4.72, wps=555960, ups=1.12, wpb=495153, bsz=16927.2, num_updates=9300, lr=0.000655826, gnorm=0.242, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=8583 epoch 006: 969 / 1689 loss=3.768, nll_loss=2.241, ppl=4.73, wps=557313, ups=1.13, wpb=493241, bsz=16229.5, num_updates=9400, lr=0.000652328, gnorm=0.227, clip=0, loss_scale=4, train_wall=87, gb_free=22.4, wall=8671 epoch 006: 969 / 1689 loss=3.768, nll_loss=2.241, ppl=4.73, wps=557313, ups=1.13, wpb=493241, bsz=16229.5, num_updates=9400, lr=0.000652328, gnorm=0.227, clip=0, loss_scale=4, train_wall=87, gb_free=22.4, wall=8671 epoch 006: 969 / 1689 loss=3.768, nll_loss=2.241, ppl=4.73, wps=557313, ups=1.13, wpb=493241, bsz=16229.5, num_updates=9400, lr=0.000652328, gnorm=0.227, clip=0, loss_scale=4, train_wall=87, gb_free=22.4, wall=8671 epoch 006: 969 / 1689 loss=3.768, nll_loss=2.241, ppl=4.73, wps=557313, ups=1.13, wpb=493241, bsz=16229.5, num_updates=9400, lr=0.000652328, gnorm=0.227, clip=0, loss_scale=4, train_wall=87, gb_free=22.4, wall=8671 epoch 006: 969 / 1689 loss=3.768, nll_loss=2.241, ppl=4.73, wps=557313, ups=1.13, wpb=493241, bsz=16229.5, num_updates=9400, lr=0.000652328, gnorm=0.227, clip=0, loss_scale=4, train_wall=87, gb_free=22.4, wall=8671 epoch 006: 969 / 1689 loss=3.768, nll_loss=2.241, ppl=4.73, wps=557313, ups=1.13, wpb=493241, bsz=16229.5, num_updates=9400, lr=0.000652328, gnorm=0.227, clip=0, loss_scale=4, train_wall=87, gb_free=22.4, wall=8671 epoch 006: 1069 / 1689 loss=3.762, nll_loss=2.234, ppl=4.7, wps=551458, ups=1.12, wpb=494501, bsz=16483.8, num_updates=9500, lr=0.000648886, gnorm=0.231, clip=0, loss_scale=4, train_wall=88, gb_free=22, wall=8761 epoch 006: 1069 / 1689 loss=3.762, nll_loss=2.234, ppl=4.7, wps=551458, ups=1.12, wpb=494501, bsz=16483.8, num_updates=9500, lr=0.000648886, gnorm=0.231, clip=0, loss_scale=4, train_wall=88, gb_free=22, wall=8761 epoch 006: 1069 / 1689 loss=3.762, nll_loss=2.234, ppl=4.7, wps=551458, ups=1.12, wpb=494501, bsz=16483.8, num_updates=9500, lr=0.000648886, gnorm=0.231, clip=0, loss_scale=4, train_wall=88, gb_free=22, wall=8761 epoch 006: 1069 / 1689 loss=3.762, nll_loss=2.234, ppl=4.7, wps=551458, ups=1.12, wpb=494501, bsz=16483.8, num_updates=9500, lr=0.000648886, gnorm=0.231, clip=0, loss_scale=4, train_wall=88, gb_free=22, wall=8761 epoch 006: 1069 / 1689 loss=3.762, nll_loss=2.234, ppl=4.7, wps=551458, ups=1.12, wpb=494501, bsz=16483.8, num_updates=9500, lr=0.000648886, gnorm=0.231, clip=0, loss_scale=4, train_wall=88, gb_free=22, wall=8761 epoch 006: 1069 / 1689 loss=3.762, nll_loss=2.234, ppl=4.7, wps=551458, ups=1.12, wpb=494501, bsz=16483.8, num_updates=9500, lr=0.000648886, gnorm=0.231, clip=0, loss_scale=4, train_wall=88, gb_free=22, wall=8761 epoch 006: 1170 / 1689 loss=3.76, nll_loss=2.232, ppl=4.7, wps=549752, ups=1.11, wpb=494492, bsz=16811.1, num_updates=9600, lr=0.000645497, gnorm=0.227, clip=0, loss_scale=4, train_wall=89, gb_free=21.6, wall=8851 epoch 006: 1170 / 1689 loss=3.76, nll_loss=2.232, ppl=4.7, wps=549752, ups=1.11, wpb=494492, bsz=16811.1, num_updates=9600, lr=0.000645497, gnorm=0.227, clip=0, loss_scale=4, train_wall=89, gb_free=21.6, wall=8851 epoch 006: 1170 / 1689 loss=3.76, nll_loss=2.232, ppl=4.7, wps=549752, ups=1.11, wpb=494492, bsz=16811.1, num_updates=9600, lr=0.000645497, gnorm=0.227, clip=0, loss_scale=4, train_wall=89, gb_free=21.6, wall=8851 epoch 006: 1170 / 1689 loss=3.76, nll_loss=2.232, ppl=4.7, wps=549752, ups=1.11, wpb=494492, bsz=16811.1, num_updates=9600, lr=0.000645497, gnorm=0.227, clip=0, loss_scale=4, train_wall=89, gb_free=21.6, wall=8851 epoch 006: 1170 / 1689 loss=3.76, nll_loss=2.232, ppl=4.7, wps=549752, ups=1.11, wpb=494492, bsz=16811.1, num_updates=9600, lr=0.000645497, gnorm=0.227, clip=0, loss_scale=4, train_wall=89, gb_free=21.6, wall=8851 epoch 006: 1170 / 1689 loss=3.76, nll_loss=2.232, ppl=4.7, wps=549752, ups=1.11, wpb=494492, bsz=16811.1, num_updates=9600, lr=0.000645497, gnorm=0.227, clip=0, loss_scale=4, train_wall=89, gb_free=21.6, wall=8851 epoch 006: 1270 / 1689 loss=3.765, nll_loss=2.237, ppl=4.71, wps=558150, ups=1.13, wpb=495553, bsz=16170.1, num_updates=9700, lr=0.000642161, gnorm=0.227, clip=0, loss_scale=4, train_wall=88, gb_free=21.3, wall=8940 epoch 006: 1270 / 1689 loss=3.765, nll_loss=2.237, ppl=4.71, wps=558150, ups=1.13, wpb=495553, bsz=16170.1, num_updates=9700, lr=0.000642161, gnorm=0.227, clip=0, loss_scale=4, train_wall=88, gb_free=21.3, wall=8940 epoch 006: 1270 / 1689 loss=3.765, nll_loss=2.237, ppl=4.71, wps=558150, ups=1.13, wpb=495553, bsz=16170.1, num_updates=9700, lr=0.000642161, gnorm=0.227, clip=0, loss_scale=4, train_wall=88, gb_free=21.3, wall=8940 epoch 006: 1270 / 1689 loss=3.765, nll_loss=2.237, ppl=4.71, wps=558150, ups=1.13, wpb=495553, bsz=16170.1, num_updates=9700, lr=0.000642161, gnorm=0.227, clip=0, loss_scale=4, train_wall=88, gb_free=21.3, wall=8940 epoch 006: 1270 / 1689 loss=3.765, nll_loss=2.237, ppl=4.71, wps=558150, ups=1.13, wpb=495553, bsz=16170.1, num_updates=9700, lr=0.000642161, gnorm=0.227, clip=0, loss_scale=4, train_wall=88, gb_free=21.3, wall=8940 epoch 006: 1270 / 1689 loss=3.765, nll_loss=2.237, ppl=4.71, wps=558150, ups=1.13, wpb=495553, bsz=16170.1, num_updates=9700, lr=0.000642161, gnorm=0.227, clip=0, loss_scale=4, train_wall=88, gb_free=21.3, wall=8940 epoch 006: 1370 / 1689 loss=3.755, nll_loss=2.227, ppl=4.68, wps=559106, ups=1.12, wpb=497314, bsz=16725.4, num_updates=9800, lr=0.000638877, gnorm=0.225, clip=0, loss_scale=4, train_wall=88, gb_free=21.6, wall=9028 epoch 006: 1370 / 1689 loss=3.755, nll_loss=2.227, ppl=4.68, wps=559106, ups=1.12, wpb=497314, bsz=16725.4, num_updates=9800, lr=0.000638877, gnorm=0.225, clip=0, loss_scale=4, train_wall=88, gb_free=21.6, wall=9028 epoch 006: 1370 / 1689 loss=3.755, nll_loss=2.227, ppl=4.68, wps=559106, ups=1.12, wpb=497314, bsz=16725.4, num_updates=9800, lr=0.000638877, gnorm=0.225, clip=0, loss_scale=4, train_wall=88, gb_free=21.6, wall=9028 epoch 006: 1370 / 1689 loss=3.755, nll_loss=2.227, ppl=4.68, wps=559106, ups=1.12, wpb=497314, bsz=16725.4, num_updates=9800, lr=0.000638877, gnorm=0.225, clip=0, loss_scale=4, train_wall=88, gb_free=21.6, wall=9028 epoch 006: 1370 / 1689 loss=3.755, nll_loss=2.227, ppl=4.68, wps=559106, ups=1.12, wpb=497314, bsz=16725.4, num_updates=9800, lr=0.000638877, gnorm=0.225, clip=0, loss_scale=4, train_wall=88, gb_free=21.6, wall=9028 epoch 006: 1370 / 1689 loss=3.755, nll_loss=2.227, ppl=4.68, wps=559106, ups=1.12, wpb=497314, bsz=16725.4, num_updates=9800, lr=0.000638877, gnorm=0.225, clip=0, loss_scale=4, train_wall=88, gb_free=21.6, wall=9028 epoch 006: 1470 / 1689 loss=3.763, nll_loss=2.235, ppl=4.71, wps=552369, ups=1.12, wpb=494850, bsz=16358.3, num_updates=9900, lr=0.000635642, gnorm=0.223, clip=0, loss_scale=4, train_wall=89, gb_free=22.3, wall=9118 epoch 006: 1470 / 1689 loss=3.763, nll_loss=2.235, ppl=4.71, wps=552369, ups=1.12, wpb=494850, bsz=16358.3, num_updates=9900, lr=0.000635642, gnorm=0.223, clip=0, loss_scale=4, train_wall=89, gb_free=22.3, wall=9118 epoch 006: 1470 / 1689 loss=3.763, nll_loss=2.235, ppl=4.71, wps=552369, ups=1.12, wpb=494850, bsz=16358.3, num_updates=9900, lr=0.000635642, gnorm=0.223, clip=0, loss_scale=4, train_wall=89, gb_free=22.3, wall=9118 epoch 006: 1470 / 1689 loss=3.763, nll_loss=2.235, ppl=4.71, wps=552369, ups=1.12, wpb=494850, bsz=16358.3, num_updates=9900, lr=0.000635642, gnorm=0.223, clip=0, loss_scale=4, train_wall=89, gb_free=22.3, wall=9118 epoch 006: 1470 / 1689 loss=3.763, nll_loss=2.235, ppl=4.71, wps=552369, ups=1.12, wpb=494850, bsz=16358.3, num_updates=9900, lr=0.000635642, gnorm=0.223, clip=0, loss_scale=4, train_wall=89, gb_free=22.3, wall=9118 epoch 006: 1470 / 1689 loss=3.763, nll_loss=2.235, ppl=4.71, wps=552369, ups=1.12, wpb=494850, bsz=16358.3, num_updates=9900, lr=0.000635642, gnorm=0.223, clip=0, loss_scale=4, train_wall=89, gb_free=22.3, wall=9118 epoch 006: 1570 / 1689 loss=3.752, nll_loss=2.223, ppl=4.67, wps=553402, ups=1.12, wpb=495856, bsz=16750.3, num_updates=10000, lr=0.000632456, gnorm=0.219, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=9208 epoch 006: 1570 / 1689 loss=3.752, nll_loss=2.223, ppl=4.67, wps=553402, ups=1.12, wpb=495856, bsz=16750.3, num_updates=10000, lr=0.000632456, gnorm=0.219, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=9208 epoch 006: 1570 / 1689 loss=3.752, nll_loss=2.223, ppl=4.67, wps=553402, ups=1.12, wpb=495856, bsz=16750.3, num_updates=10000, lr=0.000632456, gnorm=0.219, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=9208 epoch 006: 1570 / 1689 loss=3.752, nll_loss=2.223, ppl=4.67, wps=553402, ups=1.12, wpb=495856, bsz=16750.3, num_updates=10000, lr=0.000632456, gnorm=0.219, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=9208 epoch 006: 1570 / 1689 loss=3.752, nll_loss=2.223, ppl=4.67, wps=553402, ups=1.12, wpb=495856, bsz=16750.3, num_updates=10000, lr=0.000632456, gnorm=0.219, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=9208 epoch 006: 1570 / 1689 loss=3.752, nll_loss=2.223, ppl=4.67, wps=553402, ups=1.12, wpb=495856, bsz=16750.3, num_updates=10000, lr=0.000632456, gnorm=0.219, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=9208 begin validation on "valid" subset epoch 006 | valid on 'valid' subset | loss 3.824 | nll_loss 2.282 | ppl 4.86 | wps 0 | wpb 44526 | bsz 2008 | num_updates 10000 | best_loss 3.824 epoch 006 | valid on 'valid' subset | loss 3.824 | nll_loss 2.282 | ppl 4.86 | wps 0 | wpb 44526 | bsz 2008 | num_updates 10000 | best_loss 3.824 epoch 006 | valid on 'valid' subset | loss 3.824 | nll_loss 2.282 | ppl 4.86 | wps 0 | wpb 44526 | bsz 2008 | num_updates 10000 | best_loss 3.824 epoch 006 | valid on 'valid' subset | loss 3.824 | nll_loss 2.282 | ppl 4.86 | wps 0 | wpb 44526 | bsz 2008 | num_updates 10000 | best_loss 3.824 epoch 006 | valid on 'valid' subset | loss 3.824 | nll_loss 2.282 | ppl 4.86 | wps 0 | wpb 44526 | bsz 2008 | num_updates 10000 | best_loss 3.824 epoch 006 | valid on 'valid' subset | loss 3.824 | nll_loss 2.282 | ppl 4.86 | wps 0 | wpb 44526 | bsz 2008 | num_updates 10000 | best_loss 3.824 epoch 006: 1670 / 1689 loss=3.756, nll_loss=2.228, ppl=4.68, wps=385738, ups=0.78, wpb=495675, bsz=16481.7, num_updates=10100, lr=0.000629317, gnorm=0.221, clip=0, loss_scale=8, train_wall=86, gb_free=21.9, wall=9336 epoch 006: 1670 / 1689 loss=3.756, nll_loss=2.228, ppl=4.68, wps=385738, ups=0.78, wpb=495675, bsz=16481.7, num_updates=10100, lr=0.000629317, gnorm=0.221, clip=0, loss_scale=8, train_wall=86, gb_free=21.9, wall=9336 epoch 006: 1670 / 1689 loss=3.756, nll_loss=2.228, ppl=4.68, wps=385738, ups=0.78, wpb=495675, bsz=16481.7, num_updates=10100, lr=0.000629317, gnorm=0.221, clip=0, loss_scale=8, train_wall=86, gb_free=21.9, wall=9336 epoch 006: 1670 / 1689 loss=3.756, nll_loss=2.228, ppl=4.68, wps=385738, ups=0.78, wpb=495675, bsz=16481.7, num_updates=10100, lr=0.000629317, gnorm=0.221, clip=0, loss_scale=8, train_wall=86, gb_free=21.9, wall=9336 epoch 006: 1670 / 1689 loss=3.756, nll_loss=2.228, ppl=4.68, wps=385738, ups=0.78, wpb=495675, bsz=16481.7, num_updates=10100, lr=0.000629317, gnorm=0.221, clip=0, loss_scale=8, train_wall=86, gb_free=21.9, wall=9336 epoch 006: 1670 / 1689 loss=3.756, nll_loss=2.228, ppl=4.68, wps=385738, ups=0.78, wpb=495675, bsz=16481.7, num_updates=10100, lr=0.000629317, gnorm=0.221, clip=0, loss_scale=8, train_wall=86, gb_free=21.9, wall=9336 end of epoch 6 (average epoch stats below) epoch 006 | loss 3.762 | nll_loss 2.233 | ppl 4.7 | wps 530658 | ups 1.07 | wpb 495122 | bsz 16507.6 | num_updates 10119 | lr 0.000628726 | gnorm 0.229 | clip 0 | loss_scale 8 | train_wall 1492 | gb_free 23 | wall 9352 epoch 006 | loss 3.762 | nll_loss 2.233 | ppl 4.7 | wps 530658 | ups 1.07 | wpb 495122 | bsz 16507.6 | num_updates 10119 | lr 0.000628726 | gnorm 0.229 | clip 0 | loss_scale 8 | train_wall 1492 | gb_free 23 | wall 9352 epoch 006 | loss 3.762 | nll_loss 2.233 | ppl 4.7 | wps 530658 | ups 1.07 | wpb 495122 | bsz 16507.6 | num_updates 10119 | lr 0.000628726 | gnorm 0.229 | clip 0 | loss_scale 8 | train_wall 1492 | gb_free 23 | wall 9352 epoch 006 | loss 3.762 | nll_loss 2.233 | ppl 4.7 | wps 530658 | ups 1.07 | wpb 495122 | bsz 16507.6 | num_updates 10119 | lr 0.000628726 | gnorm 0.229 | clip 0 | loss_scale 8 | train_wall 1492 | gb_free 23 | wall 9352 epoch 006 | loss 3.762 | nll_loss 2.233 | ppl 4.7 | wps 530658 | ups 1.07 | wpb 495122 | bsz 16507.6 | num_updates 10119 | lr 0.000628726 | gnorm 0.229 | clip 0 | loss_scale 8 | train_wall 1492 | gb_free 23 | wall 9352 epoch 006 | loss 3.762 | nll_loss 2.233 | ppl 4.7 | wps 530658 | ups 1.07 | wpb 495122 | bsz 16507.6 | num_updates 10119 | lr 0.000628726 | gnorm 0.229 | clip 0 | loss_scale 8 | train_wall 1492 | gb_free 23 | wall 9352 Start iterating over samples epoch 007: 82 / 1689 loss=3.723, nll_loss=2.19, ppl=4.56, wps=550716, ups=1.12, wpb=490530, bsz=16307.8, num_updates=10200, lr=0.000626224, gnorm=0.23, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=9425 epoch 007: 82 / 1689 loss=3.723, nll_loss=2.19, ppl=4.56, wps=550716, ups=1.12, wpb=490530, bsz=16307.8, num_updates=10200, lr=0.000626224, gnorm=0.23, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=9425 epoch 007: 82 / 1689 loss=3.723, nll_loss=2.19, ppl=4.56, wps=550716, ups=1.12, wpb=490530, bsz=16307.8, num_updates=10200, lr=0.000626224, gnorm=0.23, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=9425 epoch 007: 82 / 1689 loss=3.723, nll_loss=2.19, ppl=4.56, wps=550716, ups=1.12, wpb=490530, bsz=16307.8, num_updates=10200, lr=0.000626224, gnorm=0.23, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=9425 epoch 007: 82 / 1689 loss=3.723, nll_loss=2.19, ppl=4.56, wps=550716, ups=1.12, wpb=490530, bsz=16307.8, num_updates=10200, lr=0.000626224, gnorm=0.23, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=9425 epoch 007: 82 / 1689 loss=3.723, nll_loss=2.19, ppl=4.56, wps=550716, ups=1.12, wpb=490530, bsz=16307.8, num_updates=10200, lr=0.000626224, gnorm=0.23, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=9425 epoch 007: 82 / 1689 loss=3.723, nll_loss=2.19, ppl=4.56, wps=550716, ups=1.12, wpb=490530, bsz=16307.8, num_updates=10200, lr=0.000626224, gnorm=0.23, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=9425 epoch 007: 182 / 1689 loss=3.709, nll_loss=2.174, ppl=4.51, wps=558254, ups=1.13, wpb=495741, bsz=16715.4, num_updates=10300, lr=0.000623177, gnorm=0.211, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=9514 epoch 007: 182 / 1689 loss=3.709, nll_loss=2.174, ppl=4.51, wps=558254, ups=1.13, wpb=495741, bsz=16715.4, num_updates=10300, lr=0.000623177, gnorm=0.211, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=9514 epoch 007: 182 / 1689 loss=3.709, nll_loss=2.174, ppl=4.51, wps=558254, ups=1.13, wpb=495741, bsz=16715.4, num_updates=10300, lr=0.000623177, gnorm=0.211, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=9514 epoch 007: 182 / 1689 loss=3.709, nll_loss=2.174, ppl=4.51, wps=558254, ups=1.13, wpb=495741, bsz=16715.4, num_updates=10300, lr=0.000623177, gnorm=0.211, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=9514 epoch 007: 182 / 1689 loss=3.709, nll_loss=2.174, ppl=4.51, wps=558254, ups=1.13, wpb=495741, bsz=16715.4, num_updates=10300, lr=0.000623177, gnorm=0.211, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=9514 epoch 007: 182 / 1689 loss=3.709, nll_loss=2.174, ppl=4.51, wps=558254, ups=1.13, wpb=495741, bsz=16715.4, num_updates=10300, lr=0.000623177, gnorm=0.211, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=9514 epoch 007: 182 / 1689 loss=3.709, nll_loss=2.174, ppl=4.51, wps=558254, ups=1.13, wpb=495741, bsz=16715.4, num_updates=10300, lr=0.000623177, gnorm=0.211, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=9514 epoch 007: 282 / 1689 loss=3.718, nll_loss=2.184, ppl=4.54, wps=558886, ups=1.13, wpb=495341, bsz=16592.9, num_updates=10400, lr=0.000620174, gnorm=0.223, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=9603 epoch 007: 282 / 1689 loss=3.718, nll_loss=2.184, ppl=4.54, wps=558886, ups=1.13, wpb=495341, bsz=16592.9, num_updates=10400, lr=0.000620174, gnorm=0.223, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=9603 epoch 007: 282 / 1689 loss=3.718, nll_loss=2.184, ppl=4.54, wps=558886, ups=1.13, wpb=495341, bsz=16592.9, num_updates=10400, lr=0.000620174, gnorm=0.223, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=9603 epoch 007: 282 / 1689 loss=3.718, nll_loss=2.184, ppl=4.54, wps=558886, ups=1.13, wpb=495341, bsz=16592.9, num_updates=10400, lr=0.000620174, gnorm=0.223, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=9603 epoch 007: 282 / 1689 loss=3.718, nll_loss=2.184, ppl=4.54, wps=558886, ups=1.13, wpb=495341, bsz=16592.9, num_updates=10400, lr=0.000620174, gnorm=0.223, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=9603 epoch 007: 282 / 1689 loss=3.718, nll_loss=2.184, ppl=4.54, wps=558886, ups=1.13, wpb=495341, bsz=16592.9, num_updates=10400, lr=0.000620174, gnorm=0.223, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=9603 epoch 007: 282 / 1689 loss=3.718, nll_loss=2.184, ppl=4.54, wps=558886, ups=1.13, wpb=495341, bsz=16592.9, num_updates=10400, lr=0.000620174, gnorm=0.223, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=9603 epoch 007: 383 / 1689 loss=3.724, nll_loss=2.191, ppl=4.57, wps=552878, ups=1.12, wpb=495247, bsz=16141.5, num_updates=10500, lr=0.000617213, gnorm=0.223, clip=0, loss_scale=2, train_wall=89, gb_free=21.7, wall=9692 epoch 007: 383 / 1689 loss=3.724, nll_loss=2.191, ppl=4.57, wps=552878, ups=1.12, wpb=495247, bsz=16141.5, num_updates=10500, lr=0.000617213, gnorm=0.223, clip=0, loss_scale=2, train_wall=89, gb_free=21.7, wall=9692 epoch 007: 383 / 1689 loss=3.724, nll_loss=2.191, ppl=4.57, wps=552878, ups=1.12, wpb=495247, bsz=16141.5, num_updates=10500, lr=0.000617213, gnorm=0.223, clip=0, loss_scale=2, train_wall=89, gb_free=21.7, wall=9692 epoch 007: 383 / 1689 loss=3.724, nll_loss=2.191, ppl=4.57, wps=552878, ups=1.12, wpb=495247, bsz=16141.5, num_updates=10500, lr=0.000617213, gnorm=0.223, clip=0, loss_scale=2, train_wall=89, gb_free=21.7, wall=9692 epoch 007: 383 / 1689 loss=3.724, nll_loss=2.191, ppl=4.57, wps=552878, ups=1.12, wpb=495247, bsz=16141.5, num_updates=10500, lr=0.000617213, gnorm=0.223, clip=0, loss_scale=2, train_wall=89, gb_free=21.7, wall=9692 epoch 007: 383 / 1689 loss=3.724, nll_loss=2.191, ppl=4.57, wps=552878, ups=1.12, wpb=495247, bsz=16141.5, num_updates=10500, lr=0.000617213, gnorm=0.223, clip=0, loss_scale=2, train_wall=89, gb_free=21.7, wall=9692 epoch 007: 383 / 1689 loss=3.724, nll_loss=2.191, ppl=4.57, wps=552878, ups=1.12, wpb=495247, bsz=16141.5, num_updates=10500, lr=0.000617213, gnorm=0.223, clip=0, loss_scale=2, train_wall=89, gb_free=21.7, wall=9692 epoch 007: 483 / 1689 loss=3.727, nll_loss=2.195, ppl=4.58, wps=557018, ups=1.12, wpb=495202, bsz=16557.2, num_updates=10600, lr=0.000614295, gnorm=0.22, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=9781 epoch 007: 483 / 1689 loss=3.727, nll_loss=2.195, ppl=4.58, wps=557018, ups=1.12, wpb=495202, bsz=16557.2, num_updates=10600, lr=0.000614295, gnorm=0.22, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=9781 epoch 007: 483 / 1689 loss=3.727, nll_loss=2.195, ppl=4.58, wps=557018, ups=1.12, wpb=495202, bsz=16557.2, num_updates=10600, lr=0.000614295, gnorm=0.22, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=9781 epoch 007: 483 / 1689 loss=3.727, nll_loss=2.195, ppl=4.58, wps=557018, ups=1.12, wpb=495202, bsz=16557.2, num_updates=10600, lr=0.000614295, gnorm=0.22, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=9781 epoch 007: 483 / 1689 loss=3.727, nll_loss=2.195, ppl=4.58, wps=557018, ups=1.12, wpb=495202, bsz=16557.2, num_updates=10600, lr=0.000614295, gnorm=0.22, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=9781 epoch 007: 483 / 1689 loss=3.727, nll_loss=2.195, ppl=4.58, wps=557018, ups=1.12, wpb=495202, bsz=16557.2, num_updates=10600, lr=0.000614295, gnorm=0.22, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=9781 epoch 007: 483 / 1689 loss=3.727, nll_loss=2.195, ppl=4.58, wps=557018, ups=1.12, wpb=495202, bsz=16557.2, num_updates=10600, lr=0.000614295, gnorm=0.22, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=9781 epoch 007: 583 / 1689 loss=3.721, nll_loss=2.189, ppl=4.56, wps=555234, ups=1.12, wpb=496100, bsz=16461.3, num_updates=10700, lr=0.000611418, gnorm=0.222, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=9871 epoch 007: 583 / 1689 loss=3.721, nll_loss=2.189, ppl=4.56, wps=555234, ups=1.12, wpb=496100, bsz=16461.3, num_updates=10700, lr=0.000611418, gnorm=0.222, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=9871 epoch 007: 583 / 1689 loss=3.721, nll_loss=2.189, ppl=4.56, wps=555234, ups=1.12, wpb=496100, bsz=16461.3, num_updates=10700, lr=0.000611418, gnorm=0.222, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=9871 epoch 007: 583 / 1689 loss=3.721, nll_loss=2.189, ppl=4.56, wps=555234, ups=1.12, wpb=496100, bsz=16461.3, num_updates=10700, lr=0.000611418, gnorm=0.222, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=9871 epoch 007: 583 / 1689 loss=3.721, nll_loss=2.189, ppl=4.56, wps=555234, ups=1.12, wpb=496100, bsz=16461.3, num_updates=10700, lr=0.000611418, gnorm=0.222, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=9871 epoch 007: 583 / 1689 loss=3.721, nll_loss=2.189, ppl=4.56, wps=555234, ups=1.12, wpb=496100, bsz=16461.3, num_updates=10700, lr=0.000611418, gnorm=0.222, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=9871 epoch 007: 583 / 1689 loss=3.721, nll_loss=2.189, ppl=4.56, wps=555234, ups=1.12, wpb=496100, bsz=16461.3, num_updates=10700, lr=0.000611418, gnorm=0.222, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=9871 epoch 007: 683 / 1689 loss=3.726, nll_loss=2.193, ppl=4.57, wps=554681, ups=1.12, wpb=494727, bsz=16323.8, num_updates=10800, lr=0.000608581, gnorm=0.232, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=9960 epoch 007: 683 / 1689 loss=3.726, nll_loss=2.193, ppl=4.57, wps=554681, ups=1.12, wpb=494727, bsz=16323.8, num_updates=10800, lr=0.000608581, gnorm=0.232, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=9960 epoch 007: 683 / 1689 loss=3.726, nll_loss=2.193, ppl=4.57, wps=554681, ups=1.12, wpb=494727, bsz=16323.8, num_updates=10800, lr=0.000608581, gnorm=0.232, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=9960 epoch 007: 683 / 1689 loss=3.726, nll_loss=2.193, ppl=4.57, wps=554681, ups=1.12, wpb=494727, bsz=16323.8, num_updates=10800, lr=0.000608581, gnorm=0.232, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=9960 epoch 007: 683 / 1689 loss=3.726, nll_loss=2.193, ppl=4.57, wps=554681, ups=1.12, wpb=494727, bsz=16323.8, num_updates=10800, lr=0.000608581, gnorm=0.232, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=9960 epoch 007: 683 / 1689 loss=3.726, nll_loss=2.193, ppl=4.57, wps=554681, ups=1.12, wpb=494727, bsz=16323.8, num_updates=10800, lr=0.000608581, gnorm=0.232, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=9960 epoch 007: 683 / 1689 loss=3.726, nll_loss=2.193, ppl=4.57, wps=554681, ups=1.12, wpb=494727, bsz=16323.8, num_updates=10800, lr=0.000608581, gnorm=0.232, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=9960 epoch 007: 783 / 1689 loss=3.724, nll_loss=2.192, ppl=4.57, wps=549939, ups=1.11, wpb=494245, bsz=16231.8, num_updates=10900, lr=0.000605783, gnorm=0.211, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=10050 epoch 007: 783 / 1689 loss=3.724, nll_loss=2.192, ppl=4.57, wps=549939, ups=1.11, wpb=494245, bsz=16231.8, num_updates=10900, lr=0.000605783, gnorm=0.211, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=10050 epoch 007: 783 / 1689 loss=3.724, nll_loss=2.192, ppl=4.57, wps=549939, ups=1.11, wpb=494245, bsz=16231.8, num_updates=10900, lr=0.000605783, gnorm=0.211, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=10050 epoch 007: 783 / 1689 loss=3.724, nll_loss=2.192, ppl=4.57, wps=549939, ups=1.11, wpb=494245, bsz=16231.8, num_updates=10900, lr=0.000605783, gnorm=0.211, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=10050 epoch 007: 783 / 1689 loss=3.724, nll_loss=2.192, ppl=4.57, wps=549939, ups=1.11, wpb=494245, bsz=16231.8, num_updates=10900, lr=0.000605783, gnorm=0.211, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=10050 epoch 007: 783 / 1689 loss=3.724, nll_loss=2.192, ppl=4.57, wps=549939, ups=1.11, wpb=494245, bsz=16231.8, num_updates=10900, lr=0.000605783, gnorm=0.211, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=10050 epoch 007: 783 / 1689 loss=3.724, nll_loss=2.192, ppl=4.57, wps=549939, ups=1.11, wpb=494245, bsz=16231.8, num_updates=10900, lr=0.000605783, gnorm=0.211, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=10050 epoch 007: 883 / 1689 loss=3.723, nll_loss=2.191, ppl=4.56, wps=554590, ups=1.12, wpb=494758, bsz=16427.9, num_updates=11000, lr=0.000603023, gnorm=0.222, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=10139 epoch 007: 883 / 1689 loss=3.723, nll_loss=2.191, ppl=4.56, wps=554590, ups=1.12, wpb=494758, bsz=16427.9, num_updates=11000, lr=0.000603023, gnorm=0.222, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=10139 epoch 007: 883 / 1689 loss=3.723, nll_loss=2.191, ppl=4.56, wps=554590, ups=1.12, wpb=494758, bsz=16427.9, num_updates=11000, lr=0.000603023, gnorm=0.222, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=10139 epoch 007: 883 / 1689 loss=3.723, nll_loss=2.191, ppl=4.56, wps=554590, ups=1.12, wpb=494758, bsz=16427.9, num_updates=11000, lr=0.000603023, gnorm=0.222, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=10139 epoch 007: 883 / 1689 loss=3.723, nll_loss=2.191, ppl=4.56, wps=554590, ups=1.12, wpb=494758, bsz=16427.9, num_updates=11000, lr=0.000603023, gnorm=0.222, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=10139 epoch 007: 883 / 1689 loss=3.723, nll_loss=2.191, ppl=4.56, wps=554590, ups=1.12, wpb=494758, bsz=16427.9, num_updates=11000, lr=0.000603023, gnorm=0.222, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=10139 epoch 007: 883 / 1689 loss=3.723, nll_loss=2.191, ppl=4.56, wps=554590, ups=1.12, wpb=494758, bsz=16427.9, num_updates=11000, lr=0.000603023, gnorm=0.222, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=10139 begin validation on "valid" subset epoch 007 | valid on 'valid' subset | loss 3.802 | nll_loss 2.267 | ppl 4.81 | wps 0 | wpb 44526 | bsz 2008 | num_updates 11000 | best_loss 3.802 epoch 007 | valid on 'valid' subset | loss 3.802 | nll_loss 2.267 | ppl 4.81 | wps 0 | wpb 44526 | bsz 2008 | num_updates 11000 | best_loss 3.802 epoch 007 | valid on 'valid' subset | loss 3.802 | nll_loss 2.267 | ppl 4.81 | wps 0 | wpb 44526 | bsz 2008 | num_updates 11000 | best_loss 3.802 epoch 007 | valid on 'valid' subset | loss 3.802 | nll_loss 2.267 | ppl 4.81 | wps 0 | wpb 44526 | bsz 2008 | num_updates 11000 | best_loss 3.802 epoch 007 | valid on 'valid' subset | loss 3.802 | nll_loss 2.267 | ppl 4.81 | wps 0 | wpb 44526 | bsz 2008 | num_updates 11000 | best_loss 3.802 epoch 007 | valid on 'valid' subset | loss 3.802 | nll_loss 2.267 | ppl 4.81 | wps 0 | wpb 44526 | bsz 2008 | num_updates 11000 | best_loss 3.802 epoch 007 | valid on 'valid' subset | loss 3.802 | nll_loss 2.267 | ppl 4.81 | wps 0 | wpb 44526 | bsz 2008 | num_updates 11000 | best_loss 3.802 epoch 007: 983 / 1689 loss=3.722, nll_loss=2.19, ppl=4.56, wps=460510, ups=0.93, wpb=496550, bsz=16253.8, num_updates=11100, lr=0.0006003, gnorm=0.213, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=10247 epoch 007: 983 / 1689 loss=3.722, nll_loss=2.19, ppl=4.56, wps=460510, ups=0.93, wpb=496550, bsz=16253.8, num_updates=11100, lr=0.0006003, gnorm=0.213, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=10247 epoch 007: 983 / 1689 loss=3.722, nll_loss=2.19, ppl=4.56, wps=460510, ups=0.93, wpb=496550, bsz=16253.8, num_updates=11100, lr=0.0006003, gnorm=0.213, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=10247 epoch 007: 983 / 1689 loss=3.722, nll_loss=2.19, ppl=4.56, wps=460510, ups=0.93, wpb=496550, bsz=16253.8, num_updates=11100, lr=0.0006003, gnorm=0.213, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=10247 epoch 007: 983 / 1689 loss=3.722, nll_loss=2.19, ppl=4.56, wps=460510, ups=0.93, wpb=496550, bsz=16253.8, num_updates=11100, lr=0.0006003, gnorm=0.213, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=10247 epoch 007: 983 / 1689 loss=3.722, nll_loss=2.19, ppl=4.56, wps=460510, ups=0.93, wpb=496550, bsz=16253.8, num_updates=11100, lr=0.0006003, gnorm=0.213, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=10247 epoch 007: 983 / 1689 loss=3.722, nll_loss=2.19, ppl=4.56, wps=460510, ups=0.93, wpb=496550, bsz=16253.8, num_updates=11100, lr=0.0006003, gnorm=0.213, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=10247 epoch 007: 1083 / 1689 loss=3.722, nll_loss=2.191, ppl=4.57, wps=550564, ups=1.11, wpb=496079, bsz=16901.5, num_updates=11200, lr=0.000597614, gnorm=0.213, clip=0, loss_scale=4, train_wall=89, gb_free=21.8, wall=10337 epoch 007: 1083 / 1689 loss=3.722, nll_loss=2.191, ppl=4.57, wps=550564, ups=1.11, wpb=496079, bsz=16901.5, num_updates=11200, lr=0.000597614, gnorm=0.213, clip=0, loss_scale=4, train_wall=89, gb_free=21.8, wall=10337 epoch 007: 1083 / 1689 loss=3.722, nll_loss=2.191, ppl=4.57, wps=550564, ups=1.11, wpb=496079, bsz=16901.5, num_updates=11200, lr=0.000597614, gnorm=0.213, clip=0, loss_scale=4, train_wall=89, gb_free=21.8, wall=10337 epoch 007: 1083 / 1689 loss=3.722, nll_loss=2.191, ppl=4.57, wps=550564, ups=1.11, wpb=496079, bsz=16901.5, num_updates=11200, lr=0.000597614, gnorm=0.213, clip=0, loss_scale=4, train_wall=89, gb_free=21.8, wall=10337 epoch 007: 1083 / 1689 loss=3.722, nll_loss=2.191, ppl=4.57, wps=550564, ups=1.11, wpb=496079, bsz=16901.5, num_updates=11200, lr=0.000597614, gnorm=0.213, clip=0, loss_scale=4, train_wall=89, gb_free=21.8, wall=10337 epoch 007: 1083 / 1689 loss=3.722, nll_loss=2.191, ppl=4.57, wps=550564, ups=1.11, wpb=496079, bsz=16901.5, num_updates=11200, lr=0.000597614, gnorm=0.213, clip=0, loss_scale=4, train_wall=89, gb_free=21.8, wall=10337 epoch 007: 1083 / 1689 loss=3.722, nll_loss=2.191, ppl=4.57, wps=550564, ups=1.11, wpb=496079, bsz=16901.5, num_updates=11200, lr=0.000597614, gnorm=0.213, clip=0, loss_scale=4, train_wall=89, gb_free=21.8, wall=10337 epoch 007: 1183 / 1689 loss=3.723, nll_loss=2.191, ppl=4.57, wps=557367, ups=1.12, wpb=495919, bsz=16675, num_updates=11300, lr=0.000594964, gnorm=0.214, clip=0, loss_scale=4, train_wall=87, gb_free=21.8, wall=10426 epoch 007: 1183 / 1689 loss=3.723, nll_loss=2.191, ppl=4.57, wps=557367, ups=1.12, wpb=495919, bsz=16675, num_updates=11300, lr=0.000594964, gnorm=0.214, clip=0, loss_scale=4, train_wall=87, gb_free=21.8, wall=10426 epoch 007: 1183 / 1689 loss=3.723, nll_loss=2.191, ppl=4.57, wps=557367, ups=1.12, wpb=495919, bsz=16675, num_updates=11300, lr=0.000594964, gnorm=0.214, clip=0, loss_scale=4, train_wall=87, gb_free=21.8, wall=10426 epoch 007: 1183 / 1689 loss=3.723, nll_loss=2.191, ppl=4.57, wps=557367, ups=1.12, wpb=495919, bsz=16675, num_updates=11300, lr=0.000594964, gnorm=0.214, clip=0, loss_scale=4, train_wall=87, gb_free=21.8, wall=10426 epoch 007: 1183 / 1689 loss=3.723, nll_loss=2.191, ppl=4.57, wps=557367, ups=1.12, wpb=495919, bsz=16675, num_updates=11300, lr=0.000594964, gnorm=0.214, clip=0, loss_scale=4, train_wall=87, gb_free=21.8, wall=10426 epoch 007: 1183 / 1689 loss=3.723, nll_loss=2.191, ppl=4.57, wps=557367, ups=1.12, wpb=495919, bsz=16675, num_updates=11300, lr=0.000594964, gnorm=0.214, clip=0, loss_scale=4, train_wall=87, gb_free=21.8, wall=10426 epoch 007: 1183 / 1689 loss=3.723, nll_loss=2.191, ppl=4.57, wps=557367, ups=1.12, wpb=495919, bsz=16675, num_updates=11300, lr=0.000594964, gnorm=0.214, clip=0, loss_scale=4, train_wall=87, gb_free=21.8, wall=10426 epoch 007: 1283 / 1689 loss=3.722, nll_loss=2.191, ppl=4.56, wps=553366, ups=1.12, wpb=495412, bsz=16498.5, num_updates=11400, lr=0.000592349, gnorm=0.21, clip=0, loss_scale=4, train_wall=88, gb_free=21.5, wall=10515 epoch 007: 1283 / 1689 loss=3.722, nll_loss=2.191, ppl=4.56, wps=553366, ups=1.12, wpb=495412, bsz=16498.5, num_updates=11400, lr=0.000592349, gnorm=0.21, clip=0, loss_scale=4, train_wall=88, gb_free=21.5, wall=10515 epoch 007: 1283 / 1689 loss=3.722, nll_loss=2.191, ppl=4.56, wps=553366, ups=1.12, wpb=495412, bsz=16498.5, num_updates=11400, lr=0.000592349, gnorm=0.21, clip=0, loss_scale=4, train_wall=88, gb_free=21.5, wall=10515 epoch 007: 1283 / 1689 loss=3.722, nll_loss=2.191, ppl=4.56, wps=553366, ups=1.12, wpb=495412, bsz=16498.5, num_updates=11400, lr=0.000592349, gnorm=0.21, clip=0, loss_scale=4, train_wall=88, gb_free=21.5, wall=10515 epoch 007: 1283 / 1689 loss=3.722, nll_loss=2.191, ppl=4.56, wps=553366, ups=1.12, wpb=495412, bsz=16498.5, num_updates=11400, lr=0.000592349, gnorm=0.21, clip=0, loss_scale=4, train_wall=88, gb_free=21.5, wall=10515 epoch 007: 1283 / 1689 loss=3.722, nll_loss=2.191, ppl=4.56, wps=553366, ups=1.12, wpb=495412, bsz=16498.5, num_updates=11400, lr=0.000592349, gnorm=0.21, clip=0, loss_scale=4, train_wall=88, gb_free=21.5, wall=10515 epoch 007: 1283 / 1689 loss=3.722, nll_loss=2.191, ppl=4.56, wps=553366, ups=1.12, wpb=495412, bsz=16498.5, num_updates=11400, lr=0.000592349, gnorm=0.21, clip=0, loss_scale=4, train_wall=88, gb_free=21.5, wall=10515 epoch 007: 1384 / 1689 loss=3.727, nll_loss=2.196, ppl=4.58, wps=546754, ups=1.1, wpb=494965, bsz=16322.3, num_updates=11500, lr=0.000589768, gnorm=0.22, clip=0, loss_scale=4, train_wall=89, gb_free=22, wall=10606 epoch 007: 1384 / 1689 loss=3.727, nll_loss=2.196, ppl=4.58, wps=546754, ups=1.1, wpb=494965, bsz=16322.3, num_updates=11500, lr=0.000589768, gnorm=0.22, clip=0, loss_scale=4, train_wall=89, gb_free=22, wall=10606 epoch 007: 1384 / 1689 loss=3.727, nll_loss=2.196, ppl=4.58, wps=546754, ups=1.1, wpb=494965, bsz=16322.3, num_updates=11500, lr=0.000589768, gnorm=0.22, clip=0, loss_scale=4, train_wall=89, gb_free=22, wall=10606 epoch 007: 1384 / 1689 loss=3.727, nll_loss=2.196, ppl=4.58, wps=546754, ups=1.1, wpb=494965, bsz=16322.3, num_updates=11500, lr=0.000589768, gnorm=0.22, clip=0, loss_scale=4, train_wall=89, gb_free=22, wall=10606 epoch 007: 1384 / 1689 loss=3.727, nll_loss=2.196, ppl=4.58, wps=546754, ups=1.1, wpb=494965, bsz=16322.3, num_updates=11500, lr=0.000589768, gnorm=0.22, clip=0, loss_scale=4, train_wall=89, gb_free=22, wall=10606 epoch 007: 1384 / 1689 loss=3.727, nll_loss=2.196, ppl=4.58, wps=546754, ups=1.1, wpb=494965, bsz=16322.3, num_updates=11500, lr=0.000589768, gnorm=0.22, clip=0, loss_scale=4, train_wall=89, gb_free=22, wall=10606 epoch 007: 1384 / 1689 loss=3.727, nll_loss=2.196, ppl=4.58, wps=546754, ups=1.1, wpb=494965, bsz=16322.3, num_updates=11500, lr=0.000589768, gnorm=0.22, clip=0, loss_scale=4, train_wall=89, gb_free=22, wall=10606 epoch 007: 1484 / 1689 loss=3.714, nll_loss=2.182, ppl=4.54, wps=547759, ups=1.1, wpb=496070, bsz=16661.9, num_updates=11600, lr=0.00058722, gnorm=0.205, clip=0, loss_scale=4, train_wall=89, gb_free=21.6, wall=10696 epoch 007: 1484 / 1689 loss=3.714, nll_loss=2.182, ppl=4.54, wps=547759, ups=1.1, wpb=496070, bsz=16661.9, num_updates=11600, lr=0.00058722, gnorm=0.205, clip=0, loss_scale=4, train_wall=89, gb_free=21.6, wall=10696 epoch 007: 1484 / 1689 loss=3.714, nll_loss=2.182, ppl=4.54, wps=547759, ups=1.1, wpb=496070, bsz=16661.9, num_updates=11600, lr=0.00058722, gnorm=0.205, clip=0, loss_scale=4, train_wall=89, gb_free=21.6, wall=10696 epoch 007: 1484 / 1689 loss=3.714, nll_loss=2.182, ppl=4.54, wps=547759, ups=1.1, wpb=496070, bsz=16661.9, num_updates=11600, lr=0.00058722, gnorm=0.205, clip=0, loss_scale=4, train_wall=89, gb_free=21.6, wall=10696 epoch 007: 1484 / 1689 loss=3.714, nll_loss=2.182, ppl=4.54, wps=547759, ups=1.1, wpb=496070, bsz=16661.9, num_updates=11600, lr=0.00058722, gnorm=0.205, clip=0, loss_scale=4, train_wall=89, gb_free=21.6, wall=10696 epoch 007: 1484 / 1689 loss=3.714, nll_loss=2.182, ppl=4.54, wps=547759, ups=1.1, wpb=496070, bsz=16661.9, num_updates=11600, lr=0.00058722, gnorm=0.205, clip=0, loss_scale=4, train_wall=89, gb_free=21.6, wall=10696 epoch 007: 1484 / 1689 loss=3.714, nll_loss=2.182, ppl=4.54, wps=547759, ups=1.1, wpb=496070, bsz=16661.9, num_updates=11600, lr=0.00058722, gnorm=0.205, clip=0, loss_scale=4, train_wall=89, gb_free=21.6, wall=10696 epoch 007: 1584 / 1689 loss=3.713, nll_loss=2.18, ppl=4.53, wps=546419, ups=1.1, wpb=495895, bsz=16985.8, num_updates=11700, lr=0.000584705, gnorm=0.205, clip=0, loss_scale=4, train_wall=89, gb_free=22.7, wall=10787 epoch 007: 1584 / 1689 loss=3.713, nll_loss=2.18, ppl=4.53, wps=546419, ups=1.1, wpb=495895, bsz=16985.8, num_updates=11700, lr=0.000584705, gnorm=0.205, clip=0, loss_scale=4, train_wall=89, gb_free=22.7, wall=10787 epoch 007: 1584 / 1689 loss=3.713, nll_loss=2.18, ppl=4.53, wps=546419, ups=1.1, wpb=495895, bsz=16985.8, num_updates=11700, lr=0.000584705, gnorm=0.205, clip=0, loss_scale=4, train_wall=89, gb_free=22.7, wall=10787 epoch 007: 1584 / 1689 loss=3.713, nll_loss=2.18, ppl=4.53, wps=546419, ups=1.1, wpb=495895, bsz=16985.8, num_updates=11700, lr=0.000584705, gnorm=0.205, clip=0, loss_scale=4, train_wall=89, gb_free=22.7, wall=10787 epoch 007: 1584 / 1689 loss=3.713, nll_loss=2.18, ppl=4.53, wps=546419, ups=1.1, wpb=495895, bsz=16985.8, num_updates=11700, lr=0.000584705, gnorm=0.205, clip=0, loss_scale=4, train_wall=89, gb_free=22.7, wall=10787 epoch 007: 1584 / 1689 loss=3.713, nll_loss=2.18, ppl=4.53, wps=546419, ups=1.1, wpb=495895, bsz=16985.8, num_updates=11700, lr=0.000584705, gnorm=0.205, clip=0, loss_scale=4, train_wall=89, gb_free=22.7, wall=10787 epoch 007: 1584 / 1689 loss=3.713, nll_loss=2.18, ppl=4.53, wps=546419, ups=1.1, wpb=495895, bsz=16985.8, num_updates=11700, lr=0.000584705, gnorm=0.205, clip=0, loss_scale=4, train_wall=89, gb_free=22.7, wall=10787 epoch 007: 1684 / 1689 loss=3.712, nll_loss=2.18, ppl=4.53, wps=547229, ups=1.11, wpb=494822, bsz=16502.3, num_updates=11800, lr=0.000582223, gnorm=0.209, clip=0, loss_scale=4, train_wall=89, gb_free=22.4, wall=10878 epoch 007: 1684 / 1689 loss=3.712, nll_loss=2.18, ppl=4.53, wps=547229, ups=1.11, wpb=494822, bsz=16502.3, num_updates=11800, lr=0.000582223, gnorm=0.209, clip=0, loss_scale=4, train_wall=89, gb_free=22.4, wall=10878 epoch 007: 1684 / 1689 loss=3.712, nll_loss=2.18, ppl=4.53, wps=547229, ups=1.11, wpb=494822, bsz=16502.3, num_updates=11800, lr=0.000582223, gnorm=0.209, clip=0, loss_scale=4, train_wall=89, gb_free=22.4, wall=10878 epoch 007: 1684 / 1689 loss=3.712, nll_loss=2.18, ppl=4.53, wps=547229, ups=1.11, wpb=494822, bsz=16502.3, num_updates=11800, lr=0.000582223, gnorm=0.209, clip=0, loss_scale=4, train_wall=89, gb_free=22.4, wall=10878 epoch 007: 1684 / 1689 loss=3.712, nll_loss=2.18, ppl=4.53, wps=547229, ups=1.11, wpb=494822, bsz=16502.3, num_updates=11800, lr=0.000582223, gnorm=0.209, clip=0, loss_scale=4, train_wall=89, gb_free=22.4, wall=10878 epoch 007: 1684 / 1689 loss=3.712, nll_loss=2.18, ppl=4.53, wps=547229, ups=1.11, wpb=494822, bsz=16502.3, num_updates=11800, lr=0.000582223, gnorm=0.209, clip=0, loss_scale=4, train_wall=89, gb_free=22.4, wall=10878 epoch 007: 1684 / 1689 loss=3.712, nll_loss=2.18, ppl=4.53, wps=547229, ups=1.11, wpb=494822, bsz=16502.3, num_updates=11800, lr=0.000582223, gnorm=0.209, clip=0, loss_scale=4, train_wall=89, gb_free=22.4, wall=10878 end of epoch 7 (average epoch stats below) epoch 007 | loss 3.72 | nll_loss 2.188 | ppl 4.56 | wps 545928 | ups 1.1 | wpb 495107 | bsz 16504.4 | num_updates 11805 | lr 0.000582099 | gnorm 0.217 | clip 0 | loss_scale 4 | train_wall 1488 | gb_free 23 | wall 10881 epoch 007 | loss 3.72 | nll_loss 2.188 | ppl 4.56 | wps 545928 | ups 1.1 | wpb 495107 | bsz 16504.4 | num_updates 11805 | lr 0.000582099 | gnorm 0.217 | clip 0 | loss_scale 4 | train_wall 1488 | gb_free 23 | wall 10881 epoch 007 | loss 3.72 | nll_loss 2.188 | ppl 4.56 | wps 545928 | ups 1.1 | wpb 495107 | bsz 16504.4 | num_updates 11805 | lr 0.000582099 | gnorm 0.217 | clip 0 | loss_scale 4 | train_wall 1488 | gb_free 23 | wall 10881 epoch 007 | loss 3.72 | nll_loss 2.188 | ppl 4.56 | wps 545928 | ups 1.1 | wpb 495107 | bsz 16504.4 | num_updates 11805 | lr 0.000582099 | gnorm 0.217 | clip 0 | loss_scale 4 | train_wall 1488 | gb_free 23 | wall 10881 epoch 007 | loss 3.72 | nll_loss 2.188 | ppl 4.56 | wps 545928 | ups 1.1 | wpb 495107 | bsz 16504.4 | num_updates 11805 | lr 0.000582099 | gnorm 0.217 | clip 0 | loss_scale 4 | train_wall 1488 | gb_free 23 | wall 10881 epoch 007 | loss 3.72 | nll_loss 2.188 | ppl 4.56 | wps 545928 | ups 1.1 | wpb 495107 | bsz 16504.4 | num_updates 11805 | lr 0.000582099 | gnorm 0.217 | clip 0 | loss_scale 4 | train_wall 1488 | gb_free 23 | wall 10881 epoch 007 | loss 3.72 | nll_loss 2.188 | ppl 4.56 | wps 545928 | ups 1.1 | wpb 495107 | bsz 16504.4 | num_updates 11805 | lr 0.000582099 | gnorm 0.217 | clip 0 | loss_scale 4 | train_wall 1488 | gb_free 23 | wall 10881 Start iterating over samples epoch 008: 95 / 1689 loss=3.675, nll_loss=2.137, ppl=4.4, wps=545292, ups=1.11, wpb=491425, bsz=16580.3, num_updates=11900, lr=0.000579771, gnorm=0.221, clip=0, loss_scale=4, train_wall=88, gb_free=21.1, wall=10968 epoch 008: 95 / 1689 loss=3.675, nll_loss=2.137, ppl=4.4, wps=545292, ups=1.11, wpb=491425, bsz=16580.3, num_updates=11900, lr=0.000579771, gnorm=0.221, clip=0, loss_scale=4, train_wall=88, gb_free=21.1, wall=10968 epoch 008: 95 / 1689 loss=3.675, nll_loss=2.137, ppl=4.4, wps=545292, ups=1.11, wpb=491425, bsz=16580.3, num_updates=11900, lr=0.000579771, gnorm=0.221, clip=0, loss_scale=4, train_wall=88, gb_free=21.1, wall=10968 epoch 008: 95 / 1689 loss=3.675, nll_loss=2.137, ppl=4.4, wps=545292, ups=1.11, wpb=491425, bsz=16580.3, num_updates=11900, lr=0.000579771, gnorm=0.221, clip=0, loss_scale=4, train_wall=88, gb_free=21.1, wall=10968 epoch 008: 95 / 1689 loss=3.675, nll_loss=2.137, ppl=4.4, wps=545292, ups=1.11, wpb=491425, bsz=16580.3, num_updates=11900, lr=0.000579771, gnorm=0.221, clip=0, loss_scale=4, train_wall=88, gb_free=21.1, wall=10968 epoch 008: 95 / 1689 loss=3.675, nll_loss=2.137, ppl=4.4, wps=545292, ups=1.11, wpb=491425, bsz=16580.3, num_updates=11900, lr=0.000579771, gnorm=0.221, clip=0, loss_scale=4, train_wall=88, gb_free=21.1, wall=10968 epoch 008: 95 / 1689 loss=3.675, nll_loss=2.137, ppl=4.4, wps=545292, ups=1.11, wpb=491425, bsz=16580.3, num_updates=11900, lr=0.000579771, gnorm=0.221, clip=0, loss_scale=4, train_wall=88, gb_free=21.1, wall=10968 epoch 008: 95 / 1689 loss=3.675, nll_loss=2.137, ppl=4.4, wps=545292, ups=1.11, wpb=491425, bsz=16580.3, num_updates=11900, lr=0.000579771, gnorm=0.221, clip=0, loss_scale=4, train_wall=88, gb_free=21.1, wall=10968 epoch 008: 195 / 1689 loss=3.679, nll_loss=2.141, ppl=4.41, wps=552675, ups=1.12, wpb=494513, bsz=16693.9, num_updates=12000, lr=0.00057735, gnorm=0.219, clip=0, loss_scale=8, train_wall=88, gb_free=22.1, wall=11057 epoch 008: 195 / 1689 loss=3.679, nll_loss=2.141, ppl=4.41, wps=552675, ups=1.12, wpb=494513, bsz=16693.9, num_updates=12000, lr=0.00057735, gnorm=0.219, clip=0, loss_scale=8, train_wall=88, gb_free=22.1, wall=11057 epoch 008: 195 / 1689 loss=3.679, nll_loss=2.141, ppl=4.41, wps=552675, ups=1.12, wpb=494513, bsz=16693.9, num_updates=12000, lr=0.00057735, gnorm=0.219, clip=0, loss_scale=8, train_wall=88, gb_free=22.1, wall=11057 epoch 008: 195 / 1689 loss=3.679, nll_loss=2.141, ppl=4.41, wps=552675, ups=1.12, wpb=494513, bsz=16693.9, num_updates=12000, lr=0.00057735, gnorm=0.219, clip=0, loss_scale=8, train_wall=88, gb_free=22.1, wall=11057 epoch 008: 195 / 1689 loss=3.679, nll_loss=2.141, ppl=4.41, wps=552675, ups=1.12, wpb=494513, bsz=16693.9, num_updates=12000, lr=0.00057735, gnorm=0.219, clip=0, loss_scale=8, train_wall=88, gb_free=22.1, wall=11057 epoch 008: 195 / 1689 loss=3.679, nll_loss=2.141, ppl=4.41, wps=552675, ups=1.12, wpb=494513, bsz=16693.9, num_updates=12000, lr=0.00057735, gnorm=0.219, clip=0, loss_scale=8, train_wall=88, gb_free=22.1, wall=11057 epoch 008: 195 / 1689 loss=3.679, nll_loss=2.141, ppl=4.41, wps=552675, ups=1.12, wpb=494513, bsz=16693.9, num_updates=12000, lr=0.00057735, gnorm=0.219, clip=0, loss_scale=8, train_wall=88, gb_free=22.1, wall=11057 epoch 008: 195 / 1689 loss=3.679, nll_loss=2.141, ppl=4.41, wps=552675, ups=1.12, wpb=494513, bsz=16693.9, num_updates=12000, lr=0.00057735, gnorm=0.219, clip=0, loss_scale=8, train_wall=88, gb_free=22.1, wall=11057 begin validation on "valid" subset epoch 008 | valid on 'valid' subset | loss 3.818 | nll_loss 2.274 | ppl 4.84 | wps 0 | wpb 44526 | bsz 2008 | num_updates 12000 | best_loss 3.802 epoch 008 | valid on 'valid' subset | loss 3.818 | nll_loss 2.274 | ppl 4.84 | wps 0 | wpb 44526 | bsz 2008 | num_updates 12000 | best_loss 3.802 epoch 008 | valid on 'valid' subset | loss 3.818 | nll_loss 2.274 | ppl 4.84 | wps 0 | wpb 44526 | bsz 2008 | num_updates 12000 | best_loss 3.802 epoch 008 | valid on 'valid' subset | loss 3.818 | nll_loss 2.274 | ppl 4.84 | wps 0 | wpb 44526 | bsz 2008 | num_updates 12000 | best_loss 3.802 epoch 008 | valid on 'valid' subset | loss 3.818 | nll_loss 2.274 | ppl 4.84 | wps 0 | wpb 44526 | bsz 2008 | num_updates 12000 | best_loss 3.802 epoch 008 | valid on 'valid' subset | loss 3.818 | nll_loss 2.274 | ppl 4.84 | wps 0 | wpb 44526 | bsz 2008 | num_updates 12000 | best_loss 3.802 epoch 008 | valid on 'valid' subset | loss 3.818 | nll_loss 2.274 | ppl 4.84 | wps 0 | wpb 44526 | bsz 2008 | num_updates 12000 | best_loss 3.802 epoch 008 | valid on 'valid' subset | loss 3.818 | nll_loss 2.274 | ppl 4.84 | wps 0 | wpb 44526 | bsz 2008 | num_updates 12000 | best_loss 3.802 epoch 008: 297 / 1689 loss=3.682, nll_loss=2.145, ppl=4.42, wps=479462, ups=0.97, wpb=495528, bsz=16533.1, num_updates=12100, lr=0.00057496, gnorm=0.213, clip=0, loss_scale=2, train_wall=90, gb_free=21.1, wall=11160 epoch 008: 297 / 1689 loss=3.682, nll_loss=2.145, ppl=4.42, wps=479462, ups=0.97, wpb=495528, bsz=16533.1, num_updates=12100, lr=0.00057496, gnorm=0.213, clip=0, loss_scale=2, train_wall=90, gb_free=21.1, wall=11160 epoch 008: 297 / 1689 loss=3.682, nll_loss=2.145, ppl=4.42, wps=479462, ups=0.97, wpb=495528, bsz=16533.1, num_updates=12100, lr=0.00057496, gnorm=0.213, clip=0, loss_scale=2, train_wall=90, gb_free=21.1, wall=11160 epoch 008: 297 / 1689 loss=3.682, nll_loss=2.145, ppl=4.42, wps=479462, ups=0.97, wpb=495528, bsz=16533.1, num_updates=12100, lr=0.00057496, gnorm=0.213, clip=0, loss_scale=2, train_wall=90, gb_free=21.1, wall=11160 epoch 008: 297 / 1689 loss=3.682, nll_loss=2.145, ppl=4.42, wps=479462, ups=0.97, wpb=495528, bsz=16533.1, num_updates=12100, lr=0.00057496, gnorm=0.213, clip=0, loss_scale=2, train_wall=90, gb_free=21.1, wall=11160 epoch 008: 297 / 1689 loss=3.682, nll_loss=2.145, ppl=4.42, wps=479462, ups=0.97, wpb=495528, bsz=16533.1, num_updates=12100, lr=0.00057496, gnorm=0.213, clip=0, loss_scale=2, train_wall=90, gb_free=21.1, wall=11160 epoch 008: 297 / 1689 loss=3.682, nll_loss=2.145, ppl=4.42, wps=479462, ups=0.97, wpb=495528, bsz=16533.1, num_updates=12100, lr=0.00057496, gnorm=0.213, clip=0, loss_scale=2, train_wall=90, gb_free=21.1, wall=11160 epoch 008: 297 / 1689 loss=3.682, nll_loss=2.145, ppl=4.42, wps=479462, ups=0.97, wpb=495528, bsz=16533.1, num_updates=12100, lr=0.00057496, gnorm=0.213, clip=0, loss_scale=2, train_wall=90, gb_free=21.1, wall=11160 epoch 008: 398 / 1689 loss=3.681, nll_loss=2.144, ppl=4.42, wps=545657, ups=1.1, wpb=495179, bsz=16670.2, num_updates=12200, lr=0.000572598, gnorm=0.211, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=11251 epoch 008: 398 / 1689 loss=3.681, nll_loss=2.144, ppl=4.42, wps=545657, ups=1.1, wpb=495179, bsz=16670.2, num_updates=12200, lr=0.000572598, gnorm=0.211, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=11251 epoch 008: 398 / 1689 loss=3.681, nll_loss=2.144, ppl=4.42, wps=545657, ups=1.1, wpb=495179, bsz=16670.2, num_updates=12200, lr=0.000572598, gnorm=0.211, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=11251 epoch 008: 398 / 1689 loss=3.681, nll_loss=2.144, ppl=4.42, wps=545657, ups=1.1, wpb=495179, bsz=16670.2, num_updates=12200, lr=0.000572598, gnorm=0.211, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=11251 epoch 008: 398 / 1689 loss=3.681, nll_loss=2.144, ppl=4.42, wps=545657, ups=1.1, wpb=495179, bsz=16670.2, num_updates=12200, lr=0.000572598, gnorm=0.211, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=11251 epoch 008: 398 / 1689 loss=3.681, nll_loss=2.144, ppl=4.42, wps=545657, ups=1.1, wpb=495179, bsz=16670.2, num_updates=12200, lr=0.000572598, gnorm=0.211, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=11251 epoch 008: 398 / 1689 loss=3.681, nll_loss=2.144, ppl=4.42, wps=545657, ups=1.1, wpb=495179, bsz=16670.2, num_updates=12200, lr=0.000572598, gnorm=0.211, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=11251 epoch 008: 398 / 1689 loss=3.681, nll_loss=2.144, ppl=4.42, wps=545657, ups=1.1, wpb=495179, bsz=16670.2, num_updates=12200, lr=0.000572598, gnorm=0.211, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=11251 epoch 008: 498 / 1689 loss=3.692, nll_loss=2.157, ppl=4.46, wps=554201, ups=1.12, wpb=493366, bsz=15939, num_updates=12300, lr=0.000570266, gnorm=0.206, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=11340 epoch 008: 498 / 1689 loss=3.692, nll_loss=2.157, ppl=4.46, wps=554201, ups=1.12, wpb=493366, bsz=15939, num_updates=12300, lr=0.000570266, gnorm=0.206, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=11340 epoch 008: 498 / 1689 loss=3.692, nll_loss=2.157, ppl=4.46, wps=554201, ups=1.12, wpb=493366, bsz=15939, num_updates=12300, lr=0.000570266, gnorm=0.206, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=11340 epoch 008: 498 / 1689 loss=3.692, nll_loss=2.157, ppl=4.46, wps=554201, ups=1.12, wpb=493366, bsz=15939, num_updates=12300, lr=0.000570266, gnorm=0.206, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=11340 epoch 008: 498 / 1689 loss=3.692, nll_loss=2.157, ppl=4.46, wps=554201, ups=1.12, wpb=493366, bsz=15939, num_updates=12300, lr=0.000570266, gnorm=0.206, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=11340 epoch 008: 498 / 1689 loss=3.692, nll_loss=2.157, ppl=4.46, wps=554201, ups=1.12, wpb=493366, bsz=15939, num_updates=12300, lr=0.000570266, gnorm=0.206, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=11340 epoch 008: 498 / 1689 loss=3.692, nll_loss=2.157, ppl=4.46, wps=554201, ups=1.12, wpb=493366, bsz=15939, num_updates=12300, lr=0.000570266, gnorm=0.206, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=11340 epoch 008: 498 / 1689 loss=3.692, nll_loss=2.157, ppl=4.46, wps=554201, ups=1.12, wpb=493366, bsz=15939, num_updates=12300, lr=0.000570266, gnorm=0.206, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=11340 epoch 008: 598 / 1689 loss=3.697, nll_loss=2.162, ppl=4.47, wps=555898, ups=1.12, wpb=495106, bsz=16509, num_updates=12400, lr=0.000567962, gnorm=0.204, clip=0, loss_scale=1, train_wall=88, gb_free=21.1, wall=11429 epoch 008: 598 / 1689 loss=3.697, nll_loss=2.162, ppl=4.47, wps=555898, ups=1.12, wpb=495106, bsz=16509, num_updates=12400, lr=0.000567962, gnorm=0.204, clip=0, loss_scale=1, train_wall=88, gb_free=21.1, wall=11429 epoch 008: 598 / 1689 loss=3.697, nll_loss=2.162, ppl=4.47, wps=555898, ups=1.12, wpb=495106, bsz=16509, num_updates=12400, lr=0.000567962, gnorm=0.204, clip=0, loss_scale=1, train_wall=88, gb_free=21.1, wall=11429 epoch 008: 598 / 1689 loss=3.697, nll_loss=2.162, ppl=4.47, wps=555898, ups=1.12, wpb=495106, bsz=16509, num_updates=12400, lr=0.000567962, gnorm=0.204, clip=0, loss_scale=1, train_wall=88, gb_free=21.1, wall=11429 epoch 008: 598 / 1689 loss=3.697, nll_loss=2.162, ppl=4.47, wps=555898, ups=1.12, wpb=495106, bsz=16509, num_updates=12400, lr=0.000567962, gnorm=0.204, clip=0, loss_scale=1, train_wall=88, gb_free=21.1, wall=11429 epoch 008: 598 / 1689 loss=3.697, nll_loss=2.162, ppl=4.47, wps=555898, ups=1.12, wpb=495106, bsz=16509, num_updates=12400, lr=0.000567962, gnorm=0.204, clip=0, loss_scale=1, train_wall=88, gb_free=21.1, wall=11429 epoch 008: 598 / 1689 loss=3.697, nll_loss=2.162, ppl=4.47, wps=555898, ups=1.12, wpb=495106, bsz=16509, num_updates=12400, lr=0.000567962, gnorm=0.204, clip=0, loss_scale=1, train_wall=88, gb_free=21.1, wall=11429 epoch 008: 598 / 1689 loss=3.697, nll_loss=2.162, ppl=4.47, wps=555898, ups=1.12, wpb=495106, bsz=16509, num_updates=12400, lr=0.000567962, gnorm=0.204, clip=0, loss_scale=1, train_wall=88, gb_free=21.1, wall=11429 epoch 008: 698 / 1689 loss=3.688, nll_loss=2.152, ppl=4.45, wps=549460, ups=1.11, wpb=495682, bsz=16531.5, num_updates=12500, lr=0.000565685, gnorm=0.224, clip=0, loss_scale=1, train_wall=89, gb_free=20.7, wall=11520 epoch 008: 698 / 1689 loss=3.688, nll_loss=2.152, ppl=4.45, wps=549460, ups=1.11, wpb=495682, bsz=16531.5, num_updates=12500, lr=0.000565685, gnorm=0.224, clip=0, loss_scale=1, train_wall=89, gb_free=20.7, wall=11520 epoch 008: 698 / 1689 loss=3.688, nll_loss=2.152, ppl=4.45, wps=549460, ups=1.11, wpb=495682, bsz=16531.5, num_updates=12500, lr=0.000565685, gnorm=0.224, clip=0, loss_scale=1, train_wall=89, gb_free=20.7, wall=11520 epoch 008: 698 / 1689 loss=3.688, nll_loss=2.152, ppl=4.45, wps=549460, ups=1.11, wpb=495682, bsz=16531.5, num_updates=12500, lr=0.000565685, gnorm=0.224, clip=0, loss_scale=1, train_wall=89, gb_free=20.7, wall=11520 epoch 008: 698 / 1689 loss=3.688, nll_loss=2.152, ppl=4.45, wps=549460, ups=1.11, wpb=495682, bsz=16531.5, num_updates=12500, lr=0.000565685, gnorm=0.224, clip=0, loss_scale=1, train_wall=89, gb_free=20.7, wall=11520 epoch 008: 698 / 1689 loss=3.688, nll_loss=2.152, ppl=4.45, wps=549460, ups=1.11, wpb=495682, bsz=16531.5, num_updates=12500, lr=0.000565685, gnorm=0.224, clip=0, loss_scale=1, train_wall=89, gb_free=20.7, wall=11520 epoch 008: 698 / 1689 loss=3.688, nll_loss=2.152, ppl=4.45, wps=549460, ups=1.11, wpb=495682, bsz=16531.5, num_updates=12500, lr=0.000565685, gnorm=0.224, clip=0, loss_scale=1, train_wall=89, gb_free=20.7, wall=11520 epoch 008: 698 / 1689 loss=3.688, nll_loss=2.152, ppl=4.45, wps=549460, ups=1.11, wpb=495682, bsz=16531.5, num_updates=12500, lr=0.000565685, gnorm=0.224, clip=0, loss_scale=1, train_wall=89, gb_free=20.7, wall=11520 epoch 008: 798 / 1689 loss=3.692, nll_loss=2.157, ppl=4.46, wps=554708, ups=1.12, wpb=495975, bsz=16442.3, num_updates=12600, lr=0.000563436, gnorm=0.205, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=11609 epoch 008: 798 / 1689 loss=3.692, nll_loss=2.157, ppl=4.46, wps=554708, ups=1.12, wpb=495975, bsz=16442.3, num_updates=12600, lr=0.000563436, gnorm=0.205, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=11609 epoch 008: 798 / 1689 loss=3.692, nll_loss=2.157, ppl=4.46, wps=554708, ups=1.12, wpb=495975, bsz=16442.3, num_updates=12600, lr=0.000563436, gnorm=0.205, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=11609 epoch 008: 798 / 1689 loss=3.692, nll_loss=2.157, ppl=4.46, wps=554708, ups=1.12, wpb=495975, bsz=16442.3, num_updates=12600, lr=0.000563436, gnorm=0.205, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=11609 epoch 008: 798 / 1689 loss=3.692, nll_loss=2.157, ppl=4.46, wps=554708, ups=1.12, wpb=495975, bsz=16442.3, num_updates=12600, lr=0.000563436, gnorm=0.205, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=11609 epoch 008: 798 / 1689 loss=3.692, nll_loss=2.157, ppl=4.46, wps=554708, ups=1.12, wpb=495975, bsz=16442.3, num_updates=12600, lr=0.000563436, gnorm=0.205, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=11609 epoch 008: 798 / 1689 loss=3.692, nll_loss=2.157, ppl=4.46, wps=554708, ups=1.12, wpb=495975, bsz=16442.3, num_updates=12600, lr=0.000563436, gnorm=0.205, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=11609 epoch 008: 798 / 1689 loss=3.692, nll_loss=2.157, ppl=4.46, wps=554708, ups=1.12, wpb=495975, bsz=16442.3, num_updates=12600, lr=0.000563436, gnorm=0.205, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=11609 epoch 008: 898 / 1689 loss=3.693, nll_loss=2.159, ppl=4.47, wps=554036, ups=1.12, wpb=496040, bsz=16665, num_updates=12700, lr=0.000561214, gnorm=0.212, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=11698 epoch 008: 898 / 1689 loss=3.693, nll_loss=2.159, ppl=4.47, wps=554036, ups=1.12, wpb=496040, bsz=16665, num_updates=12700, lr=0.000561214, gnorm=0.212, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=11698 epoch 008: 898 / 1689 loss=3.693, nll_loss=2.159, ppl=4.47, wps=554036, ups=1.12, wpb=496040, bsz=16665, num_updates=12700, lr=0.000561214, gnorm=0.212, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=11698 epoch 008: 898 / 1689 loss=3.693, nll_loss=2.159, ppl=4.47, wps=554036, ups=1.12, wpb=496040, bsz=16665, num_updates=12700, lr=0.000561214, gnorm=0.212, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=11698 epoch 008: 898 / 1689 loss=3.693, nll_loss=2.159, ppl=4.47, wps=554036, ups=1.12, wpb=496040, bsz=16665, num_updates=12700, lr=0.000561214, gnorm=0.212, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=11698 epoch 008: 898 / 1689 loss=3.693, nll_loss=2.159, ppl=4.47, wps=554036, ups=1.12, wpb=496040, bsz=16665, num_updates=12700, lr=0.000561214, gnorm=0.212, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=11698 epoch 008: 898 / 1689 loss=3.693, nll_loss=2.159, ppl=4.47, wps=554036, ups=1.12, wpb=496040, bsz=16665, num_updates=12700, lr=0.000561214, gnorm=0.212, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=11698 epoch 008: 898 / 1689 loss=3.693, nll_loss=2.159, ppl=4.47, wps=554036, ups=1.12, wpb=496040, bsz=16665, num_updates=12700, lr=0.000561214, gnorm=0.212, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=11698 epoch 008: 998 / 1689 loss=3.695, nll_loss=2.161, ppl=4.47, wps=544163, ups=1.1, wpb=494198, bsz=16455.6, num_updates=12800, lr=0.000559017, gnorm=0.207, clip=0, loss_scale=2, train_wall=90, gb_free=22.3, wall=11789 epoch 008: 998 / 1689 loss=3.695, nll_loss=2.161, ppl=4.47, wps=544163, ups=1.1, wpb=494198, bsz=16455.6, num_updates=12800, lr=0.000559017, gnorm=0.207, clip=0, loss_scale=2, train_wall=90, gb_free=22.3, wall=11789 epoch 008: 998 / 1689 loss=3.695, nll_loss=2.161, ppl=4.47, wps=544163, ups=1.1, wpb=494198, bsz=16455.6, num_updates=12800, lr=0.000559017, gnorm=0.207, clip=0, loss_scale=2, train_wall=90, gb_free=22.3, wall=11789 epoch 008: 998 / 1689 loss=3.695, nll_loss=2.161, ppl=4.47, wps=544163, ups=1.1, wpb=494198, bsz=16455.6, num_updates=12800, lr=0.000559017, gnorm=0.207, clip=0, loss_scale=2, train_wall=90, gb_free=22.3, wall=11789 epoch 008: 998 / 1689 loss=3.695, nll_loss=2.161, ppl=4.47, wps=544163, ups=1.1, wpb=494198, bsz=16455.6, num_updates=12800, lr=0.000559017, gnorm=0.207, clip=0, loss_scale=2, train_wall=90, gb_free=22.3, wall=11789 epoch 008: 998 / 1689 loss=3.695, nll_loss=2.161, ppl=4.47, wps=544163, ups=1.1, wpb=494198, bsz=16455.6, num_updates=12800, lr=0.000559017, gnorm=0.207, clip=0, loss_scale=2, train_wall=90, gb_free=22.3, wall=11789 epoch 008: 998 / 1689 loss=3.695, nll_loss=2.161, ppl=4.47, wps=544163, ups=1.1, wpb=494198, bsz=16455.6, num_updates=12800, lr=0.000559017, gnorm=0.207, clip=0, loss_scale=2, train_wall=90, gb_free=22.3, wall=11789 epoch 008: 998 / 1689 loss=3.695, nll_loss=2.161, ppl=4.47, wps=544163, ups=1.1, wpb=494198, bsz=16455.6, num_updates=12800, lr=0.000559017, gnorm=0.207, clip=0, loss_scale=2, train_wall=90, gb_free=22.3, wall=11789 epoch 008: 1098 / 1689 loss=3.685, nll_loss=2.149, ppl=4.44, wps=547986, ups=1.11, wpb=494969, bsz=16398.1, num_updates=12900, lr=0.000556846, gnorm=0.198, clip=0, loss_scale=2, train_wall=89, gb_free=22.4, wall=11880 epoch 008: 1098 / 1689 loss=3.685, nll_loss=2.149, ppl=4.44, wps=547986, ups=1.11, wpb=494969, bsz=16398.1, num_updates=12900, lr=0.000556846, gnorm=0.198, clip=0, loss_scale=2, train_wall=89, gb_free=22.4, wall=11880 epoch 008: 1098 / 1689 loss=3.685, nll_loss=2.149, ppl=4.44, wps=547986, ups=1.11, wpb=494969, bsz=16398.1, num_updates=12900, lr=0.000556846, gnorm=0.198, clip=0, loss_scale=2, train_wall=89, gb_free=22.4, wall=11880 epoch 008: 1098 / 1689 loss=3.685, nll_loss=2.149, ppl=4.44, wps=547986, ups=1.11, wpb=494969, bsz=16398.1, num_updates=12900, lr=0.000556846, gnorm=0.198, clip=0, loss_scale=2, train_wall=89, gb_free=22.4, wall=11880 epoch 008: 1098 / 1689 loss=3.685, nll_loss=2.149, ppl=4.44, wps=547986, ups=1.11, wpb=494969, bsz=16398.1, num_updates=12900, lr=0.000556846, gnorm=0.198, clip=0, loss_scale=2, train_wall=89, gb_free=22.4, wall=11880 epoch 008: 1098 / 1689 loss=3.685, nll_loss=2.149, ppl=4.44, wps=547986, ups=1.11, wpb=494969, bsz=16398.1, num_updates=12900, lr=0.000556846, gnorm=0.198, clip=0, loss_scale=2, train_wall=89, gb_free=22.4, wall=11880 epoch 008: 1098 / 1689 loss=3.685, nll_loss=2.149, ppl=4.44, wps=547986, ups=1.11, wpb=494969, bsz=16398.1, num_updates=12900, lr=0.000556846, gnorm=0.198, clip=0, loss_scale=2, train_wall=89, gb_free=22.4, wall=11880 epoch 008: 1098 / 1689 loss=3.685, nll_loss=2.149, ppl=4.44, wps=547986, ups=1.11, wpb=494969, bsz=16398.1, num_updates=12900, lr=0.000556846, gnorm=0.198, clip=0, loss_scale=2, train_wall=89, gb_free=22.4, wall=11880 epoch 008: 1198 / 1689 loss=3.686, nll_loss=2.151, ppl=4.44, wps=558750, ups=1.12, wpb=497424, bsz=16192.6, num_updates=13000, lr=0.0005547, gnorm=0.207, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=11969 epoch 008: 1198 / 1689 loss=3.686, nll_loss=2.151, ppl=4.44, wps=558750, ups=1.12, wpb=497424, bsz=16192.6, num_updates=13000, lr=0.0005547, gnorm=0.207, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=11969 epoch 008: 1198 / 1689 loss=3.686, nll_loss=2.151, ppl=4.44, wps=558750, ups=1.12, wpb=497424, bsz=16192.6, num_updates=13000, lr=0.0005547, gnorm=0.207, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=11969 epoch 008: 1198 / 1689 loss=3.686, nll_loss=2.151, ppl=4.44, wps=558750, ups=1.12, wpb=497424, bsz=16192.6, num_updates=13000, lr=0.0005547, gnorm=0.207, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=11969 epoch 008: 1198 / 1689 loss=3.686, nll_loss=2.151, ppl=4.44, wps=558750, ups=1.12, wpb=497424, bsz=16192.6, num_updates=13000, lr=0.0005547, gnorm=0.207, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=11969 epoch 008: 1198 / 1689 loss=3.686, nll_loss=2.151, ppl=4.44, wps=558750, ups=1.12, wpb=497424, bsz=16192.6, num_updates=13000, lr=0.0005547, gnorm=0.207, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=11969 epoch 008: 1198 / 1689 loss=3.686, nll_loss=2.151, ppl=4.44, wps=558750, ups=1.12, wpb=497424, bsz=16192.6, num_updates=13000, lr=0.0005547, gnorm=0.207, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=11969 epoch 008: 1198 / 1689 loss=3.686, nll_loss=2.151, ppl=4.44, wps=558750, ups=1.12, wpb=497424, bsz=16192.6, num_updates=13000, lr=0.0005547, gnorm=0.207, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=11969 begin validation on "valid" subset epoch 008 | valid on 'valid' subset | loss 3.788 | nll_loss 2.25 | ppl 4.76 | wps 0 | wpb 44526 | bsz 2008 | num_updates 13000 | best_loss 3.788 epoch 008 | valid on 'valid' subset | loss 3.788 | nll_loss 2.25 | ppl 4.76 | wps 0 | wpb 44526 | bsz 2008 | num_updates 13000 | best_loss 3.788 epoch 008 | valid on 'valid' subset | loss 3.788 | nll_loss 2.25 | ppl 4.76 | wps 0 | wpb 44526 | bsz 2008 | num_updates 13000 | best_loss 3.788 epoch 008 | valid on 'valid' subset | loss 3.788 | nll_loss 2.25 | ppl 4.76 | wps 0 | wpb 44526 | bsz 2008 | num_updates 13000 | best_loss 3.788 epoch 008 | valid on 'valid' subset | loss 3.788 | nll_loss 2.25 | ppl 4.76 | wps 0 | wpb 44526 | bsz 2008 | num_updates 13000 | best_loss 3.788 epoch 008 | valid on 'valid' subset | loss 3.788 | nll_loss 2.25 | ppl 4.76 | wps 0 | wpb 44526 | bsz 2008 | num_updates 13000 | best_loss 3.788 epoch 008 | valid on 'valid' subset | loss 3.788 | nll_loss 2.25 | ppl 4.76 | wps 0 | wpb 44526 | bsz 2008 | num_updates 13000 | best_loss 3.788 epoch 008 | valid on 'valid' subset | loss 3.788 | nll_loss 2.25 | ppl 4.76 | wps 0 | wpb 44526 | bsz 2008 | num_updates 13000 | best_loss 3.788 epoch 008: 1298 / 1689 loss=3.685, nll_loss=2.15, ppl=4.44, wps=394860, ups=0.8, wpb=495453, bsz=16751, num_updates=13100, lr=0.000552579, gnorm=0.195, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=12094 epoch 008: 1298 / 1689 loss=3.685, nll_loss=2.15, ppl=4.44, wps=394860, ups=0.8, wpb=495453, bsz=16751, num_updates=13100, lr=0.000552579, gnorm=0.195, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=12094 epoch 008: 1298 / 1689 loss=3.685, nll_loss=2.15, ppl=4.44, wps=394860, ups=0.8, wpb=495453, bsz=16751, num_updates=13100, lr=0.000552579, gnorm=0.195, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=12094 epoch 008: 1298 / 1689 loss=3.685, nll_loss=2.15, ppl=4.44, wps=394860, ups=0.8, wpb=495453, bsz=16751, num_updates=13100, lr=0.000552579, gnorm=0.195, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=12094 epoch 008: 1298 / 1689 loss=3.685, nll_loss=2.15, ppl=4.44, wps=394860, ups=0.8, wpb=495453, bsz=16751, num_updates=13100, lr=0.000552579, gnorm=0.195, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=12094 epoch 008: 1298 / 1689 loss=3.685, nll_loss=2.15, ppl=4.44, wps=394860, ups=0.8, wpb=495453, bsz=16751, num_updates=13100, lr=0.000552579, gnorm=0.195, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=12094 epoch 008: 1298 / 1689 loss=3.685, nll_loss=2.15, ppl=4.44, wps=394860, ups=0.8, wpb=495453, bsz=16751, num_updates=13100, lr=0.000552579, gnorm=0.195, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=12094 epoch 008: 1298 / 1689 loss=3.685, nll_loss=2.15, ppl=4.44, wps=394860, ups=0.8, wpb=495453, bsz=16751, num_updates=13100, lr=0.000552579, gnorm=0.195, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=12094 epoch 008: 1399 / 1689 loss=3.689, nll_loss=2.154, ppl=4.45, wps=553827, ups=1.12, wpb=496602, bsz=16590.8, num_updates=13200, lr=0.000550482, gnorm=0.224, clip=1, loss_scale=1, train_wall=89, gb_free=21.8, wall=12184 epoch 008: 1399 / 1689 loss=3.689, nll_loss=2.154, ppl=4.45, wps=553827, ups=1.12, wpb=496602, bsz=16590.8, num_updates=13200, lr=0.000550482, gnorm=0.224, clip=1, loss_scale=1, train_wall=89, gb_free=21.8, wall=12184 epoch 008: 1399 / 1689 loss=3.689, nll_loss=2.154, ppl=4.45, wps=553827, ups=1.12, wpb=496602, bsz=16590.8, num_updates=13200, lr=0.000550482, gnorm=0.224, clip=1, loss_scale=1, train_wall=89, gb_free=21.8, wall=12184 epoch 008: 1399 / 1689 loss=3.689, nll_loss=2.154, ppl=4.45, wps=553827, ups=1.12, wpb=496602, bsz=16590.8, num_updates=13200, lr=0.000550482, gnorm=0.224, clip=1, loss_scale=1, train_wall=89, gb_free=21.8, wall=12184 epoch 008: 1399 / 1689 loss=3.689, nll_loss=2.154, ppl=4.45, wps=553827, ups=1.12, wpb=496602, bsz=16590.8, num_updates=13200, lr=0.000550482, gnorm=0.224, clip=1, loss_scale=1, train_wall=89, gb_free=21.8, wall=12184 epoch 008: 1399 / 1689 loss=3.689, nll_loss=2.154, ppl=4.45, wps=553827, ups=1.12, wpb=496602, bsz=16590.8, num_updates=13200, lr=0.000550482, gnorm=0.224, clip=1, loss_scale=1, train_wall=89, gb_free=21.8, wall=12184 epoch 008: 1399 / 1689 loss=3.689, nll_loss=2.154, ppl=4.45, wps=553827, ups=1.12, wpb=496602, bsz=16590.8, num_updates=13200, lr=0.000550482, gnorm=0.224, clip=1, loss_scale=1, train_wall=89, gb_free=21.8, wall=12184 epoch 008: 1399 / 1689 loss=3.689, nll_loss=2.154, ppl=4.45, wps=553827, ups=1.12, wpb=496602, bsz=16590.8, num_updates=13200, lr=0.000550482, gnorm=0.224, clip=1, loss_scale=1, train_wall=89, gb_free=21.8, wall=12184 epoch 008: 1499 / 1689 loss=3.693, nll_loss=2.159, ppl=4.47, wps=549292, ups=1.11, wpb=495143, bsz=16468.4, num_updates=13300, lr=0.000548408, gnorm=0.191, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=12274 epoch 008: 1499 / 1689 loss=3.693, nll_loss=2.159, ppl=4.47, wps=549292, ups=1.11, wpb=495143, bsz=16468.4, num_updates=13300, lr=0.000548408, gnorm=0.191, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=12274 epoch 008: 1499 / 1689 loss=3.693, nll_loss=2.159, ppl=4.47, wps=549292, ups=1.11, wpb=495143, bsz=16468.4, num_updates=13300, lr=0.000548408, gnorm=0.191, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=12274 epoch 008: 1499 / 1689 loss=3.693, nll_loss=2.159, ppl=4.47, wps=549292, ups=1.11, wpb=495143, bsz=16468.4, num_updates=13300, lr=0.000548408, gnorm=0.191, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=12274 epoch 008: 1499 / 1689 loss=3.693, nll_loss=2.159, ppl=4.47, wps=549292, ups=1.11, wpb=495143, bsz=16468.4, num_updates=13300, lr=0.000548408, gnorm=0.191, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=12274 epoch 008: 1499 / 1689 loss=3.693, nll_loss=2.159, ppl=4.47, wps=549292, ups=1.11, wpb=495143, bsz=16468.4, num_updates=13300, lr=0.000548408, gnorm=0.191, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=12274 epoch 008: 1499 / 1689 loss=3.693, nll_loss=2.159, ppl=4.47, wps=549292, ups=1.11, wpb=495143, bsz=16468.4, num_updates=13300, lr=0.000548408, gnorm=0.191, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=12274 epoch 008: 1499 / 1689 loss=3.693, nll_loss=2.159, ppl=4.47, wps=549292, ups=1.11, wpb=495143, bsz=16468.4, num_updates=13300, lr=0.000548408, gnorm=0.191, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=12274 epoch 008: 1599 / 1689 loss=3.693, nll_loss=2.159, ppl=4.47, wps=551350, ups=1.11, wpb=494698, bsz=16570.9, num_updates=13400, lr=0.000546358, gnorm=0.213, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=12364 epoch 008: 1599 / 1689 loss=3.693, nll_loss=2.159, ppl=4.47, wps=551350, ups=1.11, wpb=494698, bsz=16570.9, num_updates=13400, lr=0.000546358, gnorm=0.213, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=12364 epoch 008: 1599 / 1689 loss=3.693, nll_loss=2.159, ppl=4.47, wps=551350, ups=1.11, wpb=494698, bsz=16570.9, num_updates=13400, lr=0.000546358, gnorm=0.213, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=12364 epoch 008: 1599 / 1689 loss=3.693, nll_loss=2.159, ppl=4.47, wps=551350, ups=1.11, wpb=494698, bsz=16570.9, num_updates=13400, lr=0.000546358, gnorm=0.213, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=12364 epoch 008: 1599 / 1689 loss=3.693, nll_loss=2.159, ppl=4.47, wps=551350, ups=1.11, wpb=494698, bsz=16570.9, num_updates=13400, lr=0.000546358, gnorm=0.213, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=12364 epoch 008: 1599 / 1689 loss=3.693, nll_loss=2.159, ppl=4.47, wps=551350, ups=1.11, wpb=494698, bsz=16570.9, num_updates=13400, lr=0.000546358, gnorm=0.213, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=12364 epoch 008: 1599 / 1689 loss=3.693, nll_loss=2.159, ppl=4.47, wps=551350, ups=1.11, wpb=494698, bsz=16570.9, num_updates=13400, lr=0.000546358, gnorm=0.213, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=12364 epoch 008: 1599 / 1689 loss=3.693, nll_loss=2.159, ppl=4.47, wps=551350, ups=1.11, wpb=494698, bsz=16570.9, num_updates=13400, lr=0.000546358, gnorm=0.213, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=12364 end of epoch 8 (average epoch stats below) epoch 008 | loss 3.688 | nll_loss 2.153 | ppl 4.45 | wps 533908 | ups 1.08 | wpb 495134 | bsz 16509.9 | num_updates 13490 | lr 0.000544533 | gnorm 0.209 | clip 0.1 | loss_scale 1 | train_wall 1493 | gb_free 22.7 | wall 12444 epoch 008 | loss 3.688 | nll_loss 2.153 | ppl 4.45 | wps 533908 | ups 1.08 | wpb 495134 | bsz 16509.9 | num_updates 13490 | lr 0.000544533 | gnorm 0.209 | clip 0.1 | loss_scale 1 | train_wall 1493 | gb_free 22.7 | wall 12444 epoch 008 | loss 3.688 | nll_loss 2.153 | ppl 4.45 | wps 533908 | ups 1.08 | wpb 495134 | bsz 16509.9 | num_updates 13490 | lr 0.000544533 | gnorm 0.209 | clip 0.1 | loss_scale 1 | train_wall 1493 | gb_free 22.7 | wall 12444 epoch 008 | loss 3.688 | nll_loss 2.153 | ppl 4.45 | wps 533908 | ups 1.08 | wpb 495134 | bsz 16509.9 | num_updates 13490 | lr 0.000544533 | gnorm 0.209 | clip 0.1 | loss_scale 1 | train_wall 1493 | gb_free 22.7 | wall 12444 epoch 008 | loss 3.688 | nll_loss 2.153 | ppl 4.45 | wps 533908 | ups 1.08 | wpb 495134 | bsz 16509.9 | num_updates 13490 | lr 0.000544533 | gnorm 0.209 | clip 0.1 | loss_scale 1 | train_wall 1493 | gb_free 22.7 | wall 12444 epoch 008 | loss 3.688 | nll_loss 2.153 | ppl 4.45 | wps 533908 | ups 1.08 | wpb 495134 | bsz 16509.9 | num_updates 13490 | lr 0.000544533 | gnorm 0.209 | clip 0.1 | loss_scale 1 | train_wall 1493 | gb_free 22.7 | wall 12444 epoch 008 | loss 3.688 | nll_loss 2.153 | ppl 4.45 | wps 533908 | ups 1.08 | wpb 495134 | bsz 16509.9 | num_updates 13490 | lr 0.000544533 | gnorm 0.209 | clip 0.1 | loss_scale 1 | train_wall 1493 | gb_free 22.7 | wall 12444 epoch 008 | loss 3.688 | nll_loss 2.153 | ppl 4.45 | wps 533908 | ups 1.08 | wpb 495134 | bsz 16509.9 | num_updates 13490 | lr 0.000544533 | gnorm 0.209 | clip 0.1 | loss_scale 1 | train_wall 1493 | gb_free 22.7 | wall 12444 Start iterating over samples epoch 009: 10 / 1689 loss=3.689, nll_loss=2.154, ppl=4.45, wps=544791, ups=1.11, wpb=491756, bsz=16523.8, num_updates=13500, lr=0.000544331, gnorm=0.21, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=12454 epoch 009: 10 / 1689 loss=3.689, nll_loss=2.154, ppl=4.45, wps=544791, ups=1.11, wpb=491756, bsz=16523.8, num_updates=13500, lr=0.000544331, gnorm=0.21, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=12454 epoch 009: 10 / 1689 loss=3.689, nll_loss=2.154, ppl=4.45, wps=544791, ups=1.11, wpb=491756, bsz=16523.8, num_updates=13500, lr=0.000544331, gnorm=0.21, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=12454 epoch 009: 10 / 1689 loss=3.689, nll_loss=2.154, ppl=4.45, wps=544791, ups=1.11, wpb=491756, bsz=16523.8, num_updates=13500, lr=0.000544331, gnorm=0.21, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=12454 epoch 009: 10 / 1689 loss=3.689, nll_loss=2.154, ppl=4.45, wps=544791, ups=1.11, wpb=491756, bsz=16523.8, num_updates=13500, lr=0.000544331, gnorm=0.21, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=12454 epoch 009: 10 / 1689 loss=3.689, nll_loss=2.154, ppl=4.45, wps=544791, ups=1.11, wpb=491756, bsz=16523.8, num_updates=13500, lr=0.000544331, gnorm=0.21, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=12454 epoch 009: 10 / 1689 loss=3.689, nll_loss=2.154, ppl=4.45, wps=544791, ups=1.11, wpb=491756, bsz=16523.8, num_updates=13500, lr=0.000544331, gnorm=0.21, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=12454 epoch 009: 10 / 1689 loss=3.689, nll_loss=2.154, ppl=4.45, wps=544791, ups=1.11, wpb=491756, bsz=16523.8, num_updates=13500, lr=0.000544331, gnorm=0.21, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=12454 epoch 009: 10 / 1689 loss=3.689, nll_loss=2.154, ppl=4.45, wps=544791, ups=1.11, wpb=491756, bsz=16523.8, num_updates=13500, lr=0.000544331, gnorm=0.21, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=12454 epoch 009: 110 / 1689 loss=3.649, nll_loss=2.108, ppl=4.31, wps=554920, ups=1.12, wpb=495785, bsz=16394.6, num_updates=13600, lr=0.000542326, gnorm=0.201, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=12543 epoch 009: 110 / 1689 loss=3.649, nll_loss=2.108, ppl=4.31, wps=554920, ups=1.12, wpb=495785, bsz=16394.6, num_updates=13600, lr=0.000542326, gnorm=0.201, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=12543 epoch 009: 110 / 1689 loss=3.649, nll_loss=2.108, ppl=4.31, wps=554920, ups=1.12, wpb=495785, bsz=16394.6, num_updates=13600, lr=0.000542326, gnorm=0.201, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=12543 epoch 009: 110 / 1689 loss=3.649, nll_loss=2.108, ppl=4.31, wps=554920, ups=1.12, wpb=495785, bsz=16394.6, num_updates=13600, lr=0.000542326, gnorm=0.201, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=12543 epoch 009: 110 / 1689 loss=3.649, nll_loss=2.108, ppl=4.31, wps=554920, ups=1.12, wpb=495785, bsz=16394.6, num_updates=13600, lr=0.000542326, gnorm=0.201, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=12543 epoch 009: 110 / 1689 loss=3.649, nll_loss=2.108, ppl=4.31, wps=554920, ups=1.12, wpb=495785, bsz=16394.6, num_updates=13600, lr=0.000542326, gnorm=0.201, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=12543 epoch 009: 110 / 1689 loss=3.649, nll_loss=2.108, ppl=4.31, wps=554920, ups=1.12, wpb=495785, bsz=16394.6, num_updates=13600, lr=0.000542326, gnorm=0.201, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=12543 epoch 009: 110 / 1689 loss=3.649, nll_loss=2.108, ppl=4.31, wps=554920, ups=1.12, wpb=495785, bsz=16394.6, num_updates=13600, lr=0.000542326, gnorm=0.201, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=12543 epoch 009: 110 / 1689 loss=3.649, nll_loss=2.108, ppl=4.31, wps=554920, ups=1.12, wpb=495785, bsz=16394.6, num_updates=13600, lr=0.000542326, gnorm=0.201, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=12543 epoch 009: 210 / 1689 loss=3.658, nll_loss=2.119, ppl=4.34, wps=548296, ups=1.11, wpb=493508, bsz=16709.4, num_updates=13700, lr=0.000540343, gnorm=0.202, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=12633 epoch 009: 210 / 1689 loss=3.658, nll_loss=2.119, ppl=4.34, wps=548296, ups=1.11, wpb=493508, bsz=16709.4, num_updates=13700, lr=0.000540343, gnorm=0.202, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=12633 epoch 009: 210 / 1689 loss=3.658, nll_loss=2.119, ppl=4.34, wps=548296, ups=1.11, wpb=493508, bsz=16709.4, num_updates=13700, lr=0.000540343, gnorm=0.202, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=12633 epoch 009: 210 / 1689 loss=3.658, nll_loss=2.119, ppl=4.34, wps=548296, ups=1.11, wpb=493508, bsz=16709.4, num_updates=13700, lr=0.000540343, gnorm=0.202, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=12633 epoch 009: 210 / 1689 loss=3.658, nll_loss=2.119, ppl=4.34, wps=548296, ups=1.11, wpb=493508, bsz=16709.4, num_updates=13700, lr=0.000540343, gnorm=0.202, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=12633 epoch 009: 210 / 1689 loss=3.658, nll_loss=2.119, ppl=4.34, wps=548296, ups=1.11, wpb=493508, bsz=16709.4, num_updates=13700, lr=0.000540343, gnorm=0.202, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=12633 epoch 009: 210 / 1689 loss=3.658, nll_loss=2.119, ppl=4.34, wps=548296, ups=1.11, wpb=493508, bsz=16709.4, num_updates=13700, lr=0.000540343, gnorm=0.202, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=12633 epoch 009: 210 / 1689 loss=3.658, nll_loss=2.119, ppl=4.34, wps=548296, ups=1.11, wpb=493508, bsz=16709.4, num_updates=13700, lr=0.000540343, gnorm=0.202, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=12633 epoch 009: 210 / 1689 loss=3.658, nll_loss=2.119, ppl=4.34, wps=548296, ups=1.11, wpb=493508, bsz=16709.4, num_updates=13700, lr=0.000540343, gnorm=0.202, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=12633 epoch 009: 310 / 1689 loss=3.651, nll_loss=2.111, ppl=4.32, wps=549691, ups=1.11, wpb=497118, bsz=16555.1, num_updates=13800, lr=0.000538382, gnorm=0.195, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=12724 epoch 009: 310 / 1689 loss=3.651, nll_loss=2.111, ppl=4.32, wps=549691, ups=1.11, wpb=497118, bsz=16555.1, num_updates=13800, lr=0.000538382, gnorm=0.195, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=12724 epoch 009: 310 / 1689 loss=3.651, nll_loss=2.111, ppl=4.32, wps=549691, ups=1.11, wpb=497118, bsz=16555.1, num_updates=13800, lr=0.000538382, gnorm=0.195, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=12724 epoch 009: 310 / 1689 loss=3.651, nll_loss=2.111, ppl=4.32, wps=549691, ups=1.11, wpb=497118, bsz=16555.1, num_updates=13800, lr=0.000538382, gnorm=0.195, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=12724 epoch 009: 310 / 1689 loss=3.651, nll_loss=2.111, ppl=4.32, wps=549691, ups=1.11, wpb=497118, bsz=16555.1, num_updates=13800, lr=0.000538382, gnorm=0.195, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=12724 epoch 009: 310 / 1689 loss=3.651, nll_loss=2.111, ppl=4.32, wps=549691, ups=1.11, wpb=497118, bsz=16555.1, num_updates=13800, lr=0.000538382, gnorm=0.195, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=12724 epoch 009: 310 / 1689 loss=3.651, nll_loss=2.111, ppl=4.32, wps=549691, ups=1.11, wpb=497118, bsz=16555.1, num_updates=13800, lr=0.000538382, gnorm=0.195, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=12724 epoch 009: 310 / 1689 loss=3.651, nll_loss=2.111, ppl=4.32, wps=549691, ups=1.11, wpb=497118, bsz=16555.1, num_updates=13800, lr=0.000538382, gnorm=0.195, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=12724 epoch 009: 310 / 1689 loss=3.651, nll_loss=2.111, ppl=4.32, wps=549691, ups=1.11, wpb=497118, bsz=16555.1, num_updates=13800, lr=0.000538382, gnorm=0.195, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=12724 epoch 009: 410 / 1689 loss=3.66, nll_loss=2.121, ppl=4.35, wps=550690, ups=1.11, wpb=495117, bsz=16251.6, num_updates=13900, lr=0.000536442, gnorm=0.198, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=12814 epoch 009: 410 / 1689 loss=3.66, nll_loss=2.121, ppl=4.35, wps=550690, ups=1.11, wpb=495117, bsz=16251.6, num_updates=13900, lr=0.000536442, gnorm=0.198, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=12814 epoch 009: 410 / 1689 loss=3.66, nll_loss=2.121, ppl=4.35, wps=550690, ups=1.11, wpb=495117, bsz=16251.6, num_updates=13900, lr=0.000536442, gnorm=0.198, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=12814 epoch 009: 410 / 1689 loss=3.66, nll_loss=2.121, ppl=4.35, wps=550690, ups=1.11, wpb=495117, bsz=16251.6, num_updates=13900, lr=0.000536442, gnorm=0.198, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=12814 epoch 009: 410 / 1689 loss=3.66, nll_loss=2.121, ppl=4.35, wps=550690, ups=1.11, wpb=495117, bsz=16251.6, num_updates=13900, lr=0.000536442, gnorm=0.198, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=12814 epoch 009: 410 / 1689 loss=3.66, nll_loss=2.121, ppl=4.35, wps=550690, ups=1.11, wpb=495117, bsz=16251.6, num_updates=13900, lr=0.000536442, gnorm=0.198, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=12814 epoch 009: 410 / 1689 loss=3.66, nll_loss=2.121, ppl=4.35, wps=550690, ups=1.11, wpb=495117, bsz=16251.6, num_updates=13900, lr=0.000536442, gnorm=0.198, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=12814 epoch 009: 410 / 1689 loss=3.66, nll_loss=2.121, ppl=4.35, wps=550690, ups=1.11, wpb=495117, bsz=16251.6, num_updates=13900, lr=0.000536442, gnorm=0.198, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=12814 epoch 009: 410 / 1689 loss=3.66, nll_loss=2.121, ppl=4.35, wps=550690, ups=1.11, wpb=495117, bsz=16251.6, num_updates=13900, lr=0.000536442, gnorm=0.198, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=12814 epoch 009: 511 / 1689 loss=3.658, nll_loss=2.119, ppl=4.34, wps=541611, ups=1.09, wpb=495066, bsz=16518.1, num_updates=14000, lr=0.000534522, gnorm=0.202, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=12905 epoch 009: 511 / 1689 loss=3.658, nll_loss=2.119, ppl=4.34, wps=541611, ups=1.09, wpb=495066, bsz=16518.1, num_updates=14000, lr=0.000534522, gnorm=0.202, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=12905 epoch 009: 511 / 1689 loss=3.658, nll_loss=2.119, ppl=4.34, wps=541611, ups=1.09, wpb=495066, bsz=16518.1, num_updates=14000, lr=0.000534522, gnorm=0.202, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=12905 epoch 009: 511 / 1689 loss=3.658, nll_loss=2.119, ppl=4.34, wps=541611, ups=1.09, wpb=495066, bsz=16518.1, num_updates=14000, lr=0.000534522, gnorm=0.202, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=12905 epoch 009: 511 / 1689 loss=3.658, nll_loss=2.119, ppl=4.34, wps=541611, ups=1.09, wpb=495066, bsz=16518.1, num_updates=14000, lr=0.000534522, gnorm=0.202, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=12905 epoch 009: 511 / 1689 loss=3.658, nll_loss=2.119, ppl=4.34, wps=541611, ups=1.09, wpb=495066, bsz=16518.1, num_updates=14000, lr=0.000534522, gnorm=0.202, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=12905 epoch 009: 511 / 1689 loss=3.658, nll_loss=2.119, ppl=4.34, wps=541611, ups=1.09, wpb=495066, bsz=16518.1, num_updates=14000, lr=0.000534522, gnorm=0.202, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=12905 epoch 009: 511 / 1689 loss=3.658, nll_loss=2.119, ppl=4.34, wps=541611, ups=1.09, wpb=495066, bsz=16518.1, num_updates=14000, lr=0.000534522, gnorm=0.202, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=12905 epoch 009: 511 / 1689 loss=3.658, nll_loss=2.119, ppl=4.34, wps=541611, ups=1.09, wpb=495066, bsz=16518.1, num_updates=14000, lr=0.000534522, gnorm=0.202, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=12905 begin validation on "valid" subset epoch 009 | valid on 'valid' subset | loss 3.783 | nll_loss 2.242 | ppl 4.73 | wps 0 | wpb 44526 | bsz 2008 | num_updates 14000 | best_loss 3.783 epoch 009 | valid on 'valid' subset | loss 3.783 | nll_loss 2.242 | ppl 4.73 | wps 0 | wpb 44526 | bsz 2008 | num_updates 14000 | best_loss 3.783 epoch 009 | valid on 'valid' subset | loss 3.783 | nll_loss 2.242 | ppl 4.73 | wps 0 | wpb 44526 | bsz 2008 | num_updates 14000 | best_loss 3.783 epoch 009 | valid on 'valid' subset | loss 3.783 | nll_loss 2.242 | ppl 4.73 | wps 0 | wpb 44526 | bsz 2008 | num_updates 14000 | best_loss 3.783 epoch 009 | valid on 'valid' subset | loss 3.783 | nll_loss 2.242 | ppl 4.73 | wps 0 | wpb 44526 | bsz 2008 | num_updates 14000 | best_loss 3.783 epoch 009 | valid on 'valid' subset | loss 3.783 | nll_loss 2.242 | ppl 4.73 | wps 0 | wpb 44526 | bsz 2008 | num_updates 14000 | best_loss 3.783 epoch 009 | valid on 'valid' subset | loss 3.783 | nll_loss 2.242 | ppl 4.73 | wps 0 | wpb 44526 | bsz 2008 | num_updates 14000 | best_loss 3.783 epoch 009 | valid on 'valid' subset | loss 3.783 | nll_loss 2.242 | ppl 4.73 | wps 0 | wpb 44526 | bsz 2008 | num_updates 14000 | best_loss 3.783 epoch 009 | valid on 'valid' subset | loss 3.783 | nll_loss 2.242 | ppl 4.73 | wps 0 | wpb 44526 | bsz 2008 | num_updates 14000 | best_loss 3.783 epoch 009: 611 / 1689 loss=3.659, nll_loss=2.121, ppl=4.35, wps=456129, ups=0.92, wpb=495312, bsz=16384.8, num_updates=14100, lr=0.000532624, gnorm=0.202, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=13014 epoch 009: 611 / 1689 loss=3.659, nll_loss=2.121, ppl=4.35, wps=456129, ups=0.92, wpb=495312, bsz=16384.8, num_updates=14100, lr=0.000532624, gnorm=0.202, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=13014 epoch 009: 611 / 1689 loss=3.659, nll_loss=2.121, ppl=4.35, wps=456129, ups=0.92, wpb=495312, bsz=16384.8, num_updates=14100, lr=0.000532624, gnorm=0.202, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=13014 epoch 009: 611 / 1689 loss=3.659, nll_loss=2.121, ppl=4.35, wps=456129, ups=0.92, wpb=495312, bsz=16384.8, num_updates=14100, lr=0.000532624, gnorm=0.202, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=13014 epoch 009: 611 / 1689 loss=3.659, nll_loss=2.121, ppl=4.35, wps=456129, ups=0.92, wpb=495312, bsz=16384.8, num_updates=14100, lr=0.000532624, gnorm=0.202, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=13014 epoch 009: 611 / 1689 loss=3.659, nll_loss=2.121, ppl=4.35, wps=456129, ups=0.92, wpb=495312, bsz=16384.8, num_updates=14100, lr=0.000532624, gnorm=0.202, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=13014 epoch 009: 611 / 1689 loss=3.659, nll_loss=2.121, ppl=4.35, wps=456129, ups=0.92, wpb=495312, bsz=16384.8, num_updates=14100, lr=0.000532624, gnorm=0.202, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=13014 epoch 009: 611 / 1689 loss=3.659, nll_loss=2.121, ppl=4.35, wps=456129, ups=0.92, wpb=495312, bsz=16384.8, num_updates=14100, lr=0.000532624, gnorm=0.202, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=13014 epoch 009: 611 / 1689 loss=3.659, nll_loss=2.121, ppl=4.35, wps=456129, ups=0.92, wpb=495312, bsz=16384.8, num_updates=14100, lr=0.000532624, gnorm=0.202, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=13014 epoch 009: 711 / 1689 loss=3.658, nll_loss=2.12, ppl=4.35, wps=553522, ups=1.12, wpb=495283, bsz=16520.8, num_updates=14200, lr=0.000530745, gnorm=0.198, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=13103 epoch 009: 711 / 1689 loss=3.658, nll_loss=2.12, ppl=4.35, wps=553522, ups=1.12, wpb=495283, bsz=16520.8, num_updates=14200, lr=0.000530745, gnorm=0.198, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=13103 epoch 009: 711 / 1689 loss=3.658, nll_loss=2.12, ppl=4.35, wps=553522, ups=1.12, wpb=495283, bsz=16520.8, num_updates=14200, lr=0.000530745, gnorm=0.198, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=13103 epoch 009: 711 / 1689 loss=3.658, nll_loss=2.12, ppl=4.35, wps=553522, ups=1.12, wpb=495283, bsz=16520.8, num_updates=14200, lr=0.000530745, gnorm=0.198, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=13103 epoch 009: 711 / 1689 loss=3.658, nll_loss=2.12, ppl=4.35, wps=553522, ups=1.12, wpb=495283, bsz=16520.8, num_updates=14200, lr=0.000530745, gnorm=0.198, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=13103 epoch 009: 711 / 1689 loss=3.658, nll_loss=2.12, ppl=4.35, wps=553522, ups=1.12, wpb=495283, bsz=16520.8, num_updates=14200, lr=0.000530745, gnorm=0.198, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=13103 epoch 009: 711 / 1689 loss=3.658, nll_loss=2.12, ppl=4.35, wps=553522, ups=1.12, wpb=495283, bsz=16520.8, num_updates=14200, lr=0.000530745, gnorm=0.198, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=13103 epoch 009: 711 / 1689 loss=3.658, nll_loss=2.12, ppl=4.35, wps=553522, ups=1.12, wpb=495283, bsz=16520.8, num_updates=14200, lr=0.000530745, gnorm=0.198, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=13103 epoch 009: 711 / 1689 loss=3.658, nll_loss=2.12, ppl=4.35, wps=553522, ups=1.12, wpb=495283, bsz=16520.8, num_updates=14200, lr=0.000530745, gnorm=0.198, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=13103 epoch 009: 811 / 1689 loss=3.665, nll_loss=2.127, ppl=4.37, wps=553908, ups=1.12, wpb=494861, bsz=16637.5, num_updates=14300, lr=0.000528886, gnorm=0.2, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=13192 epoch 009: 811 / 1689 loss=3.665, nll_loss=2.127, ppl=4.37, wps=553908, ups=1.12, wpb=494861, bsz=16637.5, num_updates=14300, lr=0.000528886, gnorm=0.2, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=13192 epoch 009: 811 / 1689 loss=3.665, nll_loss=2.127, ppl=4.37, wps=553908, ups=1.12, wpb=494861, bsz=16637.5, num_updates=14300, lr=0.000528886, gnorm=0.2, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=13192 epoch 009: 811 / 1689 loss=3.665, nll_loss=2.127, ppl=4.37, wps=553908, ups=1.12, wpb=494861, bsz=16637.5, num_updates=14300, lr=0.000528886, gnorm=0.2, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=13192 epoch 009: 811 / 1689 loss=3.665, nll_loss=2.127, ppl=4.37, wps=553908, ups=1.12, wpb=494861, bsz=16637.5, num_updates=14300, lr=0.000528886, gnorm=0.2, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=13192 epoch 009: 811 / 1689 loss=3.665, nll_loss=2.127, ppl=4.37, wps=553908, ups=1.12, wpb=494861, bsz=16637.5, num_updates=14300, lr=0.000528886, gnorm=0.2, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=13192 epoch 009: 811 / 1689 loss=3.665, nll_loss=2.127, ppl=4.37, wps=553908, ups=1.12, wpb=494861, bsz=16637.5, num_updates=14300, lr=0.000528886, gnorm=0.2, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=13192 epoch 009: 811 / 1689 loss=3.665, nll_loss=2.127, ppl=4.37, wps=553908, ups=1.12, wpb=494861, bsz=16637.5, num_updates=14300, lr=0.000528886, gnorm=0.2, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=13192 epoch 009: 811 / 1689 loss=3.665, nll_loss=2.127, ppl=4.37, wps=553908, ups=1.12, wpb=494861, bsz=16637.5, num_updates=14300, lr=0.000528886, gnorm=0.2, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=13192 epoch 009: 911 / 1689 loss=3.669, nll_loss=2.132, ppl=4.38, wps=553173, ups=1.12, wpb=495369, bsz=16696.6, num_updates=14400, lr=0.000527046, gnorm=0.208, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=13282 epoch 009: 911 / 1689 loss=3.669, nll_loss=2.132, ppl=4.38, wps=553173, ups=1.12, wpb=495369, bsz=16696.6, num_updates=14400, lr=0.000527046, gnorm=0.208, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=13282 epoch 009: 911 / 1689 loss=3.669, nll_loss=2.132, ppl=4.38, wps=553173, ups=1.12, wpb=495369, bsz=16696.6, num_updates=14400, lr=0.000527046, gnorm=0.208, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=13282 epoch 009: 911 / 1689 loss=3.669, nll_loss=2.132, ppl=4.38, wps=553173, ups=1.12, wpb=495369, bsz=16696.6, num_updates=14400, lr=0.000527046, gnorm=0.208, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=13282 epoch 009: 911 / 1689 loss=3.669, nll_loss=2.132, ppl=4.38, wps=553173, ups=1.12, wpb=495369, bsz=16696.6, num_updates=14400, lr=0.000527046, gnorm=0.208, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=13282 epoch 009: 911 / 1689 loss=3.669, nll_loss=2.132, ppl=4.38, wps=553173, ups=1.12, wpb=495369, bsz=16696.6, num_updates=14400, lr=0.000527046, gnorm=0.208, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=13282 epoch 009: 911 / 1689 loss=3.669, nll_loss=2.132, ppl=4.38, wps=553173, ups=1.12, wpb=495369, bsz=16696.6, num_updates=14400, lr=0.000527046, gnorm=0.208, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=13282 epoch 009: 911 / 1689 loss=3.669, nll_loss=2.132, ppl=4.38, wps=553173, ups=1.12, wpb=495369, bsz=16696.6, num_updates=14400, lr=0.000527046, gnorm=0.208, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=13282 epoch 009: 911 / 1689 loss=3.669, nll_loss=2.132, ppl=4.38, wps=553173, ups=1.12, wpb=495369, bsz=16696.6, num_updates=14400, lr=0.000527046, gnorm=0.208, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=13282 epoch 009: 1011 / 1689 loss=3.665, nll_loss=2.127, ppl=4.37, wps=557223, ups=1.13, wpb=495114, bsz=16360.1, num_updates=14500, lr=0.000525226, gnorm=0.196, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=13371 epoch 009: 1011 / 1689 loss=3.665, nll_loss=2.127, ppl=4.37, wps=557223, ups=1.13, wpb=495114, bsz=16360.1, num_updates=14500, lr=0.000525226, gnorm=0.196, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=13371 epoch 009: 1011 / 1689 loss=3.665, nll_loss=2.127, ppl=4.37, wps=557223, ups=1.13, wpb=495114, bsz=16360.1, num_updates=14500, lr=0.000525226, gnorm=0.196, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=13371 epoch 009: 1011 / 1689 loss=3.665, nll_loss=2.127, ppl=4.37, wps=557223, ups=1.13, wpb=495114, bsz=16360.1, num_updates=14500, lr=0.000525226, gnorm=0.196, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=13371 epoch 009: 1011 / 1689 loss=3.665, nll_loss=2.127, ppl=4.37, wps=557223, ups=1.13, wpb=495114, bsz=16360.1, num_updates=14500, lr=0.000525226, gnorm=0.196, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=13371 epoch 009: 1011 / 1689 loss=3.665, nll_loss=2.127, ppl=4.37, wps=557223, ups=1.13, wpb=495114, bsz=16360.1, num_updates=14500, lr=0.000525226, gnorm=0.196, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=13371 epoch 009: 1011 / 1689 loss=3.665, nll_loss=2.127, ppl=4.37, wps=557223, ups=1.13, wpb=495114, bsz=16360.1, num_updates=14500, lr=0.000525226, gnorm=0.196, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=13371 epoch 009: 1011 / 1689 loss=3.665, nll_loss=2.127, ppl=4.37, wps=557223, ups=1.13, wpb=495114, bsz=16360.1, num_updates=14500, lr=0.000525226, gnorm=0.196, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=13371 epoch 009: 1011 / 1689 loss=3.665, nll_loss=2.127, ppl=4.37, wps=557223, ups=1.13, wpb=495114, bsz=16360.1, num_updates=14500, lr=0.000525226, gnorm=0.196, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=13371 epoch 009: 1111 / 1689 loss=3.67, nll_loss=2.134, ppl=4.39, wps=560256, ups=1.13, wpb=494900, bsz=16756.2, num_updates=14600, lr=0.000523424, gnorm=0.197, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=13459 epoch 009: 1111 / 1689 loss=3.67, nll_loss=2.134, ppl=4.39, wps=560256, ups=1.13, wpb=494900, bsz=16756.2, num_updates=14600, lr=0.000523424, gnorm=0.197, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=13459 epoch 009: 1111 / 1689 loss=3.67, nll_loss=2.134, ppl=4.39, wps=560256, ups=1.13, wpb=494900, bsz=16756.2, num_updates=14600, lr=0.000523424, gnorm=0.197, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=13459 epoch 009: 1111 / 1689 loss=3.67, nll_loss=2.134, ppl=4.39, wps=560256, ups=1.13, wpb=494900, bsz=16756.2, num_updates=14600, lr=0.000523424, gnorm=0.197, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=13459 epoch 009: 1111 / 1689 loss=3.67, nll_loss=2.134, ppl=4.39, wps=560256, ups=1.13, wpb=494900, bsz=16756.2, num_updates=14600, lr=0.000523424, gnorm=0.197, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=13459 epoch 009: 1111 / 1689 loss=3.67, nll_loss=2.134, ppl=4.39, wps=560256, ups=1.13, wpb=494900, bsz=16756.2, num_updates=14600, lr=0.000523424, gnorm=0.197, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=13459 epoch 009: 1111 / 1689 loss=3.67, nll_loss=2.134, ppl=4.39, wps=560256, ups=1.13, wpb=494900, bsz=16756.2, num_updates=14600, lr=0.000523424, gnorm=0.197, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=13459 epoch 009: 1111 / 1689 loss=3.67, nll_loss=2.134, ppl=4.39, wps=560256, ups=1.13, wpb=494900, bsz=16756.2, num_updates=14600, lr=0.000523424, gnorm=0.197, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=13459 epoch 009: 1111 / 1689 loss=3.67, nll_loss=2.134, ppl=4.39, wps=560256, ups=1.13, wpb=494900, bsz=16756.2, num_updates=14600, lr=0.000523424, gnorm=0.197, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=13459 epoch 009: 1211 / 1689 loss=3.663, nll_loss=2.125, ppl=4.36, wps=558428, ups=1.13, wpb=495309, bsz=16216.9, num_updates=14700, lr=0.000521641, gnorm=0.196, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=13548 epoch 009: 1211 / 1689 loss=3.663, nll_loss=2.125, ppl=4.36, wps=558428, ups=1.13, wpb=495309, bsz=16216.9, num_updates=14700, lr=0.000521641, gnorm=0.196, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=13548 epoch 009: 1211 / 1689 loss=3.663, nll_loss=2.125, ppl=4.36, wps=558428, ups=1.13, wpb=495309, bsz=16216.9, num_updates=14700, lr=0.000521641, gnorm=0.196, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=13548 epoch 009: 1211 / 1689 loss=3.663, nll_loss=2.125, ppl=4.36, wps=558428, ups=1.13, wpb=495309, bsz=16216.9, num_updates=14700, lr=0.000521641, gnorm=0.196, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=13548 epoch 009: 1211 / 1689 loss=3.663, nll_loss=2.125, ppl=4.36, wps=558428, ups=1.13, wpb=495309, bsz=16216.9, num_updates=14700, lr=0.000521641, gnorm=0.196, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=13548 epoch 009: 1211 / 1689 loss=3.663, nll_loss=2.125, ppl=4.36, wps=558428, ups=1.13, wpb=495309, bsz=16216.9, num_updates=14700, lr=0.000521641, gnorm=0.196, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=13548 epoch 009: 1211 / 1689 loss=3.663, nll_loss=2.125, ppl=4.36, wps=558428, ups=1.13, wpb=495309, bsz=16216.9, num_updates=14700, lr=0.000521641, gnorm=0.196, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=13548 epoch 009: 1211 / 1689 loss=3.663, nll_loss=2.125, ppl=4.36, wps=558428, ups=1.13, wpb=495309, bsz=16216.9, num_updates=14700, lr=0.000521641, gnorm=0.196, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=13548 epoch 009: 1211 / 1689 loss=3.663, nll_loss=2.125, ppl=4.36, wps=558428, ups=1.13, wpb=495309, bsz=16216.9, num_updates=14700, lr=0.000521641, gnorm=0.196, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=13548 epoch 009: 1311 / 1689 loss=3.664, nll_loss=2.127, ppl=4.37, wps=559122, ups=1.13, wpb=495766, bsz=16353.8, num_updates=14800, lr=0.000519875, gnorm=0.2, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=13637 epoch 009: 1311 / 1689 loss=3.664, nll_loss=2.127, ppl=4.37, wps=559122, ups=1.13, wpb=495766, bsz=16353.8, num_updates=14800, lr=0.000519875, gnorm=0.2, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=13637 epoch 009: 1311 / 1689 loss=3.664, nll_loss=2.127, ppl=4.37, wps=559122, ups=1.13, wpb=495766, bsz=16353.8, num_updates=14800, lr=0.000519875, gnorm=0.2, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=13637 epoch 009: 1311 / 1689 loss=3.664, nll_loss=2.127, ppl=4.37, wps=559122, ups=1.13, wpb=495766, bsz=16353.8, num_updates=14800, lr=0.000519875, gnorm=0.2, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=13637 epoch 009: 1311 / 1689 loss=3.664, nll_loss=2.127, ppl=4.37, wps=559122, ups=1.13, wpb=495766, bsz=16353.8, num_updates=14800, lr=0.000519875, gnorm=0.2, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=13637 epoch 009: 1311 / 1689 loss=3.664, nll_loss=2.127, ppl=4.37, wps=559122, ups=1.13, wpb=495766, bsz=16353.8, num_updates=14800, lr=0.000519875, gnorm=0.2, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=13637 epoch 009: 1311 / 1689 loss=3.664, nll_loss=2.127, ppl=4.37, wps=559122, ups=1.13, wpb=495766, bsz=16353.8, num_updates=14800, lr=0.000519875, gnorm=0.2, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=13637 epoch 009: 1311 / 1689 loss=3.664, nll_loss=2.127, ppl=4.37, wps=559122, ups=1.13, wpb=495766, bsz=16353.8, num_updates=14800, lr=0.000519875, gnorm=0.2, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=13637 epoch 009: 1311 / 1689 loss=3.664, nll_loss=2.127, ppl=4.37, wps=559122, ups=1.13, wpb=495766, bsz=16353.8, num_updates=14800, lr=0.000519875, gnorm=0.2, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=13637 epoch 009: 1411 / 1689 loss=3.662, nll_loss=2.125, ppl=4.36, wps=555948, ups=1.12, wpb=495312, bsz=16649.7, num_updates=14900, lr=0.000518128, gnorm=0.197, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=13726 epoch 009: 1411 / 1689 loss=3.662, nll_loss=2.125, ppl=4.36, wps=555948, ups=1.12, wpb=495312, bsz=16649.7, num_updates=14900, lr=0.000518128, gnorm=0.197, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=13726 epoch 009: 1411 / 1689 loss=3.662, nll_loss=2.125, ppl=4.36, wps=555948, ups=1.12, wpb=495312, bsz=16649.7, num_updates=14900, lr=0.000518128, gnorm=0.197, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=13726 epoch 009: 1411 / 1689 loss=3.662, nll_loss=2.125, ppl=4.36, wps=555948, ups=1.12, wpb=495312, bsz=16649.7, num_updates=14900, lr=0.000518128, gnorm=0.197, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=13726 epoch 009: 1411 / 1689 loss=3.662, nll_loss=2.125, ppl=4.36, wps=555948, ups=1.12, wpb=495312, bsz=16649.7, num_updates=14900, lr=0.000518128, gnorm=0.197, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=13726 epoch 009: 1411 / 1689 loss=3.662, nll_loss=2.125, ppl=4.36, wps=555948, ups=1.12, wpb=495312, bsz=16649.7, num_updates=14900, lr=0.000518128, gnorm=0.197, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=13726 epoch 009: 1411 / 1689 loss=3.662, nll_loss=2.125, ppl=4.36, wps=555948, ups=1.12, wpb=495312, bsz=16649.7, num_updates=14900, lr=0.000518128, gnorm=0.197, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=13726 epoch 009: 1411 / 1689 loss=3.662, nll_loss=2.125, ppl=4.36, wps=555948, ups=1.12, wpb=495312, bsz=16649.7, num_updates=14900, lr=0.000518128, gnorm=0.197, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=13726 epoch 009: 1411 / 1689 loss=3.662, nll_loss=2.125, ppl=4.36, wps=555948, ups=1.12, wpb=495312, bsz=16649.7, num_updates=14900, lr=0.000518128, gnorm=0.197, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=13726 epoch 009: 1511 / 1689 loss=3.671, nll_loss=2.135, ppl=4.39, wps=553648, ups=1.12, wpb=495654, bsz=16451.1, num_updates=15000, lr=0.000516398, gnorm=0.202, clip=0, loss_scale=4, train_wall=88, gb_free=21.9, wall=13815 epoch 009: 1511 / 1689 loss=3.671, nll_loss=2.135, ppl=4.39, wps=553648, ups=1.12, wpb=495654, bsz=16451.1, num_updates=15000, lr=0.000516398, gnorm=0.202, clip=0, loss_scale=4, train_wall=88, gb_free=21.9, wall=13815 epoch 009: 1511 / 1689 loss=3.671, nll_loss=2.135, ppl=4.39, wps=553648, ups=1.12, wpb=495654, bsz=16451.1, num_updates=15000, lr=0.000516398, gnorm=0.202, clip=0, loss_scale=4, train_wall=88, gb_free=21.9, wall=13815 epoch 009: 1511 / 1689 loss=3.671, nll_loss=2.135, ppl=4.39, wps=553648, ups=1.12, wpb=495654, bsz=16451.1, num_updates=15000, lr=0.000516398, gnorm=0.202, clip=0, loss_scale=4, train_wall=88, gb_free=21.9, wall=13815 epoch 009: 1511 / 1689 loss=3.671, nll_loss=2.135, ppl=4.39, wps=553648, ups=1.12, wpb=495654, bsz=16451.1, num_updates=15000, lr=0.000516398, gnorm=0.202, clip=0, loss_scale=4, train_wall=88, gb_free=21.9, wall=13815 epoch 009: 1511 / 1689 loss=3.671, nll_loss=2.135, ppl=4.39, wps=553648, ups=1.12, wpb=495654, bsz=16451.1, num_updates=15000, lr=0.000516398, gnorm=0.202, clip=0, loss_scale=4, train_wall=88, gb_free=21.9, wall=13815 epoch 009: 1511 / 1689 loss=3.671, nll_loss=2.135, ppl=4.39, wps=553648, ups=1.12, wpb=495654, bsz=16451.1, num_updates=15000, lr=0.000516398, gnorm=0.202, clip=0, loss_scale=4, train_wall=88, gb_free=21.9, wall=13815 epoch 009: 1511 / 1689 loss=3.671, nll_loss=2.135, ppl=4.39, wps=553648, ups=1.12, wpb=495654, bsz=16451.1, num_updates=15000, lr=0.000516398, gnorm=0.202, clip=0, loss_scale=4, train_wall=88, gb_free=21.9, wall=13815 epoch 009: 1511 / 1689 loss=3.671, nll_loss=2.135, ppl=4.39, wps=553648, ups=1.12, wpb=495654, bsz=16451.1, num_updates=15000, lr=0.000516398, gnorm=0.202, clip=0, loss_scale=4, train_wall=88, gb_free=21.9, wall=13815 begin validation on "valid" subset epoch 009 | valid on 'valid' subset | loss 3.768 | nll_loss 2.23 | ppl 4.69 | wps 0 | wpb 44526 | bsz 2008 | num_updates 15000 | best_loss 3.768 epoch 009 | valid on 'valid' subset | loss 3.768 | nll_loss 2.23 | ppl 4.69 | wps 0 | wpb 44526 | bsz 2008 | num_updates 15000 | best_loss 3.768 epoch 009 | valid on 'valid' subset | loss 3.768 | nll_loss 2.23 | ppl 4.69 | wps 0 | wpb 44526 | bsz 2008 | num_updates 15000 | best_loss 3.768 epoch 009 | valid on 'valid' subset | loss 3.768 | nll_loss 2.23 | ppl 4.69 | wps 0 | wpb 44526 | bsz 2008 | num_updates 15000 | best_loss 3.768 epoch 009 | valid on 'valid' subset | loss 3.768 | nll_loss 2.23 | ppl 4.69 | wps 0 | wpb 44526 | bsz 2008 | num_updates 15000 | best_loss 3.768 epoch 009 | valid on 'valid' subset | loss 3.768 | nll_loss 2.23 | ppl 4.69 | wps 0 | wpb 44526 | bsz 2008 | num_updates 15000 | best_loss 3.768 epoch 009 | valid on 'valid' subset | loss 3.768 | nll_loss 2.23 | ppl 4.69 | wps 0 | wpb 44526 | bsz 2008 | num_updates 15000 | best_loss 3.768 epoch 009 | valid on 'valid' subset | loss 3.768 | nll_loss 2.23 | ppl 4.69 | wps 0 | wpb 44526 | bsz 2008 | num_updates 15000 | best_loss 3.768 epoch 009 | valid on 'valid' subset | loss 3.768 | nll_loss 2.23 | ppl 4.69 | wps 0 | wpb 44526 | bsz 2008 | num_updates 15000 | best_loss 3.768 epoch 009: 1613 / 1689 loss=3.663, nll_loss=2.126, ppl=4.36, wps=352692, ups=0.71, wpb=495699, bsz=16665.5, num_updates=15100, lr=0.000514685, gnorm=0.202, clip=0, loss_scale=1, train_wall=114, gb_free=22.2, wall=13956 epoch 009: 1613 / 1689 loss=3.663, nll_loss=2.126, ppl=4.36, wps=352692, ups=0.71, wpb=495699, bsz=16665.5, num_updates=15100, lr=0.000514685, gnorm=0.202, clip=0, loss_scale=1, train_wall=114, gb_free=22.2, wall=13956 epoch 009: 1613 / 1689 loss=3.663, nll_loss=2.126, ppl=4.36, wps=352692, ups=0.71, wpb=495699, bsz=16665.5, num_updates=15100, lr=0.000514685, gnorm=0.202, clip=0, loss_scale=1, train_wall=114, gb_free=22.2, wall=13956 epoch 009: 1613 / 1689 loss=3.663, nll_loss=2.126, ppl=4.36, wps=352692, ups=0.71, wpb=495699, bsz=16665.5, num_updates=15100, lr=0.000514685, gnorm=0.202, clip=0, loss_scale=1, train_wall=114, gb_free=22.2, wall=13956 epoch 009: 1613 / 1689 loss=3.663, nll_loss=2.126, ppl=4.36, wps=352692, ups=0.71, wpb=495699, bsz=16665.5, num_updates=15100, lr=0.000514685, gnorm=0.202, clip=0, loss_scale=1, train_wall=114, gb_free=22.2, wall=13956 epoch 009: 1613 / 1689 loss=3.663, nll_loss=2.126, ppl=4.36, wps=352692, ups=0.71, wpb=495699, bsz=16665.5, num_updates=15100, lr=0.000514685, gnorm=0.202, clip=0, loss_scale=1, train_wall=114, gb_free=22.2, wall=13956 epoch 009: 1613 / 1689 loss=3.663, nll_loss=2.126, ppl=4.36, wps=352692, ups=0.71, wpb=495699, bsz=16665.5, num_updates=15100, lr=0.000514685, gnorm=0.202, clip=0, loss_scale=1, train_wall=114, gb_free=22.2, wall=13956 epoch 009: 1613 / 1689 loss=3.663, nll_loss=2.126, ppl=4.36, wps=352692, ups=0.71, wpb=495699, bsz=16665.5, num_updates=15100, lr=0.000514685, gnorm=0.202, clip=0, loss_scale=1, train_wall=114, gb_free=22.2, wall=13956 epoch 009: 1613 / 1689 loss=3.663, nll_loss=2.126, ppl=4.36, wps=352692, ups=0.71, wpb=495699, bsz=16665.5, num_updates=15100, lr=0.000514685, gnorm=0.202, clip=0, loss_scale=1, train_wall=114, gb_free=22.2, wall=13956 end of epoch 9 (average epoch stats below) epoch 009 | loss 3.662 | nll_loss 2.124 | ppl 4.36 | wps 528742 | ups 1.07 | wpb 495126 | bsz 16505.5 | num_updates 15176 | lr 0.000513395 | gnorm 0.2 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 25.4 | wall 14023 epoch 009 | loss 3.662 | nll_loss 2.124 | ppl 4.36 | wps 528742 | ups 1.07 | wpb 495126 | bsz 16505.5 | num_updates 15176 | lr 0.000513395 | gnorm 0.2 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 25.4 | wall 14023 epoch 009 | loss 3.662 | nll_loss 2.124 | ppl 4.36 | wps 528742 | ups 1.07 | wpb 495126 | bsz 16505.5 | num_updates 15176 | lr 0.000513395 | gnorm 0.2 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 25.4 | wall 14023 epoch 009 | loss 3.662 | nll_loss 2.124 | ppl 4.36 | wps 528742 | ups 1.07 | wpb 495126 | bsz 16505.5 | num_updates 15176 | lr 0.000513395 | gnorm 0.2 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 25.4 | wall 14023 epoch 009 | loss 3.662 | nll_loss 2.124 | ppl 4.36 | wps 528742 | ups 1.07 | wpb 495126 | bsz 16505.5 | num_updates 15176 | lr 0.000513395 | gnorm 0.2 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 25.4 | wall 14023 epoch 009 | loss 3.662 | nll_loss 2.124 | ppl 4.36 | wps 528742 | ups 1.07 | wpb 495126 | bsz 16505.5 | num_updates 15176 | lr 0.000513395 | gnorm 0.2 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 25.4 | wall 14023 epoch 009 | loss 3.662 | nll_loss 2.124 | ppl 4.36 | wps 528742 | ups 1.07 | wpb 495126 | bsz 16505.5 | num_updates 15176 | lr 0.000513395 | gnorm 0.2 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 25.4 | wall 14023 epoch 009 | loss 3.662 | nll_loss 2.124 | ppl 4.36 | wps 528742 | ups 1.07 | wpb 495126 | bsz 16505.5 | num_updates 15176 | lr 0.000513395 | gnorm 0.2 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 25.4 | wall 14023 epoch 009 | loss 3.662 | nll_loss 2.124 | ppl 4.36 | wps 528742 | ups 1.07 | wpb 495126 | bsz 16505.5 | num_updates 15176 | lr 0.000513395 | gnorm 0.2 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 25.4 | wall 14023 Start iterating over samples epoch 010: 24 / 1689 loss=3.66, nll_loss=2.123, ppl=4.36, wps=552007, ups=1.12, wpb=491640, bsz=16581.4, num_updates=15200, lr=0.000512989, gnorm=0.192, clip=0, loss_scale=1, train_wall=87, gb_free=21.2, wall=14045 epoch 010: 24 / 1689 loss=3.66, nll_loss=2.123, ppl=4.36, wps=552007, ups=1.12, wpb=491640, bsz=16581.4, num_updates=15200, lr=0.000512989, gnorm=0.192, clip=0, loss_scale=1, train_wall=87, gb_free=21.2, wall=14045 epoch 010: 24 / 1689 loss=3.66, nll_loss=2.123, ppl=4.36, wps=552007, ups=1.12, wpb=491640, bsz=16581.4, num_updates=15200, lr=0.000512989, gnorm=0.192, clip=0, loss_scale=1, train_wall=87, gb_free=21.2, wall=14045 epoch 010: 24 / 1689 loss=3.66, nll_loss=2.123, ppl=4.36, wps=552007, ups=1.12, wpb=491640, bsz=16581.4, num_updates=15200, lr=0.000512989, gnorm=0.192, clip=0, loss_scale=1, train_wall=87, gb_free=21.2, wall=14045 epoch 010: 24 / 1689 loss=3.66, nll_loss=2.123, ppl=4.36, wps=552007, ups=1.12, wpb=491640, bsz=16581.4, num_updates=15200, lr=0.000512989, gnorm=0.192, clip=0, loss_scale=1, train_wall=87, gb_free=21.2, wall=14045 epoch 010: 24 / 1689 loss=3.66, nll_loss=2.123, ppl=4.36, wps=552007, ups=1.12, wpb=491640, bsz=16581.4, num_updates=15200, lr=0.000512989, gnorm=0.192, clip=0, loss_scale=1, train_wall=87, gb_free=21.2, wall=14045 epoch 010: 24 / 1689 loss=3.66, nll_loss=2.123, ppl=4.36, wps=552007, ups=1.12, wpb=491640, bsz=16581.4, num_updates=15200, lr=0.000512989, gnorm=0.192, clip=0, loss_scale=1, train_wall=87, gb_free=21.2, wall=14045 epoch 010: 24 / 1689 loss=3.66, nll_loss=2.123, ppl=4.36, wps=552007, ups=1.12, wpb=491640, bsz=16581.4, num_updates=15200, lr=0.000512989, gnorm=0.192, clip=0, loss_scale=1, train_wall=87, gb_free=21.2, wall=14045 epoch 010: 24 / 1689 loss=3.66, nll_loss=2.123, ppl=4.36, wps=552007, ups=1.12, wpb=491640, bsz=16581.4, num_updates=15200, lr=0.000512989, gnorm=0.192, clip=0, loss_scale=1, train_wall=87, gb_free=21.2, wall=14045 epoch 010: 24 / 1689 loss=3.66, nll_loss=2.123, ppl=4.36, wps=552007, ups=1.12, wpb=491640, bsz=16581.4, num_updates=15200, lr=0.000512989, gnorm=0.192, clip=0, loss_scale=1, train_wall=87, gb_free=21.2, wall=14045 epoch 010: 124 / 1689 loss=3.617, nll_loss=2.073, ppl=4.21, wps=549531, ups=1.11, wpb=495572, bsz=16445.6, num_updates=15300, lr=0.00051131, gnorm=0.195, clip=0, loss_scale=1, train_wall=89, gb_free=22.9, wall=14135 epoch 010: 124 / 1689 loss=3.617, nll_loss=2.073, ppl=4.21, wps=549531, ups=1.11, wpb=495572, bsz=16445.6, num_updates=15300, lr=0.00051131, gnorm=0.195, clip=0, loss_scale=1, train_wall=89, gb_free=22.9, wall=14135 epoch 010: 124 / 1689 loss=3.617, nll_loss=2.073, ppl=4.21, wps=549531, ups=1.11, wpb=495572, bsz=16445.6, num_updates=15300, lr=0.00051131, gnorm=0.195, clip=0, loss_scale=1, train_wall=89, gb_free=22.9, wall=14135 epoch 010: 124 / 1689 loss=3.617, nll_loss=2.073, ppl=4.21, wps=549531, ups=1.11, wpb=495572, bsz=16445.6, num_updates=15300, lr=0.00051131, gnorm=0.195, clip=0, loss_scale=1, train_wall=89, gb_free=22.9, wall=14135 epoch 010: 124 / 1689 loss=3.617, nll_loss=2.073, ppl=4.21, wps=549531, ups=1.11, wpb=495572, bsz=16445.6, num_updates=15300, lr=0.00051131, gnorm=0.195, clip=0, loss_scale=1, train_wall=89, gb_free=22.9, wall=14135 epoch 010: 124 / 1689 loss=3.617, nll_loss=2.073, ppl=4.21, wps=549531, ups=1.11, wpb=495572, bsz=16445.6, num_updates=15300, lr=0.00051131, gnorm=0.195, clip=0, loss_scale=1, train_wall=89, gb_free=22.9, wall=14135 epoch 010: 124 / 1689 loss=3.617, nll_loss=2.073, ppl=4.21, wps=549531, ups=1.11, wpb=495572, bsz=16445.6, num_updates=15300, lr=0.00051131, gnorm=0.195, clip=0, loss_scale=1, train_wall=89, gb_free=22.9, wall=14135 epoch 010: 124 / 1689 loss=3.617, nll_loss=2.073, ppl=4.21, wps=549531, ups=1.11, wpb=495572, bsz=16445.6, num_updates=15300, lr=0.00051131, gnorm=0.195, clip=0, loss_scale=1, train_wall=89, gb_free=22.9, wall=14135 epoch 010: 124 / 1689 loss=3.617, nll_loss=2.073, ppl=4.21, wps=549531, ups=1.11, wpb=495572, bsz=16445.6, num_updates=15300, lr=0.00051131, gnorm=0.195, clip=0, loss_scale=1, train_wall=89, gb_free=22.9, wall=14135 epoch 010: 124 / 1689 loss=3.617, nll_loss=2.073, ppl=4.21, wps=549531, ups=1.11, wpb=495572, bsz=16445.6, num_updates=15300, lr=0.00051131, gnorm=0.195, clip=0, loss_scale=1, train_wall=89, gb_free=22.9, wall=14135 epoch 010: 224 / 1689 loss=3.634, nll_loss=2.092, ppl=4.26, wps=546628, ups=1.1, wpb=495280, bsz=16744.1, num_updates=15400, lr=0.000509647, gnorm=0.197, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=14226 epoch 010: 224 / 1689 loss=3.634, nll_loss=2.092, ppl=4.26, wps=546628, ups=1.1, wpb=495280, bsz=16744.1, num_updates=15400, lr=0.000509647, gnorm=0.197, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=14226 epoch 010: 224 / 1689 loss=3.634, nll_loss=2.092, ppl=4.26, wps=546628, ups=1.1, wpb=495280, bsz=16744.1, num_updates=15400, lr=0.000509647, gnorm=0.197, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=14226 epoch 010: 224 / 1689 loss=3.634, nll_loss=2.092, ppl=4.26, wps=546628, ups=1.1, wpb=495280, bsz=16744.1, num_updates=15400, lr=0.000509647, gnorm=0.197, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=14226 epoch 010: 224 / 1689 loss=3.634, nll_loss=2.092, ppl=4.26, wps=546628, ups=1.1, wpb=495280, bsz=16744.1, num_updates=15400, lr=0.000509647, gnorm=0.197, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=14226 epoch 010: 224 / 1689 loss=3.634, nll_loss=2.092, ppl=4.26, wps=546628, ups=1.1, wpb=495280, bsz=16744.1, num_updates=15400, lr=0.000509647, gnorm=0.197, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=14226 epoch 010: 224 / 1689 loss=3.634, nll_loss=2.092, ppl=4.26, wps=546628, ups=1.1, wpb=495280, bsz=16744.1, num_updates=15400, lr=0.000509647, gnorm=0.197, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=14226 epoch 010: 224 / 1689 loss=3.634, nll_loss=2.092, ppl=4.26, wps=546628, ups=1.1, wpb=495280, bsz=16744.1, num_updates=15400, lr=0.000509647, gnorm=0.197, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=14226 epoch 010: 224 / 1689 loss=3.634, nll_loss=2.092, ppl=4.26, wps=546628, ups=1.1, wpb=495280, bsz=16744.1, num_updates=15400, lr=0.000509647, gnorm=0.197, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=14226 epoch 010: 224 / 1689 loss=3.634, nll_loss=2.092, ppl=4.26, wps=546628, ups=1.1, wpb=495280, bsz=16744.1, num_updates=15400, lr=0.000509647, gnorm=0.197, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=14226 epoch 010: 324 / 1689 loss=3.64, nll_loss=2.099, ppl=4.28, wps=547352, ups=1.1, wpb=495349, bsz=16496.1, num_updates=15500, lr=0.000508001, gnorm=0.195, clip=0, loss_scale=1, train_wall=89, gb_free=21.5, wall=14316 epoch 010: 324 / 1689 loss=3.64, nll_loss=2.099, ppl=4.28, wps=547352, ups=1.1, wpb=495349, bsz=16496.1, num_updates=15500, lr=0.000508001, gnorm=0.195, clip=0, loss_scale=1, train_wall=89, gb_free=21.5, wall=14316 epoch 010: 324 / 1689 loss=3.64, nll_loss=2.099, ppl=4.28, wps=547352, ups=1.1, wpb=495349, bsz=16496.1, num_updates=15500, lr=0.000508001, gnorm=0.195, clip=0, loss_scale=1, train_wall=89, gb_free=21.5, wall=14316 epoch 010: 324 / 1689 loss=3.64, nll_loss=2.099, ppl=4.28, wps=547352, ups=1.1, wpb=495349, bsz=16496.1, num_updates=15500, lr=0.000508001, gnorm=0.195, clip=0, loss_scale=1, train_wall=89, gb_free=21.5, wall=14316 epoch 010: 324 / 1689 loss=3.64, nll_loss=2.099, ppl=4.28, wps=547352, ups=1.1, wpb=495349, bsz=16496.1, num_updates=15500, lr=0.000508001, gnorm=0.195, clip=0, loss_scale=1, train_wall=89, gb_free=21.5, wall=14316 epoch 010: 324 / 1689 loss=3.64, nll_loss=2.099, ppl=4.28, wps=547352, ups=1.1, wpb=495349, bsz=16496.1, num_updates=15500, lr=0.000508001, gnorm=0.195, clip=0, loss_scale=1, train_wall=89, gb_free=21.5, wall=14316 epoch 010: 324 / 1689 loss=3.64, nll_loss=2.099, ppl=4.28, wps=547352, ups=1.1, wpb=495349, bsz=16496.1, num_updates=15500, lr=0.000508001, gnorm=0.195, clip=0, loss_scale=1, train_wall=89, gb_free=21.5, wall=14316 epoch 010: 324 / 1689 loss=3.64, nll_loss=2.099, ppl=4.28, wps=547352, ups=1.1, wpb=495349, bsz=16496.1, num_updates=15500, lr=0.000508001, gnorm=0.195, clip=0, loss_scale=1, train_wall=89, gb_free=21.5, wall=14316 epoch 010: 324 / 1689 loss=3.64, nll_loss=2.099, ppl=4.28, wps=547352, ups=1.1, wpb=495349, bsz=16496.1, num_updates=15500, lr=0.000508001, gnorm=0.195, clip=0, loss_scale=1, train_wall=89, gb_free=21.5, wall=14316 epoch 010: 324 / 1689 loss=3.64, nll_loss=2.099, ppl=4.28, wps=547352, ups=1.1, wpb=495349, bsz=16496.1, num_updates=15500, lr=0.000508001, gnorm=0.195, clip=0, loss_scale=1, train_wall=89, gb_free=21.5, wall=14316 epoch 010: 424 / 1689 loss=3.633, nll_loss=2.091, ppl=4.26, wps=545915, ups=1.1, wpb=495917, bsz=16609.7, num_updates=15600, lr=0.00050637, gnorm=0.194, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=14407 epoch 010: 424 / 1689 loss=3.633, nll_loss=2.091, ppl=4.26, wps=545915, ups=1.1, wpb=495917, bsz=16609.7, num_updates=15600, lr=0.00050637, gnorm=0.194, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=14407 epoch 010: 424 / 1689 loss=3.633, nll_loss=2.091, ppl=4.26, wps=545915, ups=1.1, wpb=495917, bsz=16609.7, num_updates=15600, lr=0.00050637, gnorm=0.194, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=14407 epoch 010: 424 / 1689 loss=3.633, nll_loss=2.091, ppl=4.26, wps=545915, ups=1.1, wpb=495917, bsz=16609.7, num_updates=15600, lr=0.00050637, gnorm=0.194, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=14407 epoch 010: 424 / 1689 loss=3.633, nll_loss=2.091, ppl=4.26, wps=545915, ups=1.1, wpb=495917, bsz=16609.7, num_updates=15600, lr=0.00050637, gnorm=0.194, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=14407 epoch 010: 424 / 1689 loss=3.633, nll_loss=2.091, ppl=4.26, wps=545915, ups=1.1, wpb=495917, bsz=16609.7, num_updates=15600, lr=0.00050637, gnorm=0.194, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=14407 epoch 010: 424 / 1689 loss=3.633, nll_loss=2.091, ppl=4.26, wps=545915, ups=1.1, wpb=495917, bsz=16609.7, num_updates=15600, lr=0.00050637, gnorm=0.194, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=14407 epoch 010: 424 / 1689 loss=3.633, nll_loss=2.091, ppl=4.26, wps=545915, ups=1.1, wpb=495917, bsz=16609.7, num_updates=15600, lr=0.00050637, gnorm=0.194, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=14407 epoch 010: 424 / 1689 loss=3.633, nll_loss=2.091, ppl=4.26, wps=545915, ups=1.1, wpb=495917, bsz=16609.7, num_updates=15600, lr=0.00050637, gnorm=0.194, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=14407 epoch 010: 424 / 1689 loss=3.633, nll_loss=2.091, ppl=4.26, wps=545915, ups=1.1, wpb=495917, bsz=16609.7, num_updates=15600, lr=0.00050637, gnorm=0.194, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=14407 epoch 010: 524 / 1689 loss=3.639, nll_loss=2.098, ppl=4.28, wps=552881, ups=1.11, wpb=496666, bsz=16304.5, num_updates=15700, lr=0.000504754, gnorm=0.194, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=14497 epoch 010: 524 / 1689 loss=3.639, nll_loss=2.098, ppl=4.28, wps=552881, ups=1.11, wpb=496666, bsz=16304.5, num_updates=15700, lr=0.000504754, gnorm=0.194, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=14497 epoch 010: 524 / 1689 loss=3.639, nll_loss=2.098, ppl=4.28, wps=552881, ups=1.11, wpb=496666, bsz=16304.5, num_updates=15700, lr=0.000504754, gnorm=0.194, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=14497 epoch 010: 524 / 1689 loss=3.639, nll_loss=2.098, ppl=4.28, wps=552881, ups=1.11, wpb=496666, bsz=16304.5, num_updates=15700, lr=0.000504754, gnorm=0.194, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=14497 epoch 010: 524 / 1689 loss=3.639, nll_loss=2.098, ppl=4.28, wps=552881, ups=1.11, wpb=496666, bsz=16304.5, num_updates=15700, lr=0.000504754, gnorm=0.194, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=14497 epoch 010: 524 / 1689 loss=3.639, nll_loss=2.098, ppl=4.28, wps=552881, ups=1.11, wpb=496666, bsz=16304.5, num_updates=15700, lr=0.000504754, gnorm=0.194, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=14497 epoch 010: 524 / 1689 loss=3.639, nll_loss=2.098, ppl=4.28, wps=552881, ups=1.11, wpb=496666, bsz=16304.5, num_updates=15700, lr=0.000504754, gnorm=0.194, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=14497 epoch 010: 524 / 1689 loss=3.639, nll_loss=2.098, ppl=4.28, wps=552881, ups=1.11, wpb=496666, bsz=16304.5, num_updates=15700, lr=0.000504754, gnorm=0.194, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=14497 epoch 010: 524 / 1689 loss=3.639, nll_loss=2.098, ppl=4.28, wps=552881, ups=1.11, wpb=496666, bsz=16304.5, num_updates=15700, lr=0.000504754, gnorm=0.194, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=14497 epoch 010: 524 / 1689 loss=3.639, nll_loss=2.098, ppl=4.28, wps=552881, ups=1.11, wpb=496666, bsz=16304.5, num_updates=15700, lr=0.000504754, gnorm=0.194, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=14497 epoch 010: 624 / 1689 loss=3.63, nll_loss=2.089, ppl=4.25, wps=551755, ups=1.11, wpb=495407, bsz=16805.3, num_updates=15800, lr=0.000503155, gnorm=0.199, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=14587 epoch 010: 624 / 1689 loss=3.63, nll_loss=2.089, ppl=4.25, wps=551755, ups=1.11, wpb=495407, bsz=16805.3, num_updates=15800, lr=0.000503155, gnorm=0.199, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=14587 epoch 010: 624 / 1689 loss=3.63, nll_loss=2.089, ppl=4.25, wps=551755, ups=1.11, wpb=495407, bsz=16805.3, num_updates=15800, lr=0.000503155, gnorm=0.199, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=14587 epoch 010: 624 / 1689 loss=3.63, nll_loss=2.089, ppl=4.25, wps=551755, ups=1.11, wpb=495407, bsz=16805.3, num_updates=15800, lr=0.000503155, gnorm=0.199, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=14587 epoch 010: 624 / 1689 loss=3.63, nll_loss=2.089, ppl=4.25, wps=551755, ups=1.11, wpb=495407, bsz=16805.3, num_updates=15800, lr=0.000503155, gnorm=0.199, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=14587 epoch 010: 624 / 1689 loss=3.63, nll_loss=2.089, ppl=4.25, wps=551755, ups=1.11, wpb=495407, bsz=16805.3, num_updates=15800, lr=0.000503155, gnorm=0.199, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=14587 epoch 010: 624 / 1689 loss=3.63, nll_loss=2.089, ppl=4.25, wps=551755, ups=1.11, wpb=495407, bsz=16805.3, num_updates=15800, lr=0.000503155, gnorm=0.199, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=14587 epoch 010: 624 / 1689 loss=3.63, nll_loss=2.089, ppl=4.25, wps=551755, ups=1.11, wpb=495407, bsz=16805.3, num_updates=15800, lr=0.000503155, gnorm=0.199, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=14587 epoch 010: 624 / 1689 loss=3.63, nll_loss=2.089, ppl=4.25, wps=551755, ups=1.11, wpb=495407, bsz=16805.3, num_updates=15800, lr=0.000503155, gnorm=0.199, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=14587 epoch 010: 624 / 1689 loss=3.63, nll_loss=2.089, ppl=4.25, wps=551755, ups=1.11, wpb=495407, bsz=16805.3, num_updates=15800, lr=0.000503155, gnorm=0.199, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=14587 epoch 010: 724 / 1689 loss=3.641, nll_loss=2.101, ppl=4.29, wps=550479, ups=1.11, wpb=495419, bsz=16520.5, num_updates=15900, lr=0.00050157, gnorm=0.196, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=14677 epoch 010: 724 / 1689 loss=3.641, nll_loss=2.101, ppl=4.29, wps=550479, ups=1.11, wpb=495419, bsz=16520.5, num_updates=15900, lr=0.00050157, gnorm=0.196, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=14677 epoch 010: 724 / 1689 loss=3.641, nll_loss=2.101, ppl=4.29, wps=550479, ups=1.11, wpb=495419, bsz=16520.5, num_updates=15900, lr=0.00050157, gnorm=0.196, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=14677 epoch 010: 724 / 1689 loss=3.641, nll_loss=2.101, ppl=4.29, wps=550479, ups=1.11, wpb=495419, bsz=16520.5, num_updates=15900, lr=0.00050157, gnorm=0.196, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=14677 epoch 010: 724 / 1689 loss=3.641, nll_loss=2.101, ppl=4.29, wps=550479, ups=1.11, wpb=495419, bsz=16520.5, num_updates=15900, lr=0.00050157, gnorm=0.196, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=14677 epoch 010: 724 / 1689 loss=3.641, nll_loss=2.101, ppl=4.29, wps=550479, ups=1.11, wpb=495419, bsz=16520.5, num_updates=15900, lr=0.00050157, gnorm=0.196, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=14677 epoch 010: 724 / 1689 loss=3.641, nll_loss=2.101, ppl=4.29, wps=550479, ups=1.11, wpb=495419, bsz=16520.5, num_updates=15900, lr=0.00050157, gnorm=0.196, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=14677 epoch 010: 724 / 1689 loss=3.641, nll_loss=2.101, ppl=4.29, wps=550479, ups=1.11, wpb=495419, bsz=16520.5, num_updates=15900, lr=0.00050157, gnorm=0.196, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=14677 epoch 010: 724 / 1689 loss=3.641, nll_loss=2.101, ppl=4.29, wps=550479, ups=1.11, wpb=495419, bsz=16520.5, num_updates=15900, lr=0.00050157, gnorm=0.196, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=14677 epoch 010: 724 / 1689 loss=3.641, nll_loss=2.101, ppl=4.29, wps=550479, ups=1.11, wpb=495419, bsz=16520.5, num_updates=15900, lr=0.00050157, gnorm=0.196, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=14677 epoch 010: 824 / 1689 loss=3.642, nll_loss=2.102, ppl=4.29, wps=550917, ups=1.11, wpb=495783, bsz=16543.2, num_updates=16000, lr=0.0005, gnorm=0.195, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=14767 epoch 010: 824 / 1689 loss=3.642, nll_loss=2.102, ppl=4.29, wps=550917, ups=1.11, wpb=495783, bsz=16543.2, num_updates=16000, lr=0.0005, gnorm=0.195, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=14767 epoch 010: 824 / 1689 loss=3.642, nll_loss=2.102, ppl=4.29, wps=550917, ups=1.11, wpb=495783, bsz=16543.2, num_updates=16000, lr=0.0005, gnorm=0.195, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=14767 epoch 010: 824 / 1689 loss=3.642, nll_loss=2.102, ppl=4.29, wps=550917, ups=1.11, wpb=495783, bsz=16543.2, num_updates=16000, lr=0.0005, gnorm=0.195, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=14767 epoch 010: 824 / 1689 loss=3.642, nll_loss=2.102, ppl=4.29, wps=550917, ups=1.11, wpb=495783, bsz=16543.2, num_updates=16000, lr=0.0005, gnorm=0.195, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=14767 epoch 010: 824 / 1689 loss=3.642, nll_loss=2.102, ppl=4.29, wps=550917, ups=1.11, wpb=495783, bsz=16543.2, num_updates=16000, lr=0.0005, gnorm=0.195, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=14767 epoch 010: 824 / 1689 loss=3.642, nll_loss=2.102, ppl=4.29, wps=550917, ups=1.11, wpb=495783, bsz=16543.2, num_updates=16000, lr=0.0005, gnorm=0.195, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=14767 epoch 010: 824 / 1689 loss=3.642, nll_loss=2.102, ppl=4.29, wps=550917, ups=1.11, wpb=495783, bsz=16543.2, num_updates=16000, lr=0.0005, gnorm=0.195, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=14767 epoch 010: 824 / 1689 loss=3.642, nll_loss=2.102, ppl=4.29, wps=550917, ups=1.11, wpb=495783, bsz=16543.2, num_updates=16000, lr=0.0005, gnorm=0.195, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=14767 epoch 010: 824 / 1689 loss=3.642, nll_loss=2.102, ppl=4.29, wps=550917, ups=1.11, wpb=495783, bsz=16543.2, num_updates=16000, lr=0.0005, gnorm=0.195, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=14767 begin validation on "valid" subset epoch 010 | valid on 'valid' subset | loss 3.761 | nll_loss 2.216 | ppl 4.65 | wps 0 | wpb 44526 | bsz 2008 | num_updates 16000 | best_loss 3.761 epoch 010 | valid on 'valid' subset | loss 3.761 | nll_loss 2.216 | ppl 4.65 | wps 0 | wpb 44526 | bsz 2008 | num_updates 16000 | best_loss 3.761 epoch 010 | valid on 'valid' subset | loss 3.761 | nll_loss 2.216 | ppl 4.65 | wps 0 | wpb 44526 | bsz 2008 | num_updates 16000 | best_loss 3.761 epoch 010 | valid on 'valid' subset | loss 3.761 | nll_loss 2.216 | ppl 4.65 | wps 0 | wpb 44526 | bsz 2008 | num_updates 16000 | best_loss 3.761 epoch 010 | valid on 'valid' subset | loss 3.761 | nll_loss 2.216 | ppl 4.65 | wps 0 | wpb 44526 | bsz 2008 | num_updates 16000 | best_loss 3.761 epoch 010 | valid on 'valid' subset | loss 3.761 | nll_loss 2.216 | ppl 4.65 | wps 0 | wpb 44526 | bsz 2008 | num_updates 16000 | best_loss 3.761 epoch 010 | valid on 'valid' subset | loss 3.761 | nll_loss 2.216 | ppl 4.65 | wps 0 | wpb 44526 | bsz 2008 | num_updates 16000 | best_loss 3.761 epoch 010 | valid on 'valid' subset | loss 3.761 | nll_loss 2.216 | ppl 4.65 | wps 0 | wpb 44526 | bsz 2008 | num_updates 16000 | best_loss 3.761 epoch 010 | valid on 'valid' subset | loss 3.761 | nll_loss 2.216 | ppl 4.65 | wps 0 | wpb 44526 | bsz 2008 | num_updates 16000 | best_loss 3.761 epoch 010 | valid on 'valid' subset | loss 3.761 | nll_loss 2.216 | ppl 4.65 | wps 0 | wpb 44526 | bsz 2008 | num_updates 16000 | best_loss 3.761 epoch 010: 924 / 1689 loss=3.641, nll_loss=2.102, ppl=4.29, wps=449418, ups=0.9, wpb=496619, bsz=16738.4, num_updates=16100, lr=0.000498445, gnorm=0.197, clip=0, loss_scale=4, train_wall=89, gb_free=21.4, wall=14877 epoch 010: 924 / 1689 loss=3.641, nll_loss=2.102, ppl=4.29, wps=449418, ups=0.9, wpb=496619, bsz=16738.4, num_updates=16100, lr=0.000498445, gnorm=0.197, clip=0, loss_scale=4, train_wall=89, gb_free=21.4, wall=14877 epoch 010: 924 / 1689 loss=3.641, nll_loss=2.102, ppl=4.29, wps=449418, ups=0.9, wpb=496619, bsz=16738.4, num_updates=16100, lr=0.000498445, gnorm=0.197, clip=0, loss_scale=4, train_wall=89, gb_free=21.4, wall=14877 epoch 010: 924 / 1689 loss=3.641, nll_loss=2.102, ppl=4.29, wps=449418, ups=0.9, wpb=496619, bsz=16738.4, num_updates=16100, lr=0.000498445, gnorm=0.197, clip=0, loss_scale=4, train_wall=89, gb_free=21.4, wall=14877 epoch 010: 924 / 1689 loss=3.641, nll_loss=2.102, ppl=4.29, wps=449418, ups=0.9, wpb=496619, bsz=16738.4, num_updates=16100, lr=0.000498445, gnorm=0.197, clip=0, loss_scale=4, train_wall=89, gb_free=21.4, wall=14877 epoch 010: 924 / 1689 loss=3.641, nll_loss=2.102, ppl=4.29, wps=449418, ups=0.9, wpb=496619, bsz=16738.4, num_updates=16100, lr=0.000498445, gnorm=0.197, clip=0, loss_scale=4, train_wall=89, gb_free=21.4, wall=14877 epoch 010: 924 / 1689 loss=3.641, nll_loss=2.102, ppl=4.29, wps=449418, ups=0.9, wpb=496619, bsz=16738.4, num_updates=16100, lr=0.000498445, gnorm=0.197, clip=0, loss_scale=4, train_wall=89, gb_free=21.4, wall=14877 epoch 010: 924 / 1689 loss=3.641, nll_loss=2.102, ppl=4.29, wps=449418, ups=0.9, wpb=496619, bsz=16738.4, num_updates=16100, lr=0.000498445, gnorm=0.197, clip=0, loss_scale=4, train_wall=89, gb_free=21.4, wall=14877 epoch 010: 924 / 1689 loss=3.641, nll_loss=2.102, ppl=4.29, wps=449418, ups=0.9, wpb=496619, bsz=16738.4, num_updates=16100, lr=0.000498445, gnorm=0.197, clip=0, loss_scale=4, train_wall=89, gb_free=21.4, wall=14877 epoch 010: 924 / 1689 loss=3.641, nll_loss=2.102, ppl=4.29, wps=449418, ups=0.9, wpb=496619, bsz=16738.4, num_updates=16100, lr=0.000498445, gnorm=0.197, clip=0, loss_scale=4, train_wall=89, gb_free=21.4, wall=14877 epoch 010: 1024 / 1689 loss=3.648, nll_loss=2.109, ppl=4.31, wps=554004, ups=1.12, wpb=495302, bsz=16598.1, num_updates=16200, lr=0.000496904, gnorm=0.192, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=14966 epoch 010: 1024 / 1689 loss=3.648, nll_loss=2.109, ppl=4.31, wps=554004, ups=1.12, wpb=495302, bsz=16598.1, num_updates=16200, lr=0.000496904, gnorm=0.192, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=14966 epoch 010: 1024 / 1689 loss=3.648, nll_loss=2.109, ppl=4.31, wps=554004, ups=1.12, wpb=495302, bsz=16598.1, num_updates=16200, lr=0.000496904, gnorm=0.192, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=14966 epoch 010: 1024 / 1689 loss=3.648, nll_loss=2.109, ppl=4.31, wps=554004, ups=1.12, wpb=495302, bsz=16598.1, num_updates=16200, lr=0.000496904, gnorm=0.192, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=14966 epoch 010: 1024 / 1689 loss=3.648, nll_loss=2.109, ppl=4.31, wps=554004, ups=1.12, wpb=495302, bsz=16598.1, num_updates=16200, lr=0.000496904, gnorm=0.192, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=14966 epoch 010: 1024 / 1689 loss=3.648, nll_loss=2.109, ppl=4.31, wps=554004, ups=1.12, wpb=495302, bsz=16598.1, num_updates=16200, lr=0.000496904, gnorm=0.192, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=14966 epoch 010: 1024 / 1689 loss=3.648, nll_loss=2.109, ppl=4.31, wps=554004, ups=1.12, wpb=495302, bsz=16598.1, num_updates=16200, lr=0.000496904, gnorm=0.192, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=14966 epoch 010: 1024 / 1689 loss=3.648, nll_loss=2.109, ppl=4.31, wps=554004, ups=1.12, wpb=495302, bsz=16598.1, num_updates=16200, lr=0.000496904, gnorm=0.192, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=14966 epoch 010: 1024 / 1689 loss=3.648, nll_loss=2.109, ppl=4.31, wps=554004, ups=1.12, wpb=495302, bsz=16598.1, num_updates=16200, lr=0.000496904, gnorm=0.192, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=14966 epoch 010: 1024 / 1689 loss=3.648, nll_loss=2.109, ppl=4.31, wps=554004, ups=1.12, wpb=495302, bsz=16598.1, num_updates=16200, lr=0.000496904, gnorm=0.192, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=14966 epoch 010: 1124 / 1689 loss=3.647, nll_loss=2.108, ppl=4.31, wps=550084, ups=1.11, wpb=494794, bsz=16349.7, num_updates=16300, lr=0.000495377, gnorm=0.189, clip=0, loss_scale=4, train_wall=88, gb_free=20.6, wall=15056 epoch 010: 1124 / 1689 loss=3.647, nll_loss=2.108, ppl=4.31, wps=550084, ups=1.11, wpb=494794, bsz=16349.7, num_updates=16300, lr=0.000495377, gnorm=0.189, clip=0, loss_scale=4, train_wall=88, gb_free=20.6, wall=15056 epoch 010: 1124 / 1689 loss=3.647, nll_loss=2.108, ppl=4.31, wps=550084, ups=1.11, wpb=494794, bsz=16349.7, num_updates=16300, lr=0.000495377, gnorm=0.189, clip=0, loss_scale=4, train_wall=88, gb_free=20.6, wall=15056 epoch 010: 1124 / 1689 loss=3.647, nll_loss=2.108, ppl=4.31, wps=550084, ups=1.11, wpb=494794, bsz=16349.7, num_updates=16300, lr=0.000495377, gnorm=0.189, clip=0, loss_scale=4, train_wall=88, gb_free=20.6, wall=15056 epoch 010: 1124 / 1689 loss=3.647, nll_loss=2.108, ppl=4.31, wps=550084, ups=1.11, wpb=494794, bsz=16349.7, num_updates=16300, lr=0.000495377, gnorm=0.189, clip=0, loss_scale=4, train_wall=88, gb_free=20.6, wall=15056 epoch 010: 1124 / 1689 loss=3.647, nll_loss=2.108, ppl=4.31, wps=550084, ups=1.11, wpb=494794, bsz=16349.7, num_updates=16300, lr=0.000495377, gnorm=0.189, clip=0, loss_scale=4, train_wall=88, gb_free=20.6, wall=15056 epoch 010: 1124 / 1689 loss=3.647, nll_loss=2.108, ppl=4.31, wps=550084, ups=1.11, wpb=494794, bsz=16349.7, num_updates=16300, lr=0.000495377, gnorm=0.189, clip=0, loss_scale=4, train_wall=88, gb_free=20.6, wall=15056 epoch 010: 1124 / 1689 loss=3.647, nll_loss=2.108, ppl=4.31, wps=550084, ups=1.11, wpb=494794, bsz=16349.7, num_updates=16300, lr=0.000495377, gnorm=0.189, clip=0, loss_scale=4, train_wall=88, gb_free=20.6, wall=15056 epoch 010: 1124 / 1689 loss=3.647, nll_loss=2.108, ppl=4.31, wps=550084, ups=1.11, wpb=494794, bsz=16349.7, num_updates=16300, lr=0.000495377, gnorm=0.189, clip=0, loss_scale=4, train_wall=88, gb_free=20.6, wall=15056 epoch 010: 1124 / 1689 loss=3.647, nll_loss=2.108, ppl=4.31, wps=550084, ups=1.11, wpb=494794, bsz=16349.7, num_updates=16300, lr=0.000495377, gnorm=0.189, clip=0, loss_scale=4, train_wall=88, gb_free=20.6, wall=15056 epoch 010: 1224 / 1689 loss=3.644, nll_loss=2.105, ppl=4.3, wps=550325, ups=1.11, wpb=495790, bsz=16427, num_updates=16400, lr=0.000493865, gnorm=0.193, clip=0, loss_scale=4, train_wall=88, gb_free=21.6, wall=15147 epoch 010: 1224 / 1689 loss=3.644, nll_loss=2.105, ppl=4.3, wps=550325, ups=1.11, wpb=495790, bsz=16427, num_updates=16400, lr=0.000493865, gnorm=0.193, clip=0, loss_scale=4, train_wall=88, gb_free=21.6, wall=15147 epoch 010: 1224 / 1689 loss=3.644, nll_loss=2.105, ppl=4.3, wps=550325, ups=1.11, wpb=495790, bsz=16427, num_updates=16400, lr=0.000493865, gnorm=0.193, clip=0, loss_scale=4, train_wall=88, gb_free=21.6, wall=15147 epoch 010: 1224 / 1689 loss=3.644, nll_loss=2.105, ppl=4.3, wps=550325, ups=1.11, wpb=495790, bsz=16427, num_updates=16400, lr=0.000493865, gnorm=0.193, clip=0, loss_scale=4, train_wall=88, gb_free=21.6, wall=15147 epoch 010: 1224 / 1689 loss=3.644, nll_loss=2.105, ppl=4.3, wps=550325, ups=1.11, wpb=495790, bsz=16427, num_updates=16400, lr=0.000493865, gnorm=0.193, clip=0, loss_scale=4, train_wall=88, gb_free=21.6, wall=15147 epoch 010: 1224 / 1689 loss=3.644, nll_loss=2.105, ppl=4.3, wps=550325, ups=1.11, wpb=495790, bsz=16427, num_updates=16400, lr=0.000493865, gnorm=0.193, clip=0, loss_scale=4, train_wall=88, gb_free=21.6, wall=15147 epoch 010: 1224 / 1689 loss=3.644, nll_loss=2.105, ppl=4.3, wps=550325, ups=1.11, wpb=495790, bsz=16427, num_updates=16400, lr=0.000493865, gnorm=0.193, clip=0, loss_scale=4, train_wall=88, gb_free=21.6, wall=15147 epoch 010: 1224 / 1689 loss=3.644, nll_loss=2.105, ppl=4.3, wps=550325, ups=1.11, wpb=495790, bsz=16427, num_updates=16400, lr=0.000493865, gnorm=0.193, clip=0, loss_scale=4, train_wall=88, gb_free=21.6, wall=15147 epoch 010: 1224 / 1689 loss=3.644, nll_loss=2.105, ppl=4.3, wps=550325, ups=1.11, wpb=495790, bsz=16427, num_updates=16400, lr=0.000493865, gnorm=0.193, clip=0, loss_scale=4, train_wall=88, gb_free=21.6, wall=15147 epoch 010: 1224 / 1689 loss=3.644, nll_loss=2.105, ppl=4.3, wps=550325, ups=1.11, wpb=495790, bsz=16427, num_updates=16400, lr=0.000493865, gnorm=0.193, clip=0, loss_scale=4, train_wall=88, gb_free=21.6, wall=15147 epoch 010: 1324 / 1689 loss=3.65, nll_loss=2.112, ppl=4.32, wps=553247, ups=1.12, wpb=494335, bsz=16563.4, num_updates=16500, lr=0.000492366, gnorm=0.193, clip=0, loss_scale=4, train_wall=88, gb_free=22.1, wall=15236 epoch 010: 1324 / 1689 loss=3.65, nll_loss=2.112, ppl=4.32, wps=553247, ups=1.12, wpb=494335, bsz=16563.4, num_updates=16500, lr=0.000492366, gnorm=0.193, clip=0, loss_scale=4, train_wall=88, gb_free=22.1, wall=15236 epoch 010: 1324 / 1689 loss=3.65, nll_loss=2.112, ppl=4.32, wps=553247, ups=1.12, wpb=494335, bsz=16563.4, num_updates=16500, lr=0.000492366, gnorm=0.193, clip=0, loss_scale=4, train_wall=88, gb_free=22.1, wall=15236 epoch 010: 1324 / 1689 loss=3.65, nll_loss=2.112, ppl=4.32, wps=553247, ups=1.12, wpb=494335, bsz=16563.4, num_updates=16500, lr=0.000492366, gnorm=0.193, clip=0, loss_scale=4, train_wall=88, gb_free=22.1, wall=15236 epoch 010: 1324 / 1689 loss=3.65, nll_loss=2.112, ppl=4.32, wps=553247, ups=1.12, wpb=494335, bsz=16563.4, num_updates=16500, lr=0.000492366, gnorm=0.193, clip=0, loss_scale=4, train_wall=88, gb_free=22.1, wall=15236 epoch 010: 1324 / 1689 loss=3.65, nll_loss=2.112, ppl=4.32, wps=553247, ups=1.12, wpb=494335, bsz=16563.4, num_updates=16500, lr=0.000492366, gnorm=0.193, clip=0, loss_scale=4, train_wall=88, gb_free=22.1, wall=15236 epoch 010: 1324 / 1689 loss=3.65, nll_loss=2.112, ppl=4.32, wps=553247, ups=1.12, wpb=494335, bsz=16563.4, num_updates=16500, lr=0.000492366, gnorm=0.193, clip=0, loss_scale=4, train_wall=88, gb_free=22.1, wall=15236 epoch 010: 1324 / 1689 loss=3.65, nll_loss=2.112, ppl=4.32, wps=553247, ups=1.12, wpb=494335, bsz=16563.4, num_updates=16500, lr=0.000492366, gnorm=0.193, clip=0, loss_scale=4, train_wall=88, gb_free=22.1, wall=15236 epoch 010: 1324 / 1689 loss=3.65, nll_loss=2.112, ppl=4.32, wps=553247, ups=1.12, wpb=494335, bsz=16563.4, num_updates=16500, lr=0.000492366, gnorm=0.193, clip=0, loss_scale=4, train_wall=88, gb_free=22.1, wall=15236 epoch 010: 1324 / 1689 loss=3.65, nll_loss=2.112, ppl=4.32, wps=553247, ups=1.12, wpb=494335, bsz=16563.4, num_updates=16500, lr=0.000492366, gnorm=0.193, clip=0, loss_scale=4, train_wall=88, gb_free=22.1, wall=15236 epoch 010: 1425 / 1689 loss=3.643, nll_loss=2.104, ppl=4.3, wps=546829, ups=1.1, wpb=495775, bsz=16605.5, num_updates=16600, lr=0.000490881, gnorm=0.197, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=15327 epoch 010: 1425 / 1689 loss=3.643, nll_loss=2.104, ppl=4.3, wps=546829, ups=1.1, wpb=495775, bsz=16605.5, num_updates=16600, lr=0.000490881, gnorm=0.197, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=15327 epoch 010: 1425 / 1689 loss=3.643, nll_loss=2.104, ppl=4.3, wps=546829, ups=1.1, wpb=495775, bsz=16605.5, num_updates=16600, lr=0.000490881, gnorm=0.197, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=15327 epoch 010: 1425 / 1689 loss=3.643, nll_loss=2.104, ppl=4.3, wps=546829, ups=1.1, wpb=495775, bsz=16605.5, num_updates=16600, lr=0.000490881, gnorm=0.197, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=15327 epoch 010: 1425 / 1689 loss=3.643, nll_loss=2.104, ppl=4.3, wps=546829, ups=1.1, wpb=495775, bsz=16605.5, num_updates=16600, lr=0.000490881, gnorm=0.197, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=15327 epoch 010: 1425 / 1689 loss=3.643, nll_loss=2.104, ppl=4.3, wps=546829, ups=1.1, wpb=495775, bsz=16605.5, num_updates=16600, lr=0.000490881, gnorm=0.197, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=15327 epoch 010: 1425 / 1689 loss=3.643, nll_loss=2.104, ppl=4.3, wps=546829, ups=1.1, wpb=495775, bsz=16605.5, num_updates=16600, lr=0.000490881, gnorm=0.197, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=15327 epoch 010: 1425 / 1689 loss=3.643, nll_loss=2.104, ppl=4.3, wps=546829, ups=1.1, wpb=495775, bsz=16605.5, num_updates=16600, lr=0.000490881, gnorm=0.197, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=15327 epoch 010: 1425 / 1689 loss=3.643, nll_loss=2.104, ppl=4.3, wps=546829, ups=1.1, wpb=495775, bsz=16605.5, num_updates=16600, lr=0.000490881, gnorm=0.197, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=15327 epoch 010: 1425 / 1689 loss=3.643, nll_loss=2.104, ppl=4.3, wps=546829, ups=1.1, wpb=495775, bsz=16605.5, num_updates=16600, lr=0.000490881, gnorm=0.197, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=15327 epoch 010: 1525 / 1689 loss=3.65, nll_loss=2.112, ppl=4.32, wps=550062, ups=1.11, wpb=493411, bsz=16423.4, num_updates=16700, lr=0.000489409, gnorm=0.19, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=15416 epoch 010: 1525 / 1689 loss=3.65, nll_loss=2.112, ppl=4.32, wps=550062, ups=1.11, wpb=493411, bsz=16423.4, num_updates=16700, lr=0.000489409, gnorm=0.19, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=15416 epoch 010: 1525 / 1689 loss=3.65, nll_loss=2.112, ppl=4.32, wps=550062, ups=1.11, wpb=493411, bsz=16423.4, num_updates=16700, lr=0.000489409, gnorm=0.19, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=15416 epoch 010: 1525 / 1689 loss=3.65, nll_loss=2.112, ppl=4.32, wps=550062, ups=1.11, wpb=493411, bsz=16423.4, num_updates=16700, lr=0.000489409, gnorm=0.19, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=15416 epoch 010: 1525 / 1689 loss=3.65, nll_loss=2.112, ppl=4.32, wps=550062, ups=1.11, wpb=493411, bsz=16423.4, num_updates=16700, lr=0.000489409, gnorm=0.19, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=15416 epoch 010: 1525 / 1689 loss=3.65, nll_loss=2.112, ppl=4.32, wps=550062, ups=1.11, wpb=493411, bsz=16423.4, num_updates=16700, lr=0.000489409, gnorm=0.19, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=15416 epoch 010: 1525 / 1689 loss=3.65, nll_loss=2.112, ppl=4.32, wps=550062, ups=1.11, wpb=493411, bsz=16423.4, num_updates=16700, lr=0.000489409, gnorm=0.19, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=15416 epoch 010: 1525 / 1689 loss=3.65, nll_loss=2.112, ppl=4.32, wps=550062, ups=1.11, wpb=493411, bsz=16423.4, num_updates=16700, lr=0.000489409, gnorm=0.19, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=15416 epoch 010: 1525 / 1689 loss=3.65, nll_loss=2.112, ppl=4.32, wps=550062, ups=1.11, wpb=493411, bsz=16423.4, num_updates=16700, lr=0.000489409, gnorm=0.19, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=15416 epoch 010: 1525 / 1689 loss=3.65, nll_loss=2.112, ppl=4.32, wps=550062, ups=1.11, wpb=493411, bsz=16423.4, num_updates=16700, lr=0.000489409, gnorm=0.19, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=15416 epoch 010: 1625 / 1689 loss=3.64, nll_loss=2.101, ppl=4.29, wps=553286, ups=1.12, wpb=495772, bsz=16299.8, num_updates=16800, lr=0.00048795, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=15506 epoch 010: 1625 / 1689 loss=3.64, nll_loss=2.101, ppl=4.29, wps=553286, ups=1.12, wpb=495772, bsz=16299.8, num_updates=16800, lr=0.00048795, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=15506 epoch 010: 1625 / 1689 loss=3.64, nll_loss=2.101, ppl=4.29, wps=553286, ups=1.12, wpb=495772, bsz=16299.8, num_updates=16800, lr=0.00048795, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=15506 epoch 010: 1625 / 1689 loss=3.64, nll_loss=2.101, ppl=4.29, wps=553286, ups=1.12, wpb=495772, bsz=16299.8, num_updates=16800, lr=0.00048795, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=15506 epoch 010: 1625 / 1689 loss=3.64, nll_loss=2.101, ppl=4.29, wps=553286, ups=1.12, wpb=495772, bsz=16299.8, num_updates=16800, lr=0.00048795, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=15506 epoch 010: 1625 / 1689 loss=3.64, nll_loss=2.101, ppl=4.29, wps=553286, ups=1.12, wpb=495772, bsz=16299.8, num_updates=16800, lr=0.00048795, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=15506 epoch 010: 1625 / 1689 loss=3.64, nll_loss=2.101, ppl=4.29, wps=553286, ups=1.12, wpb=495772, bsz=16299.8, num_updates=16800, lr=0.00048795, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=15506 epoch 010: 1625 / 1689 loss=3.64, nll_loss=2.101, ppl=4.29, wps=553286, ups=1.12, wpb=495772, bsz=16299.8, num_updates=16800, lr=0.00048795, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=15506 epoch 010: 1625 / 1689 loss=3.64, nll_loss=2.101, ppl=4.29, wps=553286, ups=1.12, wpb=495772, bsz=16299.8, num_updates=16800, lr=0.00048795, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=15506 epoch 010: 1625 / 1689 loss=3.64, nll_loss=2.101, ppl=4.29, wps=553286, ups=1.12, wpb=495772, bsz=16299.8, num_updates=16800, lr=0.00048795, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=15506 end of epoch 10 (average epoch stats below) epoch 010 | loss 3.64 | nll_loss 2.1 | ppl 4.29 | wps 542888 | ups 1.1 | wpb 495119 | bsz 16506.4 | num_updates 16864 | lr 0.000487023 | gnorm 0.194 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.6 | wall 15562 epoch 010 | loss 3.64 | nll_loss 2.1 | ppl 4.29 | wps 542888 | ups 1.1 | wpb 495119 | bsz 16506.4 | num_updates 16864 | lr 0.000487023 | gnorm 0.194 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.6 | wall 15562 epoch 010 | loss 3.64 | nll_loss 2.1 | ppl 4.29 | wps 542888 | ups 1.1 | wpb 495119 | bsz 16506.4 | num_updates 16864 | lr 0.000487023 | gnorm 0.194 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.6 | wall 15562 epoch 010 | loss 3.64 | nll_loss 2.1 | ppl 4.29 | wps 542888 | ups 1.1 | wpb 495119 | bsz 16506.4 | num_updates 16864 | lr 0.000487023 | gnorm 0.194 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.6 | wall 15562 epoch 010 | loss 3.64 | nll_loss 2.1 | ppl 4.29 | wps 542888 | ups 1.1 | wpb 495119 | bsz 16506.4 | num_updates 16864 | lr 0.000487023 | gnorm 0.194 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.6 | wall 15562 epoch 010 | loss 3.64 | nll_loss 2.1 | ppl 4.29 | wps 542888 | ups 1.1 | wpb 495119 | bsz 16506.4 | num_updates 16864 | lr 0.000487023 | gnorm 0.194 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.6 | wall 15562 epoch 010 | loss 3.64 | nll_loss 2.1 | ppl 4.29 | wps 542888 | ups 1.1 | wpb 495119 | bsz 16506.4 | num_updates 16864 | lr 0.000487023 | gnorm 0.194 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.6 | wall 15562 epoch 010 | loss 3.64 | nll_loss 2.1 | ppl 4.29 | wps 542888 | ups 1.1 | wpb 495119 | bsz 16506.4 | num_updates 16864 | lr 0.000487023 | gnorm 0.194 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.6 | wall 15562 epoch 010 | loss 3.64 | nll_loss 2.1 | ppl 4.29 | wps 542888 | ups 1.1 | wpb 495119 | bsz 16506.4 | num_updates 16864 | lr 0.000487023 | gnorm 0.194 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.6 | wall 15562 epoch 010 | loss 3.64 | nll_loss 2.1 | ppl 4.29 | wps 542888 | ups 1.1 | wpb 495119 | bsz 16506.4 | num_updates 16864 | lr 0.000487023 | gnorm 0.194 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.6 | wall 15562 Start iterating over samples epoch 011: 36 / 1689 loss=3.626, nll_loss=2.085, ppl=4.24, wps=548674, ups=1.12, wpb=490471, bsz=16053, num_updates=16900, lr=0.000486504, gnorm=0.193, clip=0, loss_scale=2, train_wall=87, gb_free=22.6, wall=15595 epoch 011: 36 / 1689 loss=3.626, nll_loss=2.085, ppl=4.24, wps=548674, ups=1.12, wpb=490471, bsz=16053, num_updates=16900, lr=0.000486504, gnorm=0.193, clip=0, loss_scale=2, train_wall=87, gb_free=22.6, wall=15595 epoch 011: 36 / 1689 loss=3.626, nll_loss=2.085, ppl=4.24, wps=548674, ups=1.12, wpb=490471, bsz=16053, num_updates=16900, lr=0.000486504, gnorm=0.193, clip=0, loss_scale=2, train_wall=87, gb_free=22.6, wall=15595 epoch 011: 36 / 1689 loss=3.626, nll_loss=2.085, ppl=4.24, wps=548674, ups=1.12, wpb=490471, bsz=16053, num_updates=16900, lr=0.000486504, gnorm=0.193, clip=0, loss_scale=2, train_wall=87, gb_free=22.6, wall=15595 epoch 011: 36 / 1689 loss=3.626, nll_loss=2.085, ppl=4.24, wps=548674, ups=1.12, wpb=490471, bsz=16053, num_updates=16900, lr=0.000486504, gnorm=0.193, clip=0, loss_scale=2, train_wall=87, gb_free=22.6, wall=15595 epoch 011: 36 / 1689 loss=3.626, nll_loss=2.085, ppl=4.24, wps=548674, ups=1.12, wpb=490471, bsz=16053, num_updates=16900, lr=0.000486504, gnorm=0.193, clip=0, loss_scale=2, train_wall=87, gb_free=22.6, wall=15595 epoch 011: 36 / 1689 loss=3.626, nll_loss=2.085, ppl=4.24, wps=548674, ups=1.12, wpb=490471, bsz=16053, num_updates=16900, lr=0.000486504, gnorm=0.193, clip=0, loss_scale=2, train_wall=87, gb_free=22.6, wall=15595 epoch 011: 36 / 1689 loss=3.626, nll_loss=2.085, ppl=4.24, wps=548674, ups=1.12, wpb=490471, bsz=16053, num_updates=16900, lr=0.000486504, gnorm=0.193, clip=0, loss_scale=2, train_wall=87, gb_free=22.6, wall=15595 epoch 011: 36 / 1689 loss=3.626, nll_loss=2.085, ppl=4.24, wps=548674, ups=1.12, wpb=490471, bsz=16053, num_updates=16900, lr=0.000486504, gnorm=0.193, clip=0, loss_scale=2, train_wall=87, gb_free=22.6, wall=15595 epoch 011: 36 / 1689 loss=3.626, nll_loss=2.085, ppl=4.24, wps=548674, ups=1.12, wpb=490471, bsz=16053, num_updates=16900, lr=0.000486504, gnorm=0.193, clip=0, loss_scale=2, train_wall=87, gb_free=22.6, wall=15595 epoch 011: 36 / 1689 loss=3.626, nll_loss=2.085, ppl=4.24, wps=548674, ups=1.12, wpb=490471, bsz=16053, num_updates=16900, lr=0.000486504, gnorm=0.193, clip=0, loss_scale=2, train_wall=87, gb_free=22.6, wall=15595 epoch 011: 136 / 1689 loss=3.606, nll_loss=2.061, ppl=4.17, wps=553279, ups=1.11, wpb=496346, bsz=16649, num_updates=17000, lr=0.000485071, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=15685 epoch 011: 136 / 1689 loss=3.606, nll_loss=2.061, ppl=4.17, wps=553279, ups=1.11, wpb=496346, bsz=16649, num_updates=17000, lr=0.000485071, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=15685 epoch 011: 136 / 1689 loss=3.606, nll_loss=2.061, ppl=4.17, wps=553279, ups=1.11, wpb=496346, bsz=16649, num_updates=17000, lr=0.000485071, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=15685 epoch 011: 136 / 1689 loss=3.606, nll_loss=2.061, ppl=4.17, wps=553279, ups=1.11, wpb=496346, bsz=16649, num_updates=17000, lr=0.000485071, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=15685 epoch 011: 136 / 1689 loss=3.606, nll_loss=2.061, ppl=4.17, wps=553279, ups=1.11, wpb=496346, bsz=16649, num_updates=17000, lr=0.000485071, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=15685 epoch 011: 136 / 1689 loss=3.606, nll_loss=2.061, ppl=4.17, wps=553279, ups=1.11, wpb=496346, bsz=16649, num_updates=17000, lr=0.000485071, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=15685 epoch 011: 136 / 1689 loss=3.606, nll_loss=2.061, ppl=4.17, wps=553279, ups=1.11, wpb=496346, bsz=16649, num_updates=17000, lr=0.000485071, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=15685 epoch 011: 136 / 1689 loss=3.606, nll_loss=2.061, ppl=4.17, wps=553279, ups=1.11, wpb=496346, bsz=16649, num_updates=17000, lr=0.000485071, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=15685 epoch 011: 136 / 1689 loss=3.606, nll_loss=2.061, ppl=4.17, wps=553279, ups=1.11, wpb=496346, bsz=16649, num_updates=17000, lr=0.000485071, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=15685 epoch 011: 136 / 1689 loss=3.606, nll_loss=2.061, ppl=4.17, wps=553279, ups=1.11, wpb=496346, bsz=16649, num_updates=17000, lr=0.000485071, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=15685 epoch 011: 136 / 1689 loss=3.606, nll_loss=2.061, ppl=4.17, wps=553279, ups=1.11, wpb=496346, bsz=16649, num_updates=17000, lr=0.000485071, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=15685 begin validation on "valid" subset epoch 011 | valid on 'valid' subset | loss 3.771 | nll_loss 2.225 | ppl 4.68 | wps 0 | wpb 44526 | bsz 2008 | num_updates 17000 | best_loss 3.761 epoch 011 | valid on 'valid' subset | loss 3.771 | nll_loss 2.225 | ppl 4.68 | wps 0 | wpb 44526 | bsz 2008 | num_updates 17000 | best_loss 3.761 epoch 011 | valid on 'valid' subset | loss 3.771 | nll_loss 2.225 | ppl 4.68 | wps 0 | wpb 44526 | bsz 2008 | num_updates 17000 | best_loss 3.761 epoch 011 | valid on 'valid' subset | loss 3.771 | nll_loss 2.225 | ppl 4.68 | wps 0 | wpb 44526 | bsz 2008 | num_updates 17000 | best_loss 3.761 epoch 011 | valid on 'valid' subset | loss 3.771 | nll_loss 2.225 | ppl 4.68 | wps 0 | wpb 44526 | bsz 2008 | num_updates 17000 | best_loss 3.761 epoch 011 | valid on 'valid' subset | loss 3.771 | nll_loss 2.225 | ppl 4.68 | wps 0 | wpb 44526 | bsz 2008 | num_updates 17000 | best_loss 3.761 epoch 011 | valid on 'valid' subset | loss 3.771 | nll_loss 2.225 | ppl 4.68 | wps 0 | wpb 44526 | bsz 2008 | num_updates 17000 | best_loss 3.761 epoch 011 | valid on 'valid' subset | loss 3.771 | nll_loss 2.225 | ppl 4.68 | wps 0 | wpb 44526 | bsz 2008 | num_updates 17000 | best_loss 3.761 epoch 011 | valid on 'valid' subset | loss 3.771 | nll_loss 2.225 | ppl 4.68 | wps 0 | wpb 44526 | bsz 2008 | num_updates 17000 | best_loss 3.761 epoch 011 | valid on 'valid' subset | loss 3.771 | nll_loss 2.225 | ppl 4.68 | wps 0 | wpb 44526 | bsz 2008 | num_updates 17000 | best_loss 3.761 epoch 011 | valid on 'valid' subset | loss 3.771 | nll_loss 2.225 | ppl 4.68 | wps 0 | wpb 44526 | bsz 2008 | num_updates 17000 | best_loss 3.761 epoch 011: 237 / 1689 loss=3.608, nll_loss=2.063, ppl=4.18, wps=474908, ups=0.96, wpb=495646, bsz=16530.2, num_updates=17100, lr=0.000483651, gnorm=0.194, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=15789 epoch 011: 237 / 1689 loss=3.608, nll_loss=2.063, ppl=4.18, wps=474908, ups=0.96, wpb=495646, bsz=16530.2, num_updates=17100, lr=0.000483651, gnorm=0.194, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=15789 epoch 011: 237 / 1689 loss=3.608, nll_loss=2.063, ppl=4.18, wps=474908, ups=0.96, wpb=495646, bsz=16530.2, num_updates=17100, lr=0.000483651, gnorm=0.194, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=15789 epoch 011: 237 / 1689 loss=3.608, nll_loss=2.063, ppl=4.18, wps=474908, ups=0.96, wpb=495646, bsz=16530.2, num_updates=17100, lr=0.000483651, gnorm=0.194, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=15789 epoch 011: 237 / 1689 loss=3.608, nll_loss=2.063, ppl=4.18, wps=474908, ups=0.96, wpb=495646, bsz=16530.2, num_updates=17100, lr=0.000483651, gnorm=0.194, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=15789 epoch 011: 237 / 1689 loss=3.608, nll_loss=2.063, ppl=4.18, wps=474908, ups=0.96, wpb=495646, bsz=16530.2, num_updates=17100, lr=0.000483651, gnorm=0.194, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=15789 epoch 011: 237 / 1689 loss=3.608, nll_loss=2.063, ppl=4.18, wps=474908, ups=0.96, wpb=495646, bsz=16530.2, num_updates=17100, lr=0.000483651, gnorm=0.194, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=15789 epoch 011: 237 / 1689 loss=3.608, nll_loss=2.063, ppl=4.18, wps=474908, ups=0.96, wpb=495646, bsz=16530.2, num_updates=17100, lr=0.000483651, gnorm=0.194, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=15789 epoch 011: 237 / 1689 loss=3.608, nll_loss=2.063, ppl=4.18, wps=474908, ups=0.96, wpb=495646, bsz=16530.2, num_updates=17100, lr=0.000483651, gnorm=0.194, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=15789 epoch 011: 237 / 1689 loss=3.608, nll_loss=2.063, ppl=4.18, wps=474908, ups=0.96, wpb=495646, bsz=16530.2, num_updates=17100, lr=0.000483651, gnorm=0.194, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=15789 epoch 011: 237 / 1689 loss=3.608, nll_loss=2.063, ppl=4.18, wps=474908, ups=0.96, wpb=495646, bsz=16530.2, num_updates=17100, lr=0.000483651, gnorm=0.194, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=15789 epoch 011: 337 / 1689 loss=3.619, nll_loss=2.077, ppl=4.22, wps=553673, ups=1.12, wpb=496083, bsz=16943.4, num_updates=17200, lr=0.000482243, gnorm=0.205, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=15879 epoch 011: 337 / 1689 loss=3.619, nll_loss=2.077, ppl=4.22, wps=553673, ups=1.12, wpb=496083, bsz=16943.4, num_updates=17200, lr=0.000482243, gnorm=0.205, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=15879 epoch 011: 337 / 1689 loss=3.619, nll_loss=2.077, ppl=4.22, wps=553673, ups=1.12, wpb=496083, bsz=16943.4, num_updates=17200, lr=0.000482243, gnorm=0.205, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=15879 epoch 011: 337 / 1689 loss=3.619, nll_loss=2.077, ppl=4.22, wps=553673, ups=1.12, wpb=496083, bsz=16943.4, num_updates=17200, lr=0.000482243, gnorm=0.205, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=15879 epoch 011: 337 / 1689 loss=3.619, nll_loss=2.077, ppl=4.22, wps=553673, ups=1.12, wpb=496083, bsz=16943.4, num_updates=17200, lr=0.000482243, gnorm=0.205, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=15879 epoch 011: 337 / 1689 loss=3.619, nll_loss=2.077, ppl=4.22, wps=553673, ups=1.12, wpb=496083, bsz=16943.4, num_updates=17200, lr=0.000482243, gnorm=0.205, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=15879 epoch 011: 337 / 1689 loss=3.619, nll_loss=2.077, ppl=4.22, wps=553673, ups=1.12, wpb=496083, bsz=16943.4, num_updates=17200, lr=0.000482243, gnorm=0.205, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=15879 epoch 011: 337 / 1689 loss=3.619, nll_loss=2.077, ppl=4.22, wps=553673, ups=1.12, wpb=496083, bsz=16943.4, num_updates=17200, lr=0.000482243, gnorm=0.205, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=15879 epoch 011: 337 / 1689 loss=3.619, nll_loss=2.077, ppl=4.22, wps=553673, ups=1.12, wpb=496083, bsz=16943.4, num_updates=17200, lr=0.000482243, gnorm=0.205, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=15879 epoch 011: 337 / 1689 loss=3.619, nll_loss=2.077, ppl=4.22, wps=553673, ups=1.12, wpb=496083, bsz=16943.4, num_updates=17200, lr=0.000482243, gnorm=0.205, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=15879 epoch 011: 337 / 1689 loss=3.619, nll_loss=2.077, ppl=4.22, wps=553673, ups=1.12, wpb=496083, bsz=16943.4, num_updates=17200, lr=0.000482243, gnorm=0.205, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=15879 epoch 011: 437 / 1689 loss=3.625, nll_loss=2.083, ppl=4.24, wps=552789, ups=1.12, wpb=493728, bsz=16388.2, num_updates=17300, lr=0.000480847, gnorm=0.19, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=15968 epoch 011: 437 / 1689 loss=3.625, nll_loss=2.083, ppl=4.24, wps=552789, ups=1.12, wpb=493728, bsz=16388.2, num_updates=17300, lr=0.000480847, gnorm=0.19, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=15968 epoch 011: 437 / 1689 loss=3.625, nll_loss=2.083, ppl=4.24, wps=552789, ups=1.12, wpb=493728, bsz=16388.2, num_updates=17300, lr=0.000480847, gnorm=0.19, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=15968 epoch 011: 437 / 1689 loss=3.625, nll_loss=2.083, ppl=4.24, wps=552789, ups=1.12, wpb=493728, bsz=16388.2, num_updates=17300, lr=0.000480847, gnorm=0.19, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=15968 epoch 011: 437 / 1689 loss=3.625, nll_loss=2.083, ppl=4.24, wps=552789, ups=1.12, wpb=493728, bsz=16388.2, num_updates=17300, lr=0.000480847, gnorm=0.19, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=15968 epoch 011: 437 / 1689 loss=3.625, nll_loss=2.083, ppl=4.24, wps=552789, ups=1.12, wpb=493728, bsz=16388.2, num_updates=17300, lr=0.000480847, gnorm=0.19, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=15968 epoch 011: 437 / 1689 loss=3.625, nll_loss=2.083, ppl=4.24, wps=552789, ups=1.12, wpb=493728, bsz=16388.2, num_updates=17300, lr=0.000480847, gnorm=0.19, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=15968 epoch 011: 437 / 1689 loss=3.625, nll_loss=2.083, ppl=4.24, wps=552789, ups=1.12, wpb=493728, bsz=16388.2, num_updates=17300, lr=0.000480847, gnorm=0.19, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=15968 epoch 011: 437 / 1689 loss=3.625, nll_loss=2.083, ppl=4.24, wps=552789, ups=1.12, wpb=493728, bsz=16388.2, num_updates=17300, lr=0.000480847, gnorm=0.19, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=15968 epoch 011: 437 / 1689 loss=3.625, nll_loss=2.083, ppl=4.24, wps=552789, ups=1.12, wpb=493728, bsz=16388.2, num_updates=17300, lr=0.000480847, gnorm=0.19, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=15968 epoch 011: 437 / 1689 loss=3.625, nll_loss=2.083, ppl=4.24, wps=552789, ups=1.12, wpb=493728, bsz=16388.2, num_updates=17300, lr=0.000480847, gnorm=0.19, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=15968 epoch 011: 537 / 1689 loss=3.628, nll_loss=2.087, ppl=4.25, wps=551605, ups=1.12, wpb=493399, bsz=16565, num_updates=17400, lr=0.000479463, gnorm=0.186, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=16058 epoch 011: 537 / 1689 loss=3.628, nll_loss=2.087, ppl=4.25, wps=551605, ups=1.12, wpb=493399, bsz=16565, num_updates=17400, lr=0.000479463, gnorm=0.186, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=16058 epoch 011: 537 / 1689 loss=3.628, nll_loss=2.087, ppl=4.25, wps=551605, ups=1.12, wpb=493399, bsz=16565, num_updates=17400, lr=0.000479463, gnorm=0.186, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=16058 epoch 011: 537 / 1689 loss=3.628, nll_loss=2.087, ppl=4.25, wps=551605, ups=1.12, wpb=493399, bsz=16565, num_updates=17400, lr=0.000479463, gnorm=0.186, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=16058 epoch 011: 537 / 1689 loss=3.628, nll_loss=2.087, ppl=4.25, wps=551605, ups=1.12, wpb=493399, bsz=16565, num_updates=17400, lr=0.000479463, gnorm=0.186, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=16058 epoch 011: 537 / 1689 loss=3.628, nll_loss=2.087, ppl=4.25, wps=551605, ups=1.12, wpb=493399, bsz=16565, num_updates=17400, lr=0.000479463, gnorm=0.186, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=16058 epoch 011: 537 / 1689 loss=3.628, nll_loss=2.087, ppl=4.25, wps=551605, ups=1.12, wpb=493399, bsz=16565, num_updates=17400, lr=0.000479463, gnorm=0.186, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=16058 epoch 011: 537 / 1689 loss=3.628, nll_loss=2.087, ppl=4.25, wps=551605, ups=1.12, wpb=493399, bsz=16565, num_updates=17400, lr=0.000479463, gnorm=0.186, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=16058 epoch 011: 537 / 1689 loss=3.628, nll_loss=2.087, ppl=4.25, wps=551605, ups=1.12, wpb=493399, bsz=16565, num_updates=17400, lr=0.000479463, gnorm=0.186, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=16058 epoch 011: 537 / 1689 loss=3.628, nll_loss=2.087, ppl=4.25, wps=551605, ups=1.12, wpb=493399, bsz=16565, num_updates=17400, lr=0.000479463, gnorm=0.186, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=16058 epoch 011: 537 / 1689 loss=3.628, nll_loss=2.087, ppl=4.25, wps=551605, ups=1.12, wpb=493399, bsz=16565, num_updates=17400, lr=0.000479463, gnorm=0.186, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=16058 epoch 011: 637 / 1689 loss=3.621, nll_loss=2.078, ppl=4.22, wps=562145, ups=1.13, wpb=496685, bsz=16296.1, num_updates=17500, lr=0.000478091, gnorm=0.192, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=16146 epoch 011: 637 / 1689 loss=3.621, nll_loss=2.078, ppl=4.22, wps=562145, ups=1.13, wpb=496685, bsz=16296.1, num_updates=17500, lr=0.000478091, gnorm=0.192, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=16146 epoch 011: 637 / 1689 loss=3.621, nll_loss=2.078, ppl=4.22, wps=562145, ups=1.13, wpb=496685, bsz=16296.1, num_updates=17500, lr=0.000478091, gnorm=0.192, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=16146 epoch 011: 637 / 1689 loss=3.621, nll_loss=2.078, ppl=4.22, wps=562145, ups=1.13, wpb=496685, bsz=16296.1, num_updates=17500, lr=0.000478091, gnorm=0.192, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=16146 epoch 011: 637 / 1689 loss=3.621, nll_loss=2.078, ppl=4.22, wps=562145, ups=1.13, wpb=496685, bsz=16296.1, num_updates=17500, lr=0.000478091, gnorm=0.192, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=16146 epoch 011: 637 / 1689 loss=3.621, nll_loss=2.078, ppl=4.22, wps=562145, ups=1.13, wpb=496685, bsz=16296.1, num_updates=17500, lr=0.000478091, gnorm=0.192, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=16146 epoch 011: 637 / 1689 loss=3.621, nll_loss=2.078, ppl=4.22, wps=562145, ups=1.13, wpb=496685, bsz=16296.1, num_updates=17500, lr=0.000478091, gnorm=0.192, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=16146 epoch 011: 637 / 1689 loss=3.621, nll_loss=2.078, ppl=4.22, wps=562145, ups=1.13, wpb=496685, bsz=16296.1, num_updates=17500, lr=0.000478091, gnorm=0.192, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=16146 epoch 011: 637 / 1689 loss=3.621, nll_loss=2.078, ppl=4.22, wps=562145, ups=1.13, wpb=496685, bsz=16296.1, num_updates=17500, lr=0.000478091, gnorm=0.192, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=16146 epoch 011: 637 / 1689 loss=3.621, nll_loss=2.078, ppl=4.22, wps=562145, ups=1.13, wpb=496685, bsz=16296.1, num_updates=17500, lr=0.000478091, gnorm=0.192, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=16146 epoch 011: 637 / 1689 loss=3.621, nll_loss=2.078, ppl=4.22, wps=562145, ups=1.13, wpb=496685, bsz=16296.1, num_updates=17500, lr=0.000478091, gnorm=0.192, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=16146 epoch 011: 737 / 1689 loss=3.622, nll_loss=2.08, ppl=4.23, wps=554202, ups=1.12, wpb=494793, bsz=16516.4, num_updates=17600, lr=0.000476731, gnorm=0.199, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=16235 epoch 011: 737 / 1689 loss=3.622, nll_loss=2.08, ppl=4.23, wps=554202, ups=1.12, wpb=494793, bsz=16516.4, num_updates=17600, lr=0.000476731, gnorm=0.199, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=16235 epoch 011: 737 / 1689 loss=3.622, nll_loss=2.08, ppl=4.23, wps=554202, ups=1.12, wpb=494793, bsz=16516.4, num_updates=17600, lr=0.000476731, gnorm=0.199, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=16235 epoch 011: 737 / 1689 loss=3.622, nll_loss=2.08, ppl=4.23, wps=554202, ups=1.12, wpb=494793, bsz=16516.4, num_updates=17600, lr=0.000476731, gnorm=0.199, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=16235 epoch 011: 737 / 1689 loss=3.622, nll_loss=2.08, ppl=4.23, wps=554202, ups=1.12, wpb=494793, bsz=16516.4, num_updates=17600, lr=0.000476731, gnorm=0.199, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=16235 epoch 011: 737 / 1689 loss=3.622, nll_loss=2.08, ppl=4.23, wps=554202, ups=1.12, wpb=494793, bsz=16516.4, num_updates=17600, lr=0.000476731, gnorm=0.199, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=16235 epoch 011: 737 / 1689 loss=3.622, nll_loss=2.08, ppl=4.23, wps=554202, ups=1.12, wpb=494793, bsz=16516.4, num_updates=17600, lr=0.000476731, gnorm=0.199, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=16235 epoch 011: 737 / 1689 loss=3.622, nll_loss=2.08, ppl=4.23, wps=554202, ups=1.12, wpb=494793, bsz=16516.4, num_updates=17600, lr=0.000476731, gnorm=0.199, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=16235 epoch 011: 737 / 1689 loss=3.622, nll_loss=2.08, ppl=4.23, wps=554202, ups=1.12, wpb=494793, bsz=16516.4, num_updates=17600, lr=0.000476731, gnorm=0.199, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=16235 epoch 011: 737 / 1689 loss=3.622, nll_loss=2.08, ppl=4.23, wps=554202, ups=1.12, wpb=494793, bsz=16516.4, num_updates=17600, lr=0.000476731, gnorm=0.199, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=16235 epoch 011: 737 / 1689 loss=3.622, nll_loss=2.08, ppl=4.23, wps=554202, ups=1.12, wpb=494793, bsz=16516.4, num_updates=17600, lr=0.000476731, gnorm=0.199, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=16235 epoch 011: 837 / 1689 loss=3.62, nll_loss=2.078, ppl=4.22, wps=554497, ups=1.12, wpb=495727, bsz=16608.1, num_updates=17700, lr=0.000475383, gnorm=0.187, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=16325 epoch 011: 837 / 1689 loss=3.62, nll_loss=2.078, ppl=4.22, wps=554497, ups=1.12, wpb=495727, bsz=16608.1, num_updates=17700, lr=0.000475383, gnorm=0.187, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=16325 epoch 011: 837 / 1689 loss=3.62, nll_loss=2.078, ppl=4.22, wps=554497, ups=1.12, wpb=495727, bsz=16608.1, num_updates=17700, lr=0.000475383, gnorm=0.187, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=16325 epoch 011: 837 / 1689 loss=3.62, nll_loss=2.078, ppl=4.22, wps=554497, ups=1.12, wpb=495727, bsz=16608.1, num_updates=17700, lr=0.000475383, gnorm=0.187, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=16325 epoch 011: 837 / 1689 loss=3.62, nll_loss=2.078, ppl=4.22, wps=554497, ups=1.12, wpb=495727, bsz=16608.1, num_updates=17700, lr=0.000475383, gnorm=0.187, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=16325 epoch 011: 837 / 1689 loss=3.62, nll_loss=2.078, ppl=4.22, wps=554497, ups=1.12, wpb=495727, bsz=16608.1, num_updates=17700, lr=0.000475383, gnorm=0.187, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=16325 epoch 011: 837 / 1689 loss=3.62, nll_loss=2.078, ppl=4.22, wps=554497, ups=1.12, wpb=495727, bsz=16608.1, num_updates=17700, lr=0.000475383, gnorm=0.187, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=16325 epoch 011: 837 / 1689 loss=3.62, nll_loss=2.078, ppl=4.22, wps=554497, ups=1.12, wpb=495727, bsz=16608.1, num_updates=17700, lr=0.000475383, gnorm=0.187, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=16325 epoch 011: 837 / 1689 loss=3.62, nll_loss=2.078, ppl=4.22, wps=554497, ups=1.12, wpb=495727, bsz=16608.1, num_updates=17700, lr=0.000475383, gnorm=0.187, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=16325 epoch 011: 837 / 1689 loss=3.62, nll_loss=2.078, ppl=4.22, wps=554497, ups=1.12, wpb=495727, bsz=16608.1, num_updates=17700, lr=0.000475383, gnorm=0.187, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=16325 epoch 011: 837 / 1689 loss=3.62, nll_loss=2.078, ppl=4.22, wps=554497, ups=1.12, wpb=495727, bsz=16608.1, num_updates=17700, lr=0.000475383, gnorm=0.187, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=16325 epoch 011: 937 / 1689 loss=3.617, nll_loss=2.074, ppl=4.21, wps=550578, ups=1.11, wpb=497000, bsz=16488.7, num_updates=17800, lr=0.000474045, gnorm=0.183, clip=0, loss_scale=4, train_wall=89, gb_free=21.8, wall=16415 epoch 011: 937 / 1689 loss=3.617, nll_loss=2.074, ppl=4.21, wps=550578, ups=1.11, wpb=497000, bsz=16488.7, num_updates=17800, lr=0.000474045, gnorm=0.183, clip=0, loss_scale=4, train_wall=89, gb_free=21.8, wall=16415 epoch 011: 937 / 1689 loss=3.617, nll_loss=2.074, ppl=4.21, wps=550578, ups=1.11, wpb=497000, bsz=16488.7, num_updates=17800, lr=0.000474045, gnorm=0.183, clip=0, loss_scale=4, train_wall=89, gb_free=21.8, wall=16415 epoch 011: 937 / 1689 loss=3.617, nll_loss=2.074, ppl=4.21, wps=550578, ups=1.11, wpb=497000, bsz=16488.7, num_updates=17800, lr=0.000474045, gnorm=0.183, clip=0, loss_scale=4, train_wall=89, gb_free=21.8, wall=16415 epoch 011: 937 / 1689 loss=3.617, nll_loss=2.074, ppl=4.21, wps=550578, ups=1.11, wpb=497000, bsz=16488.7, num_updates=17800, lr=0.000474045, gnorm=0.183, clip=0, loss_scale=4, train_wall=89, gb_free=21.8, wall=16415 epoch 011: 937 / 1689 loss=3.617, nll_loss=2.074, ppl=4.21, wps=550578, ups=1.11, wpb=497000, bsz=16488.7, num_updates=17800, lr=0.000474045, gnorm=0.183, clip=0, loss_scale=4, train_wall=89, gb_free=21.8, wall=16415 epoch 011: 937 / 1689 loss=3.617, nll_loss=2.074, ppl=4.21, wps=550578, ups=1.11, wpb=497000, bsz=16488.7, num_updates=17800, lr=0.000474045, gnorm=0.183, clip=0, loss_scale=4, train_wall=89, gb_free=21.8, wall=16415 epoch 011: 937 / 1689 loss=3.617, nll_loss=2.074, ppl=4.21, wps=550578, ups=1.11, wpb=497000, bsz=16488.7, num_updates=17800, lr=0.000474045, gnorm=0.183, clip=0, loss_scale=4, train_wall=89, gb_free=21.8, wall=16415 epoch 011: 937 / 1689 loss=3.617, nll_loss=2.074, ppl=4.21, wps=550578, ups=1.11, wpb=497000, bsz=16488.7, num_updates=17800, lr=0.000474045, gnorm=0.183, clip=0, loss_scale=4, train_wall=89, gb_free=21.8, wall=16415 epoch 011: 937 / 1689 loss=3.617, nll_loss=2.074, ppl=4.21, wps=550578, ups=1.11, wpb=497000, bsz=16488.7, num_updates=17800, lr=0.000474045, gnorm=0.183, clip=0, loss_scale=4, train_wall=89, gb_free=21.8, wall=16415 epoch 011: 937 / 1689 loss=3.617, nll_loss=2.074, ppl=4.21, wps=550578, ups=1.11, wpb=497000, bsz=16488.7, num_updates=17800, lr=0.000474045, gnorm=0.183, clip=0, loss_scale=4, train_wall=89, gb_free=21.8, wall=16415 epoch 011: 1037 / 1689 loss=3.623, nll_loss=2.081, ppl=4.23, wps=545924, ups=1.11, wpb=493323, bsz=16808.4, num_updates=17900, lr=0.000472719, gnorm=0.184, clip=0, loss_scale=4, train_wall=89, gb_free=22.8, wall=16505 epoch 011: 1037 / 1689 loss=3.623, nll_loss=2.081, ppl=4.23, wps=545924, ups=1.11, wpb=493323, bsz=16808.4, num_updates=17900, lr=0.000472719, gnorm=0.184, clip=0, loss_scale=4, train_wall=89, gb_free=22.8, wall=16505 epoch 011: 1037 / 1689 loss=3.623, nll_loss=2.081, ppl=4.23, wps=545924, ups=1.11, wpb=493323, bsz=16808.4, num_updates=17900, lr=0.000472719, gnorm=0.184, clip=0, loss_scale=4, train_wall=89, gb_free=22.8, wall=16505 epoch 011: 1037 / 1689 loss=3.623, nll_loss=2.081, ppl=4.23, wps=545924, ups=1.11, wpb=493323, bsz=16808.4, num_updates=17900, lr=0.000472719, gnorm=0.184, clip=0, loss_scale=4, train_wall=89, gb_free=22.8, wall=16505 epoch 011: 1037 / 1689 loss=3.623, nll_loss=2.081, ppl=4.23, wps=545924, ups=1.11, wpb=493323, bsz=16808.4, num_updates=17900, lr=0.000472719, gnorm=0.184, clip=0, loss_scale=4, train_wall=89, gb_free=22.8, wall=16505 epoch 011: 1037 / 1689 loss=3.623, nll_loss=2.081, ppl=4.23, wps=545924, ups=1.11, wpb=493323, bsz=16808.4, num_updates=17900, lr=0.000472719, gnorm=0.184, clip=0, loss_scale=4, train_wall=89, gb_free=22.8, wall=16505 epoch 011: 1037 / 1689 loss=3.623, nll_loss=2.081, ppl=4.23, wps=545924, ups=1.11, wpb=493323, bsz=16808.4, num_updates=17900, lr=0.000472719, gnorm=0.184, clip=0, loss_scale=4, train_wall=89, gb_free=22.8, wall=16505 epoch 011: 1037 / 1689 loss=3.623, nll_loss=2.081, ppl=4.23, wps=545924, ups=1.11, wpb=493323, bsz=16808.4, num_updates=17900, lr=0.000472719, gnorm=0.184, clip=0, loss_scale=4, train_wall=89, gb_free=22.8, wall=16505 epoch 011: 1037 / 1689 loss=3.623, nll_loss=2.081, ppl=4.23, wps=545924, ups=1.11, wpb=493323, bsz=16808.4, num_updates=17900, lr=0.000472719, gnorm=0.184, clip=0, loss_scale=4, train_wall=89, gb_free=22.8, wall=16505 epoch 011: 1037 / 1689 loss=3.623, nll_loss=2.081, ppl=4.23, wps=545924, ups=1.11, wpb=493323, bsz=16808.4, num_updates=17900, lr=0.000472719, gnorm=0.184, clip=0, loss_scale=4, train_wall=89, gb_free=22.8, wall=16505 epoch 011: 1037 / 1689 loss=3.623, nll_loss=2.081, ppl=4.23, wps=545924, ups=1.11, wpb=493323, bsz=16808.4, num_updates=17900, lr=0.000472719, gnorm=0.184, clip=0, loss_scale=4, train_wall=89, gb_free=22.8, wall=16505 epoch 011: 1137 / 1689 loss=3.627, nll_loss=2.086, ppl=4.24, wps=553121, ups=1.11, wpb=496090, bsz=16556.6, num_updates=18000, lr=0.000471405, gnorm=0.2, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=16595 epoch 011: 1137 / 1689 loss=3.627, nll_loss=2.086, ppl=4.24, wps=553121, ups=1.11, wpb=496090, bsz=16556.6, num_updates=18000, lr=0.000471405, gnorm=0.2, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=16595 epoch 011: 1137 / 1689 loss=3.627, nll_loss=2.086, ppl=4.24, wps=553121, ups=1.11, wpb=496090, bsz=16556.6, num_updates=18000, lr=0.000471405, gnorm=0.2, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=16595 epoch 011: 1137 / 1689 loss=3.627, nll_loss=2.086, ppl=4.24, wps=553121, ups=1.11, wpb=496090, bsz=16556.6, num_updates=18000, lr=0.000471405, gnorm=0.2, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=16595 epoch 011: 1137 / 1689 loss=3.627, nll_loss=2.086, ppl=4.24, wps=553121, ups=1.11, wpb=496090, bsz=16556.6, num_updates=18000, lr=0.000471405, gnorm=0.2, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=16595 epoch 011: 1137 / 1689 loss=3.627, nll_loss=2.086, ppl=4.24, wps=553121, ups=1.11, wpb=496090, bsz=16556.6, num_updates=18000, lr=0.000471405, gnorm=0.2, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=16595 epoch 011: 1137 / 1689 loss=3.627, nll_loss=2.086, ppl=4.24, wps=553121, ups=1.11, wpb=496090, bsz=16556.6, num_updates=18000, lr=0.000471405, gnorm=0.2, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=16595 epoch 011: 1137 / 1689 loss=3.627, nll_loss=2.086, ppl=4.24, wps=553121, ups=1.11, wpb=496090, bsz=16556.6, num_updates=18000, lr=0.000471405, gnorm=0.2, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=16595 epoch 011: 1137 / 1689 loss=3.627, nll_loss=2.086, ppl=4.24, wps=553121, ups=1.11, wpb=496090, bsz=16556.6, num_updates=18000, lr=0.000471405, gnorm=0.2, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=16595 epoch 011: 1137 / 1689 loss=3.627, nll_loss=2.086, ppl=4.24, wps=553121, ups=1.11, wpb=496090, bsz=16556.6, num_updates=18000, lr=0.000471405, gnorm=0.2, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=16595 epoch 011: 1137 / 1689 loss=3.627, nll_loss=2.086, ppl=4.24, wps=553121, ups=1.11, wpb=496090, bsz=16556.6, num_updates=18000, lr=0.000471405, gnorm=0.2, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=16595 begin validation on "valid" subset epoch 011 | valid on 'valid' subset | loss 3.749 | nll_loss 2.205 | ppl 4.61 | wps 0 | wpb 44526 | bsz 2008 | num_updates 18000 | best_loss 3.749 epoch 011 | valid on 'valid' subset | loss 3.749 | nll_loss 2.205 | ppl 4.61 | wps 0 | wpb 44526 | bsz 2008 | num_updates 18000 | best_loss 3.749 epoch 011 | valid on 'valid' subset | loss 3.749 | nll_loss 2.205 | ppl 4.61 | wps 0 | wpb 44526 | bsz 2008 | num_updates 18000 | best_loss 3.749 epoch 011 | valid on 'valid' subset | loss 3.749 | nll_loss 2.205 | ppl 4.61 | wps 0 | wpb 44526 | bsz 2008 | num_updates 18000 | best_loss 3.749 epoch 011 | valid on 'valid' subset | loss 3.749 | nll_loss 2.205 | ppl 4.61 | wps 0 | wpb 44526 | bsz 2008 | num_updates 18000 | best_loss 3.749 epoch 011 | valid on 'valid' subset | loss 3.749 | nll_loss 2.205 | ppl 4.61 | wps 0 | wpb 44526 | bsz 2008 | num_updates 18000 | best_loss 3.749 epoch 011 | valid on 'valid' subset | loss 3.749 | nll_loss 2.205 | ppl 4.61 | wps 0 | wpb 44526 | bsz 2008 | num_updates 18000 | best_loss 3.749 epoch 011 | valid on 'valid' subset | loss 3.749 | nll_loss 2.205 | ppl 4.61 | wps 0 | wpb 44526 | bsz 2008 | num_updates 18000 | best_loss 3.749 epoch 011 | valid on 'valid' subset | loss 3.749 | nll_loss 2.205 | ppl 4.61 | wps 0 | wpb 44526 | bsz 2008 | num_updates 18000 | best_loss 3.749 epoch 011 | valid on 'valid' subset | loss 3.749 | nll_loss 2.205 | ppl 4.61 | wps 0 | wpb 44526 | bsz 2008 | num_updates 18000 | best_loss 3.749 epoch 011 | valid on 'valid' subset | loss 3.749 | nll_loss 2.205 | ppl 4.61 | wps 0 | wpb 44526 | bsz 2008 | num_updates 18000 | best_loss 3.749 epoch 011: 1238 / 1689 loss=3.624, nll_loss=2.083, ppl=4.24, wps=358902, ups=0.72, wpb=495150, bsz=16708.6, num_updates=18100, lr=0.0004701, gnorm=0.191, clip=0, loss_scale=2, train_wall=114, gb_free=21.7, wall=16733 epoch 011: 1238 / 1689 loss=3.624, nll_loss=2.083, ppl=4.24, wps=358902, ups=0.72, wpb=495150, bsz=16708.6, num_updates=18100, lr=0.0004701, gnorm=0.191, clip=0, loss_scale=2, train_wall=114, gb_free=21.7, wall=16733 epoch 011: 1238 / 1689 loss=3.624, nll_loss=2.083, ppl=4.24, wps=358902, ups=0.72, wpb=495150, bsz=16708.6, num_updates=18100, lr=0.0004701, gnorm=0.191, clip=0, loss_scale=2, train_wall=114, gb_free=21.7, wall=16733 epoch 011: 1238 / 1689 loss=3.624, nll_loss=2.083, ppl=4.24, wps=358902, ups=0.72, wpb=495150, bsz=16708.6, num_updates=18100, lr=0.0004701, gnorm=0.191, clip=0, loss_scale=2, train_wall=114, gb_free=21.7, wall=16733 epoch 011: 1238 / 1689 loss=3.624, nll_loss=2.083, ppl=4.24, wps=358902, ups=0.72, wpb=495150, bsz=16708.6, num_updates=18100, lr=0.0004701, gnorm=0.191, clip=0, loss_scale=2, train_wall=114, gb_free=21.7, wall=16733 epoch 011: 1238 / 1689 loss=3.624, nll_loss=2.083, ppl=4.24, wps=358902, ups=0.72, wpb=495150, bsz=16708.6, num_updates=18100, lr=0.0004701, gnorm=0.191, clip=0, loss_scale=2, train_wall=114, gb_free=21.7, wall=16733 epoch 011: 1238 / 1689 loss=3.624, nll_loss=2.083, ppl=4.24, wps=358902, ups=0.72, wpb=495150, bsz=16708.6, num_updates=18100, lr=0.0004701, gnorm=0.191, clip=0, loss_scale=2, train_wall=114, gb_free=21.7, wall=16733 epoch 011: 1238 / 1689 loss=3.624, nll_loss=2.083, ppl=4.24, wps=358902, ups=0.72, wpb=495150, bsz=16708.6, num_updates=18100, lr=0.0004701, gnorm=0.191, clip=0, loss_scale=2, train_wall=114, gb_free=21.7, wall=16733 epoch 011: 1238 / 1689 loss=3.624, nll_loss=2.083, ppl=4.24, wps=358902, ups=0.72, wpb=495150, bsz=16708.6, num_updates=18100, lr=0.0004701, gnorm=0.191, clip=0, loss_scale=2, train_wall=114, gb_free=21.7, wall=16733 epoch 011: 1238 / 1689 loss=3.624, nll_loss=2.083, ppl=4.24, wps=358902, ups=0.72, wpb=495150, bsz=16708.6, num_updates=18100, lr=0.0004701, gnorm=0.191, clip=0, loss_scale=2, train_wall=114, gb_free=21.7, wall=16733 epoch 011: 1238 / 1689 loss=3.624, nll_loss=2.083, ppl=4.24, wps=358902, ups=0.72, wpb=495150, bsz=16708.6, num_updates=18100, lr=0.0004701, gnorm=0.191, clip=0, loss_scale=2, train_wall=114, gb_free=21.7, wall=16733 epoch 011: 1338 / 1689 loss=3.627, nll_loss=2.086, ppl=4.25, wps=557909, ups=1.12, wpb=495996, bsz=16402.4, num_updates=18200, lr=0.000468807, gnorm=0.185, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=16822 epoch 011: 1338 / 1689 loss=3.627, nll_loss=2.086, ppl=4.25, wps=557909, ups=1.12, wpb=495996, bsz=16402.4, num_updates=18200, lr=0.000468807, gnorm=0.185, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=16822 epoch 011: 1338 / 1689 loss=3.627, nll_loss=2.086, ppl=4.25, wps=557909, ups=1.12, wpb=495996, bsz=16402.4, num_updates=18200, lr=0.000468807, gnorm=0.185, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=16822 epoch 011: 1338 / 1689 loss=3.627, nll_loss=2.086, ppl=4.25, wps=557909, ups=1.12, wpb=495996, bsz=16402.4, num_updates=18200, lr=0.000468807, gnorm=0.185, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=16822 epoch 011: 1338 / 1689 loss=3.627, nll_loss=2.086, ppl=4.25, wps=557909, ups=1.12, wpb=495996, bsz=16402.4, num_updates=18200, lr=0.000468807, gnorm=0.185, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=16822 epoch 011: 1338 / 1689 loss=3.627, nll_loss=2.086, ppl=4.25, wps=557909, ups=1.12, wpb=495996, bsz=16402.4, num_updates=18200, lr=0.000468807, gnorm=0.185, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=16822 epoch 011: 1338 / 1689 loss=3.627, nll_loss=2.086, ppl=4.25, wps=557909, ups=1.12, wpb=495996, bsz=16402.4, num_updates=18200, lr=0.000468807, gnorm=0.185, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=16822 epoch 011: 1338 / 1689 loss=3.627, nll_loss=2.086, ppl=4.25, wps=557909, ups=1.12, wpb=495996, bsz=16402.4, num_updates=18200, lr=0.000468807, gnorm=0.185, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=16822 epoch 011: 1338 / 1689 loss=3.627, nll_loss=2.086, ppl=4.25, wps=557909, ups=1.12, wpb=495996, bsz=16402.4, num_updates=18200, lr=0.000468807, gnorm=0.185, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=16822 epoch 011: 1338 / 1689 loss=3.627, nll_loss=2.086, ppl=4.25, wps=557909, ups=1.12, wpb=495996, bsz=16402.4, num_updates=18200, lr=0.000468807, gnorm=0.185, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=16822 epoch 011: 1338 / 1689 loss=3.627, nll_loss=2.086, ppl=4.25, wps=557909, ups=1.12, wpb=495996, bsz=16402.4, num_updates=18200, lr=0.000468807, gnorm=0.185, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=16822 epoch 011: 1439 / 1689 loss=3.629, nll_loss=2.089, ppl=4.25, wps=551146, ups=1.12, wpb=493661, bsz=16036.8, num_updates=18300, lr=0.000467525, gnorm=0.193, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=16912 epoch 011: 1439 / 1689 loss=3.629, nll_loss=2.089, ppl=4.25, wps=551146, ups=1.12, wpb=493661, bsz=16036.8, num_updates=18300, lr=0.000467525, gnorm=0.193, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=16912 epoch 011: 1439 / 1689 loss=3.629, nll_loss=2.089, ppl=4.25, wps=551146, ups=1.12, wpb=493661, bsz=16036.8, num_updates=18300, lr=0.000467525, gnorm=0.193, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=16912 epoch 011: 1439 / 1689 loss=3.629, nll_loss=2.089, ppl=4.25, wps=551146, ups=1.12, wpb=493661, bsz=16036.8, num_updates=18300, lr=0.000467525, gnorm=0.193, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=16912 epoch 011: 1439 / 1689 loss=3.629, nll_loss=2.089, ppl=4.25, wps=551146, ups=1.12, wpb=493661, bsz=16036.8, num_updates=18300, lr=0.000467525, gnorm=0.193, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=16912 epoch 011: 1439 / 1689 loss=3.629, nll_loss=2.089, ppl=4.25, wps=551146, ups=1.12, wpb=493661, bsz=16036.8, num_updates=18300, lr=0.000467525, gnorm=0.193, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=16912 epoch 011: 1439 / 1689 loss=3.629, nll_loss=2.089, ppl=4.25, wps=551146, ups=1.12, wpb=493661, bsz=16036.8, num_updates=18300, lr=0.000467525, gnorm=0.193, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=16912 epoch 011: 1439 / 1689 loss=3.629, nll_loss=2.089, ppl=4.25, wps=551146, ups=1.12, wpb=493661, bsz=16036.8, num_updates=18300, lr=0.000467525, gnorm=0.193, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=16912 epoch 011: 1439 / 1689 loss=3.629, nll_loss=2.089, ppl=4.25, wps=551146, ups=1.12, wpb=493661, bsz=16036.8, num_updates=18300, lr=0.000467525, gnorm=0.193, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=16912 epoch 011: 1439 / 1689 loss=3.629, nll_loss=2.089, ppl=4.25, wps=551146, ups=1.12, wpb=493661, bsz=16036.8, num_updates=18300, lr=0.000467525, gnorm=0.193, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=16912 epoch 011: 1439 / 1689 loss=3.629, nll_loss=2.089, ppl=4.25, wps=551146, ups=1.12, wpb=493661, bsz=16036.8, num_updates=18300, lr=0.000467525, gnorm=0.193, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=16912 epoch 011: 1539 / 1689 loss=3.629, nll_loss=2.089, ppl=4.25, wps=553124, ups=1.12, wpb=495016, bsz=16101.7, num_updates=18400, lr=0.000466252, gnorm=0.195, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=17001 epoch 011: 1539 / 1689 loss=3.629, nll_loss=2.089, ppl=4.25, wps=553124, ups=1.12, wpb=495016, bsz=16101.7, num_updates=18400, lr=0.000466252, gnorm=0.195, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=17001 epoch 011: 1539 / 1689 loss=3.629, nll_loss=2.089, ppl=4.25, wps=553124, ups=1.12, wpb=495016, bsz=16101.7, num_updates=18400, lr=0.000466252, gnorm=0.195, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=17001 epoch 011: 1539 / 1689 loss=3.629, nll_loss=2.089, ppl=4.25, wps=553124, ups=1.12, wpb=495016, bsz=16101.7, num_updates=18400, lr=0.000466252, gnorm=0.195, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=17001 epoch 011: 1539 / 1689 loss=3.629, nll_loss=2.089, ppl=4.25, wps=553124, ups=1.12, wpb=495016, bsz=16101.7, num_updates=18400, lr=0.000466252, gnorm=0.195, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=17001 epoch 011: 1539 / 1689 loss=3.629, nll_loss=2.089, ppl=4.25, wps=553124, ups=1.12, wpb=495016, bsz=16101.7, num_updates=18400, lr=0.000466252, gnorm=0.195, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=17001 epoch 011: 1539 / 1689 loss=3.629, nll_loss=2.089, ppl=4.25, wps=553124, ups=1.12, wpb=495016, bsz=16101.7, num_updates=18400, lr=0.000466252, gnorm=0.195, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=17001 epoch 011: 1539 / 1689 loss=3.629, nll_loss=2.089, ppl=4.25, wps=553124, ups=1.12, wpb=495016, bsz=16101.7, num_updates=18400, lr=0.000466252, gnorm=0.195, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=17001 epoch 011: 1539 / 1689 loss=3.629, nll_loss=2.089, ppl=4.25, wps=553124, ups=1.12, wpb=495016, bsz=16101.7, num_updates=18400, lr=0.000466252, gnorm=0.195, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=17001 epoch 011: 1539 / 1689 loss=3.629, nll_loss=2.089, ppl=4.25, wps=553124, ups=1.12, wpb=495016, bsz=16101.7, num_updates=18400, lr=0.000466252, gnorm=0.195, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=17001 epoch 011: 1539 / 1689 loss=3.629, nll_loss=2.089, ppl=4.25, wps=553124, ups=1.12, wpb=495016, bsz=16101.7, num_updates=18400, lr=0.000466252, gnorm=0.195, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=17001 epoch 011: 1639 / 1689 loss=3.624, nll_loss=2.083, ppl=4.24, wps=551559, ups=1.11, wpb=496725, bsz=16388.4, num_updates=18500, lr=0.000464991, gnorm=0.188, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=17091 epoch 011: 1639 / 1689 loss=3.624, nll_loss=2.083, ppl=4.24, wps=551559, ups=1.11, wpb=496725, bsz=16388.4, num_updates=18500, lr=0.000464991, gnorm=0.188, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=17091 epoch 011: 1639 / 1689 loss=3.624, nll_loss=2.083, ppl=4.24, wps=551559, ups=1.11, wpb=496725, bsz=16388.4, num_updates=18500, lr=0.000464991, gnorm=0.188, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=17091 epoch 011: 1639 / 1689 loss=3.624, nll_loss=2.083, ppl=4.24, wps=551559, ups=1.11, wpb=496725, bsz=16388.4, num_updates=18500, lr=0.000464991, gnorm=0.188, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=17091 epoch 011: 1639 / 1689 loss=3.624, nll_loss=2.083, ppl=4.24, wps=551559, ups=1.11, wpb=496725, bsz=16388.4, num_updates=18500, lr=0.000464991, gnorm=0.188, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=17091 epoch 011: 1639 / 1689 loss=3.624, nll_loss=2.083, ppl=4.24, wps=551559, ups=1.11, wpb=496725, bsz=16388.4, num_updates=18500, lr=0.000464991, gnorm=0.188, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=17091 epoch 011: 1639 / 1689 loss=3.624, nll_loss=2.083, ppl=4.24, wps=551559, ups=1.11, wpb=496725, bsz=16388.4, num_updates=18500, lr=0.000464991, gnorm=0.188, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=17091 epoch 011: 1639 / 1689 loss=3.624, nll_loss=2.083, ppl=4.24, wps=551559, ups=1.11, wpb=496725, bsz=16388.4, num_updates=18500, lr=0.000464991, gnorm=0.188, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=17091 epoch 011: 1639 / 1689 loss=3.624, nll_loss=2.083, ppl=4.24, wps=551559, ups=1.11, wpb=496725, bsz=16388.4, num_updates=18500, lr=0.000464991, gnorm=0.188, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=17091 epoch 011: 1639 / 1689 loss=3.624, nll_loss=2.083, ppl=4.24, wps=551559, ups=1.11, wpb=496725, bsz=16388.4, num_updates=18500, lr=0.000464991, gnorm=0.188, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=17091 epoch 011: 1639 / 1689 loss=3.624, nll_loss=2.083, ppl=4.24, wps=551559, ups=1.11, wpb=496725, bsz=16388.4, num_updates=18500, lr=0.000464991, gnorm=0.188, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=17091 end of epoch 11 (average epoch stats below) epoch 011 | loss 3.621 | nll_loss 2.08 | ppl 4.23 | wps 530723 | ups 1.07 | wpb 495109 | bsz 16499.9 | num_updates 18550 | lr 0.000464363 | gnorm 0.191 | clip 0 | loss_scale 1 | train_wall 1512 | gb_free 22.9 | wall 17135 epoch 011 | loss 3.621 | nll_loss 2.08 | ppl 4.23 | wps 530723 | ups 1.07 | wpb 495109 | bsz 16499.9 | num_updates 18550 | lr 0.000464363 | gnorm 0.191 | clip 0 | loss_scale 1 | train_wall 1512 | gb_free 22.9 | wall 17135 epoch 011 | loss 3.621 | nll_loss 2.08 | ppl 4.23 | wps 530723 | ups 1.07 | wpb 495109 | bsz 16499.9 | num_updates 18550 | lr 0.000464363 | gnorm 0.191 | clip 0 | loss_scale 1 | train_wall 1512 | gb_free 22.9 | wall 17135 epoch 011 | loss 3.621 | nll_loss 2.08 | ppl 4.23 | wps 530723 | ups 1.07 | wpb 495109 | bsz 16499.9 | num_updates 18550 | lr 0.000464363 | gnorm 0.191 | clip 0 | loss_scale 1 | train_wall 1512 | gb_free 22.9 | wall 17135 epoch 011 | loss 3.621 | nll_loss 2.08 | ppl 4.23 | wps 530723 | ups 1.07 | wpb 495109 | bsz 16499.9 | num_updates 18550 | lr 0.000464363 | gnorm 0.191 | clip 0 | loss_scale 1 | train_wall 1512 | gb_free 22.9 | wall 17135 epoch 011 | loss 3.621 | nll_loss 2.08 | ppl 4.23 | wps 530723 | ups 1.07 | wpb 495109 | bsz 16499.9 | num_updates 18550 | lr 0.000464363 | gnorm 0.191 | clip 0 | loss_scale 1 | train_wall 1512 | gb_free 22.9 | wall 17135 epoch 011 | loss 3.621 | nll_loss 2.08 | ppl 4.23 | wps 530723 | ups 1.07 | wpb 495109 | bsz 16499.9 | num_updates 18550 | lr 0.000464363 | gnorm 0.191 | clip 0 | loss_scale 1 | train_wall 1512 | gb_free 22.9 | wall 17135 epoch 011 | loss 3.621 | nll_loss 2.08 | ppl 4.23 | wps 530723 | ups 1.07 | wpb 495109 | bsz 16499.9 | num_updates 18550 | lr 0.000464363 | gnorm 0.191 | clip 0 | loss_scale 1 | train_wall 1512 | gb_free 22.9 | wall 17135 epoch 011 | loss 3.621 | nll_loss 2.08 | ppl 4.23 | wps 530723 | ups 1.07 | wpb 495109 | bsz 16499.9 | num_updates 18550 | lr 0.000464363 | gnorm 0.191 | clip 0 | loss_scale 1 | train_wall 1512 | gb_free 22.9 | wall 17135 epoch 011 | loss 3.621 | nll_loss 2.08 | ppl 4.23 | wps 530723 | ups 1.07 | wpb 495109 | bsz 16499.9 | num_updates 18550 | lr 0.000464363 | gnorm 0.191 | clip 0 | loss_scale 1 | train_wall 1512 | gb_free 22.9 | wall 17135 epoch 011 | loss 3.621 | nll_loss 2.08 | ppl 4.23 | wps 530723 | ups 1.07 | wpb 495109 | bsz 16499.9 | num_updates 18550 | lr 0.000464363 | gnorm 0.191 | clip 0 | loss_scale 1 | train_wall 1512 | gb_free 22.9 | wall 17135 Start iterating over samples epoch 012: 50 / 1689 loss=3.608, nll_loss=2.065, ppl=4.18, wps=540107, ups=1.1, wpb=491078, bsz=16369.8, num_updates=18600, lr=0.000463739, gnorm=0.186, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=17182 epoch 012: 50 / 1689 loss=3.608, nll_loss=2.065, ppl=4.18, wps=540107, ups=1.1, wpb=491078, bsz=16369.8, num_updates=18600, lr=0.000463739, gnorm=0.186, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=17182 epoch 012: 50 / 1689 loss=3.608, nll_loss=2.065, ppl=4.18, wps=540107, ups=1.1, wpb=491078, bsz=16369.8, num_updates=18600, lr=0.000463739, gnorm=0.186, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=17182 epoch 012: 50 / 1689 loss=3.608, nll_loss=2.065, ppl=4.18, wps=540107, ups=1.1, wpb=491078, bsz=16369.8, num_updates=18600, lr=0.000463739, gnorm=0.186, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=17182 epoch 012: 50 / 1689 loss=3.608, nll_loss=2.065, ppl=4.18, wps=540107, ups=1.1, wpb=491078, bsz=16369.8, num_updates=18600, lr=0.000463739, gnorm=0.186, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=17182 epoch 012: 50 / 1689 loss=3.608, nll_loss=2.065, ppl=4.18, wps=540107, ups=1.1, wpb=491078, bsz=16369.8, num_updates=18600, lr=0.000463739, gnorm=0.186, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=17182 epoch 012: 50 / 1689 loss=3.608, nll_loss=2.065, ppl=4.18, wps=540107, ups=1.1, wpb=491078, bsz=16369.8, num_updates=18600, lr=0.000463739, gnorm=0.186, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=17182 epoch 012: 50 / 1689 loss=3.608, nll_loss=2.065, ppl=4.18, wps=540107, ups=1.1, wpb=491078, bsz=16369.8, num_updates=18600, lr=0.000463739, gnorm=0.186, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=17182 epoch 012: 50 / 1689 loss=3.608, nll_loss=2.065, ppl=4.18, wps=540107, ups=1.1, wpb=491078, bsz=16369.8, num_updates=18600, lr=0.000463739, gnorm=0.186, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=17182 epoch 012: 50 / 1689 loss=3.608, nll_loss=2.065, ppl=4.18, wps=540107, ups=1.1, wpb=491078, bsz=16369.8, num_updates=18600, lr=0.000463739, gnorm=0.186, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=17182 epoch 012: 50 / 1689 loss=3.608, nll_loss=2.065, ppl=4.18, wps=540107, ups=1.1, wpb=491078, bsz=16369.8, num_updates=18600, lr=0.000463739, gnorm=0.186, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=17182 epoch 012: 50 / 1689 loss=3.608, nll_loss=2.065, ppl=4.18, wps=540107, ups=1.1, wpb=491078, bsz=16369.8, num_updates=18600, lr=0.000463739, gnorm=0.186, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=17182 epoch 012: 150 / 1689 loss=3.586, nll_loss=2.039, ppl=4.11, wps=552632, ups=1.11, wpb=496160, bsz=16257.3, num_updates=18700, lr=0.000462497, gnorm=0.186, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=17272 epoch 012: 150 / 1689 loss=3.586, nll_loss=2.039, ppl=4.11, wps=552632, ups=1.11, wpb=496160, bsz=16257.3, num_updates=18700, lr=0.000462497, gnorm=0.186, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=17272 epoch 012: 150 / 1689 loss=3.586, nll_loss=2.039, ppl=4.11, wps=552632, ups=1.11, wpb=496160, bsz=16257.3, num_updates=18700, lr=0.000462497, gnorm=0.186, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=17272 epoch 012: 150 / 1689 loss=3.586, nll_loss=2.039, ppl=4.11, wps=552632, ups=1.11, wpb=496160, bsz=16257.3, num_updates=18700, lr=0.000462497, gnorm=0.186, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=17272 epoch 012: 150 / 1689 loss=3.586, nll_loss=2.039, ppl=4.11, wps=552632, ups=1.11, wpb=496160, bsz=16257.3, num_updates=18700, lr=0.000462497, gnorm=0.186, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=17272 epoch 012: 150 / 1689 loss=3.586, nll_loss=2.039, ppl=4.11, wps=552632, ups=1.11, wpb=496160, bsz=16257.3, num_updates=18700, lr=0.000462497, gnorm=0.186, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=17272 epoch 012: 150 / 1689 loss=3.586, nll_loss=2.039, ppl=4.11, wps=552632, ups=1.11, wpb=496160, bsz=16257.3, num_updates=18700, lr=0.000462497, gnorm=0.186, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=17272 epoch 012: 150 / 1689 loss=3.586, nll_loss=2.039, ppl=4.11, wps=552632, ups=1.11, wpb=496160, bsz=16257.3, num_updates=18700, lr=0.000462497, gnorm=0.186, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=17272 epoch 012: 150 / 1689 loss=3.586, nll_loss=2.039, ppl=4.11, wps=552632, ups=1.11, wpb=496160, bsz=16257.3, num_updates=18700, lr=0.000462497, gnorm=0.186, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=17272 epoch 012: 150 / 1689 loss=3.586, nll_loss=2.039, ppl=4.11, wps=552632, ups=1.11, wpb=496160, bsz=16257.3, num_updates=18700, lr=0.000462497, gnorm=0.186, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=17272 epoch 012: 150 / 1689 loss=3.586, nll_loss=2.039, ppl=4.11, wps=552632, ups=1.11, wpb=496160, bsz=16257.3, num_updates=18700, lr=0.000462497, gnorm=0.186, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=17272 epoch 012: 150 / 1689 loss=3.586, nll_loss=2.039, ppl=4.11, wps=552632, ups=1.11, wpb=496160, bsz=16257.3, num_updates=18700, lr=0.000462497, gnorm=0.186, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=17272 epoch 012: 250 / 1689 loss=3.596, nll_loss=2.051, ppl=4.14, wps=552046, ups=1.12, wpb=495005, bsz=16726.8, num_updates=18800, lr=0.000461266, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=17361 epoch 012: 250 / 1689 loss=3.596, nll_loss=2.051, ppl=4.14, wps=552046, ups=1.12, wpb=495005, bsz=16726.8, num_updates=18800, lr=0.000461266, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=17361 epoch 012: 250 / 1689 loss=3.596, nll_loss=2.051, ppl=4.14, wps=552046, ups=1.12, wpb=495005, bsz=16726.8, num_updates=18800, lr=0.000461266, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=17361 epoch 012: 250 / 1689 loss=3.596, nll_loss=2.051, ppl=4.14, wps=552046, ups=1.12, wpb=495005, bsz=16726.8, num_updates=18800, lr=0.000461266, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=17361 epoch 012: 250 / 1689 loss=3.596, nll_loss=2.051, ppl=4.14, wps=552046, ups=1.12, wpb=495005, bsz=16726.8, num_updates=18800, lr=0.000461266, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=17361 epoch 012: 250 / 1689 loss=3.596, nll_loss=2.051, ppl=4.14, wps=552046, ups=1.12, wpb=495005, bsz=16726.8, num_updates=18800, lr=0.000461266, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=17361 epoch 012: 250 / 1689 loss=3.596, nll_loss=2.051, ppl=4.14, wps=552046, ups=1.12, wpb=495005, bsz=16726.8, num_updates=18800, lr=0.000461266, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=17361 epoch 012: 250 / 1689 loss=3.596, nll_loss=2.051, ppl=4.14, wps=552046, ups=1.12, wpb=495005, bsz=16726.8, num_updates=18800, lr=0.000461266, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=17361 epoch 012: 250 / 1689 loss=3.596, nll_loss=2.051, ppl=4.14, wps=552046, ups=1.12, wpb=495005, bsz=16726.8, num_updates=18800, lr=0.000461266, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=17361 epoch 012: 250 / 1689 loss=3.596, nll_loss=2.051, ppl=4.14, wps=552046, ups=1.12, wpb=495005, bsz=16726.8, num_updates=18800, lr=0.000461266, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=17361 epoch 012: 250 / 1689 loss=3.596, nll_loss=2.051, ppl=4.14, wps=552046, ups=1.12, wpb=495005, bsz=16726.8, num_updates=18800, lr=0.000461266, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=17361 epoch 012: 250 / 1689 loss=3.596, nll_loss=2.051, ppl=4.14, wps=552046, ups=1.12, wpb=495005, bsz=16726.8, num_updates=18800, lr=0.000461266, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=17361 epoch 012: 350 / 1689 loss=3.6, nll_loss=2.055, ppl=4.16, wps=550465, ups=1.11, wpb=494284, bsz=16487.9, num_updates=18900, lr=0.000460044, gnorm=0.189, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=17451 epoch 012: 350 / 1689 loss=3.6, nll_loss=2.055, ppl=4.16, wps=550465, ups=1.11, wpb=494284, bsz=16487.9, num_updates=18900, lr=0.000460044, gnorm=0.189, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=17451 epoch 012: 350 / 1689 loss=3.6, nll_loss=2.055, ppl=4.16, wps=550465, ups=1.11, wpb=494284, bsz=16487.9, num_updates=18900, lr=0.000460044, gnorm=0.189, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=17451 epoch 012: 350 / 1689 loss=3.6, nll_loss=2.055, ppl=4.16, wps=550465, ups=1.11, wpb=494284, bsz=16487.9, num_updates=18900, lr=0.000460044, gnorm=0.189, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=17451 epoch 012: 350 / 1689 loss=3.6, nll_loss=2.055, ppl=4.16, wps=550465, ups=1.11, wpb=494284, bsz=16487.9, num_updates=18900, lr=0.000460044, gnorm=0.189, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=17451 epoch 012: 350 / 1689 loss=3.6, nll_loss=2.055, ppl=4.16, wps=550465, ups=1.11, wpb=494284, bsz=16487.9, num_updates=18900, lr=0.000460044, gnorm=0.189, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=17451 epoch 012: 350 / 1689 loss=3.6, nll_loss=2.055, ppl=4.16, wps=550465, ups=1.11, wpb=494284, bsz=16487.9, num_updates=18900, lr=0.000460044, gnorm=0.189, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=17451 epoch 012: 350 / 1689 loss=3.6, nll_loss=2.055, ppl=4.16, wps=550465, ups=1.11, wpb=494284, bsz=16487.9, num_updates=18900, lr=0.000460044, gnorm=0.189, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=17451 epoch 012: 350 / 1689 loss=3.6, nll_loss=2.055, ppl=4.16, wps=550465, ups=1.11, wpb=494284, bsz=16487.9, num_updates=18900, lr=0.000460044, gnorm=0.189, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=17451 epoch 012: 350 / 1689 loss=3.6, nll_loss=2.055, ppl=4.16, wps=550465, ups=1.11, wpb=494284, bsz=16487.9, num_updates=18900, lr=0.000460044, gnorm=0.189, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=17451 epoch 012: 350 / 1689 loss=3.6, nll_loss=2.055, ppl=4.16, wps=550465, ups=1.11, wpb=494284, bsz=16487.9, num_updates=18900, lr=0.000460044, gnorm=0.189, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=17451 epoch 012: 350 / 1689 loss=3.6, nll_loss=2.055, ppl=4.16, wps=550465, ups=1.11, wpb=494284, bsz=16487.9, num_updates=18900, lr=0.000460044, gnorm=0.189, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=17451 epoch 012: 450 / 1689 loss=3.604, nll_loss=2.059, ppl=4.17, wps=554227, ups=1.12, wpb=494326, bsz=16398.1, num_updates=19000, lr=0.000458831, gnorm=0.186, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=17540 epoch 012: 450 / 1689 loss=3.604, nll_loss=2.059, ppl=4.17, wps=554227, ups=1.12, wpb=494326, bsz=16398.1, num_updates=19000, lr=0.000458831, gnorm=0.186, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=17540 epoch 012: 450 / 1689 loss=3.604, nll_loss=2.059, ppl=4.17, wps=554227, ups=1.12, wpb=494326, bsz=16398.1, num_updates=19000, lr=0.000458831, gnorm=0.186, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=17540 epoch 012: 450 / 1689 loss=3.604, nll_loss=2.059, ppl=4.17, wps=554227, ups=1.12, wpb=494326, bsz=16398.1, num_updates=19000, lr=0.000458831, gnorm=0.186, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=17540 epoch 012: 450 / 1689 loss=3.604, nll_loss=2.059, ppl=4.17, wps=554227, ups=1.12, wpb=494326, bsz=16398.1, num_updates=19000, lr=0.000458831, gnorm=0.186, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=17540 epoch 012: 450 / 1689 loss=3.604, nll_loss=2.059, ppl=4.17, wps=554227, ups=1.12, wpb=494326, bsz=16398.1, num_updates=19000, lr=0.000458831, gnorm=0.186, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=17540 epoch 012: 450 / 1689 loss=3.604, nll_loss=2.059, ppl=4.17, wps=554227, ups=1.12, wpb=494326, bsz=16398.1, num_updates=19000, lr=0.000458831, gnorm=0.186, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=17540 epoch 012: 450 / 1689 loss=3.604, nll_loss=2.059, ppl=4.17, wps=554227, ups=1.12, wpb=494326, bsz=16398.1, num_updates=19000, lr=0.000458831, gnorm=0.186, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=17540 epoch 012: 450 / 1689 loss=3.604, nll_loss=2.059, ppl=4.17, wps=554227, ups=1.12, wpb=494326, bsz=16398.1, num_updates=19000, lr=0.000458831, gnorm=0.186, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=17540 epoch 012: 450 / 1689 loss=3.604, nll_loss=2.059, ppl=4.17, wps=554227, ups=1.12, wpb=494326, bsz=16398.1, num_updates=19000, lr=0.000458831, gnorm=0.186, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=17540 epoch 012: 450 / 1689 loss=3.604, nll_loss=2.059, ppl=4.17, wps=554227, ups=1.12, wpb=494326, bsz=16398.1, num_updates=19000, lr=0.000458831, gnorm=0.186, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=17540 epoch 012: 450 / 1689 loss=3.604, nll_loss=2.059, ppl=4.17, wps=554227, ups=1.12, wpb=494326, bsz=16398.1, num_updates=19000, lr=0.000458831, gnorm=0.186, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=17540 begin validation on "valid" subset epoch 012 | valid on 'valid' subset | loss 3.759 | nll_loss 2.214 | ppl 4.64 | wps 0 | wpb 44526 | bsz 2008 | num_updates 19000 | best_loss 3.749 epoch 012 | valid on 'valid' subset | loss 3.759 | nll_loss 2.214 | ppl 4.64 | wps 0 | wpb 44526 | bsz 2008 | num_updates 19000 | best_loss 3.749 epoch 012 | valid on 'valid' subset | loss 3.759 | nll_loss 2.214 | ppl 4.64 | wps 0 | wpb 44526 | bsz 2008 | num_updates 19000 | best_loss 3.749 epoch 012 | valid on 'valid' subset | loss 3.759 | nll_loss 2.214 | ppl 4.64 | wps 0 | wpb 44526 | bsz 2008 | num_updates 19000 | best_loss 3.749 epoch 012 | valid on 'valid' subset | loss 3.759 | nll_loss 2.214 | ppl 4.64 | wps 0 | wpb 44526 | bsz 2008 | num_updates 19000 | best_loss 3.749 epoch 012 | valid on 'valid' subset | loss 3.759 | nll_loss 2.214 | ppl 4.64 | wps 0 | wpb 44526 | bsz 2008 | num_updates 19000 | best_loss 3.749 epoch 012 | valid on 'valid' subset | loss 3.759 | nll_loss 2.214 | ppl 4.64 | wps 0 | wpb 44526 | bsz 2008 | num_updates 19000 | best_loss 3.749 epoch 012 | valid on 'valid' subset | loss 3.759 | nll_loss 2.214 | ppl 4.64 | wps 0 | wpb 44526 | bsz 2008 | num_updates 19000 | best_loss 3.749 epoch 012 | valid on 'valid' subset | loss 3.759 | nll_loss 2.214 | ppl 4.64 | wps 0 | wpb 44526 | bsz 2008 | num_updates 19000 | best_loss 3.749 epoch 012 | valid on 'valid' subset | loss 3.759 | nll_loss 2.214 | ppl 4.64 | wps 0 | wpb 44526 | bsz 2008 | num_updates 19000 | best_loss 3.749 epoch 012 | valid on 'valid' subset | loss 3.759 | nll_loss 2.214 | ppl 4.64 | wps 0 | wpb 44526 | bsz 2008 | num_updates 19000 | best_loss 3.749 epoch 012 | valid on 'valid' subset | loss 3.759 | nll_loss 2.214 | ppl 4.64 | wps 0 | wpb 44526 | bsz 2008 | num_updates 19000 | best_loss 3.749 epoch 012: 550 / 1689 loss=3.602, nll_loss=2.058, ppl=4.16, wps=490109, ups=0.99, wpb=495614, bsz=16455.9, num_updates=19100, lr=0.000457629, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=17642 epoch 012: 550 / 1689 loss=3.602, nll_loss=2.058, ppl=4.16, wps=490109, ups=0.99, wpb=495614, bsz=16455.9, num_updates=19100, lr=0.000457629, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=17642 epoch 012: 550 / 1689 loss=3.602, nll_loss=2.058, ppl=4.16, wps=490109, ups=0.99, wpb=495614, bsz=16455.9, num_updates=19100, lr=0.000457629, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=17642 epoch 012: 550 / 1689 loss=3.602, nll_loss=2.058, ppl=4.16, wps=490109, ups=0.99, wpb=495614, bsz=16455.9, num_updates=19100, lr=0.000457629, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=17642 epoch 012: 550 / 1689 loss=3.602, nll_loss=2.058, ppl=4.16, wps=490109, ups=0.99, wpb=495614, bsz=16455.9, num_updates=19100, lr=0.000457629, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=17642 epoch 012: 550 / 1689 loss=3.602, nll_loss=2.058, ppl=4.16, wps=490109, ups=0.99, wpb=495614, bsz=16455.9, num_updates=19100, lr=0.000457629, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=17642 epoch 012: 550 / 1689 loss=3.602, nll_loss=2.058, ppl=4.16, wps=490109, ups=0.99, wpb=495614, bsz=16455.9, num_updates=19100, lr=0.000457629, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=17642 epoch 012: 550 / 1689 loss=3.602, nll_loss=2.058, ppl=4.16, wps=490109, ups=0.99, wpb=495614, bsz=16455.9, num_updates=19100, lr=0.000457629, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=17642 epoch 012: 550 / 1689 loss=3.602, nll_loss=2.058, ppl=4.16, wps=490109, ups=0.99, wpb=495614, bsz=16455.9, num_updates=19100, lr=0.000457629, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=17642 epoch 012: 550 / 1689 loss=3.602, nll_loss=2.058, ppl=4.16, wps=490109, ups=0.99, wpb=495614, bsz=16455.9, num_updates=19100, lr=0.000457629, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=17642 epoch 012: 550 / 1689 loss=3.602, nll_loss=2.058, ppl=4.16, wps=490109, ups=0.99, wpb=495614, bsz=16455.9, num_updates=19100, lr=0.000457629, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=17642 epoch 012: 550 / 1689 loss=3.602, nll_loss=2.058, ppl=4.16, wps=490109, ups=0.99, wpb=495614, bsz=16455.9, num_updates=19100, lr=0.000457629, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=17642 epoch 012: 650 / 1689 loss=3.601, nll_loss=2.057, ppl=4.16, wps=557704, ups=1.12, wpb=497014, bsz=16727.1, num_updates=19200, lr=0.000456435, gnorm=0.186, clip=0, loss_scale=2, train_wall=88, gb_free=20.5, wall=17731 epoch 012: 650 / 1689 loss=3.601, nll_loss=2.057, ppl=4.16, wps=557704, ups=1.12, wpb=497014, bsz=16727.1, num_updates=19200, lr=0.000456435, gnorm=0.186, clip=0, loss_scale=2, train_wall=88, gb_free=20.5, wall=17731 epoch 012: 650 / 1689 loss=3.601, nll_loss=2.057, ppl=4.16, wps=557704, ups=1.12, wpb=497014, bsz=16727.1, num_updates=19200, lr=0.000456435, gnorm=0.186, clip=0, loss_scale=2, train_wall=88, gb_free=20.5, wall=17731 epoch 012: 650 / 1689 loss=3.601, nll_loss=2.057, ppl=4.16, wps=557704, ups=1.12, wpb=497014, bsz=16727.1, num_updates=19200, lr=0.000456435, gnorm=0.186, clip=0, loss_scale=2, train_wall=88, gb_free=20.5, wall=17731 epoch 012: 650 / 1689 loss=3.601, nll_loss=2.057, ppl=4.16, wps=557704, ups=1.12, wpb=497014, bsz=16727.1, num_updates=19200, lr=0.000456435, gnorm=0.186, clip=0, loss_scale=2, train_wall=88, gb_free=20.5, wall=17731 epoch 012: 650 / 1689 loss=3.601, nll_loss=2.057, ppl=4.16, wps=557704, ups=1.12, wpb=497014, bsz=16727.1, num_updates=19200, lr=0.000456435, gnorm=0.186, clip=0, loss_scale=2, train_wall=88, gb_free=20.5, wall=17731 epoch 012: 650 / 1689 loss=3.601, nll_loss=2.057, ppl=4.16, wps=557704, ups=1.12, wpb=497014, bsz=16727.1, num_updates=19200, lr=0.000456435, gnorm=0.186, clip=0, loss_scale=2, train_wall=88, gb_free=20.5, wall=17731 epoch 012: 650 / 1689 loss=3.601, nll_loss=2.057, ppl=4.16, wps=557704, ups=1.12, wpb=497014, bsz=16727.1, num_updates=19200, lr=0.000456435, gnorm=0.186, clip=0, loss_scale=2, train_wall=88, gb_free=20.5, wall=17731 epoch 012: 650 / 1689 loss=3.601, nll_loss=2.057, ppl=4.16, wps=557704, ups=1.12, wpb=497014, bsz=16727.1, num_updates=19200, lr=0.000456435, gnorm=0.186, clip=0, loss_scale=2, train_wall=88, gb_free=20.5, wall=17731 epoch 012: 650 / 1689 loss=3.601, nll_loss=2.057, ppl=4.16, wps=557704, ups=1.12, wpb=497014, bsz=16727.1, num_updates=19200, lr=0.000456435, gnorm=0.186, clip=0, loss_scale=2, train_wall=88, gb_free=20.5, wall=17731 epoch 012: 650 / 1689 loss=3.601, nll_loss=2.057, ppl=4.16, wps=557704, ups=1.12, wpb=497014, bsz=16727.1, num_updates=19200, lr=0.000456435, gnorm=0.186, clip=0, loss_scale=2, train_wall=88, gb_free=20.5, wall=17731 epoch 012: 650 / 1689 loss=3.601, nll_loss=2.057, ppl=4.16, wps=557704, ups=1.12, wpb=497014, bsz=16727.1, num_updates=19200, lr=0.000456435, gnorm=0.186, clip=0, loss_scale=2, train_wall=88, gb_free=20.5, wall=17731 epoch 012: 751 / 1689 loss=3.609, nll_loss=2.066, ppl=4.19, wps=550066, ups=1.11, wpb=496318, bsz=16470.7, num_updates=19300, lr=0.000455251, gnorm=0.198, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=17821 epoch 012: 751 / 1689 loss=3.609, nll_loss=2.066, ppl=4.19, wps=550066, ups=1.11, wpb=496318, bsz=16470.7, num_updates=19300, lr=0.000455251, gnorm=0.198, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=17821 epoch 012: 751 / 1689 loss=3.609, nll_loss=2.066, ppl=4.19, wps=550066, ups=1.11, wpb=496318, bsz=16470.7, num_updates=19300, lr=0.000455251, gnorm=0.198, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=17821 epoch 012: 751 / 1689 loss=3.609, nll_loss=2.066, ppl=4.19, wps=550066, ups=1.11, wpb=496318, bsz=16470.7, num_updates=19300, lr=0.000455251, gnorm=0.198, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=17821 epoch 012: 751 / 1689 loss=3.609, nll_loss=2.066, ppl=4.19, wps=550066, ups=1.11, wpb=496318, bsz=16470.7, num_updates=19300, lr=0.000455251, gnorm=0.198, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=17821 epoch 012: 751 / 1689 loss=3.609, nll_loss=2.066, ppl=4.19, wps=550066, ups=1.11, wpb=496318, bsz=16470.7, num_updates=19300, lr=0.000455251, gnorm=0.198, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=17821 epoch 012: 751 / 1689 loss=3.609, nll_loss=2.066, ppl=4.19, wps=550066, ups=1.11, wpb=496318, bsz=16470.7, num_updates=19300, lr=0.000455251, gnorm=0.198, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=17821 epoch 012: 751 / 1689 loss=3.609, nll_loss=2.066, ppl=4.19, wps=550066, ups=1.11, wpb=496318, bsz=16470.7, num_updates=19300, lr=0.000455251, gnorm=0.198, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=17821 epoch 012: 751 / 1689 loss=3.609, nll_loss=2.066, ppl=4.19, wps=550066, ups=1.11, wpb=496318, bsz=16470.7, num_updates=19300, lr=0.000455251, gnorm=0.198, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=17821 epoch 012: 751 / 1689 loss=3.609, nll_loss=2.066, ppl=4.19, wps=550066, ups=1.11, wpb=496318, bsz=16470.7, num_updates=19300, lr=0.000455251, gnorm=0.198, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=17821 epoch 012: 751 / 1689 loss=3.609, nll_loss=2.066, ppl=4.19, wps=550066, ups=1.11, wpb=496318, bsz=16470.7, num_updates=19300, lr=0.000455251, gnorm=0.198, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=17821 epoch 012: 751 / 1689 loss=3.609, nll_loss=2.066, ppl=4.19, wps=550066, ups=1.11, wpb=496318, bsz=16470.7, num_updates=19300, lr=0.000455251, gnorm=0.198, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=17821 epoch 012: 851 / 1689 loss=3.6, nll_loss=2.056, ppl=4.16, wps=550922, ups=1.11, wpb=495583, bsz=16763.2, num_updates=19400, lr=0.000454077, gnorm=0.182, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=17911 epoch 012: 851 / 1689 loss=3.6, nll_loss=2.056, ppl=4.16, wps=550922, ups=1.11, wpb=495583, bsz=16763.2, num_updates=19400, lr=0.000454077, gnorm=0.182, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=17911 epoch 012: 851 / 1689 loss=3.6, nll_loss=2.056, ppl=4.16, wps=550922, ups=1.11, wpb=495583, bsz=16763.2, num_updates=19400, lr=0.000454077, gnorm=0.182, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=17911 epoch 012: 851 / 1689 loss=3.6, nll_loss=2.056, ppl=4.16, wps=550922, ups=1.11, wpb=495583, bsz=16763.2, num_updates=19400, lr=0.000454077, gnorm=0.182, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=17911 epoch 012: 851 / 1689 loss=3.6, nll_loss=2.056, ppl=4.16, wps=550922, ups=1.11, wpb=495583, bsz=16763.2, num_updates=19400, lr=0.000454077, gnorm=0.182, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=17911 epoch 012: 851 / 1689 loss=3.6, nll_loss=2.056, ppl=4.16, wps=550922, ups=1.11, wpb=495583, bsz=16763.2, num_updates=19400, lr=0.000454077, gnorm=0.182, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=17911 epoch 012: 851 / 1689 loss=3.6, nll_loss=2.056, ppl=4.16, wps=550922, ups=1.11, wpb=495583, bsz=16763.2, num_updates=19400, lr=0.000454077, gnorm=0.182, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=17911 epoch 012: 851 / 1689 loss=3.6, nll_loss=2.056, ppl=4.16, wps=550922, ups=1.11, wpb=495583, bsz=16763.2, num_updates=19400, lr=0.000454077, gnorm=0.182, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=17911 epoch 012: 851 / 1689 loss=3.6, nll_loss=2.056, ppl=4.16, wps=550922, ups=1.11, wpb=495583, bsz=16763.2, num_updates=19400, lr=0.000454077, gnorm=0.182, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=17911 epoch 012: 851 / 1689 loss=3.6, nll_loss=2.056, ppl=4.16, wps=550922, ups=1.11, wpb=495583, bsz=16763.2, num_updates=19400, lr=0.000454077, gnorm=0.182, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=17911 epoch 012: 851 / 1689 loss=3.6, nll_loss=2.056, ppl=4.16, wps=550922, ups=1.11, wpb=495583, bsz=16763.2, num_updates=19400, lr=0.000454077, gnorm=0.182, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=17911 epoch 012: 851 / 1689 loss=3.6, nll_loss=2.056, ppl=4.16, wps=550922, ups=1.11, wpb=495583, bsz=16763.2, num_updates=19400, lr=0.000454077, gnorm=0.182, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=17911 epoch 012: 951 / 1689 loss=3.611, nll_loss=2.069, ppl=4.2, wps=553903, ups=1.12, wpb=495031, bsz=16629.3, num_updates=19500, lr=0.000452911, gnorm=0.187, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=18000 epoch 012: 951 / 1689 loss=3.611, nll_loss=2.069, ppl=4.2, wps=553903, ups=1.12, wpb=495031, bsz=16629.3, num_updates=19500, lr=0.000452911, gnorm=0.187, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=18000 epoch 012: 951 / 1689 loss=3.611, nll_loss=2.069, ppl=4.2, wps=553903, ups=1.12, wpb=495031, bsz=16629.3, num_updates=19500, lr=0.000452911, gnorm=0.187, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=18000 epoch 012: 951 / 1689 loss=3.611, nll_loss=2.069, ppl=4.2, wps=553903, ups=1.12, wpb=495031, bsz=16629.3, num_updates=19500, lr=0.000452911, gnorm=0.187, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=18000 epoch 012: 951 / 1689 loss=3.611, nll_loss=2.069, ppl=4.2, wps=553903, ups=1.12, wpb=495031, bsz=16629.3, num_updates=19500, lr=0.000452911, gnorm=0.187, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=18000 epoch 012: 951 / 1689 loss=3.611, nll_loss=2.069, ppl=4.2, wps=553903, ups=1.12, wpb=495031, bsz=16629.3, num_updates=19500, lr=0.000452911, gnorm=0.187, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=18000 epoch 012: 951 / 1689 loss=3.611, nll_loss=2.069, ppl=4.2, wps=553903, ups=1.12, wpb=495031, bsz=16629.3, num_updates=19500, lr=0.000452911, gnorm=0.187, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=18000 epoch 012: 951 / 1689 loss=3.611, nll_loss=2.069, ppl=4.2, wps=553903, ups=1.12, wpb=495031, bsz=16629.3, num_updates=19500, lr=0.000452911, gnorm=0.187, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=18000 epoch 012: 951 / 1689 loss=3.611, nll_loss=2.069, ppl=4.2, wps=553903, ups=1.12, wpb=495031, bsz=16629.3, num_updates=19500, lr=0.000452911, gnorm=0.187, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=18000 epoch 012: 951 / 1689 loss=3.611, nll_loss=2.069, ppl=4.2, wps=553903, ups=1.12, wpb=495031, bsz=16629.3, num_updates=19500, lr=0.000452911, gnorm=0.187, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=18000 epoch 012: 951 / 1689 loss=3.611, nll_loss=2.069, ppl=4.2, wps=553903, ups=1.12, wpb=495031, bsz=16629.3, num_updates=19500, lr=0.000452911, gnorm=0.187, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=18000 epoch 012: 951 / 1689 loss=3.611, nll_loss=2.069, ppl=4.2, wps=553903, ups=1.12, wpb=495031, bsz=16629.3, num_updates=19500, lr=0.000452911, gnorm=0.187, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=18000 epoch 012: 1051 / 1689 loss=3.608, nll_loss=2.065, ppl=4.18, wps=557457, ups=1.12, wpb=495558, bsz=16041, num_updates=19600, lr=0.000451754, gnorm=0.183, clip=0, loss_scale=1, train_wall=87, gb_free=22, wall=18089 epoch 012: 1051 / 1689 loss=3.608, nll_loss=2.065, ppl=4.18, wps=557457, ups=1.12, wpb=495558, bsz=16041, num_updates=19600, lr=0.000451754, gnorm=0.183, clip=0, loss_scale=1, train_wall=87, gb_free=22, wall=18089 epoch 012: 1051 / 1689 loss=3.608, nll_loss=2.065, ppl=4.18, wps=557457, ups=1.12, wpb=495558, bsz=16041, num_updates=19600, lr=0.000451754, gnorm=0.183, clip=0, loss_scale=1, train_wall=87, gb_free=22, wall=18089 epoch 012: 1051 / 1689 loss=3.608, nll_loss=2.065, ppl=4.18, wps=557457, ups=1.12, wpb=495558, bsz=16041, num_updates=19600, lr=0.000451754, gnorm=0.183, clip=0, loss_scale=1, train_wall=87, gb_free=22, wall=18089 epoch 012: 1051 / 1689 loss=3.608, nll_loss=2.065, ppl=4.18, wps=557457, ups=1.12, wpb=495558, bsz=16041, num_updates=19600, lr=0.000451754, gnorm=0.183, clip=0, loss_scale=1, train_wall=87, gb_free=22, wall=18089 epoch 012: 1051 / 1689 loss=3.608, nll_loss=2.065, ppl=4.18, wps=557457, ups=1.12, wpb=495558, bsz=16041, num_updates=19600, lr=0.000451754, gnorm=0.183, clip=0, loss_scale=1, train_wall=87, gb_free=22, wall=18089 epoch 012: 1051 / 1689 loss=3.608, nll_loss=2.065, ppl=4.18, wps=557457, ups=1.12, wpb=495558, bsz=16041, num_updates=19600, lr=0.000451754, gnorm=0.183, clip=0, loss_scale=1, train_wall=87, gb_free=22, wall=18089 epoch 012: 1051 / 1689 loss=3.608, nll_loss=2.065, ppl=4.18, wps=557457, ups=1.12, wpb=495558, bsz=16041, num_updates=19600, lr=0.000451754, gnorm=0.183, clip=0, loss_scale=1, train_wall=87, gb_free=22, wall=18089 epoch 012: 1051 / 1689 loss=3.608, nll_loss=2.065, ppl=4.18, wps=557457, ups=1.12, wpb=495558, bsz=16041, num_updates=19600, lr=0.000451754, gnorm=0.183, clip=0, loss_scale=1, train_wall=87, gb_free=22, wall=18089 epoch 012: 1051 / 1689 loss=3.608, nll_loss=2.065, ppl=4.18, wps=557457, ups=1.12, wpb=495558, bsz=16041, num_updates=19600, lr=0.000451754, gnorm=0.183, clip=0, loss_scale=1, train_wall=87, gb_free=22, wall=18089 epoch 012: 1051 / 1689 loss=3.608, nll_loss=2.065, ppl=4.18, wps=557457, ups=1.12, wpb=495558, bsz=16041, num_updates=19600, lr=0.000451754, gnorm=0.183, clip=0, loss_scale=1, train_wall=87, gb_free=22, wall=18089 epoch 012: 1051 / 1689 loss=3.608, nll_loss=2.065, ppl=4.18, wps=557457, ups=1.12, wpb=495558, bsz=16041, num_updates=19600, lr=0.000451754, gnorm=0.183, clip=0, loss_scale=1, train_wall=87, gb_free=22, wall=18089 epoch 012: 1151 / 1689 loss=3.609, nll_loss=2.067, ppl=4.19, wps=553441, ups=1.12, wpb=495984, bsz=16475.8, num_updates=19700, lr=0.000450606, gnorm=0.178, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=18179 epoch 012: 1151 / 1689 loss=3.609, nll_loss=2.067, ppl=4.19, wps=553441, ups=1.12, wpb=495984, bsz=16475.8, num_updates=19700, lr=0.000450606, gnorm=0.178, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=18179 epoch 012: 1151 / 1689 loss=3.609, nll_loss=2.067, ppl=4.19, wps=553441, ups=1.12, wpb=495984, bsz=16475.8, num_updates=19700, lr=0.000450606, gnorm=0.178, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=18179 epoch 012: 1151 / 1689 loss=3.609, nll_loss=2.067, ppl=4.19, wps=553441, ups=1.12, wpb=495984, bsz=16475.8, num_updates=19700, lr=0.000450606, gnorm=0.178, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=18179 epoch 012: 1151 / 1689 loss=3.609, nll_loss=2.067, ppl=4.19, wps=553441, ups=1.12, wpb=495984, bsz=16475.8, num_updates=19700, lr=0.000450606, gnorm=0.178, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=18179 epoch 012: 1151 / 1689 loss=3.609, nll_loss=2.067, ppl=4.19, wps=553441, ups=1.12, wpb=495984, bsz=16475.8, num_updates=19700, lr=0.000450606, gnorm=0.178, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=18179 epoch 012: 1151 / 1689 loss=3.609, nll_loss=2.067, ppl=4.19, wps=553441, ups=1.12, wpb=495984, bsz=16475.8, num_updates=19700, lr=0.000450606, gnorm=0.178, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=18179 epoch 012: 1151 / 1689 loss=3.609, nll_loss=2.067, ppl=4.19, wps=553441, ups=1.12, wpb=495984, bsz=16475.8, num_updates=19700, lr=0.000450606, gnorm=0.178, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=18179 epoch 012: 1151 / 1689 loss=3.609, nll_loss=2.067, ppl=4.19, wps=553441, ups=1.12, wpb=495984, bsz=16475.8, num_updates=19700, lr=0.000450606, gnorm=0.178, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=18179 epoch 012: 1151 / 1689 loss=3.609, nll_loss=2.067, ppl=4.19, wps=553441, ups=1.12, wpb=495984, bsz=16475.8, num_updates=19700, lr=0.000450606, gnorm=0.178, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=18179 epoch 012: 1151 / 1689 loss=3.609, nll_loss=2.067, ppl=4.19, wps=553441, ups=1.12, wpb=495984, bsz=16475.8, num_updates=19700, lr=0.000450606, gnorm=0.178, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=18179 epoch 012: 1151 / 1689 loss=3.609, nll_loss=2.067, ppl=4.19, wps=553441, ups=1.12, wpb=495984, bsz=16475.8, num_updates=19700, lr=0.000450606, gnorm=0.178, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=18179 epoch 012: 1251 / 1689 loss=3.61, nll_loss=2.067, ppl=4.19, wps=551736, ups=1.12, wpb=494652, bsz=16663.2, num_updates=19800, lr=0.000449467, gnorm=0.188, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=18268 epoch 012: 1251 / 1689 loss=3.61, nll_loss=2.067, ppl=4.19, wps=551736, ups=1.12, wpb=494652, bsz=16663.2, num_updates=19800, lr=0.000449467, gnorm=0.188, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=18268 epoch 012: 1251 / 1689 loss=3.61, nll_loss=2.067, ppl=4.19, wps=551736, ups=1.12, wpb=494652, bsz=16663.2, num_updates=19800, lr=0.000449467, gnorm=0.188, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=18268 epoch 012: 1251 / 1689 loss=3.61, nll_loss=2.067, ppl=4.19, wps=551736, ups=1.12, wpb=494652, bsz=16663.2, num_updates=19800, lr=0.000449467, gnorm=0.188, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=18268 epoch 012: 1251 / 1689 loss=3.61, nll_loss=2.067, ppl=4.19, wps=551736, ups=1.12, wpb=494652, bsz=16663.2, num_updates=19800, lr=0.000449467, gnorm=0.188, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=18268 epoch 012: 1251 / 1689 loss=3.61, nll_loss=2.067, ppl=4.19, wps=551736, ups=1.12, wpb=494652, bsz=16663.2, num_updates=19800, lr=0.000449467, gnorm=0.188, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=18268 epoch 012: 1251 / 1689 loss=3.61, nll_loss=2.067, ppl=4.19, wps=551736, ups=1.12, wpb=494652, bsz=16663.2, num_updates=19800, lr=0.000449467, gnorm=0.188, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=18268 epoch 012: 1251 / 1689 loss=3.61, nll_loss=2.067, ppl=4.19, wps=551736, ups=1.12, wpb=494652, bsz=16663.2, num_updates=19800, lr=0.000449467, gnorm=0.188, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=18268 epoch 012: 1251 / 1689 loss=3.61, nll_loss=2.067, ppl=4.19, wps=551736, ups=1.12, wpb=494652, bsz=16663.2, num_updates=19800, lr=0.000449467, gnorm=0.188, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=18268 epoch 012: 1251 / 1689 loss=3.61, nll_loss=2.067, ppl=4.19, wps=551736, ups=1.12, wpb=494652, bsz=16663.2, num_updates=19800, lr=0.000449467, gnorm=0.188, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=18268 epoch 012: 1251 / 1689 loss=3.61, nll_loss=2.067, ppl=4.19, wps=551736, ups=1.12, wpb=494652, bsz=16663.2, num_updates=19800, lr=0.000449467, gnorm=0.188, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=18268 epoch 012: 1251 / 1689 loss=3.61, nll_loss=2.067, ppl=4.19, wps=551736, ups=1.12, wpb=494652, bsz=16663.2, num_updates=19800, lr=0.000449467, gnorm=0.188, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=18268 epoch 012: 1351 / 1689 loss=3.613, nll_loss=2.072, ppl=4.2, wps=554906, ups=1.12, wpb=495022, bsz=16952, num_updates=19900, lr=0.000448336, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=18358 epoch 012: 1351 / 1689 loss=3.613, nll_loss=2.072, ppl=4.2, wps=554906, ups=1.12, wpb=495022, bsz=16952, num_updates=19900, lr=0.000448336, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=18358 epoch 012: 1351 / 1689 loss=3.613, nll_loss=2.072, ppl=4.2, wps=554906, ups=1.12, wpb=495022, bsz=16952, num_updates=19900, lr=0.000448336, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=18358 epoch 012: 1351 / 1689 loss=3.613, nll_loss=2.072, ppl=4.2, wps=554906, ups=1.12, wpb=495022, bsz=16952, num_updates=19900, lr=0.000448336, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=18358 epoch 012: 1351 / 1689 loss=3.613, nll_loss=2.072, ppl=4.2, wps=554906, ups=1.12, wpb=495022, bsz=16952, num_updates=19900, lr=0.000448336, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=18358 epoch 012: 1351 / 1689 loss=3.613, nll_loss=2.072, ppl=4.2, wps=554906, ups=1.12, wpb=495022, bsz=16952, num_updates=19900, lr=0.000448336, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=18358 epoch 012: 1351 / 1689 loss=3.613, nll_loss=2.072, ppl=4.2, wps=554906, ups=1.12, wpb=495022, bsz=16952, num_updates=19900, lr=0.000448336, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=18358 epoch 012: 1351 / 1689 loss=3.613, nll_loss=2.072, ppl=4.2, wps=554906, ups=1.12, wpb=495022, bsz=16952, num_updates=19900, lr=0.000448336, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=18358 epoch 012: 1351 / 1689 loss=3.613, nll_loss=2.072, ppl=4.2, wps=554906, ups=1.12, wpb=495022, bsz=16952, num_updates=19900, lr=0.000448336, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=18358 epoch 012: 1351 / 1689 loss=3.613, nll_loss=2.072, ppl=4.2, wps=554906, ups=1.12, wpb=495022, bsz=16952, num_updates=19900, lr=0.000448336, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=18358 epoch 012: 1351 / 1689 loss=3.613, nll_loss=2.072, ppl=4.2, wps=554906, ups=1.12, wpb=495022, bsz=16952, num_updates=19900, lr=0.000448336, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=18358 epoch 012: 1351 / 1689 loss=3.613, nll_loss=2.072, ppl=4.2, wps=554906, ups=1.12, wpb=495022, bsz=16952, num_updates=19900, lr=0.000448336, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=18358 epoch 012: 1451 / 1689 loss=3.614, nll_loss=2.072, ppl=4.21, wps=553315, ups=1.12, wpb=494607, bsz=16311.4, num_updates=20000, lr=0.000447214, gnorm=0.183, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=18447 epoch 012: 1451 / 1689 loss=3.614, nll_loss=2.072, ppl=4.21, wps=553315, ups=1.12, wpb=494607, bsz=16311.4, num_updates=20000, lr=0.000447214, gnorm=0.183, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=18447 epoch 012: 1451 / 1689 loss=3.614, nll_loss=2.072, ppl=4.21, wps=553315, ups=1.12, wpb=494607, bsz=16311.4, num_updates=20000, lr=0.000447214, gnorm=0.183, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=18447 epoch 012: 1451 / 1689 loss=3.614, nll_loss=2.072, ppl=4.21, wps=553315, ups=1.12, wpb=494607, bsz=16311.4, num_updates=20000, lr=0.000447214, gnorm=0.183, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=18447 epoch 012: 1451 / 1689 loss=3.614, nll_loss=2.072, ppl=4.21, wps=553315, ups=1.12, wpb=494607, bsz=16311.4, num_updates=20000, lr=0.000447214, gnorm=0.183, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=18447 epoch 012: 1451 / 1689 loss=3.614, nll_loss=2.072, ppl=4.21, wps=553315, ups=1.12, wpb=494607, bsz=16311.4, num_updates=20000, lr=0.000447214, gnorm=0.183, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=18447 epoch 012: 1451 / 1689 loss=3.614, nll_loss=2.072, ppl=4.21, wps=553315, ups=1.12, wpb=494607, bsz=16311.4, num_updates=20000, lr=0.000447214, gnorm=0.183, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=18447 epoch 012: 1451 / 1689 loss=3.614, nll_loss=2.072, ppl=4.21, wps=553315, ups=1.12, wpb=494607, bsz=16311.4, num_updates=20000, lr=0.000447214, gnorm=0.183, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=18447 epoch 012: 1451 / 1689 loss=3.614, nll_loss=2.072, ppl=4.21, wps=553315, ups=1.12, wpb=494607, bsz=16311.4, num_updates=20000, lr=0.000447214, gnorm=0.183, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=18447 epoch 012: 1451 / 1689 loss=3.614, nll_loss=2.072, ppl=4.21, wps=553315, ups=1.12, wpb=494607, bsz=16311.4, num_updates=20000, lr=0.000447214, gnorm=0.183, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=18447 epoch 012: 1451 / 1689 loss=3.614, nll_loss=2.072, ppl=4.21, wps=553315, ups=1.12, wpb=494607, bsz=16311.4, num_updates=20000, lr=0.000447214, gnorm=0.183, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=18447 epoch 012: 1451 / 1689 loss=3.614, nll_loss=2.072, ppl=4.21, wps=553315, ups=1.12, wpb=494607, bsz=16311.4, num_updates=20000, lr=0.000447214, gnorm=0.183, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=18447 begin validation on "valid" subset epoch 012 | valid on 'valid' subset | loss 3.736 | nll_loss 2.191 | ppl 4.57 | wps 0 | wpb 44526 | bsz 2008 | num_updates 20000 | best_loss 3.736 epoch 012 | valid on 'valid' subset | loss 3.736 | nll_loss 2.191 | ppl 4.57 | wps 0 | wpb 44526 | bsz 2008 | num_updates 20000 | best_loss 3.736 epoch 012 | valid on 'valid' subset | loss 3.736 | nll_loss 2.191 | ppl 4.57 | wps 0 | wpb 44526 | bsz 2008 | num_updates 20000 | best_loss 3.736 epoch 012 | valid on 'valid' subset | loss 3.736 | nll_loss 2.191 | ppl 4.57 | wps 0 | wpb 44526 | bsz 2008 | num_updates 20000 | best_loss 3.736 epoch 012 | valid on 'valid' subset | loss 3.736 | nll_loss 2.191 | ppl 4.57 | wps 0 | wpb 44526 | bsz 2008 | num_updates 20000 | best_loss 3.736 epoch 012 | valid on 'valid' subset | loss 3.736 | nll_loss 2.191 | ppl 4.57 | wps 0 | wpb 44526 | bsz 2008 | num_updates 20000 | best_loss 3.736 epoch 012 | valid on 'valid' subset | loss 3.736 | nll_loss 2.191 | ppl 4.57 | wps 0 | wpb 44526 | bsz 2008 | num_updates 20000 | best_loss 3.736 epoch 012 | valid on 'valid' subset | loss 3.736 | nll_loss 2.191 | ppl 4.57 | wps 0 | wpb 44526 | bsz 2008 | num_updates 20000 | best_loss 3.736 epoch 012 | valid on 'valid' subset | loss 3.736 | nll_loss 2.191 | ppl 4.57 | wps 0 | wpb 44526 | bsz 2008 | num_updates 20000 | best_loss 3.736 epoch 012 | valid on 'valid' subset | loss 3.736 | nll_loss 2.191 | ppl 4.57 | wps 0 | wpb 44526 | bsz 2008 | num_updates 20000 | best_loss 3.736 epoch 012 | valid on 'valid' subset | loss 3.736 | nll_loss 2.191 | ppl 4.57 | wps 0 | wpb 44526 | bsz 2008 | num_updates 20000 | best_loss 3.736 epoch 012 | valid on 'valid' subset | loss 3.736 | nll_loss 2.191 | ppl 4.57 | wps 0 | wpb 44526 | bsz 2008 | num_updates 20000 | best_loss 3.736 epoch 012: 1551 / 1689 loss=3.61, nll_loss=2.068, ppl=4.19, wps=283330, ups=0.57, wpb=495430, bsz=16512.3, num_updates=20100, lr=0.0004461, gnorm=0.178, clip=0, loss_scale=2, train_wall=96, gb_free=22.1, wall=18622 epoch 012: 1551 / 1689 loss=3.61, nll_loss=2.068, ppl=4.19, wps=283330, ups=0.57, wpb=495430, bsz=16512.3, num_updates=20100, lr=0.0004461, gnorm=0.178, clip=0, loss_scale=2, train_wall=96, gb_free=22.1, wall=18622 epoch 012: 1551 / 1689 loss=3.61, nll_loss=2.068, ppl=4.19, wps=283330, ups=0.57, wpb=495430, bsz=16512.3, num_updates=20100, lr=0.0004461, gnorm=0.178, clip=0, loss_scale=2, train_wall=96, gb_free=22.1, wall=18622 epoch 012: 1551 / 1689 loss=3.61, nll_loss=2.068, ppl=4.19, wps=283330, ups=0.57, wpb=495430, bsz=16512.3, num_updates=20100, lr=0.0004461, gnorm=0.178, clip=0, loss_scale=2, train_wall=96, gb_free=22.1, wall=18622 epoch 012: 1551 / 1689 loss=3.61, nll_loss=2.068, ppl=4.19, wps=283330, ups=0.57, wpb=495430, bsz=16512.3, num_updates=20100, lr=0.0004461, gnorm=0.178, clip=0, loss_scale=2, train_wall=96, gb_free=22.1, wall=18622 epoch 012: 1551 / 1689 loss=3.61, nll_loss=2.068, ppl=4.19, wps=283330, ups=0.57, wpb=495430, bsz=16512.3, num_updates=20100, lr=0.0004461, gnorm=0.178, clip=0, loss_scale=2, train_wall=96, gb_free=22.1, wall=18622 epoch 012: 1551 / 1689 loss=3.61, nll_loss=2.068, ppl=4.19, wps=283330, ups=0.57, wpb=495430, bsz=16512.3, num_updates=20100, lr=0.0004461, gnorm=0.178, clip=0, loss_scale=2, train_wall=96, gb_free=22.1, wall=18622 epoch 012: 1551 / 1689 loss=3.61, nll_loss=2.068, ppl=4.19, wps=283330, ups=0.57, wpb=495430, bsz=16512.3, num_updates=20100, lr=0.0004461, gnorm=0.178, clip=0, loss_scale=2, train_wall=96, gb_free=22.1, wall=18622 epoch 012: 1551 / 1689 loss=3.61, nll_loss=2.068, ppl=4.19, wps=283330, ups=0.57, wpb=495430, bsz=16512.3, num_updates=20100, lr=0.0004461, gnorm=0.178, clip=0, loss_scale=2, train_wall=96, gb_free=22.1, wall=18622 epoch 012: 1551 / 1689 loss=3.61, nll_loss=2.068, ppl=4.19, wps=283330, ups=0.57, wpb=495430, bsz=16512.3, num_updates=20100, lr=0.0004461, gnorm=0.178, clip=0, loss_scale=2, train_wall=96, gb_free=22.1, wall=18622 epoch 012: 1551 / 1689 loss=3.61, nll_loss=2.068, ppl=4.19, wps=283330, ups=0.57, wpb=495430, bsz=16512.3, num_updates=20100, lr=0.0004461, gnorm=0.178, clip=0, loss_scale=2, train_wall=96, gb_free=22.1, wall=18622 epoch 012: 1551 / 1689 loss=3.61, nll_loss=2.068, ppl=4.19, wps=283330, ups=0.57, wpb=495430, bsz=16512.3, num_updates=20100, lr=0.0004461, gnorm=0.178, clip=0, loss_scale=2, train_wall=96, gb_free=22.1, wall=18622 epoch 012: 1651 / 1689 loss=3.613, nll_loss=2.071, ppl=4.2, wps=559762, ups=1.13, wpb=496099, bsz=16614.7, num_updates=20200, lr=0.000444994, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=18711 epoch 012: 1651 / 1689 loss=3.613, nll_loss=2.071, ppl=4.2, wps=559762, ups=1.13, wpb=496099, bsz=16614.7, num_updates=20200, lr=0.000444994, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=18711 epoch 012: 1651 / 1689 loss=3.613, nll_loss=2.071, ppl=4.2, wps=559762, ups=1.13, wpb=496099, bsz=16614.7, num_updates=20200, lr=0.000444994, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=18711 epoch 012: 1651 / 1689 loss=3.613, nll_loss=2.071, ppl=4.2, wps=559762, ups=1.13, wpb=496099, bsz=16614.7, num_updates=20200, lr=0.000444994, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=18711 epoch 012: 1651 / 1689 loss=3.613, nll_loss=2.071, ppl=4.2, wps=559762, ups=1.13, wpb=496099, bsz=16614.7, num_updates=20200, lr=0.000444994, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=18711 epoch 012: 1651 / 1689 loss=3.613, nll_loss=2.071, ppl=4.2, wps=559762, ups=1.13, wpb=496099, bsz=16614.7, num_updates=20200, lr=0.000444994, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=18711 epoch 012: 1651 / 1689 loss=3.613, nll_loss=2.071, ppl=4.2, wps=559762, ups=1.13, wpb=496099, bsz=16614.7, num_updates=20200, lr=0.000444994, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=18711 epoch 012: 1651 / 1689 loss=3.613, nll_loss=2.071, ppl=4.2, wps=559762, ups=1.13, wpb=496099, bsz=16614.7, num_updates=20200, lr=0.000444994, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=18711 epoch 012: 1651 / 1689 loss=3.613, nll_loss=2.071, ppl=4.2, wps=559762, ups=1.13, wpb=496099, bsz=16614.7, num_updates=20200, lr=0.000444994, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=18711 epoch 012: 1651 / 1689 loss=3.613, nll_loss=2.071, ppl=4.2, wps=559762, ups=1.13, wpb=496099, bsz=16614.7, num_updates=20200, lr=0.000444994, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=18711 epoch 012: 1651 / 1689 loss=3.613, nll_loss=2.071, ppl=4.2, wps=559762, ups=1.13, wpb=496099, bsz=16614.7, num_updates=20200, lr=0.000444994, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=18711 epoch 012: 1651 / 1689 loss=3.613, nll_loss=2.071, ppl=4.2, wps=559762, ups=1.13, wpb=496099, bsz=16614.7, num_updates=20200, lr=0.000444994, gnorm=0.187, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=18711 end of epoch 12 (average epoch stats below) epoch 012 | loss 3.605 | nll_loss 2.062 | ppl 4.17 | wps 519470 | ups 1.05 | wpb 495111 | bsz 16506.5 | num_updates 20238 | lr 0.000444576 | gnorm 0.185 | clip 0 | loss_scale 2 | train_wall 1494 | gb_free 23 | wall 18744 epoch 012 | loss 3.605 | nll_loss 2.062 | ppl 4.17 | wps 519470 | ups 1.05 | wpb 495111 | bsz 16506.5 | num_updates 20238 | lr 0.000444576 | gnorm 0.185 | clip 0 | loss_scale 2 | train_wall 1494 | gb_free 23 | wall 18744 epoch 012 | loss 3.605 | nll_loss 2.062 | ppl 4.17 | wps 519470 | ups 1.05 | wpb 495111 | bsz 16506.5 | num_updates 20238 | lr 0.000444576 | gnorm 0.185 | clip 0 | loss_scale 2 | train_wall 1494 | gb_free 23 | wall 18744 epoch 012 | loss 3.605 | nll_loss 2.062 | ppl 4.17 | wps 519470 | ups 1.05 | wpb 495111 | bsz 16506.5 | num_updates 20238 | lr 0.000444576 | gnorm 0.185 | clip 0 | loss_scale 2 | train_wall 1494 | gb_free 23 | wall 18744 epoch 012 | loss 3.605 | nll_loss 2.062 | ppl 4.17 | wps 519470 | ups 1.05 | wpb 495111 | bsz 16506.5 | num_updates 20238 | lr 0.000444576 | gnorm 0.185 | clip 0 | loss_scale 2 | train_wall 1494 | gb_free 23 | wall 18744 epoch 012 | loss 3.605 | nll_loss 2.062 | ppl 4.17 | wps 519470 | ups 1.05 | wpb 495111 | bsz 16506.5 | num_updates 20238 | lr 0.000444576 | gnorm 0.185 | clip 0 | loss_scale 2 | train_wall 1494 | gb_free 23 | wall 18744 epoch 012 | loss 3.605 | nll_loss 2.062 | ppl 4.17 | wps 519470 | ups 1.05 | wpb 495111 | bsz 16506.5 | num_updates 20238 | lr 0.000444576 | gnorm 0.185 | clip 0 | loss_scale 2 | train_wall 1494 | gb_free 23 | wall 18744 epoch 012 | loss 3.605 | nll_loss 2.062 | ppl 4.17 | wps 519470 | ups 1.05 | wpb 495111 | bsz 16506.5 | num_updates 20238 | lr 0.000444576 | gnorm 0.185 | clip 0 | loss_scale 2 | train_wall 1494 | gb_free 23 | wall 18744 epoch 012 | loss 3.605 | nll_loss 2.062 | ppl 4.17 | wps 519470 | ups 1.05 | wpb 495111 | bsz 16506.5 | num_updates 20238 | lr 0.000444576 | gnorm 0.185 | clip 0 | loss_scale 2 | train_wall 1494 | gb_free 23 | wall 18744 epoch 012 | loss 3.605 | nll_loss 2.062 | ppl 4.17 | wps 519470 | ups 1.05 | wpb 495111 | bsz 16506.5 | num_updates 20238 | lr 0.000444576 | gnorm 0.185 | clip 0 | loss_scale 2 | train_wall 1494 | gb_free 23 | wall 18744 epoch 012 | loss 3.605 | nll_loss 2.062 | ppl 4.17 | wps 519470 | ups 1.05 | wpb 495111 | bsz 16506.5 | num_updates 20238 | lr 0.000444576 | gnorm 0.185 | clip 0 | loss_scale 2 | train_wall 1494 | gb_free 23 | wall 18744 epoch 012 | loss 3.605 | nll_loss 2.062 | ppl 4.17 | wps 519470 | ups 1.05 | wpb 495111 | bsz 16506.5 | num_updates 20238 | lr 0.000444576 | gnorm 0.185 | clip 0 | loss_scale 2 | train_wall 1494 | gb_free 23 | wall 18744 Start iterating over samples epoch 013: 62 / 1689 loss=3.587, nll_loss=2.04, ppl=4.11, wps=550998, ups=1.12, wpb=491720, bsz=16008.6, num_updates=20300, lr=0.000443897, gnorm=0.196, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=18800 epoch 013: 62 / 1689 loss=3.587, nll_loss=2.04, ppl=4.11, wps=550998, ups=1.12, wpb=491720, bsz=16008.6, num_updates=20300, lr=0.000443897, gnorm=0.196, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=18800 epoch 013: 62 / 1689 loss=3.587, nll_loss=2.04, ppl=4.11, wps=550998, ups=1.12, wpb=491720, bsz=16008.6, num_updates=20300, lr=0.000443897, gnorm=0.196, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=18800 epoch 013: 62 / 1689 loss=3.587, nll_loss=2.04, ppl=4.11, wps=550998, ups=1.12, wpb=491720, bsz=16008.6, num_updates=20300, lr=0.000443897, gnorm=0.196, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=18800 epoch 013: 62 / 1689 loss=3.587, nll_loss=2.04, ppl=4.11, wps=550998, ups=1.12, wpb=491720, bsz=16008.6, num_updates=20300, lr=0.000443897, gnorm=0.196, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=18800 epoch 013: 62 / 1689 loss=3.587, nll_loss=2.04, ppl=4.11, wps=550998, ups=1.12, wpb=491720, bsz=16008.6, num_updates=20300, lr=0.000443897, gnorm=0.196, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=18800 epoch 013: 62 / 1689 loss=3.587, nll_loss=2.04, ppl=4.11, wps=550998, ups=1.12, wpb=491720, bsz=16008.6, num_updates=20300, lr=0.000443897, gnorm=0.196, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=18800 epoch 013: 62 / 1689 loss=3.587, nll_loss=2.04, ppl=4.11, wps=550998, ups=1.12, wpb=491720, bsz=16008.6, num_updates=20300, lr=0.000443897, gnorm=0.196, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=18800 epoch 013: 62 / 1689 loss=3.587, nll_loss=2.04, ppl=4.11, wps=550998, ups=1.12, wpb=491720, bsz=16008.6, num_updates=20300, lr=0.000443897, gnorm=0.196, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=18800 epoch 013: 62 / 1689 loss=3.587, nll_loss=2.04, ppl=4.11, wps=550998, ups=1.12, wpb=491720, bsz=16008.6, num_updates=20300, lr=0.000443897, gnorm=0.196, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=18800 epoch 013: 62 / 1689 loss=3.587, nll_loss=2.04, ppl=4.11, wps=550998, ups=1.12, wpb=491720, bsz=16008.6, num_updates=20300, lr=0.000443897, gnorm=0.196, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=18800 epoch 013: 62 / 1689 loss=3.587, nll_loss=2.04, ppl=4.11, wps=550998, ups=1.12, wpb=491720, bsz=16008.6, num_updates=20300, lr=0.000443897, gnorm=0.196, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=18800 epoch 013: 62 / 1689 loss=3.587, nll_loss=2.04, ppl=4.11, wps=550998, ups=1.12, wpb=491720, bsz=16008.6, num_updates=20300, lr=0.000443897, gnorm=0.196, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=18800 epoch 013: 163 / 1689 loss=3.574, nll_loss=2.026, ppl=4.07, wps=554027, ups=1.12, wpb=495168, bsz=16348.5, num_updates=20400, lr=0.000442807, gnorm=0.189, clip=0, loss_scale=2, train_wall=89, gb_free=21.3, wall=18889 epoch 013: 163 / 1689 loss=3.574, nll_loss=2.026, ppl=4.07, wps=554027, ups=1.12, wpb=495168, bsz=16348.5, num_updates=20400, lr=0.000442807, gnorm=0.189, clip=0, loss_scale=2, train_wall=89, gb_free=21.3, wall=18889 epoch 013: 163 / 1689 loss=3.574, nll_loss=2.026, ppl=4.07, wps=554027, ups=1.12, wpb=495168, bsz=16348.5, num_updates=20400, lr=0.000442807, gnorm=0.189, clip=0, loss_scale=2, train_wall=89, gb_free=21.3, wall=18889 epoch 013: 163 / 1689 loss=3.574, nll_loss=2.026, ppl=4.07, wps=554027, ups=1.12, wpb=495168, bsz=16348.5, num_updates=20400, lr=0.000442807, gnorm=0.189, clip=0, loss_scale=2, train_wall=89, gb_free=21.3, wall=18889 epoch 013: 163 / 1689 loss=3.574, nll_loss=2.026, ppl=4.07, wps=554027, ups=1.12, wpb=495168, bsz=16348.5, num_updates=20400, lr=0.000442807, gnorm=0.189, clip=0, loss_scale=2, train_wall=89, gb_free=21.3, wall=18889 epoch 013: 163 / 1689 loss=3.574, nll_loss=2.026, ppl=4.07, wps=554027, ups=1.12, wpb=495168, bsz=16348.5, num_updates=20400, lr=0.000442807, gnorm=0.189, clip=0, loss_scale=2, train_wall=89, gb_free=21.3, wall=18889 epoch 013: 163 / 1689 loss=3.574, nll_loss=2.026, ppl=4.07, wps=554027, ups=1.12, wpb=495168, bsz=16348.5, num_updates=20400, lr=0.000442807, gnorm=0.189, clip=0, loss_scale=2, train_wall=89, gb_free=21.3, wall=18889 epoch 013: 163 / 1689 loss=3.574, nll_loss=2.026, ppl=4.07, wps=554027, ups=1.12, wpb=495168, bsz=16348.5, num_updates=20400, lr=0.000442807, gnorm=0.189, clip=0, loss_scale=2, train_wall=89, gb_free=21.3, wall=18889 epoch 013: 163 / 1689 loss=3.574, nll_loss=2.026, ppl=4.07, wps=554027, ups=1.12, wpb=495168, bsz=16348.5, num_updates=20400, lr=0.000442807, gnorm=0.189, clip=0, loss_scale=2, train_wall=89, gb_free=21.3, wall=18889 epoch 013: 163 / 1689 loss=3.574, nll_loss=2.026, ppl=4.07, wps=554027, ups=1.12, wpb=495168, bsz=16348.5, num_updates=20400, lr=0.000442807, gnorm=0.189, clip=0, loss_scale=2, train_wall=89, gb_free=21.3, wall=18889 epoch 013: 163 / 1689 loss=3.574, nll_loss=2.026, ppl=4.07, wps=554027, ups=1.12, wpb=495168, bsz=16348.5, num_updates=20400, lr=0.000442807, gnorm=0.189, clip=0, loss_scale=2, train_wall=89, gb_free=21.3, wall=18889 epoch 013: 163 / 1689 loss=3.574, nll_loss=2.026, ppl=4.07, wps=554027, ups=1.12, wpb=495168, bsz=16348.5, num_updates=20400, lr=0.000442807, gnorm=0.189, clip=0, loss_scale=2, train_wall=89, gb_free=21.3, wall=18889 epoch 013: 163 / 1689 loss=3.574, nll_loss=2.026, ppl=4.07, wps=554027, ups=1.12, wpb=495168, bsz=16348.5, num_updates=20400, lr=0.000442807, gnorm=0.189, clip=0, loss_scale=2, train_wall=89, gb_free=21.3, wall=18889 epoch 013: 263 / 1689 loss=3.577, nll_loss=2.029, ppl=4.08, wps=557484, ups=1.13, wpb=495011, bsz=16546.6, num_updates=20500, lr=0.000441726, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=18978 epoch 013: 263 / 1689 loss=3.577, nll_loss=2.029, ppl=4.08, wps=557484, ups=1.13, wpb=495011, bsz=16546.6, num_updates=20500, lr=0.000441726, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=18978 epoch 013: 263 / 1689 loss=3.577, nll_loss=2.029, ppl=4.08, wps=557484, ups=1.13, wpb=495011, bsz=16546.6, num_updates=20500, lr=0.000441726, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=18978 epoch 013: 263 / 1689 loss=3.577, nll_loss=2.029, ppl=4.08, wps=557484, ups=1.13, wpb=495011, bsz=16546.6, num_updates=20500, lr=0.000441726, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=18978 epoch 013: 263 / 1689 loss=3.577, nll_loss=2.029, ppl=4.08, wps=557484, ups=1.13, wpb=495011, bsz=16546.6, num_updates=20500, lr=0.000441726, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=18978 epoch 013: 263 / 1689 loss=3.577, nll_loss=2.029, ppl=4.08, wps=557484, ups=1.13, wpb=495011, bsz=16546.6, num_updates=20500, lr=0.000441726, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=18978 epoch 013: 263 / 1689 loss=3.577, nll_loss=2.029, ppl=4.08, wps=557484, ups=1.13, wpb=495011, bsz=16546.6, num_updates=20500, lr=0.000441726, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=18978 epoch 013: 263 / 1689 loss=3.577, nll_loss=2.029, ppl=4.08, wps=557484, ups=1.13, wpb=495011, bsz=16546.6, num_updates=20500, lr=0.000441726, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=18978 epoch 013: 263 / 1689 loss=3.577, nll_loss=2.029, ppl=4.08, wps=557484, ups=1.13, wpb=495011, bsz=16546.6, num_updates=20500, lr=0.000441726, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=18978 epoch 013: 263 / 1689 loss=3.577, nll_loss=2.029, ppl=4.08, wps=557484, ups=1.13, wpb=495011, bsz=16546.6, num_updates=20500, lr=0.000441726, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=18978 epoch 013: 263 / 1689 loss=3.577, nll_loss=2.029, ppl=4.08, wps=557484, ups=1.13, wpb=495011, bsz=16546.6, num_updates=20500, lr=0.000441726, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=18978 epoch 013: 263 / 1689 loss=3.577, nll_loss=2.029, ppl=4.08, wps=557484, ups=1.13, wpb=495011, bsz=16546.6, num_updates=20500, lr=0.000441726, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=18978 epoch 013: 263 / 1689 loss=3.577, nll_loss=2.029, ppl=4.08, wps=557484, ups=1.13, wpb=495011, bsz=16546.6, num_updates=20500, lr=0.000441726, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=18978 epoch 013: 363 / 1689 loss=3.584, nll_loss=2.038, ppl=4.11, wps=551391, ups=1.12, wpb=494085, bsz=16818.6, num_updates=20600, lr=0.000440653, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=19068 epoch 013: 363 / 1689 loss=3.584, nll_loss=2.038, ppl=4.11, wps=551391, ups=1.12, wpb=494085, bsz=16818.6, num_updates=20600, lr=0.000440653, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=19068 epoch 013: 363 / 1689 loss=3.584, nll_loss=2.038, ppl=4.11, wps=551391, ups=1.12, wpb=494085, bsz=16818.6, num_updates=20600, lr=0.000440653, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=19068 epoch 013: 363 / 1689 loss=3.584, nll_loss=2.038, ppl=4.11, wps=551391, ups=1.12, wpb=494085, bsz=16818.6, num_updates=20600, lr=0.000440653, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=19068 epoch 013: 363 / 1689 loss=3.584, nll_loss=2.038, ppl=4.11, wps=551391, ups=1.12, wpb=494085, bsz=16818.6, num_updates=20600, lr=0.000440653, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=19068 epoch 013: 363 / 1689 loss=3.584, nll_loss=2.038, ppl=4.11, wps=551391, ups=1.12, wpb=494085, bsz=16818.6, num_updates=20600, lr=0.000440653, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=19068 epoch 013: 363 / 1689 loss=3.584, nll_loss=2.038, ppl=4.11, wps=551391, ups=1.12, wpb=494085, bsz=16818.6, num_updates=20600, lr=0.000440653, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=19068 epoch 013: 363 / 1689 loss=3.584, nll_loss=2.038, ppl=4.11, wps=551391, ups=1.12, wpb=494085, bsz=16818.6, num_updates=20600, lr=0.000440653, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=19068 epoch 013: 363 / 1689 loss=3.584, nll_loss=2.038, ppl=4.11, wps=551391, ups=1.12, wpb=494085, bsz=16818.6, num_updates=20600, lr=0.000440653, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=19068 epoch 013: 363 / 1689 loss=3.584, nll_loss=2.038, ppl=4.11, wps=551391, ups=1.12, wpb=494085, bsz=16818.6, num_updates=20600, lr=0.000440653, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=19068 epoch 013: 363 / 1689 loss=3.584, nll_loss=2.038, ppl=4.11, wps=551391, ups=1.12, wpb=494085, bsz=16818.6, num_updates=20600, lr=0.000440653, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=19068 epoch 013: 363 / 1689 loss=3.584, nll_loss=2.038, ppl=4.11, wps=551391, ups=1.12, wpb=494085, bsz=16818.6, num_updates=20600, lr=0.000440653, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=19068 epoch 013: 363 / 1689 loss=3.584, nll_loss=2.038, ppl=4.11, wps=551391, ups=1.12, wpb=494085, bsz=16818.6, num_updates=20600, lr=0.000440653, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=19068 epoch 013: 463 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=556218, ups=1.13, wpb=494204, bsz=16513, num_updates=20700, lr=0.000439587, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=19156 epoch 013: 463 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=556218, ups=1.13, wpb=494204, bsz=16513, num_updates=20700, lr=0.000439587, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=19156 epoch 013: 463 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=556218, ups=1.13, wpb=494204, bsz=16513, num_updates=20700, lr=0.000439587, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=19156 epoch 013: 463 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=556218, ups=1.13, wpb=494204, bsz=16513, num_updates=20700, lr=0.000439587, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=19156 epoch 013: 463 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=556218, ups=1.13, wpb=494204, bsz=16513, num_updates=20700, lr=0.000439587, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=19156 epoch 013: 463 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=556218, ups=1.13, wpb=494204, bsz=16513, num_updates=20700, lr=0.000439587, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=19156 epoch 013: 463 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=556218, ups=1.13, wpb=494204, bsz=16513, num_updates=20700, lr=0.000439587, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=19156 epoch 013: 463 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=556218, ups=1.13, wpb=494204, bsz=16513, num_updates=20700, lr=0.000439587, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=19156 epoch 013: 463 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=556218, ups=1.13, wpb=494204, bsz=16513, num_updates=20700, lr=0.000439587, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=19156 epoch 013: 463 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=556218, ups=1.13, wpb=494204, bsz=16513, num_updates=20700, lr=0.000439587, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=19156 epoch 013: 463 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=556218, ups=1.13, wpb=494204, bsz=16513, num_updates=20700, lr=0.000439587, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=19156 epoch 013: 463 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=556218, ups=1.13, wpb=494204, bsz=16513, num_updates=20700, lr=0.000439587, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=19156 epoch 013: 463 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=556218, ups=1.13, wpb=494204, bsz=16513, num_updates=20700, lr=0.000439587, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=19156 epoch 013: 563 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=557538, ups=1.13, wpb=495508, bsz=16069.2, num_updates=20800, lr=0.000438529, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=19245 epoch 013: 563 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=557538, ups=1.13, wpb=495508, bsz=16069.2, num_updates=20800, lr=0.000438529, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=19245 epoch 013: 563 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=557538, ups=1.13, wpb=495508, bsz=16069.2, num_updates=20800, lr=0.000438529, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=19245 epoch 013: 563 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=557538, ups=1.13, wpb=495508, bsz=16069.2, num_updates=20800, lr=0.000438529, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=19245 epoch 013: 563 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=557538, ups=1.13, wpb=495508, bsz=16069.2, num_updates=20800, lr=0.000438529, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=19245 epoch 013: 563 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=557538, ups=1.13, wpb=495508, bsz=16069.2, num_updates=20800, lr=0.000438529, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=19245 epoch 013: 563 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=557538, ups=1.13, wpb=495508, bsz=16069.2, num_updates=20800, lr=0.000438529, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=19245 epoch 013: 563 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=557538, ups=1.13, wpb=495508, bsz=16069.2, num_updates=20800, lr=0.000438529, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=19245 epoch 013: 563 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=557538, ups=1.13, wpb=495508, bsz=16069.2, num_updates=20800, lr=0.000438529, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=19245 epoch 013: 563 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=557538, ups=1.13, wpb=495508, bsz=16069.2, num_updates=20800, lr=0.000438529, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=19245 epoch 013: 563 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=557538, ups=1.13, wpb=495508, bsz=16069.2, num_updates=20800, lr=0.000438529, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=19245 epoch 013: 563 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=557538, ups=1.13, wpb=495508, bsz=16069.2, num_updates=20800, lr=0.000438529, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=19245 epoch 013: 563 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=557538, ups=1.13, wpb=495508, bsz=16069.2, num_updates=20800, lr=0.000438529, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=19245 epoch 013: 663 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=553142, ups=1.11, wpb=496344, bsz=17003.5, num_updates=20900, lr=0.000437479, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=19335 epoch 013: 663 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=553142, ups=1.11, wpb=496344, bsz=17003.5, num_updates=20900, lr=0.000437479, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=19335 epoch 013: 663 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=553142, ups=1.11, wpb=496344, bsz=17003.5, num_updates=20900, lr=0.000437479, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=19335 epoch 013: 663 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=553142, ups=1.11, wpb=496344, bsz=17003.5, num_updates=20900, lr=0.000437479, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=19335 epoch 013: 663 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=553142, ups=1.11, wpb=496344, bsz=17003.5, num_updates=20900, lr=0.000437479, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=19335 epoch 013: 663 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=553142, ups=1.11, wpb=496344, bsz=17003.5, num_updates=20900, lr=0.000437479, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=19335 epoch 013: 663 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=553142, ups=1.11, wpb=496344, bsz=17003.5, num_updates=20900, lr=0.000437479, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=19335 epoch 013: 663 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=553142, ups=1.11, wpb=496344, bsz=17003.5, num_updates=20900, lr=0.000437479, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=19335 epoch 013: 663 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=553142, ups=1.11, wpb=496344, bsz=17003.5, num_updates=20900, lr=0.000437479, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=19335 epoch 013: 663 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=553142, ups=1.11, wpb=496344, bsz=17003.5, num_updates=20900, lr=0.000437479, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=19335 epoch 013: 663 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=553142, ups=1.11, wpb=496344, bsz=17003.5, num_updates=20900, lr=0.000437479, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=19335 epoch 013: 663 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=553142, ups=1.11, wpb=496344, bsz=17003.5, num_updates=20900, lr=0.000437479, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=19335 epoch 013: 663 / 1689 loss=3.591, nll_loss=2.046, ppl=4.13, wps=553142, ups=1.11, wpb=496344, bsz=17003.5, num_updates=20900, lr=0.000437479, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=19335 epoch 013: 763 / 1689 loss=3.596, nll_loss=2.052, ppl=4.15, wps=556433, ups=1.12, wpb=495458, bsz=16534.2, num_updates=21000, lr=0.000436436, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=21.1, wall=19424 epoch 013: 763 / 1689 loss=3.596, nll_loss=2.052, ppl=4.15, wps=556433, ups=1.12, wpb=495458, bsz=16534.2, num_updates=21000, lr=0.000436436, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=21.1, wall=19424 epoch 013: 763 / 1689 loss=3.596, nll_loss=2.052, ppl=4.15, wps=556433, ups=1.12, wpb=495458, bsz=16534.2, num_updates=21000, lr=0.000436436, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=21.1, wall=19424 epoch 013: 763 / 1689 loss=3.596, nll_loss=2.052, ppl=4.15, wps=556433, ups=1.12, wpb=495458, bsz=16534.2, num_updates=21000, lr=0.000436436, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=21.1, wall=19424 epoch 013: 763 / 1689 loss=3.596, nll_loss=2.052, ppl=4.15, wps=556433, ups=1.12, wpb=495458, bsz=16534.2, num_updates=21000, lr=0.000436436, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=21.1, wall=19424 epoch 013: 763 / 1689 loss=3.596, nll_loss=2.052, ppl=4.15, wps=556433, ups=1.12, wpb=495458, bsz=16534.2, num_updates=21000, lr=0.000436436, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=21.1, wall=19424 epoch 013: 763 / 1689 loss=3.596, nll_loss=2.052, ppl=4.15, wps=556433, ups=1.12, wpb=495458, bsz=16534.2, num_updates=21000, lr=0.000436436, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=21.1, wall=19424 epoch 013: 763 / 1689 loss=3.596, nll_loss=2.052, ppl=4.15, wps=556433, ups=1.12, wpb=495458, bsz=16534.2, num_updates=21000, lr=0.000436436, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=21.1, wall=19424 epoch 013: 763 / 1689 loss=3.596, nll_loss=2.052, ppl=4.15, wps=556433, ups=1.12, wpb=495458, bsz=16534.2, num_updates=21000, lr=0.000436436, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=21.1, wall=19424 epoch 013: 763 / 1689 loss=3.596, nll_loss=2.052, ppl=4.15, wps=556433, ups=1.12, wpb=495458, bsz=16534.2, num_updates=21000, lr=0.000436436, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=21.1, wall=19424 epoch 013: 763 / 1689 loss=3.596, nll_loss=2.052, ppl=4.15, wps=556433, ups=1.12, wpb=495458, bsz=16534.2, num_updates=21000, lr=0.000436436, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=21.1, wall=19424 epoch 013: 763 / 1689 loss=3.596, nll_loss=2.052, ppl=4.15, wps=556433, ups=1.12, wpb=495458, bsz=16534.2, num_updates=21000, lr=0.000436436, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=21.1, wall=19424 epoch 013: 763 / 1689 loss=3.596, nll_loss=2.052, ppl=4.15, wps=556433, ups=1.12, wpb=495458, bsz=16534.2, num_updates=21000, lr=0.000436436, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=21.1, wall=19424 begin validation on "valid" subset epoch 013 | valid on 'valid' subset | loss 3.738 | nll_loss 2.193 | ppl 4.57 | wps 0 | wpb 44526 | bsz 2008 | num_updates 21000 | best_loss 3.736 epoch 013 | valid on 'valid' subset | loss 3.738 | nll_loss 2.193 | ppl 4.57 | wps 0 | wpb 44526 | bsz 2008 | num_updates 21000 | best_loss 3.736 epoch 013 | valid on 'valid' subset | loss 3.738 | nll_loss 2.193 | ppl 4.57 | wps 0 | wpb 44526 | bsz 2008 | num_updates 21000 | best_loss 3.736 epoch 013 | valid on 'valid' subset | loss 3.738 | nll_loss 2.193 | ppl 4.57 | wps 0 | wpb 44526 | bsz 2008 | num_updates 21000 | best_loss 3.736 epoch 013 | valid on 'valid' subset | loss 3.738 | nll_loss 2.193 | ppl 4.57 | wps 0 | wpb 44526 | bsz 2008 | num_updates 21000 | best_loss 3.736 epoch 013 | valid on 'valid' subset | loss 3.738 | nll_loss 2.193 | ppl 4.57 | wps 0 | wpb 44526 | bsz 2008 | num_updates 21000 | best_loss 3.736 epoch 013 | valid on 'valid' subset | loss 3.738 | nll_loss 2.193 | ppl 4.57 | wps 0 | wpb 44526 | bsz 2008 | num_updates 21000 | best_loss 3.736 epoch 013 | valid on 'valid' subset | loss 3.738 | nll_loss 2.193 | ppl 4.57 | wps 0 | wpb 44526 | bsz 2008 | num_updates 21000 | best_loss 3.736 epoch 013 | valid on 'valid' subset | loss 3.738 | nll_loss 2.193 | ppl 4.57 | wps 0 | wpb 44526 | bsz 2008 | num_updates 21000 | best_loss 3.736 epoch 013 | valid on 'valid' subset | loss 3.738 | nll_loss 2.193 | ppl 4.57 | wps 0 | wpb 44526 | bsz 2008 | num_updates 21000 | best_loss 3.736 epoch 013 | valid on 'valid' subset | loss 3.738 | nll_loss 2.193 | ppl 4.57 | wps 0 | wpb 44526 | bsz 2008 | num_updates 21000 | best_loss 3.736 epoch 013 | valid on 'valid' subset | loss 3.738 | nll_loss 2.193 | ppl 4.57 | wps 0 | wpb 44526 | bsz 2008 | num_updates 21000 | best_loss 3.736 epoch 013 | valid on 'valid' subset | loss 3.738 | nll_loss 2.193 | ppl 4.57 | wps 0 | wpb 44526 | bsz 2008 | num_updates 21000 | best_loss 3.736 epoch 013: 864 / 1689 loss=3.594, nll_loss=2.05, ppl=4.14, wps=377741, ups=0.76, wpb=495167, bsz=16703, num_updates=21100, lr=0.0004354, gnorm=0.19, clip=0, loss_scale=2, train_wall=115, gb_free=22.1, wall=19555 epoch 013: 864 / 1689 loss=3.594, nll_loss=2.05, ppl=4.14, wps=377741, ups=0.76, wpb=495167, bsz=16703, num_updates=21100, lr=0.0004354, gnorm=0.19, clip=0, loss_scale=2, train_wall=115, gb_free=22.1, wall=19555 epoch 013: 864 / 1689 loss=3.594, nll_loss=2.05, ppl=4.14, wps=377741, ups=0.76, wpb=495167, bsz=16703, num_updates=21100, lr=0.0004354, gnorm=0.19, clip=0, loss_scale=2, train_wall=115, gb_free=22.1, wall=19555 epoch 013: 864 / 1689 loss=3.594, nll_loss=2.05, ppl=4.14, wps=377741, ups=0.76, wpb=495167, bsz=16703, num_updates=21100, lr=0.0004354, gnorm=0.19, clip=0, loss_scale=2, train_wall=115, gb_free=22.1, wall=19555 epoch 013: 864 / 1689 loss=3.594, nll_loss=2.05, ppl=4.14, wps=377741, ups=0.76, wpb=495167, bsz=16703, num_updates=21100, lr=0.0004354, gnorm=0.19, clip=0, loss_scale=2, train_wall=115, gb_free=22.1, wall=19555 epoch 013: 864 / 1689 loss=3.594, nll_loss=2.05, ppl=4.14, wps=377741, ups=0.76, wpb=495167, bsz=16703, num_updates=21100, lr=0.0004354, gnorm=0.19, clip=0, loss_scale=2, train_wall=115, gb_free=22.1, wall=19555 epoch 013: 864 / 1689 loss=3.594, nll_loss=2.05, ppl=4.14, wps=377741, ups=0.76, wpb=495167, bsz=16703, num_updates=21100, lr=0.0004354, gnorm=0.19, clip=0, loss_scale=2, train_wall=115, gb_free=22.1, wall=19555 epoch 013: 864 / 1689 loss=3.594, nll_loss=2.05, ppl=4.14, wps=377741, ups=0.76, wpb=495167, bsz=16703, num_updates=21100, lr=0.0004354, gnorm=0.19, clip=0, loss_scale=2, train_wall=115, gb_free=22.1, wall=19555 epoch 013: 864 / 1689 loss=3.594, nll_loss=2.05, ppl=4.14, wps=377741, ups=0.76, wpb=495167, bsz=16703, num_updates=21100, lr=0.0004354, gnorm=0.19, clip=0, loss_scale=2, train_wall=115, gb_free=22.1, wall=19555 epoch 013: 864 / 1689 loss=3.594, nll_loss=2.05, ppl=4.14, wps=377741, ups=0.76, wpb=495167, bsz=16703, num_updates=21100, lr=0.0004354, gnorm=0.19, clip=0, loss_scale=2, train_wall=115, gb_free=22.1, wall=19555 epoch 013: 864 / 1689 loss=3.594, nll_loss=2.05, ppl=4.14, wps=377741, ups=0.76, wpb=495167, bsz=16703, num_updates=21100, lr=0.0004354, gnorm=0.19, clip=0, loss_scale=2, train_wall=115, gb_free=22.1, wall=19555 epoch 013: 864 / 1689 loss=3.594, nll_loss=2.05, ppl=4.14, wps=377741, ups=0.76, wpb=495167, bsz=16703, num_updates=21100, lr=0.0004354, gnorm=0.19, clip=0, loss_scale=2, train_wall=115, gb_free=22.1, wall=19555 epoch 013: 864 / 1689 loss=3.594, nll_loss=2.05, ppl=4.14, wps=377741, ups=0.76, wpb=495167, bsz=16703, num_updates=21100, lr=0.0004354, gnorm=0.19, clip=0, loss_scale=2, train_wall=115, gb_free=22.1, wall=19555 epoch 013: 964 / 1689 loss=3.586, nll_loss=2.041, ppl=4.11, wps=551242, ups=1.11, wpb=496422, bsz=16487.1, num_updates=21200, lr=0.000434372, gnorm=0.178, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=19645 epoch 013: 964 / 1689 loss=3.586, nll_loss=2.041, ppl=4.11, wps=551242, ups=1.11, wpb=496422, bsz=16487.1, num_updates=21200, lr=0.000434372, gnorm=0.178, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=19645 epoch 013: 964 / 1689 loss=3.586, nll_loss=2.041, ppl=4.11, wps=551242, ups=1.11, wpb=496422, bsz=16487.1, num_updates=21200, lr=0.000434372, gnorm=0.178, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=19645 epoch 013: 964 / 1689 loss=3.586, nll_loss=2.041, ppl=4.11, wps=551242, ups=1.11, wpb=496422, bsz=16487.1, num_updates=21200, lr=0.000434372, gnorm=0.178, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=19645 epoch 013: 964 / 1689 loss=3.586, nll_loss=2.041, ppl=4.11, wps=551242, ups=1.11, wpb=496422, bsz=16487.1, num_updates=21200, lr=0.000434372, gnorm=0.178, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=19645 epoch 013: 964 / 1689 loss=3.586, nll_loss=2.041, ppl=4.11, wps=551242, ups=1.11, wpb=496422, bsz=16487.1, num_updates=21200, lr=0.000434372, gnorm=0.178, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=19645 epoch 013: 964 / 1689 loss=3.586, nll_loss=2.041, ppl=4.11, wps=551242, ups=1.11, wpb=496422, bsz=16487.1, num_updates=21200, lr=0.000434372, gnorm=0.178, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=19645 epoch 013: 964 / 1689 loss=3.586, nll_loss=2.041, ppl=4.11, wps=551242, ups=1.11, wpb=496422, bsz=16487.1, num_updates=21200, lr=0.000434372, gnorm=0.178, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=19645 epoch 013: 964 / 1689 loss=3.586, nll_loss=2.041, ppl=4.11, wps=551242, ups=1.11, wpb=496422, bsz=16487.1, num_updates=21200, lr=0.000434372, gnorm=0.178, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=19645 epoch 013: 964 / 1689 loss=3.586, nll_loss=2.041, ppl=4.11, wps=551242, ups=1.11, wpb=496422, bsz=16487.1, num_updates=21200, lr=0.000434372, gnorm=0.178, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=19645 epoch 013: 964 / 1689 loss=3.586, nll_loss=2.041, ppl=4.11, wps=551242, ups=1.11, wpb=496422, bsz=16487.1, num_updates=21200, lr=0.000434372, gnorm=0.178, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=19645 epoch 013: 964 / 1689 loss=3.586, nll_loss=2.041, ppl=4.11, wps=551242, ups=1.11, wpb=496422, bsz=16487.1, num_updates=21200, lr=0.000434372, gnorm=0.178, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=19645 epoch 013: 964 / 1689 loss=3.586, nll_loss=2.041, ppl=4.11, wps=551242, ups=1.11, wpb=496422, bsz=16487.1, num_updates=21200, lr=0.000434372, gnorm=0.178, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=19645 epoch 013: 1064 / 1689 loss=3.596, nll_loss=2.052, ppl=4.15, wps=550725, ups=1.11, wpb=495148, bsz=16525.8, num_updates=21300, lr=0.000433351, gnorm=0.202, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=19735 epoch 013: 1064 / 1689 loss=3.596, nll_loss=2.052, ppl=4.15, wps=550725, ups=1.11, wpb=495148, bsz=16525.8, num_updates=21300, lr=0.000433351, gnorm=0.202, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=19735 epoch 013: 1064 / 1689 loss=3.596, nll_loss=2.052, ppl=4.15, wps=550725, ups=1.11, wpb=495148, bsz=16525.8, num_updates=21300, lr=0.000433351, gnorm=0.202, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=19735 epoch 013: 1064 / 1689 loss=3.596, nll_loss=2.052, ppl=4.15, wps=550725, ups=1.11, wpb=495148, bsz=16525.8, num_updates=21300, lr=0.000433351, gnorm=0.202, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=19735 epoch 013: 1064 / 1689 loss=3.596, nll_loss=2.052, ppl=4.15, wps=550725, ups=1.11, wpb=495148, bsz=16525.8, num_updates=21300, lr=0.000433351, gnorm=0.202, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=19735 epoch 013: 1064 / 1689 loss=3.596, nll_loss=2.052, ppl=4.15, wps=550725, ups=1.11, wpb=495148, bsz=16525.8, num_updates=21300, lr=0.000433351, gnorm=0.202, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=19735 epoch 013: 1064 / 1689 loss=3.596, nll_loss=2.052, ppl=4.15, wps=550725, ups=1.11, wpb=495148, bsz=16525.8, num_updates=21300, lr=0.000433351, gnorm=0.202, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=19735 epoch 013: 1064 / 1689 loss=3.596, nll_loss=2.052, ppl=4.15, wps=550725, ups=1.11, wpb=495148, bsz=16525.8, num_updates=21300, lr=0.000433351, gnorm=0.202, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=19735 epoch 013: 1064 / 1689 loss=3.596, nll_loss=2.052, ppl=4.15, wps=550725, ups=1.11, wpb=495148, bsz=16525.8, num_updates=21300, lr=0.000433351, gnorm=0.202, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=19735 epoch 013: 1064 / 1689 loss=3.596, nll_loss=2.052, ppl=4.15, wps=550725, ups=1.11, wpb=495148, bsz=16525.8, num_updates=21300, lr=0.000433351, gnorm=0.202, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=19735 epoch 013: 1064 / 1689 loss=3.596, nll_loss=2.052, ppl=4.15, wps=550725, ups=1.11, wpb=495148, bsz=16525.8, num_updates=21300, lr=0.000433351, gnorm=0.202, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=19735 epoch 013: 1064 / 1689 loss=3.596, nll_loss=2.052, ppl=4.15, wps=550725, ups=1.11, wpb=495148, bsz=16525.8, num_updates=21300, lr=0.000433351, gnorm=0.202, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=19735 epoch 013: 1064 / 1689 loss=3.596, nll_loss=2.052, ppl=4.15, wps=550725, ups=1.11, wpb=495148, bsz=16525.8, num_updates=21300, lr=0.000433351, gnorm=0.202, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=19735 epoch 013: 1164 / 1689 loss=3.601, nll_loss=2.057, ppl=4.16, wps=556305, ups=1.13, wpb=494144, bsz=16421, num_updates=21400, lr=0.000432338, gnorm=0.188, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=19824 epoch 013: 1164 / 1689 loss=3.601, nll_loss=2.057, ppl=4.16, wps=556305, ups=1.13, wpb=494144, bsz=16421, num_updates=21400, lr=0.000432338, gnorm=0.188, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=19824 epoch 013: 1164 / 1689 loss=3.601, nll_loss=2.057, ppl=4.16, wps=556305, ups=1.13, wpb=494144, bsz=16421, num_updates=21400, lr=0.000432338, gnorm=0.188, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=19824 epoch 013: 1164 / 1689 loss=3.601, nll_loss=2.057, ppl=4.16, wps=556305, ups=1.13, wpb=494144, bsz=16421, num_updates=21400, lr=0.000432338, gnorm=0.188, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=19824 epoch 013: 1164 / 1689 loss=3.601, nll_loss=2.057, ppl=4.16, wps=556305, ups=1.13, wpb=494144, bsz=16421, num_updates=21400, lr=0.000432338, gnorm=0.188, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=19824 epoch 013: 1164 / 1689 loss=3.601, nll_loss=2.057, ppl=4.16, wps=556305, ups=1.13, wpb=494144, bsz=16421, num_updates=21400, lr=0.000432338, gnorm=0.188, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=19824 epoch 013: 1164 / 1689 loss=3.601, nll_loss=2.057, ppl=4.16, wps=556305, ups=1.13, wpb=494144, bsz=16421, num_updates=21400, lr=0.000432338, gnorm=0.188, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=19824 epoch 013: 1164 / 1689 loss=3.601, nll_loss=2.057, ppl=4.16, wps=556305, ups=1.13, wpb=494144, bsz=16421, num_updates=21400, lr=0.000432338, gnorm=0.188, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=19824 epoch 013: 1164 / 1689 loss=3.601, nll_loss=2.057, ppl=4.16, wps=556305, ups=1.13, wpb=494144, bsz=16421, num_updates=21400, lr=0.000432338, gnorm=0.188, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=19824 epoch 013: 1164 / 1689 loss=3.601, nll_loss=2.057, ppl=4.16, wps=556305, ups=1.13, wpb=494144, bsz=16421, num_updates=21400, lr=0.000432338, gnorm=0.188, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=19824 epoch 013: 1164 / 1689 loss=3.601, nll_loss=2.057, ppl=4.16, wps=556305, ups=1.13, wpb=494144, bsz=16421, num_updates=21400, lr=0.000432338, gnorm=0.188, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=19824 epoch 013: 1164 / 1689 loss=3.601, nll_loss=2.057, ppl=4.16, wps=556305, ups=1.13, wpb=494144, bsz=16421, num_updates=21400, lr=0.000432338, gnorm=0.188, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=19824 epoch 013: 1164 / 1689 loss=3.601, nll_loss=2.057, ppl=4.16, wps=556305, ups=1.13, wpb=494144, bsz=16421, num_updates=21400, lr=0.000432338, gnorm=0.188, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=19824 epoch 013: 1264 / 1689 loss=3.6, nll_loss=2.056, ppl=4.16, wps=557598, ups=1.13, wpb=495533, bsz=16305.5, num_updates=21500, lr=0.000431331, gnorm=0.183, clip=0, loss_scale=2, train_wall=87, gb_free=22, wall=19913 epoch 013: 1264 / 1689 loss=3.6, nll_loss=2.056, ppl=4.16, wps=557598, ups=1.13, wpb=495533, bsz=16305.5, num_updates=21500, lr=0.000431331, gnorm=0.183, clip=0, loss_scale=2, train_wall=87, gb_free=22, wall=19913 epoch 013: 1264 / 1689 loss=3.6, nll_loss=2.056, ppl=4.16, wps=557598, ups=1.13, wpb=495533, bsz=16305.5, num_updates=21500, lr=0.000431331, gnorm=0.183, clip=0, loss_scale=2, train_wall=87, gb_free=22, wall=19913 epoch 013: 1264 / 1689 loss=3.6, nll_loss=2.056, ppl=4.16, wps=557598, ups=1.13, wpb=495533, bsz=16305.5, num_updates=21500, lr=0.000431331, gnorm=0.183, clip=0, loss_scale=2, train_wall=87, gb_free=22, wall=19913 epoch 013: 1264 / 1689 loss=3.6, nll_loss=2.056, ppl=4.16, wps=557598, ups=1.13, wpb=495533, bsz=16305.5, num_updates=21500, lr=0.000431331, gnorm=0.183, clip=0, loss_scale=2, train_wall=87, gb_free=22, wall=19913 epoch 013: 1264 / 1689 loss=3.6, nll_loss=2.056, ppl=4.16, wps=557598, ups=1.13, wpb=495533, bsz=16305.5, num_updates=21500, lr=0.000431331, gnorm=0.183, clip=0, loss_scale=2, train_wall=87, gb_free=22, wall=19913 epoch 013: 1264 / 1689 loss=3.6, nll_loss=2.056, ppl=4.16, wps=557598, ups=1.13, wpb=495533, bsz=16305.5, num_updates=21500, lr=0.000431331, gnorm=0.183, clip=0, loss_scale=2, train_wall=87, gb_free=22, wall=19913 epoch 013: 1264 / 1689 loss=3.6, nll_loss=2.056, ppl=4.16, wps=557598, ups=1.13, wpb=495533, bsz=16305.5, num_updates=21500, lr=0.000431331, gnorm=0.183, clip=0, loss_scale=2, train_wall=87, gb_free=22, wall=19913 epoch 013: 1264 / 1689 loss=3.6, nll_loss=2.056, ppl=4.16, wps=557598, ups=1.13, wpb=495533, bsz=16305.5, num_updates=21500, lr=0.000431331, gnorm=0.183, clip=0, loss_scale=2, train_wall=87, gb_free=22, wall=19913 epoch 013: 1264 / 1689 loss=3.6, nll_loss=2.056, ppl=4.16, wps=557598, ups=1.13, wpb=495533, bsz=16305.5, num_updates=21500, lr=0.000431331, gnorm=0.183, clip=0, loss_scale=2, train_wall=87, gb_free=22, wall=19913 epoch 013: 1264 / 1689 loss=3.6, nll_loss=2.056, ppl=4.16, wps=557598, ups=1.13, wpb=495533, bsz=16305.5, num_updates=21500, lr=0.000431331, gnorm=0.183, clip=0, loss_scale=2, train_wall=87, gb_free=22, wall=19913 epoch 013: 1264 / 1689 loss=3.6, nll_loss=2.056, ppl=4.16, wps=557598, ups=1.13, wpb=495533, bsz=16305.5, num_updates=21500, lr=0.000431331, gnorm=0.183, clip=0, loss_scale=2, train_wall=87, gb_free=22, wall=19913 epoch 013: 1264 / 1689 loss=3.6, nll_loss=2.056, ppl=4.16, wps=557598, ups=1.13, wpb=495533, bsz=16305.5, num_updates=21500, lr=0.000431331, gnorm=0.183, clip=0, loss_scale=2, train_wall=87, gb_free=22, wall=19913 epoch 013: 1364 / 1689 loss=3.597, nll_loss=2.054, ppl=4.15, wps=553917, ups=1.12, wpb=495370, bsz=16778.2, num_updates=21600, lr=0.000430331, gnorm=0.176, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=20002 epoch 013: 1364 / 1689 loss=3.597, nll_loss=2.054, ppl=4.15, wps=553917, ups=1.12, wpb=495370, bsz=16778.2, num_updates=21600, lr=0.000430331, gnorm=0.176, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=20002 epoch 013: 1364 / 1689 loss=3.597, nll_loss=2.054, ppl=4.15, wps=553917, ups=1.12, wpb=495370, bsz=16778.2, num_updates=21600, lr=0.000430331, gnorm=0.176, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=20002 epoch 013: 1364 / 1689 loss=3.597, nll_loss=2.054, ppl=4.15, wps=553917, ups=1.12, wpb=495370, bsz=16778.2, num_updates=21600, lr=0.000430331, gnorm=0.176, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=20002 epoch 013: 1364 / 1689 loss=3.597, nll_loss=2.054, ppl=4.15, wps=553917, ups=1.12, wpb=495370, bsz=16778.2, num_updates=21600, lr=0.000430331, gnorm=0.176, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=20002 epoch 013: 1364 / 1689 loss=3.597, nll_loss=2.054, ppl=4.15, wps=553917, ups=1.12, wpb=495370, bsz=16778.2, num_updates=21600, lr=0.000430331, gnorm=0.176, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=20002 epoch 013: 1364 / 1689 loss=3.597, nll_loss=2.054, ppl=4.15, wps=553917, ups=1.12, wpb=495370, bsz=16778.2, num_updates=21600, lr=0.000430331, gnorm=0.176, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=20002 epoch 013: 1364 / 1689 loss=3.597, nll_loss=2.054, ppl=4.15, wps=553917, ups=1.12, wpb=495370, bsz=16778.2, num_updates=21600, lr=0.000430331, gnorm=0.176, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=20002 epoch 013: 1364 / 1689 loss=3.597, nll_loss=2.054, ppl=4.15, wps=553917, ups=1.12, wpb=495370, bsz=16778.2, num_updates=21600, lr=0.000430331, gnorm=0.176, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=20002 epoch 013: 1364 / 1689 loss=3.597, nll_loss=2.054, ppl=4.15, wps=553917, ups=1.12, wpb=495370, bsz=16778.2, num_updates=21600, lr=0.000430331, gnorm=0.176, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=20002 epoch 013: 1364 / 1689 loss=3.597, nll_loss=2.054, ppl=4.15, wps=553917, ups=1.12, wpb=495370, bsz=16778.2, num_updates=21600, lr=0.000430331, gnorm=0.176, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=20002 epoch 013: 1364 / 1689 loss=3.597, nll_loss=2.054, ppl=4.15, wps=553917, ups=1.12, wpb=495370, bsz=16778.2, num_updates=21600, lr=0.000430331, gnorm=0.176, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=20002 epoch 013: 1364 / 1689 loss=3.597, nll_loss=2.054, ppl=4.15, wps=553917, ups=1.12, wpb=495370, bsz=16778.2, num_updates=21600, lr=0.000430331, gnorm=0.176, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=20002 epoch 013: 1464 / 1689 loss=3.588, nll_loss=2.043, ppl=4.12, wps=555510, ups=1.12, wpb=495688, bsz=16679.6, num_updates=21700, lr=0.000429339, gnorm=0.18, clip=0, loss_scale=4, train_wall=87, gb_free=21.3, wall=20091 epoch 013: 1464 / 1689 loss=3.588, nll_loss=2.043, ppl=4.12, wps=555510, ups=1.12, wpb=495688, bsz=16679.6, num_updates=21700, lr=0.000429339, gnorm=0.18, clip=0, loss_scale=4, train_wall=87, gb_free=21.3, wall=20091 epoch 013: 1464 / 1689 loss=3.588, nll_loss=2.043, ppl=4.12, wps=555510, ups=1.12, wpb=495688, bsz=16679.6, num_updates=21700, lr=0.000429339, gnorm=0.18, clip=0, loss_scale=4, train_wall=87, gb_free=21.3, wall=20091 epoch 013: 1464 / 1689 loss=3.588, nll_loss=2.043, ppl=4.12, wps=555510, ups=1.12, wpb=495688, bsz=16679.6, num_updates=21700, lr=0.000429339, gnorm=0.18, clip=0, loss_scale=4, train_wall=87, gb_free=21.3, wall=20091 epoch 013: 1464 / 1689 loss=3.588, nll_loss=2.043, ppl=4.12, wps=555510, ups=1.12, wpb=495688, bsz=16679.6, num_updates=21700, lr=0.000429339, gnorm=0.18, clip=0, loss_scale=4, train_wall=87, gb_free=21.3, wall=20091 epoch 013: 1464 / 1689 loss=3.588, nll_loss=2.043, ppl=4.12, wps=555510, ups=1.12, wpb=495688, bsz=16679.6, num_updates=21700, lr=0.000429339, gnorm=0.18, clip=0, loss_scale=4, train_wall=87, gb_free=21.3, wall=20091 epoch 013: 1464 / 1689 loss=3.588, nll_loss=2.043, ppl=4.12, wps=555510, ups=1.12, wpb=495688, bsz=16679.6, num_updates=21700, lr=0.000429339, gnorm=0.18, clip=0, loss_scale=4, train_wall=87, gb_free=21.3, wall=20091 epoch 013: 1464 / 1689 loss=3.588, nll_loss=2.043, ppl=4.12, wps=555510, ups=1.12, wpb=495688, bsz=16679.6, num_updates=21700, lr=0.000429339, gnorm=0.18, clip=0, loss_scale=4, train_wall=87, gb_free=21.3, wall=20091 epoch 013: 1464 / 1689 loss=3.588, nll_loss=2.043, ppl=4.12, wps=555510, ups=1.12, wpb=495688, bsz=16679.6, num_updates=21700, lr=0.000429339, gnorm=0.18, clip=0, loss_scale=4, train_wall=87, gb_free=21.3, wall=20091 epoch 013: 1464 / 1689 loss=3.588, nll_loss=2.043, ppl=4.12, wps=555510, ups=1.12, wpb=495688, bsz=16679.6, num_updates=21700, lr=0.000429339, gnorm=0.18, clip=0, loss_scale=4, train_wall=87, gb_free=21.3, wall=20091 epoch 013: 1464 / 1689 loss=3.588, nll_loss=2.043, ppl=4.12, wps=555510, ups=1.12, wpb=495688, bsz=16679.6, num_updates=21700, lr=0.000429339, gnorm=0.18, clip=0, loss_scale=4, train_wall=87, gb_free=21.3, wall=20091 epoch 013: 1464 / 1689 loss=3.588, nll_loss=2.043, ppl=4.12, wps=555510, ups=1.12, wpb=495688, bsz=16679.6, num_updates=21700, lr=0.000429339, gnorm=0.18, clip=0, loss_scale=4, train_wall=87, gb_free=21.3, wall=20091 epoch 013: 1464 / 1689 loss=3.588, nll_loss=2.043, ppl=4.12, wps=555510, ups=1.12, wpb=495688, bsz=16679.6, num_updates=21700, lr=0.000429339, gnorm=0.18, clip=0, loss_scale=4, train_wall=87, gb_free=21.3, wall=20091 epoch 013: 1564 / 1689 loss=3.604, nll_loss=2.061, ppl=4.17, wps=558478, ups=1.13, wpb=495272, bsz=16524.9, num_updates=21800, lr=0.000428353, gnorm=0.186, clip=0, loss_scale=4, train_wall=87, gb_free=22.2, wall=20180 epoch 013: 1564 / 1689 loss=3.604, nll_loss=2.061, ppl=4.17, wps=558478, ups=1.13, wpb=495272, bsz=16524.9, num_updates=21800, lr=0.000428353, gnorm=0.186, clip=0, loss_scale=4, train_wall=87, gb_free=22.2, wall=20180 epoch 013: 1564 / 1689 loss=3.604, nll_loss=2.061, ppl=4.17, wps=558478, ups=1.13, wpb=495272, bsz=16524.9, num_updates=21800, lr=0.000428353, gnorm=0.186, clip=0, loss_scale=4, train_wall=87, gb_free=22.2, wall=20180 epoch 013: 1564 / 1689 loss=3.604, nll_loss=2.061, ppl=4.17, wps=558478, ups=1.13, wpb=495272, bsz=16524.9, num_updates=21800, lr=0.000428353, gnorm=0.186, clip=0, loss_scale=4, train_wall=87, gb_free=22.2, wall=20180 epoch 013: 1564 / 1689 loss=3.604, nll_loss=2.061, ppl=4.17, wps=558478, ups=1.13, wpb=495272, bsz=16524.9, num_updates=21800, lr=0.000428353, gnorm=0.186, clip=0, loss_scale=4, train_wall=87, gb_free=22.2, wall=20180 epoch 013: 1564 / 1689 loss=3.604, nll_loss=2.061, ppl=4.17, wps=558478, ups=1.13, wpb=495272, bsz=16524.9, num_updates=21800, lr=0.000428353, gnorm=0.186, clip=0, loss_scale=4, train_wall=87, gb_free=22.2, wall=20180 epoch 013: 1564 / 1689 loss=3.604, nll_loss=2.061, ppl=4.17, wps=558478, ups=1.13, wpb=495272, bsz=16524.9, num_updates=21800, lr=0.000428353, gnorm=0.186, clip=0, loss_scale=4, train_wall=87, gb_free=22.2, wall=20180 epoch 013: 1564 / 1689 loss=3.604, nll_loss=2.061, ppl=4.17, wps=558478, ups=1.13, wpb=495272, bsz=16524.9, num_updates=21800, lr=0.000428353, gnorm=0.186, clip=0, loss_scale=4, train_wall=87, gb_free=22.2, wall=20180 epoch 013: 1564 / 1689 loss=3.604, nll_loss=2.061, ppl=4.17, wps=558478, ups=1.13, wpb=495272, bsz=16524.9, num_updates=21800, lr=0.000428353, gnorm=0.186, clip=0, loss_scale=4, train_wall=87, gb_free=22.2, wall=20180 epoch 013: 1564 / 1689 loss=3.604, nll_loss=2.061, ppl=4.17, wps=558478, ups=1.13, wpb=495272, bsz=16524.9, num_updates=21800, lr=0.000428353, gnorm=0.186, clip=0, loss_scale=4, train_wall=87, gb_free=22.2, wall=20180 epoch 013: 1564 / 1689 loss=3.604, nll_loss=2.061, ppl=4.17, wps=558478, ups=1.13, wpb=495272, bsz=16524.9, num_updates=21800, lr=0.000428353, gnorm=0.186, clip=0, loss_scale=4, train_wall=87, gb_free=22.2, wall=20180 epoch 013: 1564 / 1689 loss=3.604, nll_loss=2.061, ppl=4.17, wps=558478, ups=1.13, wpb=495272, bsz=16524.9, num_updates=21800, lr=0.000428353, gnorm=0.186, clip=0, loss_scale=4, train_wall=87, gb_free=22.2, wall=20180 epoch 013: 1564 / 1689 loss=3.604, nll_loss=2.061, ppl=4.17, wps=558478, ups=1.13, wpb=495272, bsz=16524.9, num_updates=21800, lr=0.000428353, gnorm=0.186, clip=0, loss_scale=4, train_wall=87, gb_free=22.2, wall=20180 epoch 013: 1664 / 1689 loss=3.597, nll_loss=2.054, ppl=4.15, wps=553006, ups=1.12, wpb=495139, bsz=16281.7, num_updates=21900, lr=0.000427374, gnorm=0.187, clip=0, loss_scale=4, train_wall=88, gb_free=22, wall=20270 epoch 013: 1664 / 1689 loss=3.597, nll_loss=2.054, ppl=4.15, wps=553006, ups=1.12, wpb=495139, bsz=16281.7, num_updates=21900, lr=0.000427374, gnorm=0.187, clip=0, loss_scale=4, train_wall=88, gb_free=22, wall=20270 epoch 013: 1664 / 1689 loss=3.597, nll_loss=2.054, ppl=4.15, wps=553006, ups=1.12, wpb=495139, bsz=16281.7, num_updates=21900, lr=0.000427374, gnorm=0.187, clip=0, loss_scale=4, train_wall=88, gb_free=22, wall=20270 epoch 013: 1664 / 1689 loss=3.597, nll_loss=2.054, ppl=4.15, wps=553006, ups=1.12, wpb=495139, bsz=16281.7, num_updates=21900, lr=0.000427374, gnorm=0.187, clip=0, loss_scale=4, train_wall=88, gb_free=22, wall=20270 epoch 013: 1664 / 1689 loss=3.597, nll_loss=2.054, ppl=4.15, wps=553006, ups=1.12, wpb=495139, bsz=16281.7, num_updates=21900, lr=0.000427374, gnorm=0.187, clip=0, loss_scale=4, train_wall=88, gb_free=22, wall=20270 epoch 013: 1664 / 1689 loss=3.597, nll_loss=2.054, ppl=4.15, wps=553006, ups=1.12, wpb=495139, bsz=16281.7, num_updates=21900, lr=0.000427374, gnorm=0.187, clip=0, loss_scale=4, train_wall=88, gb_free=22, wall=20270 epoch 013: 1664 / 1689 loss=3.597, nll_loss=2.054, ppl=4.15, wps=553006, ups=1.12, wpb=495139, bsz=16281.7, num_updates=21900, lr=0.000427374, gnorm=0.187, clip=0, loss_scale=4, train_wall=88, gb_free=22, wall=20270 epoch 013: 1664 / 1689 loss=3.597, nll_loss=2.054, ppl=4.15, wps=553006, ups=1.12, wpb=495139, bsz=16281.7, num_updates=21900, lr=0.000427374, gnorm=0.187, clip=0, loss_scale=4, train_wall=88, gb_free=22, wall=20270 epoch 013: 1664 / 1689 loss=3.597, nll_loss=2.054, ppl=4.15, wps=553006, ups=1.12, wpb=495139, bsz=16281.7, num_updates=21900, lr=0.000427374, gnorm=0.187, clip=0, loss_scale=4, train_wall=88, gb_free=22, wall=20270 epoch 013: 1664 / 1689 loss=3.597, nll_loss=2.054, ppl=4.15, wps=553006, ups=1.12, wpb=495139, bsz=16281.7, num_updates=21900, lr=0.000427374, gnorm=0.187, clip=0, loss_scale=4, train_wall=88, gb_free=22, wall=20270 epoch 013: 1664 / 1689 loss=3.597, nll_loss=2.054, ppl=4.15, wps=553006, ups=1.12, wpb=495139, bsz=16281.7, num_updates=21900, lr=0.000427374, gnorm=0.187, clip=0, loss_scale=4, train_wall=88, gb_free=22, wall=20270 epoch 013: 1664 / 1689 loss=3.597, nll_loss=2.054, ppl=4.15, wps=553006, ups=1.12, wpb=495139, bsz=16281.7, num_updates=21900, lr=0.000427374, gnorm=0.187, clip=0, loss_scale=4, train_wall=88, gb_free=22, wall=20270 epoch 013: 1664 / 1689 loss=3.597, nll_loss=2.054, ppl=4.15, wps=553006, ups=1.12, wpb=495139, bsz=16281.7, num_updates=21900, lr=0.000427374, gnorm=0.187, clip=0, loss_scale=4, train_wall=88, gb_free=22, wall=20270 end of epoch 13 (average epoch stats below) epoch 013 | loss 3.591 | nll_loss 2.046 | ppl 4.13 | wps 539822 | ups 1.09 | wpb 495104 | bsz 16507 | num_updates 21925 | lr 0.00042713 | gnorm 0.184 | clip 0 | loss_scale 4 | train_wall 1509 | gb_free 23.2 | wall 20291 epoch 013 | loss 3.591 | nll_loss 2.046 | ppl 4.13 | wps 539822 | ups 1.09 | wpb 495104 | bsz 16507 | num_updates 21925 | lr 0.00042713 | gnorm 0.184 | clip 0 | loss_scale 4 | train_wall 1509 | gb_free 23.2 | wall 20291 epoch 013 | loss 3.591 | nll_loss 2.046 | ppl 4.13 | wps 539822 | ups 1.09 | wpb 495104 | bsz 16507 | num_updates 21925 | lr 0.00042713 | gnorm 0.184 | clip 0 | loss_scale 4 | train_wall 1509 | gb_free 23.2 | wall 20291 epoch 013 | loss 3.591 | nll_loss 2.046 | ppl 4.13 | wps 539822 | ups 1.09 | wpb 495104 | bsz 16507 | num_updates 21925 | lr 0.00042713 | gnorm 0.184 | clip 0 | loss_scale 4 | train_wall 1509 | gb_free 23.2 | wall 20291 epoch 013 | loss 3.591 | nll_loss 2.046 | ppl 4.13 | wps 539822 | ups 1.09 | wpb 495104 | bsz 16507 | num_updates 21925 | lr 0.00042713 | gnorm 0.184 | clip 0 | loss_scale 4 | train_wall 1509 | gb_free 23.2 | wall 20291 epoch 013 | loss 3.591 | nll_loss 2.046 | ppl 4.13 | wps 539822 | ups 1.09 | wpb 495104 | bsz 16507 | num_updates 21925 | lr 0.00042713 | gnorm 0.184 | clip 0 | loss_scale 4 | train_wall 1509 | gb_free 23.2 | wall 20291 epoch 013 | loss 3.591 | nll_loss 2.046 | ppl 4.13 | wps 539822 | ups 1.09 | wpb 495104 | bsz 16507 | num_updates 21925 | lr 0.00042713 | gnorm 0.184 | clip 0 | loss_scale 4 | train_wall 1509 | gb_free 23.2 | wall 20291 epoch 013 | loss 3.591 | nll_loss 2.046 | ppl 4.13 | wps 539822 | ups 1.09 | wpb 495104 | bsz 16507 | num_updates 21925 | lr 0.00042713 | gnorm 0.184 | clip 0 | loss_scale 4 | train_wall 1509 | gb_free 23.2 | wall 20291 epoch 013 | loss 3.591 | nll_loss 2.046 | ppl 4.13 | wps 539822 | ups 1.09 | wpb 495104 | bsz 16507 | num_updates 21925 | lr 0.00042713 | gnorm 0.184 | clip 0 | loss_scale 4 | train_wall 1509 | gb_free 23.2 | wall 20291 epoch 013 | loss 3.591 | nll_loss 2.046 | ppl 4.13 | wps 539822 | ups 1.09 | wpb 495104 | bsz 16507 | num_updates 21925 | lr 0.00042713 | gnorm 0.184 | clip 0 | loss_scale 4 | train_wall 1509 | gb_free 23.2 | wall 20291 epoch 013 | loss 3.591 | nll_loss 2.046 | ppl 4.13 | wps 539822 | ups 1.09 | wpb 495104 | bsz 16507 | num_updates 21925 | lr 0.00042713 | gnorm 0.184 | clip 0 | loss_scale 4 | train_wall 1509 | gb_free 23.2 | wall 20291 epoch 013 | loss 3.591 | nll_loss 2.046 | ppl 4.13 | wps 539822 | ups 1.09 | wpb 495104 | bsz 16507 | num_updates 21925 | lr 0.00042713 | gnorm 0.184 | clip 0 | loss_scale 4 | train_wall 1509 | gb_free 23.2 | wall 20291 epoch 013 | loss 3.591 | nll_loss 2.046 | ppl 4.13 | wps 539822 | ups 1.09 | wpb 495104 | bsz 16507 | num_updates 21925 | lr 0.00042713 | gnorm 0.184 | clip 0 | loss_scale 4 | train_wall 1509 | gb_free 23.2 | wall 20291 Start iterating over samples epoch 014: 75 / 1689 loss=3.571, nll_loss=2.023, ppl=4.06, wps=547700, ups=1.11, wpb=492460, bsz=16325, num_updates=22000, lr=0.000426401, gnorm=0.184, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=20360 epoch 014: 75 / 1689 loss=3.571, nll_loss=2.023, ppl=4.06, wps=547700, ups=1.11, wpb=492460, bsz=16325, num_updates=22000, lr=0.000426401, gnorm=0.184, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=20360 epoch 014: 75 / 1689 loss=3.571, nll_loss=2.023, ppl=4.06, wps=547700, ups=1.11, wpb=492460, bsz=16325, num_updates=22000, lr=0.000426401, gnorm=0.184, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=20360 epoch 014: 75 / 1689 loss=3.571, nll_loss=2.023, ppl=4.06, wps=547700, ups=1.11, wpb=492460, bsz=16325, num_updates=22000, lr=0.000426401, gnorm=0.184, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=20360 epoch 014: 75 / 1689 loss=3.571, nll_loss=2.023, ppl=4.06, wps=547700, ups=1.11, wpb=492460, bsz=16325, num_updates=22000, lr=0.000426401, gnorm=0.184, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=20360 epoch 014: 75 / 1689 loss=3.571, nll_loss=2.023, ppl=4.06, wps=547700, ups=1.11, wpb=492460, bsz=16325, num_updates=22000, lr=0.000426401, gnorm=0.184, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=20360 epoch 014: 75 / 1689 loss=3.571, nll_loss=2.023, ppl=4.06, wps=547700, ups=1.11, wpb=492460, bsz=16325, num_updates=22000, lr=0.000426401, gnorm=0.184, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=20360 epoch 014: 75 / 1689 loss=3.571, nll_loss=2.023, ppl=4.06, wps=547700, ups=1.11, wpb=492460, bsz=16325, num_updates=22000, lr=0.000426401, gnorm=0.184, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=20360 epoch 014: 75 / 1689 loss=3.571, nll_loss=2.023, ppl=4.06, wps=547700, ups=1.11, wpb=492460, bsz=16325, num_updates=22000, lr=0.000426401, gnorm=0.184, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=20360 epoch 014: 75 / 1689 loss=3.571, nll_loss=2.023, ppl=4.06, wps=547700, ups=1.11, wpb=492460, bsz=16325, num_updates=22000, lr=0.000426401, gnorm=0.184, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=20360 epoch 014: 75 / 1689 loss=3.571, nll_loss=2.023, ppl=4.06, wps=547700, ups=1.11, wpb=492460, bsz=16325, num_updates=22000, lr=0.000426401, gnorm=0.184, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=20360 epoch 014: 75 / 1689 loss=3.571, nll_loss=2.023, ppl=4.06, wps=547700, ups=1.11, wpb=492460, bsz=16325, num_updates=22000, lr=0.000426401, gnorm=0.184, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=20360 epoch 014: 75 / 1689 loss=3.571, nll_loss=2.023, ppl=4.06, wps=547700, ups=1.11, wpb=492460, bsz=16325, num_updates=22000, lr=0.000426401, gnorm=0.184, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=20360 epoch 014: 75 / 1689 loss=3.571, nll_loss=2.023, ppl=4.06, wps=547700, ups=1.11, wpb=492460, bsz=16325, num_updates=22000, lr=0.000426401, gnorm=0.184, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=20360 begin validation on "valid" subset epoch 014 | valid on 'valid' subset | loss 3.732 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 22000 | best_loss 3.732 epoch 014 | valid on 'valid' subset | loss 3.732 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 22000 | best_loss 3.732 epoch 014 | valid on 'valid' subset | loss 3.732 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 22000 | best_loss 3.732 epoch 014 | valid on 'valid' subset | loss 3.732 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 22000 | best_loss 3.732 epoch 014 | valid on 'valid' subset | loss 3.732 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 22000 | best_loss 3.732 epoch 014 | valid on 'valid' subset | loss 3.732 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 22000 | best_loss 3.732 epoch 014 | valid on 'valid' subset | loss 3.732 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 22000 | best_loss 3.732 epoch 014 | valid on 'valid' subset | loss 3.732 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 22000 | best_loss 3.732 epoch 014 | valid on 'valid' subset | loss 3.732 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 22000 | best_loss 3.732 epoch 014 | valid on 'valid' subset | loss 3.732 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 22000 | best_loss 3.732 epoch 014 | valid on 'valid' subset | loss 3.732 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 22000 | best_loss 3.732 epoch 014 | valid on 'valid' subset | loss 3.732 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 22000 | best_loss 3.732 epoch 014 | valid on 'valid' subset | loss 3.732 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 22000 | best_loss 3.732 epoch 014 | valid on 'valid' subset | loss 3.732 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 22000 | best_loss 3.732 epoch 014: 176 / 1689 loss=3.558, nll_loss=2.008, ppl=4.02, wps=458146, ups=0.92, wpb=497717, bsz=16540.4, num_updates=22100, lr=0.000425436, gnorm=0.178, clip=0, loss_scale=4, train_wall=89, gb_free=22.8, wall=20468 epoch 014: 176 / 1689 loss=3.558, nll_loss=2.008, ppl=4.02, wps=458146, ups=0.92, wpb=497717, bsz=16540.4, num_updates=22100, lr=0.000425436, gnorm=0.178, clip=0, loss_scale=4, train_wall=89, gb_free=22.8, wall=20468 epoch 014: 176 / 1689 loss=3.558, nll_loss=2.008, ppl=4.02, wps=458146, ups=0.92, wpb=497717, bsz=16540.4, num_updates=22100, lr=0.000425436, gnorm=0.178, clip=0, loss_scale=4, train_wall=89, gb_free=22.8, wall=20468 epoch 014: 176 / 1689 loss=3.558, nll_loss=2.008, ppl=4.02, wps=458146, ups=0.92, wpb=497717, bsz=16540.4, num_updates=22100, lr=0.000425436, gnorm=0.178, clip=0, loss_scale=4, train_wall=89, gb_free=22.8, wall=20468 epoch 014: 176 / 1689 loss=3.558, nll_loss=2.008, ppl=4.02, wps=458146, ups=0.92, wpb=497717, bsz=16540.4, num_updates=22100, lr=0.000425436, gnorm=0.178, clip=0, loss_scale=4, train_wall=89, gb_free=22.8, wall=20468 epoch 014: 176 / 1689 loss=3.558, nll_loss=2.008, ppl=4.02, wps=458146, ups=0.92, wpb=497717, bsz=16540.4, num_updates=22100, lr=0.000425436, gnorm=0.178, clip=0, loss_scale=4, train_wall=89, gb_free=22.8, wall=20468 epoch 014: 176 / 1689 loss=3.558, nll_loss=2.008, ppl=4.02, wps=458146, ups=0.92, wpb=497717, bsz=16540.4, num_updates=22100, lr=0.000425436, gnorm=0.178, clip=0, loss_scale=4, train_wall=89, gb_free=22.8, wall=20468 epoch 014: 176 / 1689 loss=3.558, nll_loss=2.008, ppl=4.02, wps=458146, ups=0.92, wpb=497717, bsz=16540.4, num_updates=22100, lr=0.000425436, gnorm=0.178, clip=0, loss_scale=4, train_wall=89, gb_free=22.8, wall=20468 epoch 014: 176 / 1689 loss=3.558, nll_loss=2.008, ppl=4.02, wps=458146, ups=0.92, wpb=497717, bsz=16540.4, num_updates=22100, lr=0.000425436, gnorm=0.178, clip=0, loss_scale=4, train_wall=89, gb_free=22.8, wall=20468 epoch 014: 176 / 1689 loss=3.558, nll_loss=2.008, ppl=4.02, wps=458146, ups=0.92, wpb=497717, bsz=16540.4, num_updates=22100, lr=0.000425436, gnorm=0.178, clip=0, loss_scale=4, train_wall=89, gb_free=22.8, wall=20468 epoch 014: 176 / 1689 loss=3.558, nll_loss=2.008, ppl=4.02, wps=458146, ups=0.92, wpb=497717, bsz=16540.4, num_updates=22100, lr=0.000425436, gnorm=0.178, clip=0, loss_scale=4, train_wall=89, gb_free=22.8, wall=20468 epoch 014: 176 / 1689 loss=3.558, nll_loss=2.008, ppl=4.02, wps=458146, ups=0.92, wpb=497717, bsz=16540.4, num_updates=22100, lr=0.000425436, gnorm=0.178, clip=0, loss_scale=4, train_wall=89, gb_free=22.8, wall=20468 epoch 014: 176 / 1689 loss=3.558, nll_loss=2.008, ppl=4.02, wps=458146, ups=0.92, wpb=497717, bsz=16540.4, num_updates=22100, lr=0.000425436, gnorm=0.178, clip=0, loss_scale=4, train_wall=89, gb_free=22.8, wall=20468 epoch 014: 176 / 1689 loss=3.558, nll_loss=2.008, ppl=4.02, wps=458146, ups=0.92, wpb=497717, bsz=16540.4, num_updates=22100, lr=0.000425436, gnorm=0.178, clip=0, loss_scale=4, train_wall=89, gb_free=22.8, wall=20468 epoch 014: 276 / 1689 loss=3.567, nll_loss=2.018, ppl=4.05, wps=557444, ups=1.12, wpb=496523, bsz=16309.9, num_updates=22200, lr=0.000424476, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=22.3, wall=20557 epoch 014: 276 / 1689 loss=3.567, nll_loss=2.018, ppl=4.05, wps=557444, ups=1.12, wpb=496523, bsz=16309.9, num_updates=22200, lr=0.000424476, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=22.3, wall=20557 epoch 014: 276 / 1689 loss=3.567, nll_loss=2.018, ppl=4.05, wps=557444, ups=1.12, wpb=496523, bsz=16309.9, num_updates=22200, lr=0.000424476, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=22.3, wall=20557 epoch 014: 276 / 1689 loss=3.567, nll_loss=2.018, ppl=4.05, wps=557444, ups=1.12, wpb=496523, bsz=16309.9, num_updates=22200, lr=0.000424476, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=22.3, wall=20557 epoch 014: 276 / 1689 loss=3.567, nll_loss=2.018, ppl=4.05, wps=557444, ups=1.12, wpb=496523, bsz=16309.9, num_updates=22200, lr=0.000424476, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=22.3, wall=20557 epoch 014: 276 / 1689 loss=3.567, nll_loss=2.018, ppl=4.05, wps=557444, ups=1.12, wpb=496523, bsz=16309.9, num_updates=22200, lr=0.000424476, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=22.3, wall=20557 epoch 014: 276 / 1689 loss=3.567, nll_loss=2.018, ppl=4.05, wps=557444, ups=1.12, wpb=496523, bsz=16309.9, num_updates=22200, lr=0.000424476, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=22.3, wall=20557 epoch 014: 276 / 1689 loss=3.567, nll_loss=2.018, ppl=4.05, wps=557444, ups=1.12, wpb=496523, bsz=16309.9, num_updates=22200, lr=0.000424476, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=22.3, wall=20557 epoch 014: 276 / 1689 loss=3.567, nll_loss=2.018, ppl=4.05, wps=557444, ups=1.12, wpb=496523, bsz=16309.9, num_updates=22200, lr=0.000424476, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=22.3, wall=20557 epoch 014: 276 / 1689 loss=3.567, nll_loss=2.018, ppl=4.05, wps=557444, ups=1.12, wpb=496523, bsz=16309.9, num_updates=22200, lr=0.000424476, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=22.3, wall=20557 epoch 014: 276 / 1689 loss=3.567, nll_loss=2.018, ppl=4.05, wps=557444, ups=1.12, wpb=496523, bsz=16309.9, num_updates=22200, lr=0.000424476, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=22.3, wall=20557 epoch 014: 276 / 1689 loss=3.567, nll_loss=2.018, ppl=4.05, wps=557444, ups=1.12, wpb=496523, bsz=16309.9, num_updates=22200, lr=0.000424476, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=22.3, wall=20557 epoch 014: 276 / 1689 loss=3.567, nll_loss=2.018, ppl=4.05, wps=557444, ups=1.12, wpb=496523, bsz=16309.9, num_updates=22200, lr=0.000424476, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=22.3, wall=20557 epoch 014: 276 / 1689 loss=3.567, nll_loss=2.018, ppl=4.05, wps=557444, ups=1.12, wpb=496523, bsz=16309.9, num_updates=22200, lr=0.000424476, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=22.3, wall=20557 epoch 014: 376 / 1689 loss=3.571, nll_loss=2.023, ppl=4.06, wps=553998, ups=1.12, wpb=496083, bsz=16771, num_updates=22300, lr=0.000423524, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=20647 epoch 014: 376 / 1689 loss=3.571, nll_loss=2.023, ppl=4.06, wps=553998, ups=1.12, wpb=496083, bsz=16771, num_updates=22300, lr=0.000423524, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=20647 epoch 014: 376 / 1689 loss=3.571, nll_loss=2.023, ppl=4.06, wps=553998, ups=1.12, wpb=496083, bsz=16771, num_updates=22300, lr=0.000423524, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=20647 epoch 014: 376 / 1689 loss=3.571, nll_loss=2.023, ppl=4.06, wps=553998, ups=1.12, wpb=496083, bsz=16771, num_updates=22300, lr=0.000423524, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=20647 epoch 014: 376 / 1689 loss=3.571, nll_loss=2.023, ppl=4.06, wps=553998, ups=1.12, wpb=496083, bsz=16771, num_updates=22300, lr=0.000423524, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=20647 epoch 014: 376 / 1689 loss=3.571, nll_loss=2.023, ppl=4.06, wps=553998, ups=1.12, wpb=496083, bsz=16771, num_updates=22300, lr=0.000423524, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=20647 epoch 014: 376 / 1689 loss=3.571, nll_loss=2.023, ppl=4.06, wps=553998, ups=1.12, wpb=496083, bsz=16771, num_updates=22300, lr=0.000423524, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=20647 epoch 014: 376 / 1689 loss=3.571, nll_loss=2.023, ppl=4.06, wps=553998, ups=1.12, wpb=496083, bsz=16771, num_updates=22300, lr=0.000423524, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=20647 epoch 014: 376 / 1689 loss=3.571, nll_loss=2.023, ppl=4.06, wps=553998, ups=1.12, wpb=496083, bsz=16771, num_updates=22300, lr=0.000423524, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=20647 epoch 014: 376 / 1689 loss=3.571, nll_loss=2.023, ppl=4.06, wps=553998, ups=1.12, wpb=496083, bsz=16771, num_updates=22300, lr=0.000423524, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=20647 epoch 014: 376 / 1689 loss=3.571, nll_loss=2.023, ppl=4.06, wps=553998, ups=1.12, wpb=496083, bsz=16771, num_updates=22300, lr=0.000423524, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=20647 epoch 014: 376 / 1689 loss=3.571, nll_loss=2.023, ppl=4.06, wps=553998, ups=1.12, wpb=496083, bsz=16771, num_updates=22300, lr=0.000423524, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=20647 epoch 014: 376 / 1689 loss=3.571, nll_loss=2.023, ppl=4.06, wps=553998, ups=1.12, wpb=496083, bsz=16771, num_updates=22300, lr=0.000423524, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=20647 epoch 014: 376 / 1689 loss=3.571, nll_loss=2.023, ppl=4.06, wps=553998, ups=1.12, wpb=496083, bsz=16771, num_updates=22300, lr=0.000423524, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=21.7, wall=20647 epoch 014: 476 / 1689 loss=3.575, nll_loss=2.028, ppl=4.08, wps=551930, ups=1.11, wpb=495068, bsz=16521.9, num_updates=22400, lr=0.000422577, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=20737 epoch 014: 476 / 1689 loss=3.575, nll_loss=2.028, ppl=4.08, wps=551930, ups=1.11, wpb=495068, bsz=16521.9, num_updates=22400, lr=0.000422577, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=20737 epoch 014: 476 / 1689 loss=3.575, nll_loss=2.028, ppl=4.08, wps=551930, ups=1.11, wpb=495068, bsz=16521.9, num_updates=22400, lr=0.000422577, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=20737 epoch 014: 476 / 1689 loss=3.575, nll_loss=2.028, ppl=4.08, wps=551930, ups=1.11, wpb=495068, bsz=16521.9, num_updates=22400, lr=0.000422577, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=20737 epoch 014: 476 / 1689 loss=3.575, nll_loss=2.028, ppl=4.08, wps=551930, ups=1.11, wpb=495068, bsz=16521.9, num_updates=22400, lr=0.000422577, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=20737 epoch 014: 476 / 1689 loss=3.575, nll_loss=2.028, ppl=4.08, wps=551930, ups=1.11, wpb=495068, bsz=16521.9, num_updates=22400, lr=0.000422577, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=20737 epoch 014: 476 / 1689 loss=3.575, nll_loss=2.028, ppl=4.08, wps=551930, ups=1.11, wpb=495068, bsz=16521.9, num_updates=22400, lr=0.000422577, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=20737 epoch 014: 476 / 1689 loss=3.575, nll_loss=2.028, ppl=4.08, wps=551930, ups=1.11, wpb=495068, bsz=16521.9, num_updates=22400, lr=0.000422577, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=20737 epoch 014: 476 / 1689 loss=3.575, nll_loss=2.028, ppl=4.08, wps=551930, ups=1.11, wpb=495068, bsz=16521.9, num_updates=22400, lr=0.000422577, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=20737 epoch 014: 476 / 1689 loss=3.575, nll_loss=2.028, ppl=4.08, wps=551930, ups=1.11, wpb=495068, bsz=16521.9, num_updates=22400, lr=0.000422577, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=20737 epoch 014: 476 / 1689 loss=3.575, nll_loss=2.028, ppl=4.08, wps=551930, ups=1.11, wpb=495068, bsz=16521.9, num_updates=22400, lr=0.000422577, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=20737 epoch 014: 476 / 1689 loss=3.575, nll_loss=2.028, ppl=4.08, wps=551930, ups=1.11, wpb=495068, bsz=16521.9, num_updates=22400, lr=0.000422577, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=20737 epoch 014: 476 / 1689 loss=3.575, nll_loss=2.028, ppl=4.08, wps=551930, ups=1.11, wpb=495068, bsz=16521.9, num_updates=22400, lr=0.000422577, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=20737 epoch 014: 476 / 1689 loss=3.575, nll_loss=2.028, ppl=4.08, wps=551930, ups=1.11, wpb=495068, bsz=16521.9, num_updates=22400, lr=0.000422577, gnorm=0.184, clip=0, loss_scale=4, train_wall=88, gb_free=22.2, wall=20737 epoch 014: 577 / 1689 loss=3.578, nll_loss=2.031, ppl=4.09, wps=545117, ups=1.1, wpb=495260, bsz=16518.9, num_updates=22500, lr=0.000421637, gnorm=0.188, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=20827 epoch 014: 577 / 1689 loss=3.578, nll_loss=2.031, ppl=4.09, wps=545117, ups=1.1, wpb=495260, bsz=16518.9, num_updates=22500, lr=0.000421637, gnorm=0.188, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=20827 epoch 014: 577 / 1689 loss=3.578, nll_loss=2.031, ppl=4.09, wps=545117, ups=1.1, wpb=495260, bsz=16518.9, num_updates=22500, lr=0.000421637, gnorm=0.188, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=20827 epoch 014: 577 / 1689 loss=3.578, nll_loss=2.031, ppl=4.09, wps=545117, ups=1.1, wpb=495260, bsz=16518.9, num_updates=22500, lr=0.000421637, gnorm=0.188, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=20827 epoch 014: 577 / 1689 loss=3.578, nll_loss=2.031, ppl=4.09, wps=545117, ups=1.1, wpb=495260, bsz=16518.9, num_updates=22500, lr=0.000421637, gnorm=0.188, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=20827 epoch 014: 577 / 1689 loss=3.578, nll_loss=2.031, ppl=4.09, wps=545117, ups=1.1, wpb=495260, bsz=16518.9, num_updates=22500, lr=0.000421637, gnorm=0.188, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=20827 epoch 014: 577 / 1689 loss=3.578, nll_loss=2.031, ppl=4.09, wps=545117, ups=1.1, wpb=495260, bsz=16518.9, num_updates=22500, lr=0.000421637, gnorm=0.188, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=20827 epoch 014: 577 / 1689 loss=3.578, nll_loss=2.031, ppl=4.09, wps=545117, ups=1.1, wpb=495260, bsz=16518.9, num_updates=22500, lr=0.000421637, gnorm=0.188, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=20827 epoch 014: 577 / 1689 loss=3.578, nll_loss=2.031, ppl=4.09, wps=545117, ups=1.1, wpb=495260, bsz=16518.9, num_updates=22500, lr=0.000421637, gnorm=0.188, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=20827 epoch 014: 577 / 1689 loss=3.578, nll_loss=2.031, ppl=4.09, wps=545117, ups=1.1, wpb=495260, bsz=16518.9, num_updates=22500, lr=0.000421637, gnorm=0.188, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=20827 epoch 014: 577 / 1689 loss=3.578, nll_loss=2.031, ppl=4.09, wps=545117, ups=1.1, wpb=495260, bsz=16518.9, num_updates=22500, lr=0.000421637, gnorm=0.188, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=20827 epoch 014: 577 / 1689 loss=3.578, nll_loss=2.031, ppl=4.09, wps=545117, ups=1.1, wpb=495260, bsz=16518.9, num_updates=22500, lr=0.000421637, gnorm=0.188, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=20827 epoch 014: 577 / 1689 loss=3.578, nll_loss=2.031, ppl=4.09, wps=545117, ups=1.1, wpb=495260, bsz=16518.9, num_updates=22500, lr=0.000421637, gnorm=0.188, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=20827 epoch 014: 577 / 1689 loss=3.578, nll_loss=2.031, ppl=4.09, wps=545117, ups=1.1, wpb=495260, bsz=16518.9, num_updates=22500, lr=0.000421637, gnorm=0.188, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=20827 epoch 014: 677 / 1689 loss=3.577, nll_loss=2.031, ppl=4.09, wps=548696, ups=1.11, wpb=493978, bsz=16309.4, num_updates=22600, lr=0.000420703, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=20917 epoch 014: 677 / 1689 loss=3.577, nll_loss=2.031, ppl=4.09, wps=548696, ups=1.11, wpb=493978, bsz=16309.4, num_updates=22600, lr=0.000420703, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=20917 epoch 014: 677 / 1689 loss=3.577, nll_loss=2.031, ppl=4.09, wps=548696, ups=1.11, wpb=493978, bsz=16309.4, num_updates=22600, lr=0.000420703, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=20917 epoch 014: 677 / 1689 loss=3.577, nll_loss=2.031, ppl=4.09, wps=548696, ups=1.11, wpb=493978, bsz=16309.4, num_updates=22600, lr=0.000420703, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=20917 epoch 014: 677 / 1689 loss=3.577, nll_loss=2.031, ppl=4.09, wps=548696, ups=1.11, wpb=493978, bsz=16309.4, num_updates=22600, lr=0.000420703, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=20917 epoch 014: 677 / 1689 loss=3.577, nll_loss=2.031, ppl=4.09, wps=548696, ups=1.11, wpb=493978, bsz=16309.4, num_updates=22600, lr=0.000420703, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=20917 epoch 014: 677 / 1689 loss=3.577, nll_loss=2.031, ppl=4.09, wps=548696, ups=1.11, wpb=493978, bsz=16309.4, num_updates=22600, lr=0.000420703, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=20917 epoch 014: 677 / 1689 loss=3.577, nll_loss=2.031, ppl=4.09, wps=548696, ups=1.11, wpb=493978, bsz=16309.4, num_updates=22600, lr=0.000420703, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=20917 epoch 014: 677 / 1689 loss=3.577, nll_loss=2.031, ppl=4.09, wps=548696, ups=1.11, wpb=493978, bsz=16309.4, num_updates=22600, lr=0.000420703, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=20917 epoch 014: 677 / 1689 loss=3.577, nll_loss=2.031, ppl=4.09, wps=548696, ups=1.11, wpb=493978, bsz=16309.4, num_updates=22600, lr=0.000420703, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=20917 epoch 014: 677 / 1689 loss=3.577, nll_loss=2.031, ppl=4.09, wps=548696, ups=1.11, wpb=493978, bsz=16309.4, num_updates=22600, lr=0.000420703, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=20917 epoch 014: 677 / 1689 loss=3.577, nll_loss=2.031, ppl=4.09, wps=548696, ups=1.11, wpb=493978, bsz=16309.4, num_updates=22600, lr=0.000420703, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=20917 epoch 014: 677 / 1689 loss=3.577, nll_loss=2.031, ppl=4.09, wps=548696, ups=1.11, wpb=493978, bsz=16309.4, num_updates=22600, lr=0.000420703, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=20917 epoch 014: 677 / 1689 loss=3.577, nll_loss=2.031, ppl=4.09, wps=548696, ups=1.11, wpb=493978, bsz=16309.4, num_updates=22600, lr=0.000420703, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=20917 epoch 014: 777 / 1689 loss=3.575, nll_loss=2.029, ppl=4.08, wps=546728, ups=1.1, wpb=495129, bsz=16454.4, num_updates=22700, lr=0.000419775, gnorm=0.187, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=21008 epoch 014: 777 / 1689 loss=3.575, nll_loss=2.029, ppl=4.08, wps=546728, ups=1.1, wpb=495129, bsz=16454.4, num_updates=22700, lr=0.000419775, gnorm=0.187, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=21008 epoch 014: 777 / 1689 loss=3.575, nll_loss=2.029, ppl=4.08, wps=546728, ups=1.1, wpb=495129, bsz=16454.4, num_updates=22700, lr=0.000419775, gnorm=0.187, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=21008 epoch 014: 777 / 1689 loss=3.575, nll_loss=2.029, ppl=4.08, wps=546728, ups=1.1, wpb=495129, bsz=16454.4, num_updates=22700, lr=0.000419775, gnorm=0.187, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=21008 epoch 014: 777 / 1689 loss=3.575, nll_loss=2.029, ppl=4.08, wps=546728, ups=1.1, wpb=495129, bsz=16454.4, num_updates=22700, lr=0.000419775, gnorm=0.187, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=21008 epoch 014: 777 / 1689 loss=3.575, nll_loss=2.029, ppl=4.08, wps=546728, ups=1.1, wpb=495129, bsz=16454.4, num_updates=22700, lr=0.000419775, gnorm=0.187, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=21008 epoch 014: 777 / 1689 loss=3.575, nll_loss=2.029, ppl=4.08, wps=546728, ups=1.1, wpb=495129, bsz=16454.4, num_updates=22700, lr=0.000419775, gnorm=0.187, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=21008 epoch 014: 777 / 1689 loss=3.575, nll_loss=2.029, ppl=4.08, wps=546728, ups=1.1, wpb=495129, bsz=16454.4, num_updates=22700, lr=0.000419775, gnorm=0.187, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=21008 epoch 014: 777 / 1689 loss=3.575, nll_loss=2.029, ppl=4.08, wps=546728, ups=1.1, wpb=495129, bsz=16454.4, num_updates=22700, lr=0.000419775, gnorm=0.187, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=21008 epoch 014: 777 / 1689 loss=3.575, nll_loss=2.029, ppl=4.08, wps=546728, ups=1.1, wpb=495129, bsz=16454.4, num_updates=22700, lr=0.000419775, gnorm=0.187, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=21008 epoch 014: 777 / 1689 loss=3.575, nll_loss=2.029, ppl=4.08, wps=546728, ups=1.1, wpb=495129, bsz=16454.4, num_updates=22700, lr=0.000419775, gnorm=0.187, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=21008 epoch 014: 777 / 1689 loss=3.575, nll_loss=2.029, ppl=4.08, wps=546728, ups=1.1, wpb=495129, bsz=16454.4, num_updates=22700, lr=0.000419775, gnorm=0.187, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=21008 epoch 014: 777 / 1689 loss=3.575, nll_loss=2.029, ppl=4.08, wps=546728, ups=1.1, wpb=495129, bsz=16454.4, num_updates=22700, lr=0.000419775, gnorm=0.187, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=21008 epoch 014: 777 / 1689 loss=3.575, nll_loss=2.029, ppl=4.08, wps=546728, ups=1.1, wpb=495129, bsz=16454.4, num_updates=22700, lr=0.000419775, gnorm=0.187, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=21008 epoch 014: 877 / 1689 loss=3.579, nll_loss=2.033, ppl=4.09, wps=548804, ups=1.11, wpb=495387, bsz=16644.4, num_updates=22800, lr=0.000418854, gnorm=0.183, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=21098 epoch 014: 877 / 1689 loss=3.579, nll_loss=2.033, ppl=4.09, wps=548804, ups=1.11, wpb=495387, bsz=16644.4, num_updates=22800, lr=0.000418854, gnorm=0.183, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=21098 epoch 014: 877 / 1689 loss=3.579, nll_loss=2.033, ppl=4.09, wps=548804, ups=1.11, wpb=495387, bsz=16644.4, num_updates=22800, lr=0.000418854, gnorm=0.183, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=21098 epoch 014: 877 / 1689 loss=3.579, nll_loss=2.033, ppl=4.09, wps=548804, ups=1.11, wpb=495387, bsz=16644.4, num_updates=22800, lr=0.000418854, gnorm=0.183, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=21098 epoch 014: 877 / 1689 loss=3.579, nll_loss=2.033, ppl=4.09, wps=548804, ups=1.11, wpb=495387, bsz=16644.4, num_updates=22800, lr=0.000418854, gnorm=0.183, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=21098 epoch 014: 877 / 1689 loss=3.579, nll_loss=2.033, ppl=4.09, wps=548804, ups=1.11, wpb=495387, bsz=16644.4, num_updates=22800, lr=0.000418854, gnorm=0.183, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=21098 epoch 014: 877 / 1689 loss=3.579, nll_loss=2.033, ppl=4.09, wps=548804, ups=1.11, wpb=495387, bsz=16644.4, num_updates=22800, lr=0.000418854, gnorm=0.183, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=21098 epoch 014: 877 / 1689 loss=3.579, nll_loss=2.033, ppl=4.09, wps=548804, ups=1.11, wpb=495387, bsz=16644.4, num_updates=22800, lr=0.000418854, gnorm=0.183, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=21098 epoch 014: 877 / 1689 loss=3.579, nll_loss=2.033, ppl=4.09, wps=548804, ups=1.11, wpb=495387, bsz=16644.4, num_updates=22800, lr=0.000418854, gnorm=0.183, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=21098 epoch 014: 877 / 1689 loss=3.579, nll_loss=2.033, ppl=4.09, wps=548804, ups=1.11, wpb=495387, bsz=16644.4, num_updates=22800, lr=0.000418854, gnorm=0.183, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=21098 epoch 014: 877 / 1689 loss=3.579, nll_loss=2.033, ppl=4.09, wps=548804, ups=1.11, wpb=495387, bsz=16644.4, num_updates=22800, lr=0.000418854, gnorm=0.183, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=21098 epoch 014: 877 / 1689 loss=3.579, nll_loss=2.033, ppl=4.09, wps=548804, ups=1.11, wpb=495387, bsz=16644.4, num_updates=22800, lr=0.000418854, gnorm=0.183, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=21098 epoch 014: 877 / 1689 loss=3.579, nll_loss=2.033, ppl=4.09, wps=548804, ups=1.11, wpb=495387, bsz=16644.4, num_updates=22800, lr=0.000418854, gnorm=0.183, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=21098 epoch 014: 877 / 1689 loss=3.579, nll_loss=2.033, ppl=4.09, wps=548804, ups=1.11, wpb=495387, bsz=16644.4, num_updates=22800, lr=0.000418854, gnorm=0.183, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=21098 epoch 014: 977 / 1689 loss=3.582, nll_loss=2.037, ppl=4.1, wps=549416, ups=1.11, wpb=494594, bsz=16736.5, num_updates=22900, lr=0.000417938, gnorm=0.184, clip=0, loss_scale=2, train_wall=89, gb_free=22.3, wall=21188 epoch 014: 977 / 1689 loss=3.582, nll_loss=2.037, ppl=4.1, wps=549416, ups=1.11, wpb=494594, bsz=16736.5, num_updates=22900, lr=0.000417938, gnorm=0.184, clip=0, loss_scale=2, train_wall=89, gb_free=22.3, wall=21188 epoch 014: 977 / 1689 loss=3.582, nll_loss=2.037, ppl=4.1, wps=549416, ups=1.11, wpb=494594, bsz=16736.5, num_updates=22900, lr=0.000417938, gnorm=0.184, clip=0, loss_scale=2, train_wall=89, gb_free=22.3, wall=21188 epoch 014: 977 / 1689 loss=3.582, nll_loss=2.037, ppl=4.1, wps=549416, ups=1.11, wpb=494594, bsz=16736.5, num_updates=22900, lr=0.000417938, gnorm=0.184, clip=0, loss_scale=2, train_wall=89, gb_free=22.3, wall=21188 epoch 014: 977 / 1689 loss=3.582, nll_loss=2.037, ppl=4.1, wps=549416, ups=1.11, wpb=494594, bsz=16736.5, num_updates=22900, lr=0.000417938, gnorm=0.184, clip=0, loss_scale=2, train_wall=89, gb_free=22.3, wall=21188 epoch 014: 977 / 1689 loss=3.582, nll_loss=2.037, ppl=4.1, wps=549416, ups=1.11, wpb=494594, bsz=16736.5, num_updates=22900, lr=0.000417938, gnorm=0.184, clip=0, loss_scale=2, train_wall=89, gb_free=22.3, wall=21188 epoch 014: 977 / 1689 loss=3.582, nll_loss=2.037, ppl=4.1, wps=549416, ups=1.11, wpb=494594, bsz=16736.5, num_updates=22900, lr=0.000417938, gnorm=0.184, clip=0, loss_scale=2, train_wall=89, gb_free=22.3, wall=21188 epoch 014: 977 / 1689 loss=3.582, nll_loss=2.037, ppl=4.1, wps=549416, ups=1.11, wpb=494594, bsz=16736.5, num_updates=22900, lr=0.000417938, gnorm=0.184, clip=0, loss_scale=2, train_wall=89, gb_free=22.3, wall=21188 epoch 014: 977 / 1689 loss=3.582, nll_loss=2.037, ppl=4.1, wps=549416, ups=1.11, wpb=494594, bsz=16736.5, num_updates=22900, lr=0.000417938, gnorm=0.184, clip=0, loss_scale=2, train_wall=89, gb_free=22.3, wall=21188 epoch 014: 977 / 1689 loss=3.582, nll_loss=2.037, ppl=4.1, wps=549416, ups=1.11, wpb=494594, bsz=16736.5, num_updates=22900, lr=0.000417938, gnorm=0.184, clip=0, loss_scale=2, train_wall=89, gb_free=22.3, wall=21188 epoch 014: 977 / 1689 loss=3.582, nll_loss=2.037, ppl=4.1, wps=549416, ups=1.11, wpb=494594, bsz=16736.5, num_updates=22900, lr=0.000417938, gnorm=0.184, clip=0, loss_scale=2, train_wall=89, gb_free=22.3, wall=21188 epoch 014: 977 / 1689 loss=3.582, nll_loss=2.037, ppl=4.1, wps=549416, ups=1.11, wpb=494594, bsz=16736.5, num_updates=22900, lr=0.000417938, gnorm=0.184, clip=0, loss_scale=2, train_wall=89, gb_free=22.3, wall=21188 epoch 014: 977 / 1689 loss=3.582, nll_loss=2.037, ppl=4.1, wps=549416, ups=1.11, wpb=494594, bsz=16736.5, num_updates=22900, lr=0.000417938, gnorm=0.184, clip=0, loss_scale=2, train_wall=89, gb_free=22.3, wall=21188 epoch 014: 977 / 1689 loss=3.582, nll_loss=2.037, ppl=4.1, wps=549416, ups=1.11, wpb=494594, bsz=16736.5, num_updates=22900, lr=0.000417938, gnorm=0.184, clip=0, loss_scale=2, train_wall=89, gb_free=22.3, wall=21188 epoch 014: 1078 / 1689 loss=3.584, nll_loss=2.039, ppl=4.11, wps=544492, ups=1.1, wpb=494715, bsz=16477.8, num_updates=23000, lr=0.000417029, gnorm=0.186, clip=0, loss_scale=2, train_wall=89, gb_free=22.3, wall=21279 epoch 014: 1078 / 1689 loss=3.584, nll_loss=2.039, ppl=4.11, wps=544492, ups=1.1, wpb=494715, bsz=16477.8, num_updates=23000, lr=0.000417029, gnorm=0.186, clip=0, loss_scale=2, train_wall=89, gb_free=22.3, wall=21279 epoch 014: 1078 / 1689 loss=3.584, nll_loss=2.039, ppl=4.11, wps=544492, ups=1.1, wpb=494715, bsz=16477.8, num_updates=23000, lr=0.000417029, gnorm=0.186, clip=0, loss_scale=2, train_wall=89, gb_free=22.3, wall=21279 epoch 014: 1078 / 1689 loss=3.584, nll_loss=2.039, ppl=4.11, wps=544492, ups=1.1, wpb=494715, bsz=16477.8, num_updates=23000, lr=0.000417029, gnorm=0.186, clip=0, loss_scale=2, train_wall=89, gb_free=22.3, wall=21279 epoch 014: 1078 / 1689 loss=3.584, nll_loss=2.039, ppl=4.11, wps=544492, ups=1.1, wpb=494715, bsz=16477.8, num_updates=23000, lr=0.000417029, gnorm=0.186, clip=0, loss_scale=2, train_wall=89, gb_free=22.3, wall=21279 epoch 014: 1078 / 1689 loss=3.584, nll_loss=2.039, ppl=4.11, wps=544492, ups=1.1, wpb=494715, bsz=16477.8, num_updates=23000, lr=0.000417029, gnorm=0.186, clip=0, loss_scale=2, train_wall=89, gb_free=22.3, wall=21279 epoch 014: 1078 / 1689 loss=3.584, nll_loss=2.039, ppl=4.11, wps=544492, ups=1.1, wpb=494715, bsz=16477.8, num_updates=23000, lr=0.000417029, gnorm=0.186, clip=0, loss_scale=2, train_wall=89, gb_free=22.3, wall=21279 epoch 014: 1078 / 1689 loss=3.584, nll_loss=2.039, ppl=4.11, wps=544492, ups=1.1, wpb=494715, bsz=16477.8, num_updates=23000, lr=0.000417029, gnorm=0.186, clip=0, loss_scale=2, train_wall=89, gb_free=22.3, wall=21279 epoch 014: 1078 / 1689 loss=3.584, nll_loss=2.039, ppl=4.11, wps=544492, ups=1.1, wpb=494715, bsz=16477.8, num_updates=23000, lr=0.000417029, gnorm=0.186, clip=0, loss_scale=2, train_wall=89, gb_free=22.3, wall=21279 epoch 014: 1078 / 1689 loss=3.584, nll_loss=2.039, ppl=4.11, wps=544492, ups=1.1, wpb=494715, bsz=16477.8, num_updates=23000, lr=0.000417029, gnorm=0.186, clip=0, loss_scale=2, train_wall=89, gb_free=22.3, wall=21279 epoch 014: 1078 / 1689 loss=3.584, nll_loss=2.039, ppl=4.11, wps=544492, ups=1.1, wpb=494715, bsz=16477.8, num_updates=23000, lr=0.000417029, gnorm=0.186, clip=0, loss_scale=2, train_wall=89, gb_free=22.3, wall=21279 epoch 014: 1078 / 1689 loss=3.584, nll_loss=2.039, ppl=4.11, wps=544492, ups=1.1, wpb=494715, bsz=16477.8, num_updates=23000, lr=0.000417029, gnorm=0.186, clip=0, loss_scale=2, train_wall=89, gb_free=22.3, wall=21279 epoch 014: 1078 / 1689 loss=3.584, nll_loss=2.039, ppl=4.11, wps=544492, ups=1.1, wpb=494715, bsz=16477.8, num_updates=23000, lr=0.000417029, gnorm=0.186, clip=0, loss_scale=2, train_wall=89, gb_free=22.3, wall=21279 epoch 014: 1078 / 1689 loss=3.584, nll_loss=2.039, ppl=4.11, wps=544492, ups=1.1, wpb=494715, bsz=16477.8, num_updates=23000, lr=0.000417029, gnorm=0.186, clip=0, loss_scale=2, train_wall=89, gb_free=22.3, wall=21279 begin validation on "valid" subset epoch 014 | valid on 'valid' subset | loss 3.728 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 23000 | best_loss 3.728 epoch 014 | valid on 'valid' subset | loss 3.728 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 23000 | best_loss 3.728 epoch 014 | valid on 'valid' subset | loss 3.728 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 23000 | best_loss 3.728 epoch 014 | valid on 'valid' subset | loss 3.728 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 23000 | best_loss 3.728 epoch 014 | valid on 'valid' subset | loss 3.728 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 23000 | best_loss 3.728 epoch 014 | valid on 'valid' subset | loss 3.728 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 23000 | best_loss 3.728 epoch 014 | valid on 'valid' subset | loss 3.728 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 23000 | best_loss 3.728 epoch 014 | valid on 'valid' subset | loss 3.728 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 23000 | best_loss 3.728 epoch 014 | valid on 'valid' subset | loss 3.728 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 23000 | best_loss 3.728 epoch 014 | valid on 'valid' subset | loss 3.728 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 23000 | best_loss 3.728 epoch 014 | valid on 'valid' subset | loss 3.728 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 23000 | best_loss 3.728 epoch 014 | valid on 'valid' subset | loss 3.728 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 23000 | best_loss 3.728 epoch 014 | valid on 'valid' subset | loss 3.728 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 23000 | best_loss 3.728 epoch 014 | valid on 'valid' subset | loss 3.728 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 23000 | best_loss 3.728 epoch 014: 1178 / 1689 loss=3.583, nll_loss=2.038, ppl=4.11, wps=454966, ups=0.92, wpb=494043, bsz=16505, num_updates=23100, lr=0.000416125, gnorm=0.182, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=21388 epoch 014: 1178 / 1689 loss=3.583, nll_loss=2.038, ppl=4.11, wps=454966, ups=0.92, wpb=494043, bsz=16505, num_updates=23100, lr=0.000416125, gnorm=0.182, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=21388 epoch 014: 1178 / 1689 loss=3.583, nll_loss=2.038, ppl=4.11, wps=454966, ups=0.92, wpb=494043, bsz=16505, num_updates=23100, lr=0.000416125, gnorm=0.182, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=21388 epoch 014: 1178 / 1689 loss=3.583, nll_loss=2.038, ppl=4.11, wps=454966, ups=0.92, wpb=494043, bsz=16505, num_updates=23100, lr=0.000416125, gnorm=0.182, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=21388 epoch 014: 1178 / 1689 loss=3.583, nll_loss=2.038, ppl=4.11, wps=454966, ups=0.92, wpb=494043, bsz=16505, num_updates=23100, lr=0.000416125, gnorm=0.182, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=21388 epoch 014: 1178 / 1689 loss=3.583, nll_loss=2.038, ppl=4.11, wps=454966, ups=0.92, wpb=494043, bsz=16505, num_updates=23100, lr=0.000416125, gnorm=0.182, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=21388 epoch 014: 1178 / 1689 loss=3.583, nll_loss=2.038, ppl=4.11, wps=454966, ups=0.92, wpb=494043, bsz=16505, num_updates=23100, lr=0.000416125, gnorm=0.182, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=21388 epoch 014: 1178 / 1689 loss=3.583, nll_loss=2.038, ppl=4.11, wps=454966, ups=0.92, wpb=494043, bsz=16505, num_updates=23100, lr=0.000416125, gnorm=0.182, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=21388 epoch 014: 1178 / 1689 loss=3.583, nll_loss=2.038, ppl=4.11, wps=454966, ups=0.92, wpb=494043, bsz=16505, num_updates=23100, lr=0.000416125, gnorm=0.182, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=21388 epoch 014: 1178 / 1689 loss=3.583, nll_loss=2.038, ppl=4.11, wps=454966, ups=0.92, wpb=494043, bsz=16505, num_updates=23100, lr=0.000416125, gnorm=0.182, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=21388 epoch 014: 1178 / 1689 loss=3.583, nll_loss=2.038, ppl=4.11, wps=454966, ups=0.92, wpb=494043, bsz=16505, num_updates=23100, lr=0.000416125, gnorm=0.182, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=21388 epoch 014: 1178 / 1689 loss=3.583, nll_loss=2.038, ppl=4.11, wps=454966, ups=0.92, wpb=494043, bsz=16505, num_updates=23100, lr=0.000416125, gnorm=0.182, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=21388 epoch 014: 1178 / 1689 loss=3.583, nll_loss=2.038, ppl=4.11, wps=454966, ups=0.92, wpb=494043, bsz=16505, num_updates=23100, lr=0.000416125, gnorm=0.182, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=21388 epoch 014: 1178 / 1689 loss=3.583, nll_loss=2.038, ppl=4.11, wps=454966, ups=0.92, wpb=494043, bsz=16505, num_updates=23100, lr=0.000416125, gnorm=0.182, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=21388 epoch 014: 1278 / 1689 loss=3.585, nll_loss=2.04, ppl=4.11, wps=554812, ups=1.12, wpb=495999, bsz=16302.4, num_updates=23200, lr=0.000415227, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=21477 epoch 014: 1278 / 1689 loss=3.585, nll_loss=2.04, ppl=4.11, wps=554812, ups=1.12, wpb=495999, bsz=16302.4, num_updates=23200, lr=0.000415227, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=21477 epoch 014: 1278 / 1689 loss=3.585, nll_loss=2.04, ppl=4.11, wps=554812, ups=1.12, wpb=495999, bsz=16302.4, num_updates=23200, lr=0.000415227, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=21477 epoch 014: 1278 / 1689 loss=3.585, nll_loss=2.04, ppl=4.11, wps=554812, ups=1.12, wpb=495999, bsz=16302.4, num_updates=23200, lr=0.000415227, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=21477 epoch 014: 1278 / 1689 loss=3.585, nll_loss=2.04, ppl=4.11, wps=554812, ups=1.12, wpb=495999, bsz=16302.4, num_updates=23200, lr=0.000415227, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=21477 epoch 014: 1278 / 1689 loss=3.585, nll_loss=2.04, ppl=4.11, wps=554812, ups=1.12, wpb=495999, bsz=16302.4, num_updates=23200, lr=0.000415227, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=21477 epoch 014: 1278 / 1689 loss=3.585, nll_loss=2.04, ppl=4.11, wps=554812, ups=1.12, wpb=495999, bsz=16302.4, num_updates=23200, lr=0.000415227, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=21477 epoch 014: 1278 / 1689 loss=3.585, nll_loss=2.04, ppl=4.11, wps=554812, ups=1.12, wpb=495999, bsz=16302.4, num_updates=23200, lr=0.000415227, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=21477 epoch 014: 1278 / 1689 loss=3.585, nll_loss=2.04, ppl=4.11, wps=554812, ups=1.12, wpb=495999, bsz=16302.4, num_updates=23200, lr=0.000415227, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=21477 epoch 014: 1278 / 1689 loss=3.585, nll_loss=2.04, ppl=4.11, wps=554812, ups=1.12, wpb=495999, bsz=16302.4, num_updates=23200, lr=0.000415227, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=21477 epoch 014: 1278 / 1689 loss=3.585, nll_loss=2.04, ppl=4.11, wps=554812, ups=1.12, wpb=495999, bsz=16302.4, num_updates=23200, lr=0.000415227, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=21477 epoch 014: 1278 / 1689 loss=3.585, nll_loss=2.04, ppl=4.11, wps=554812, ups=1.12, wpb=495999, bsz=16302.4, num_updates=23200, lr=0.000415227, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=21477 epoch 014: 1278 / 1689 loss=3.585, nll_loss=2.04, ppl=4.11, wps=554812, ups=1.12, wpb=495999, bsz=16302.4, num_updates=23200, lr=0.000415227, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=21477 epoch 014: 1278 / 1689 loss=3.585, nll_loss=2.04, ppl=4.11, wps=554812, ups=1.12, wpb=495999, bsz=16302.4, num_updates=23200, lr=0.000415227, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=21477 epoch 014: 1378 / 1689 loss=3.588, nll_loss=2.044, ppl=4.12, wps=555131, ups=1.12, wpb=495455, bsz=16321.4, num_updates=23300, lr=0.000414335, gnorm=0.176, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=21566 epoch 014: 1378 / 1689 loss=3.588, nll_loss=2.044, ppl=4.12, wps=555131, ups=1.12, wpb=495455, bsz=16321.4, num_updates=23300, lr=0.000414335, gnorm=0.176, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=21566 epoch 014: 1378 / 1689 loss=3.588, nll_loss=2.044, ppl=4.12, wps=555131, ups=1.12, wpb=495455, bsz=16321.4, num_updates=23300, lr=0.000414335, gnorm=0.176, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=21566 epoch 014: 1378 / 1689 loss=3.588, nll_loss=2.044, ppl=4.12, wps=555131, ups=1.12, wpb=495455, bsz=16321.4, num_updates=23300, lr=0.000414335, gnorm=0.176, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=21566 epoch 014: 1378 / 1689 loss=3.588, nll_loss=2.044, ppl=4.12, wps=555131, ups=1.12, wpb=495455, bsz=16321.4, num_updates=23300, lr=0.000414335, gnorm=0.176, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=21566 epoch 014: 1378 / 1689 loss=3.588, nll_loss=2.044, ppl=4.12, wps=555131, ups=1.12, wpb=495455, bsz=16321.4, num_updates=23300, lr=0.000414335, gnorm=0.176, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=21566 epoch 014: 1378 / 1689 loss=3.588, nll_loss=2.044, ppl=4.12, wps=555131, ups=1.12, wpb=495455, bsz=16321.4, num_updates=23300, lr=0.000414335, gnorm=0.176, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=21566 epoch 014: 1378 / 1689 loss=3.588, nll_loss=2.044, ppl=4.12, wps=555131, ups=1.12, wpb=495455, bsz=16321.4, num_updates=23300, lr=0.000414335, gnorm=0.176, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=21566 epoch 014: 1378 / 1689 loss=3.588, nll_loss=2.044, ppl=4.12, wps=555131, ups=1.12, wpb=495455, bsz=16321.4, num_updates=23300, lr=0.000414335, gnorm=0.176, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=21566 epoch 014: 1378 / 1689 loss=3.588, nll_loss=2.044, ppl=4.12, wps=555131, ups=1.12, wpb=495455, bsz=16321.4, num_updates=23300, lr=0.000414335, gnorm=0.176, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=21566 epoch 014: 1378 / 1689 loss=3.588, nll_loss=2.044, ppl=4.12, wps=555131, ups=1.12, wpb=495455, bsz=16321.4, num_updates=23300, lr=0.000414335, gnorm=0.176, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=21566 epoch 014: 1378 / 1689 loss=3.588, nll_loss=2.044, ppl=4.12, wps=555131, ups=1.12, wpb=495455, bsz=16321.4, num_updates=23300, lr=0.000414335, gnorm=0.176, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=21566 epoch 014: 1378 / 1689 loss=3.588, nll_loss=2.044, ppl=4.12, wps=555131, ups=1.12, wpb=495455, bsz=16321.4, num_updates=23300, lr=0.000414335, gnorm=0.176, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=21566 epoch 014: 1378 / 1689 loss=3.588, nll_loss=2.044, ppl=4.12, wps=555131, ups=1.12, wpb=495455, bsz=16321.4, num_updates=23300, lr=0.000414335, gnorm=0.176, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=21566 epoch 014: 1478 / 1689 loss=3.588, nll_loss=2.043, ppl=4.12, wps=552979, ups=1.12, wpb=495081, bsz=16702.6, num_updates=23400, lr=0.000413449, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=21656 epoch 014: 1478 / 1689 loss=3.588, nll_loss=2.043, ppl=4.12, wps=552979, ups=1.12, wpb=495081, bsz=16702.6, num_updates=23400, lr=0.000413449, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=21656 epoch 014: 1478 / 1689 loss=3.588, nll_loss=2.043, ppl=4.12, wps=552979, ups=1.12, wpb=495081, bsz=16702.6, num_updates=23400, lr=0.000413449, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=21656 epoch 014: 1478 / 1689 loss=3.588, nll_loss=2.043, ppl=4.12, wps=552979, ups=1.12, wpb=495081, bsz=16702.6, num_updates=23400, lr=0.000413449, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=21656 epoch 014: 1478 / 1689 loss=3.588, nll_loss=2.043, ppl=4.12, wps=552979, ups=1.12, wpb=495081, bsz=16702.6, num_updates=23400, lr=0.000413449, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=21656 epoch 014: 1478 / 1689 loss=3.588, nll_loss=2.043, ppl=4.12, wps=552979, ups=1.12, wpb=495081, bsz=16702.6, num_updates=23400, lr=0.000413449, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=21656 epoch 014: 1478 / 1689 loss=3.588, nll_loss=2.043, ppl=4.12, wps=552979, ups=1.12, wpb=495081, bsz=16702.6, num_updates=23400, lr=0.000413449, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=21656 epoch 014: 1478 / 1689 loss=3.588, nll_loss=2.043, ppl=4.12, wps=552979, ups=1.12, wpb=495081, bsz=16702.6, num_updates=23400, lr=0.000413449, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=21656 epoch 014: 1478 / 1689 loss=3.588, nll_loss=2.043, ppl=4.12, wps=552979, ups=1.12, wpb=495081, bsz=16702.6, num_updates=23400, lr=0.000413449, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=21656 epoch 014: 1478 / 1689 loss=3.588, nll_loss=2.043, ppl=4.12, wps=552979, ups=1.12, wpb=495081, bsz=16702.6, num_updates=23400, lr=0.000413449, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=21656 epoch 014: 1478 / 1689 loss=3.588, nll_loss=2.043, ppl=4.12, wps=552979, ups=1.12, wpb=495081, bsz=16702.6, num_updates=23400, lr=0.000413449, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=21656 epoch 014: 1478 / 1689 loss=3.588, nll_loss=2.043, ppl=4.12, wps=552979, ups=1.12, wpb=495081, bsz=16702.6, num_updates=23400, lr=0.000413449, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=21656 epoch 014: 1478 / 1689 loss=3.588, nll_loss=2.043, ppl=4.12, wps=552979, ups=1.12, wpb=495081, bsz=16702.6, num_updates=23400, lr=0.000413449, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=21656 epoch 014: 1478 / 1689 loss=3.588, nll_loss=2.043, ppl=4.12, wps=552979, ups=1.12, wpb=495081, bsz=16702.6, num_updates=23400, lr=0.000413449, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=21656 epoch 014: 1578 / 1689 loss=3.577, nll_loss=2.032, ppl=4.09, wps=554953, ups=1.12, wpb=497036, bsz=16678.2, num_updates=23500, lr=0.000412568, gnorm=0.185, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=21746 epoch 014: 1578 / 1689 loss=3.577, nll_loss=2.032, ppl=4.09, wps=554953, ups=1.12, wpb=497036, bsz=16678.2, num_updates=23500, lr=0.000412568, gnorm=0.185, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=21746 epoch 014: 1578 / 1689 loss=3.577, nll_loss=2.032, ppl=4.09, wps=554953, ups=1.12, wpb=497036, bsz=16678.2, num_updates=23500, lr=0.000412568, gnorm=0.185, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=21746 epoch 014: 1578 / 1689 loss=3.577, nll_loss=2.032, ppl=4.09, wps=554953, ups=1.12, wpb=497036, bsz=16678.2, num_updates=23500, lr=0.000412568, gnorm=0.185, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=21746 epoch 014: 1578 / 1689 loss=3.577, nll_loss=2.032, ppl=4.09, wps=554953, ups=1.12, wpb=497036, bsz=16678.2, num_updates=23500, lr=0.000412568, gnorm=0.185, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=21746 epoch 014: 1578 / 1689 loss=3.577, nll_loss=2.032, ppl=4.09, wps=554953, ups=1.12, wpb=497036, bsz=16678.2, num_updates=23500, lr=0.000412568, gnorm=0.185, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=21746 epoch 014: 1578 / 1689 loss=3.577, nll_loss=2.032, ppl=4.09, wps=554953, ups=1.12, wpb=497036, bsz=16678.2, num_updates=23500, lr=0.000412568, gnorm=0.185, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=21746 epoch 014: 1578 / 1689 loss=3.577, nll_loss=2.032, ppl=4.09, wps=554953, ups=1.12, wpb=497036, bsz=16678.2, num_updates=23500, lr=0.000412568, gnorm=0.185, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=21746 epoch 014: 1578 / 1689 loss=3.577, nll_loss=2.032, ppl=4.09, wps=554953, ups=1.12, wpb=497036, bsz=16678.2, num_updates=23500, lr=0.000412568, gnorm=0.185, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=21746 epoch 014: 1578 / 1689 loss=3.577, nll_loss=2.032, ppl=4.09, wps=554953, ups=1.12, wpb=497036, bsz=16678.2, num_updates=23500, lr=0.000412568, gnorm=0.185, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=21746 epoch 014: 1578 / 1689 loss=3.577, nll_loss=2.032, ppl=4.09, wps=554953, ups=1.12, wpb=497036, bsz=16678.2, num_updates=23500, lr=0.000412568, gnorm=0.185, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=21746 epoch 014: 1578 / 1689 loss=3.577, nll_loss=2.032, ppl=4.09, wps=554953, ups=1.12, wpb=497036, bsz=16678.2, num_updates=23500, lr=0.000412568, gnorm=0.185, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=21746 epoch 014: 1578 / 1689 loss=3.577, nll_loss=2.032, ppl=4.09, wps=554953, ups=1.12, wpb=497036, bsz=16678.2, num_updates=23500, lr=0.000412568, gnorm=0.185, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=21746 epoch 014: 1578 / 1689 loss=3.577, nll_loss=2.032, ppl=4.09, wps=554953, ups=1.12, wpb=497036, bsz=16678.2, num_updates=23500, lr=0.000412568, gnorm=0.185, clip=0, loss_scale=4, train_wall=88, gb_free=21.8, wall=21746 epoch 014: 1679 / 1689 loss=3.593, nll_loss=2.049, ppl=4.14, wps=549314, ups=1.11, wpb=493476, bsz=16451.1, num_updates=23600, lr=0.000411693, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=21835 epoch 014: 1679 / 1689 loss=3.593, nll_loss=2.049, ppl=4.14, wps=549314, ups=1.11, wpb=493476, bsz=16451.1, num_updates=23600, lr=0.000411693, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=21835 epoch 014: 1679 / 1689 loss=3.593, nll_loss=2.049, ppl=4.14, wps=549314, ups=1.11, wpb=493476, bsz=16451.1, num_updates=23600, lr=0.000411693, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=21835 epoch 014: 1679 / 1689 loss=3.593, nll_loss=2.049, ppl=4.14, wps=549314, ups=1.11, wpb=493476, bsz=16451.1, num_updates=23600, lr=0.000411693, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=21835 epoch 014: 1679 / 1689 loss=3.593, nll_loss=2.049, ppl=4.14, wps=549314, ups=1.11, wpb=493476, bsz=16451.1, num_updates=23600, lr=0.000411693, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=21835 epoch 014: 1679 / 1689 loss=3.593, nll_loss=2.049, ppl=4.14, wps=549314, ups=1.11, wpb=493476, bsz=16451.1, num_updates=23600, lr=0.000411693, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=21835 epoch 014: 1679 / 1689 loss=3.593, nll_loss=2.049, ppl=4.14, wps=549314, ups=1.11, wpb=493476, bsz=16451.1, num_updates=23600, lr=0.000411693, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=21835 epoch 014: 1679 / 1689 loss=3.593, nll_loss=2.049, ppl=4.14, wps=549314, ups=1.11, wpb=493476, bsz=16451.1, num_updates=23600, lr=0.000411693, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=21835 epoch 014: 1679 / 1689 loss=3.593, nll_loss=2.049, ppl=4.14, wps=549314, ups=1.11, wpb=493476, bsz=16451.1, num_updates=23600, lr=0.000411693, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=21835 epoch 014: 1679 / 1689 loss=3.593, nll_loss=2.049, ppl=4.14, wps=549314, ups=1.11, wpb=493476, bsz=16451.1, num_updates=23600, lr=0.000411693, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=21835 epoch 014: 1679 / 1689 loss=3.593, nll_loss=2.049, ppl=4.14, wps=549314, ups=1.11, wpb=493476, bsz=16451.1, num_updates=23600, lr=0.000411693, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=21835 epoch 014: 1679 / 1689 loss=3.593, nll_loss=2.049, ppl=4.14, wps=549314, ups=1.11, wpb=493476, bsz=16451.1, num_updates=23600, lr=0.000411693, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=21835 epoch 014: 1679 / 1689 loss=3.593, nll_loss=2.049, ppl=4.14, wps=549314, ups=1.11, wpb=493476, bsz=16451.1, num_updates=23600, lr=0.000411693, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=21835 epoch 014: 1679 / 1689 loss=3.593, nll_loss=2.049, ppl=4.14, wps=549314, ups=1.11, wpb=493476, bsz=16451.1, num_updates=23600, lr=0.000411693, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=21835 end of epoch 14 (average epoch stats below) epoch 014 | loss 3.578 | nll_loss 2.032 | ppl 4.09 | wps 537444 | ups 1.09 | wpb 495139 | bsz 16505.8 | num_updates 23610 | lr 0.000411606 | gnorm 0.181 | clip 0 | loss_scale 2 | train_wall 1487 | gb_free 23.9 | wall 21844 epoch 014 | loss 3.578 | nll_loss 2.032 | ppl 4.09 | wps 537444 | ups 1.09 | wpb 495139 | bsz 16505.8 | num_updates 23610 | lr 0.000411606 | gnorm 0.181 | clip 0 | loss_scale 2 | train_wall 1487 | gb_free 23.9 | wall 21844 epoch 014 | loss 3.578 | nll_loss 2.032 | ppl 4.09 | wps 537444 | ups 1.09 | wpb 495139 | bsz 16505.8 | num_updates 23610 | lr 0.000411606 | gnorm 0.181 | clip 0 | loss_scale 2 | train_wall 1487 | gb_free 23.9 | wall 21844 epoch 014 | loss 3.578 | nll_loss 2.032 | ppl 4.09 | wps 537444 | ups 1.09 | wpb 495139 | bsz 16505.8 | num_updates 23610 | lr 0.000411606 | gnorm 0.181 | clip 0 | loss_scale 2 | train_wall 1487 | gb_free 23.9 | wall 21844 epoch 014 | loss 3.578 | nll_loss 2.032 | ppl 4.09 | wps 537444 | ups 1.09 | wpb 495139 | bsz 16505.8 | num_updates 23610 | lr 0.000411606 | gnorm 0.181 | clip 0 | loss_scale 2 | train_wall 1487 | gb_free 23.9 | wall 21844 epoch 014 | loss 3.578 | nll_loss 2.032 | ppl 4.09 | wps 537444 | ups 1.09 | wpb 495139 | bsz 16505.8 | num_updates 23610 | lr 0.000411606 | gnorm 0.181 | clip 0 | loss_scale 2 | train_wall 1487 | gb_free 23.9 | wall 21844 epoch 014 | loss 3.578 | nll_loss 2.032 | ppl 4.09 | wps 537444 | ups 1.09 | wpb 495139 | bsz 16505.8 | num_updates 23610 | lr 0.000411606 | gnorm 0.181 | clip 0 | loss_scale 2 | train_wall 1487 | gb_free 23.9 | wall 21844 epoch 014 | loss 3.578 | nll_loss 2.032 | ppl 4.09 | wps 537444 | ups 1.09 | wpb 495139 | bsz 16505.8 | num_updates 23610 | lr 0.000411606 | gnorm 0.181 | clip 0 | loss_scale 2 | train_wall 1487 | gb_free 23.9 | wall 21844 epoch 014 | loss 3.578 | nll_loss 2.032 | ppl 4.09 | wps 537444 | ups 1.09 | wpb 495139 | bsz 16505.8 | num_updates 23610 | lr 0.000411606 | gnorm 0.181 | clip 0 | loss_scale 2 | train_wall 1487 | gb_free 23.9 | wall 21844 epoch 014 | loss 3.578 | nll_loss 2.032 | ppl 4.09 | wps 537444 | ups 1.09 | wpb 495139 | bsz 16505.8 | num_updates 23610 | lr 0.000411606 | gnorm 0.181 | clip 0 | loss_scale 2 | train_wall 1487 | gb_free 23.9 | wall 21844 epoch 014 | loss 3.578 | nll_loss 2.032 | ppl 4.09 | wps 537444 | ups 1.09 | wpb 495139 | bsz 16505.8 | num_updates 23610 | lr 0.000411606 | gnorm 0.181 | clip 0 | loss_scale 2 | train_wall 1487 | gb_free 23.9 | wall 21844 epoch 014 | loss 3.578 | nll_loss 2.032 | ppl 4.09 | wps 537444 | ups 1.09 | wpb 495139 | bsz 16505.8 | num_updates 23610 | lr 0.000411606 | gnorm 0.181 | clip 0 | loss_scale 2 | train_wall 1487 | gb_free 23.9 | wall 21844 epoch 014 | loss 3.578 | nll_loss 2.032 | ppl 4.09 | wps 537444 | ups 1.09 | wpb 495139 | bsz 16505.8 | num_updates 23610 | lr 0.000411606 | gnorm 0.181 | clip 0 | loss_scale 2 | train_wall 1487 | gb_free 23.9 | wall 21844 epoch 014 | loss 3.578 | nll_loss 2.032 | ppl 4.09 | wps 537444 | ups 1.09 | wpb 495139 | bsz 16505.8 | num_updates 23610 | lr 0.000411606 | gnorm 0.181 | clip 0 | loss_scale 2 | train_wall 1487 | gb_free 23.9 | wall 21844 Start iterating over samples epoch 015: 90 / 1689 loss=3.549, nll_loss=1.999, ppl=4, wps=550346, ups=1.12, wpb=490749, bsz=16211.8, num_updates=23700, lr=0.000410824, gnorm=0.191, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=21925 epoch 015: 90 / 1689 loss=3.549, nll_loss=1.999, ppl=4, wps=550346, ups=1.12, wpb=490749, bsz=16211.8, num_updates=23700, lr=0.000410824, gnorm=0.191, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=21925 epoch 015: 90 / 1689 loss=3.549, nll_loss=1.999, ppl=4, wps=550346, ups=1.12, wpb=490749, bsz=16211.8, num_updates=23700, lr=0.000410824, gnorm=0.191, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=21925 epoch 015: 90 / 1689 loss=3.549, nll_loss=1.999, ppl=4, wps=550346, ups=1.12, wpb=490749, bsz=16211.8, num_updates=23700, lr=0.000410824, gnorm=0.191, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=21925 epoch 015: 90 / 1689 loss=3.549, nll_loss=1.999, ppl=4, wps=550346, ups=1.12, wpb=490749, bsz=16211.8, num_updates=23700, lr=0.000410824, gnorm=0.191, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=21925 epoch 015: 90 / 1689 loss=3.549, nll_loss=1.999, ppl=4, wps=550346, ups=1.12, wpb=490749, bsz=16211.8, num_updates=23700, lr=0.000410824, gnorm=0.191, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=21925 epoch 015: 90 / 1689 loss=3.549, nll_loss=1.999, ppl=4, wps=550346, ups=1.12, wpb=490749, bsz=16211.8, num_updates=23700, lr=0.000410824, gnorm=0.191, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=21925 epoch 015: 90 / 1689 loss=3.549, nll_loss=1.999, ppl=4, wps=550346, ups=1.12, wpb=490749, bsz=16211.8, num_updates=23700, lr=0.000410824, gnorm=0.191, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=21925 epoch 015: 90 / 1689 loss=3.549, nll_loss=1.999, ppl=4, wps=550346, ups=1.12, wpb=490749, bsz=16211.8, num_updates=23700, lr=0.000410824, gnorm=0.191, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=21925 epoch 015: 90 / 1689 loss=3.549, nll_loss=1.999, ppl=4, wps=550346, ups=1.12, wpb=490749, bsz=16211.8, num_updates=23700, lr=0.000410824, gnorm=0.191, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=21925 epoch 015: 90 / 1689 loss=3.549, nll_loss=1.999, ppl=4, wps=550346, ups=1.12, wpb=490749, bsz=16211.8, num_updates=23700, lr=0.000410824, gnorm=0.191, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=21925 epoch 015: 90 / 1689 loss=3.549, nll_loss=1.999, ppl=4, wps=550346, ups=1.12, wpb=490749, bsz=16211.8, num_updates=23700, lr=0.000410824, gnorm=0.191, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=21925 epoch 015: 90 / 1689 loss=3.549, nll_loss=1.999, ppl=4, wps=550346, ups=1.12, wpb=490749, bsz=16211.8, num_updates=23700, lr=0.000410824, gnorm=0.191, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=21925 epoch 015: 90 / 1689 loss=3.549, nll_loss=1.999, ppl=4, wps=550346, ups=1.12, wpb=490749, bsz=16211.8, num_updates=23700, lr=0.000410824, gnorm=0.191, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=21925 epoch 015: 90 / 1689 loss=3.549, nll_loss=1.999, ppl=4, wps=550346, ups=1.12, wpb=490749, bsz=16211.8, num_updates=23700, lr=0.000410824, gnorm=0.191, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=21925 epoch 015: 190 / 1689 loss=3.555, nll_loss=2.006, ppl=4.02, wps=553722, ups=1.12, wpb=494314, bsz=16473.8, num_updates=23800, lr=0.00040996, gnorm=0.172, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=22014 epoch 015: 190 / 1689 loss=3.555, nll_loss=2.006, ppl=4.02, wps=553722, ups=1.12, wpb=494314, bsz=16473.8, num_updates=23800, lr=0.00040996, gnorm=0.172, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=22014 epoch 015: 190 / 1689 loss=3.555, nll_loss=2.006, ppl=4.02, wps=553722, ups=1.12, wpb=494314, bsz=16473.8, num_updates=23800, lr=0.00040996, gnorm=0.172, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=22014 epoch 015: 190 / 1689 loss=3.555, nll_loss=2.006, ppl=4.02, wps=553722, ups=1.12, wpb=494314, bsz=16473.8, num_updates=23800, lr=0.00040996, gnorm=0.172, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=22014 epoch 015: 190 / 1689 loss=3.555, nll_loss=2.006, ppl=4.02, wps=553722, ups=1.12, wpb=494314, bsz=16473.8, num_updates=23800, lr=0.00040996, gnorm=0.172, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=22014 epoch 015: 190 / 1689 loss=3.555, nll_loss=2.006, ppl=4.02, wps=553722, ups=1.12, wpb=494314, bsz=16473.8, num_updates=23800, lr=0.00040996, gnorm=0.172, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=22014 epoch 015: 190 / 1689 loss=3.555, nll_loss=2.006, ppl=4.02, wps=553722, ups=1.12, wpb=494314, bsz=16473.8, num_updates=23800, lr=0.00040996, gnorm=0.172, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=22014 epoch 015: 190 / 1689 loss=3.555, nll_loss=2.006, ppl=4.02, wps=553722, ups=1.12, wpb=494314, bsz=16473.8, num_updates=23800, lr=0.00040996, gnorm=0.172, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=22014 epoch 015: 190 / 1689 loss=3.555, nll_loss=2.006, ppl=4.02, wps=553722, ups=1.12, wpb=494314, bsz=16473.8, num_updates=23800, lr=0.00040996, gnorm=0.172, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=22014 epoch 015: 190 / 1689 loss=3.555, nll_loss=2.006, ppl=4.02, wps=553722, ups=1.12, wpb=494314, bsz=16473.8, num_updates=23800, lr=0.00040996, gnorm=0.172, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=22014 epoch 015: 190 / 1689 loss=3.555, nll_loss=2.006, ppl=4.02, wps=553722, ups=1.12, wpb=494314, bsz=16473.8, num_updates=23800, lr=0.00040996, gnorm=0.172, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=22014 epoch 015: 190 / 1689 loss=3.555, nll_loss=2.006, ppl=4.02, wps=553722, ups=1.12, wpb=494314, bsz=16473.8, num_updates=23800, lr=0.00040996, gnorm=0.172, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=22014 epoch 015: 190 / 1689 loss=3.555, nll_loss=2.006, ppl=4.02, wps=553722, ups=1.12, wpb=494314, bsz=16473.8, num_updates=23800, lr=0.00040996, gnorm=0.172, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=22014 epoch 015: 190 / 1689 loss=3.555, nll_loss=2.006, ppl=4.02, wps=553722, ups=1.12, wpb=494314, bsz=16473.8, num_updates=23800, lr=0.00040996, gnorm=0.172, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=22014 epoch 015: 190 / 1689 loss=3.555, nll_loss=2.006, ppl=4.02, wps=553722, ups=1.12, wpb=494314, bsz=16473.8, num_updates=23800, lr=0.00040996, gnorm=0.172, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=22014 epoch 015: 290 / 1689 loss=3.556, nll_loss=2.006, ppl=4.02, wps=553638, ups=1.12, wpb=495579, bsz=16493.2, num_updates=23900, lr=0.000409101, gnorm=0.177, clip=0, loss_scale=2, train_wall=88, gb_free=21.5, wall=22103 epoch 015: 290 / 1689 loss=3.556, nll_loss=2.006, ppl=4.02, wps=553638, ups=1.12, wpb=495579, bsz=16493.2, num_updates=23900, lr=0.000409101, gnorm=0.177, clip=0, loss_scale=2, train_wall=88, gb_free=21.5, wall=22103 epoch 015: 290 / 1689 loss=3.556, nll_loss=2.006, ppl=4.02, wps=553638, ups=1.12, wpb=495579, bsz=16493.2, num_updates=23900, lr=0.000409101, gnorm=0.177, clip=0, loss_scale=2, train_wall=88, gb_free=21.5, wall=22103 epoch 015: 290 / 1689 loss=3.556, nll_loss=2.006, ppl=4.02, wps=553638, ups=1.12, wpb=495579, bsz=16493.2, num_updates=23900, lr=0.000409101, gnorm=0.177, clip=0, loss_scale=2, train_wall=88, gb_free=21.5, wall=22103 epoch 015: 290 / 1689 loss=3.556, nll_loss=2.006, ppl=4.02, wps=553638, ups=1.12, wpb=495579, bsz=16493.2, num_updates=23900, lr=0.000409101, gnorm=0.177, clip=0, loss_scale=2, train_wall=88, gb_free=21.5, wall=22103 epoch 015: 290 / 1689 loss=3.556, nll_loss=2.006, ppl=4.02, wps=553638, ups=1.12, wpb=495579, bsz=16493.2, num_updates=23900, lr=0.000409101, gnorm=0.177, clip=0, loss_scale=2, train_wall=88, gb_free=21.5, wall=22103 epoch 015: 290 / 1689 loss=3.556, nll_loss=2.006, ppl=4.02, wps=553638, ups=1.12, wpb=495579, bsz=16493.2, num_updates=23900, lr=0.000409101, gnorm=0.177, clip=0, loss_scale=2, train_wall=88, gb_free=21.5, wall=22103 epoch 015: 290 / 1689 loss=3.556, nll_loss=2.006, ppl=4.02, wps=553638, ups=1.12, wpb=495579, bsz=16493.2, num_updates=23900, lr=0.000409101, gnorm=0.177, clip=0, loss_scale=2, train_wall=88, gb_free=21.5, wall=22103 epoch 015: 290 / 1689 loss=3.556, nll_loss=2.006, ppl=4.02, wps=553638, ups=1.12, wpb=495579, bsz=16493.2, num_updates=23900, lr=0.000409101, gnorm=0.177, clip=0, loss_scale=2, train_wall=88, gb_free=21.5, wall=22103 epoch 015: 290 / 1689 loss=3.556, nll_loss=2.006, ppl=4.02, wps=553638, ups=1.12, wpb=495579, bsz=16493.2, num_updates=23900, lr=0.000409101, gnorm=0.177, clip=0, loss_scale=2, train_wall=88, gb_free=21.5, wall=22103 epoch 015: 290 / 1689 loss=3.556, nll_loss=2.006, ppl=4.02, wps=553638, ups=1.12, wpb=495579, bsz=16493.2, num_updates=23900, lr=0.000409101, gnorm=0.177, clip=0, loss_scale=2, train_wall=88, gb_free=21.5, wall=22103 epoch 015: 290 / 1689 loss=3.556, nll_loss=2.006, ppl=4.02, wps=553638, ups=1.12, wpb=495579, bsz=16493.2, num_updates=23900, lr=0.000409101, gnorm=0.177, clip=0, loss_scale=2, train_wall=88, gb_free=21.5, wall=22103 epoch 015: 290 / 1689 loss=3.556, nll_loss=2.006, ppl=4.02, wps=553638, ups=1.12, wpb=495579, bsz=16493.2, num_updates=23900, lr=0.000409101, gnorm=0.177, clip=0, loss_scale=2, train_wall=88, gb_free=21.5, wall=22103 epoch 015: 290 / 1689 loss=3.556, nll_loss=2.006, ppl=4.02, wps=553638, ups=1.12, wpb=495579, bsz=16493.2, num_updates=23900, lr=0.000409101, gnorm=0.177, clip=0, loss_scale=2, train_wall=88, gb_free=21.5, wall=22103 epoch 015: 290 / 1689 loss=3.556, nll_loss=2.006, ppl=4.02, wps=553638, ups=1.12, wpb=495579, bsz=16493.2, num_updates=23900, lr=0.000409101, gnorm=0.177, clip=0, loss_scale=2, train_wall=88, gb_free=21.5, wall=22103 epoch 015: 390 / 1689 loss=3.56, nll_loss=2.011, ppl=4.03, wps=549563, ups=1.11, wpb=496227, bsz=16883, num_updates=24000, lr=0.000408248, gnorm=0.177, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=22194 epoch 015: 390 / 1689 loss=3.56, nll_loss=2.011, ppl=4.03, wps=549563, ups=1.11, wpb=496227, bsz=16883, num_updates=24000, lr=0.000408248, gnorm=0.177, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=22194 epoch 015: 390 / 1689 loss=3.56, nll_loss=2.011, ppl=4.03, wps=549563, ups=1.11, wpb=496227, bsz=16883, num_updates=24000, lr=0.000408248, gnorm=0.177, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=22194 epoch 015: 390 / 1689 loss=3.56, nll_loss=2.011, ppl=4.03, wps=549563, ups=1.11, wpb=496227, bsz=16883, num_updates=24000, lr=0.000408248, gnorm=0.177, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=22194 epoch 015: 390 / 1689 loss=3.56, nll_loss=2.011, ppl=4.03, wps=549563, ups=1.11, wpb=496227, bsz=16883, num_updates=24000, lr=0.000408248, gnorm=0.177, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=22194 epoch 015: 390 / 1689 loss=3.56, nll_loss=2.011, ppl=4.03, wps=549563, ups=1.11, wpb=496227, bsz=16883, num_updates=24000, lr=0.000408248, gnorm=0.177, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=22194 epoch 015: 390 / 1689 loss=3.56, nll_loss=2.011, ppl=4.03, wps=549563, ups=1.11, wpb=496227, bsz=16883, num_updates=24000, lr=0.000408248, gnorm=0.177, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=22194 epoch 015: 390 / 1689 loss=3.56, nll_loss=2.011, ppl=4.03, wps=549563, ups=1.11, wpb=496227, bsz=16883, num_updates=24000, lr=0.000408248, gnorm=0.177, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=22194 epoch 015: 390 / 1689 loss=3.56, nll_loss=2.011, ppl=4.03, wps=549563, ups=1.11, wpb=496227, bsz=16883, num_updates=24000, lr=0.000408248, gnorm=0.177, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=22194 epoch 015: 390 / 1689 loss=3.56, nll_loss=2.011, ppl=4.03, wps=549563, ups=1.11, wpb=496227, bsz=16883, num_updates=24000, lr=0.000408248, gnorm=0.177, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=22194 epoch 015: 390 / 1689 loss=3.56, nll_loss=2.011, ppl=4.03, wps=549563, ups=1.11, wpb=496227, bsz=16883, num_updates=24000, lr=0.000408248, gnorm=0.177, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=22194 epoch 015: 390 / 1689 loss=3.56, nll_loss=2.011, ppl=4.03, wps=549563, ups=1.11, wpb=496227, bsz=16883, num_updates=24000, lr=0.000408248, gnorm=0.177, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=22194 epoch 015: 390 / 1689 loss=3.56, nll_loss=2.011, ppl=4.03, wps=549563, ups=1.11, wpb=496227, bsz=16883, num_updates=24000, lr=0.000408248, gnorm=0.177, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=22194 epoch 015: 390 / 1689 loss=3.56, nll_loss=2.011, ppl=4.03, wps=549563, ups=1.11, wpb=496227, bsz=16883, num_updates=24000, lr=0.000408248, gnorm=0.177, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=22194 epoch 015: 390 / 1689 loss=3.56, nll_loss=2.011, ppl=4.03, wps=549563, ups=1.11, wpb=496227, bsz=16883, num_updates=24000, lr=0.000408248, gnorm=0.177, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=22194 begin validation on "valid" subset epoch 015 | valid on 'valid' subset | loss 3.743 | nll_loss 2.201 | ppl 4.6 | wps 0 | wpb 44526 | bsz 2008 | num_updates 24000 | best_loss 3.728 epoch 015 | valid on 'valid' subset | loss 3.743 | nll_loss 2.201 | ppl 4.6 | wps 0 | wpb 44526 | bsz 2008 | num_updates 24000 | best_loss 3.728 epoch 015 | valid on 'valid' subset | loss 3.743 | nll_loss 2.201 | ppl 4.6 | wps 0 | wpb 44526 | bsz 2008 | num_updates 24000 | best_loss 3.728 epoch 015 | valid on 'valid' subset | loss 3.743 | nll_loss 2.201 | ppl 4.6 | wps 0 | wpb 44526 | bsz 2008 | num_updates 24000 | best_loss 3.728 epoch 015 | valid on 'valid' subset | loss 3.743 | nll_loss 2.201 | ppl 4.6 | wps 0 | wpb 44526 | bsz 2008 | num_updates 24000 | best_loss 3.728 epoch 015 | valid on 'valid' subset | loss 3.743 | nll_loss 2.201 | ppl 4.6 | wps 0 | wpb 44526 | bsz 2008 | num_updates 24000 | best_loss 3.728 epoch 015 | valid on 'valid' subset | loss 3.743 | nll_loss 2.201 | ppl 4.6 | wps 0 | wpb 44526 | bsz 2008 | num_updates 24000 | best_loss 3.728 epoch 015 | valid on 'valid' subset | loss 3.743 | nll_loss 2.201 | ppl 4.6 | wps 0 | wpb 44526 | bsz 2008 | num_updates 24000 | best_loss 3.728 epoch 015 | valid on 'valid' subset | loss 3.743 | nll_loss 2.201 | ppl 4.6 | wps 0 | wpb 44526 | bsz 2008 | num_updates 24000 | best_loss 3.728 epoch 015 | valid on 'valid' subset | loss 3.743 | nll_loss 2.201 | ppl 4.6 | wps 0 | wpb 44526 | bsz 2008 | num_updates 24000 | best_loss 3.728 epoch 015 | valid on 'valid' subset | loss 3.743 | nll_loss 2.201 | ppl 4.6 | wps 0 | wpb 44526 | bsz 2008 | num_updates 24000 | best_loss 3.728 epoch 015 | valid on 'valid' subset | loss 3.743 | nll_loss 2.201 | ppl 4.6 | wps 0 | wpb 44526 | bsz 2008 | num_updates 24000 | best_loss 3.728 epoch 015 | valid on 'valid' subset | loss 3.743 | nll_loss 2.201 | ppl 4.6 | wps 0 | wpb 44526 | bsz 2008 | num_updates 24000 | best_loss 3.728 epoch 015 | valid on 'valid' subset | loss 3.743 | nll_loss 2.201 | ppl 4.6 | wps 0 | wpb 44526 | bsz 2008 | num_updates 24000 | best_loss 3.728 epoch 015 | valid on 'valid' subset | loss 3.743 | nll_loss 2.201 | ppl 4.6 | wps 0 | wpb 44526 | bsz 2008 | num_updates 24000 | best_loss 3.728 epoch 015: 491 / 1689 loss=3.559, nll_loss=2.01, ppl=4.03, wps=475572, ups=0.96, wpb=495627, bsz=16650.5, num_updates=24100, lr=0.0004074, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=22298 epoch 015: 491 / 1689 loss=3.559, nll_loss=2.01, ppl=4.03, wps=475572, ups=0.96, wpb=495627, bsz=16650.5, num_updates=24100, lr=0.0004074, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=22298 epoch 015: 491 / 1689 loss=3.559, nll_loss=2.01, ppl=4.03, wps=475572, ups=0.96, wpb=495627, bsz=16650.5, num_updates=24100, lr=0.0004074, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=22298 epoch 015: 491 / 1689 loss=3.559, nll_loss=2.01, ppl=4.03, wps=475572, ups=0.96, wpb=495627, bsz=16650.5, num_updates=24100, lr=0.0004074, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=22298 epoch 015: 491 / 1689 loss=3.559, nll_loss=2.01, ppl=4.03, wps=475572, ups=0.96, wpb=495627, bsz=16650.5, num_updates=24100, lr=0.0004074, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=22298 epoch 015: 491 / 1689 loss=3.559, nll_loss=2.01, ppl=4.03, wps=475572, ups=0.96, wpb=495627, bsz=16650.5, num_updates=24100, lr=0.0004074, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=22298 epoch 015: 491 / 1689 loss=3.559, nll_loss=2.01, ppl=4.03, wps=475572, ups=0.96, wpb=495627, bsz=16650.5, num_updates=24100, lr=0.0004074, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=22298 epoch 015: 491 / 1689 loss=3.559, nll_loss=2.01, ppl=4.03, wps=475572, ups=0.96, wpb=495627, bsz=16650.5, num_updates=24100, lr=0.0004074, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=22298 epoch 015: 491 / 1689 loss=3.559, nll_loss=2.01, ppl=4.03, wps=475572, ups=0.96, wpb=495627, bsz=16650.5, num_updates=24100, lr=0.0004074, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=22298 epoch 015: 491 / 1689 loss=3.559, nll_loss=2.01, ppl=4.03, wps=475572, ups=0.96, wpb=495627, bsz=16650.5, num_updates=24100, lr=0.0004074, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=22298 epoch 015: 491 / 1689 loss=3.559, nll_loss=2.01, ppl=4.03, wps=475572, ups=0.96, wpb=495627, bsz=16650.5, num_updates=24100, lr=0.0004074, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=22298 epoch 015: 491 / 1689 loss=3.559, nll_loss=2.01, ppl=4.03, wps=475572, ups=0.96, wpb=495627, bsz=16650.5, num_updates=24100, lr=0.0004074, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=22298 epoch 015: 491 / 1689 loss=3.559, nll_loss=2.01, ppl=4.03, wps=475572, ups=0.96, wpb=495627, bsz=16650.5, num_updates=24100, lr=0.0004074, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=22298 epoch 015: 491 / 1689 loss=3.559, nll_loss=2.01, ppl=4.03, wps=475572, ups=0.96, wpb=495627, bsz=16650.5, num_updates=24100, lr=0.0004074, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=22298 epoch 015: 491 / 1689 loss=3.559, nll_loss=2.01, ppl=4.03, wps=475572, ups=0.96, wpb=495627, bsz=16650.5, num_updates=24100, lr=0.0004074, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=22298 epoch 015: 591 / 1689 loss=3.566, nll_loss=2.018, ppl=4.05, wps=553574, ups=1.12, wpb=494525, bsz=16389.8, num_updates=24200, lr=0.000406558, gnorm=0.182, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=22387 epoch 015: 591 / 1689 loss=3.566, nll_loss=2.018, ppl=4.05, wps=553574, ups=1.12, wpb=494525, bsz=16389.8, num_updates=24200, lr=0.000406558, gnorm=0.182, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=22387 epoch 015: 591 / 1689 loss=3.566, nll_loss=2.018, ppl=4.05, wps=553574, ups=1.12, wpb=494525, bsz=16389.8, num_updates=24200, lr=0.000406558, gnorm=0.182, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=22387 epoch 015: 591 / 1689 loss=3.566, nll_loss=2.018, ppl=4.05, wps=553574, ups=1.12, wpb=494525, bsz=16389.8, num_updates=24200, lr=0.000406558, gnorm=0.182, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=22387 epoch 015: 591 / 1689 loss=3.566, nll_loss=2.018, ppl=4.05, wps=553574, ups=1.12, wpb=494525, bsz=16389.8, num_updates=24200, lr=0.000406558, gnorm=0.182, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=22387 epoch 015: 591 / 1689 loss=3.566, nll_loss=2.018, ppl=4.05, wps=553574, ups=1.12, wpb=494525, bsz=16389.8, num_updates=24200, lr=0.000406558, gnorm=0.182, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=22387 epoch 015: 591 / 1689 loss=3.566, nll_loss=2.018, ppl=4.05, wps=553574, ups=1.12, wpb=494525, bsz=16389.8, num_updates=24200, lr=0.000406558, gnorm=0.182, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=22387 epoch 015: 591 / 1689 loss=3.566, nll_loss=2.018, ppl=4.05, wps=553574, ups=1.12, wpb=494525, bsz=16389.8, num_updates=24200, lr=0.000406558, gnorm=0.182, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=22387 epoch 015: 591 / 1689 loss=3.566, nll_loss=2.018, ppl=4.05, wps=553574, ups=1.12, wpb=494525, bsz=16389.8, num_updates=24200, lr=0.000406558, gnorm=0.182, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=22387 epoch 015: 591 / 1689 loss=3.566, nll_loss=2.018, ppl=4.05, wps=553574, ups=1.12, wpb=494525, bsz=16389.8, num_updates=24200, lr=0.000406558, gnorm=0.182, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=22387 epoch 015: 591 / 1689 loss=3.566, nll_loss=2.018, ppl=4.05, wps=553574, ups=1.12, wpb=494525, bsz=16389.8, num_updates=24200, lr=0.000406558, gnorm=0.182, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=22387 epoch 015: 591 / 1689 loss=3.566, nll_loss=2.018, ppl=4.05, wps=553574, ups=1.12, wpb=494525, bsz=16389.8, num_updates=24200, lr=0.000406558, gnorm=0.182, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=22387 epoch 015: 591 / 1689 loss=3.566, nll_loss=2.018, ppl=4.05, wps=553574, ups=1.12, wpb=494525, bsz=16389.8, num_updates=24200, lr=0.000406558, gnorm=0.182, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=22387 epoch 015: 591 / 1689 loss=3.566, nll_loss=2.018, ppl=4.05, wps=553574, ups=1.12, wpb=494525, bsz=16389.8, num_updates=24200, lr=0.000406558, gnorm=0.182, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=22387 epoch 015: 591 / 1689 loss=3.566, nll_loss=2.018, ppl=4.05, wps=553574, ups=1.12, wpb=494525, bsz=16389.8, num_updates=24200, lr=0.000406558, gnorm=0.182, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=22387 epoch 015: 691 / 1689 loss=3.562, nll_loss=2.014, ppl=4.04, wps=552725, ups=1.12, wpb=495044, bsz=16668.6, num_updates=24300, lr=0.00040572, gnorm=0.181, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=22477 epoch 015: 691 / 1689 loss=3.562, nll_loss=2.014, ppl=4.04, wps=552725, ups=1.12, wpb=495044, bsz=16668.6, num_updates=24300, lr=0.00040572, gnorm=0.181, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=22477 epoch 015: 691 / 1689 loss=3.562, nll_loss=2.014, ppl=4.04, wps=552725, ups=1.12, wpb=495044, bsz=16668.6, num_updates=24300, lr=0.00040572, gnorm=0.181, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=22477 epoch 015: 691 / 1689 loss=3.562, nll_loss=2.014, ppl=4.04, wps=552725, ups=1.12, wpb=495044, bsz=16668.6, num_updates=24300, lr=0.00040572, gnorm=0.181, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=22477 epoch 015: 691 / 1689 loss=3.562, nll_loss=2.014, ppl=4.04, wps=552725, ups=1.12, wpb=495044, bsz=16668.6, num_updates=24300, lr=0.00040572, gnorm=0.181, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=22477 epoch 015: 691 / 1689 loss=3.562, nll_loss=2.014, ppl=4.04, wps=552725, ups=1.12, wpb=495044, bsz=16668.6, num_updates=24300, lr=0.00040572, gnorm=0.181, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=22477 epoch 015: 691 / 1689 loss=3.562, nll_loss=2.014, ppl=4.04, wps=552725, ups=1.12, wpb=495044, bsz=16668.6, num_updates=24300, lr=0.00040572, gnorm=0.181, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=22477 epoch 015: 691 / 1689 loss=3.562, nll_loss=2.014, ppl=4.04, wps=552725, ups=1.12, wpb=495044, bsz=16668.6, num_updates=24300, lr=0.00040572, gnorm=0.181, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=22477 epoch 015: 691 / 1689 loss=3.562, nll_loss=2.014, ppl=4.04, wps=552725, ups=1.12, wpb=495044, bsz=16668.6, num_updates=24300, lr=0.00040572, gnorm=0.181, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=22477 epoch 015: 691 / 1689 loss=3.562, nll_loss=2.014, ppl=4.04, wps=552725, ups=1.12, wpb=495044, bsz=16668.6, num_updates=24300, lr=0.00040572, gnorm=0.181, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=22477 epoch 015: 691 / 1689 loss=3.562, nll_loss=2.014, ppl=4.04, wps=552725, ups=1.12, wpb=495044, bsz=16668.6, num_updates=24300, lr=0.00040572, gnorm=0.181, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=22477 epoch 015: 691 / 1689 loss=3.562, nll_loss=2.014, ppl=4.04, wps=552725, ups=1.12, wpb=495044, bsz=16668.6, num_updates=24300, lr=0.00040572, gnorm=0.181, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=22477 epoch 015: 691 / 1689 loss=3.562, nll_loss=2.014, ppl=4.04, wps=552725, ups=1.12, wpb=495044, bsz=16668.6, num_updates=24300, lr=0.00040572, gnorm=0.181, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=22477 epoch 015: 691 / 1689 loss=3.562, nll_loss=2.014, ppl=4.04, wps=552725, ups=1.12, wpb=495044, bsz=16668.6, num_updates=24300, lr=0.00040572, gnorm=0.181, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=22477 epoch 015: 691 / 1689 loss=3.562, nll_loss=2.014, ppl=4.04, wps=552725, ups=1.12, wpb=495044, bsz=16668.6, num_updates=24300, lr=0.00040572, gnorm=0.181, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=22477 epoch 015: 791 / 1689 loss=3.569, nll_loss=2.021, ppl=4.06, wps=556933, ups=1.12, wpb=495111, bsz=16245.4, num_updates=24400, lr=0.000404888, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=22566 epoch 015: 791 / 1689 loss=3.569, nll_loss=2.021, ppl=4.06, wps=556933, ups=1.12, wpb=495111, bsz=16245.4, num_updates=24400, lr=0.000404888, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=22566 epoch 015: 791 / 1689 loss=3.569, nll_loss=2.021, ppl=4.06, wps=556933, ups=1.12, wpb=495111, bsz=16245.4, num_updates=24400, lr=0.000404888, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=22566 epoch 015: 791 / 1689 loss=3.569, nll_loss=2.021, ppl=4.06, wps=556933, ups=1.12, wpb=495111, bsz=16245.4, num_updates=24400, lr=0.000404888, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=22566 epoch 015: 791 / 1689 loss=3.569, nll_loss=2.021, ppl=4.06, wps=556933, ups=1.12, wpb=495111, bsz=16245.4, num_updates=24400, lr=0.000404888, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=22566 epoch 015: 791 / 1689 loss=3.569, nll_loss=2.021, ppl=4.06, wps=556933, ups=1.12, wpb=495111, bsz=16245.4, num_updates=24400, lr=0.000404888, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=22566 epoch 015: 791 / 1689 loss=3.569, nll_loss=2.021, ppl=4.06, wps=556933, ups=1.12, wpb=495111, bsz=16245.4, num_updates=24400, lr=0.000404888, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=22566 epoch 015: 791 / 1689 loss=3.569, nll_loss=2.021, ppl=4.06, wps=556933, ups=1.12, wpb=495111, bsz=16245.4, num_updates=24400, lr=0.000404888, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=22566 epoch 015: 791 / 1689 loss=3.569, nll_loss=2.021, ppl=4.06, wps=556933, ups=1.12, wpb=495111, bsz=16245.4, num_updates=24400, lr=0.000404888, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=22566 epoch 015: 791 / 1689 loss=3.569, nll_loss=2.021, ppl=4.06, wps=556933, ups=1.12, wpb=495111, bsz=16245.4, num_updates=24400, lr=0.000404888, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=22566 epoch 015: 791 / 1689 loss=3.569, nll_loss=2.021, ppl=4.06, wps=556933, ups=1.12, wpb=495111, bsz=16245.4, num_updates=24400, lr=0.000404888, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=22566 epoch 015: 791 / 1689 loss=3.569, nll_loss=2.021, ppl=4.06, wps=556933, ups=1.12, wpb=495111, bsz=16245.4, num_updates=24400, lr=0.000404888, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=22566 epoch 015: 791 / 1689 loss=3.569, nll_loss=2.021, ppl=4.06, wps=556933, ups=1.12, wpb=495111, bsz=16245.4, num_updates=24400, lr=0.000404888, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=22566 epoch 015: 791 / 1689 loss=3.569, nll_loss=2.021, ppl=4.06, wps=556933, ups=1.12, wpb=495111, bsz=16245.4, num_updates=24400, lr=0.000404888, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=22566 epoch 015: 791 / 1689 loss=3.569, nll_loss=2.021, ppl=4.06, wps=556933, ups=1.12, wpb=495111, bsz=16245.4, num_updates=24400, lr=0.000404888, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=22566 epoch 015: 891 / 1689 loss=3.571, nll_loss=2.024, ppl=4.07, wps=562902, ups=1.14, wpb=494308, bsz=16269.2, num_updates=24500, lr=0.000404061, gnorm=0.176, clip=0, loss_scale=2, train_wall=87, gb_free=22.6, wall=22653 epoch 015: 891 / 1689 loss=3.571, nll_loss=2.024, ppl=4.07, wps=562902, ups=1.14, wpb=494308, bsz=16269.2, num_updates=24500, lr=0.000404061, gnorm=0.176, clip=0, loss_scale=2, train_wall=87, gb_free=22.6, wall=22653 epoch 015: 891 / 1689 loss=3.571, nll_loss=2.024, ppl=4.07, wps=562902, ups=1.14, wpb=494308, bsz=16269.2, num_updates=24500, lr=0.000404061, gnorm=0.176, clip=0, loss_scale=2, train_wall=87, gb_free=22.6, wall=22653 epoch 015: 891 / 1689 loss=3.571, nll_loss=2.024, ppl=4.07, wps=562902, ups=1.14, wpb=494308, bsz=16269.2, num_updates=24500, lr=0.000404061, gnorm=0.176, clip=0, loss_scale=2, train_wall=87, gb_free=22.6, wall=22653 epoch 015: 891 / 1689 loss=3.571, nll_loss=2.024, ppl=4.07, wps=562902, ups=1.14, wpb=494308, bsz=16269.2, num_updates=24500, lr=0.000404061, gnorm=0.176, clip=0, loss_scale=2, train_wall=87, gb_free=22.6, wall=22653 epoch 015: 891 / 1689 loss=3.571, nll_loss=2.024, ppl=4.07, wps=562902, ups=1.14, wpb=494308, bsz=16269.2, num_updates=24500, lr=0.000404061, gnorm=0.176, clip=0, loss_scale=2, train_wall=87, gb_free=22.6, wall=22653 epoch 015: 891 / 1689 loss=3.571, nll_loss=2.024, ppl=4.07, wps=562902, ups=1.14, wpb=494308, bsz=16269.2, num_updates=24500, lr=0.000404061, gnorm=0.176, clip=0, loss_scale=2, train_wall=87, gb_free=22.6, wall=22653 epoch 015: 891 / 1689 loss=3.571, nll_loss=2.024, ppl=4.07, wps=562902, ups=1.14, wpb=494308, bsz=16269.2, num_updates=24500, lr=0.000404061, gnorm=0.176, clip=0, loss_scale=2, train_wall=87, gb_free=22.6, wall=22653 epoch 015: 891 / 1689 loss=3.571, nll_loss=2.024, ppl=4.07, wps=562902, ups=1.14, wpb=494308, bsz=16269.2, num_updates=24500, lr=0.000404061, gnorm=0.176, clip=0, loss_scale=2, train_wall=87, gb_free=22.6, wall=22653 epoch 015: 891 / 1689 loss=3.571, nll_loss=2.024, ppl=4.07, wps=562902, ups=1.14, wpb=494308, bsz=16269.2, num_updates=24500, lr=0.000404061, gnorm=0.176, clip=0, loss_scale=2, train_wall=87, gb_free=22.6, wall=22653 epoch 015: 891 / 1689 loss=3.571, nll_loss=2.024, ppl=4.07, wps=562902, ups=1.14, wpb=494308, bsz=16269.2, num_updates=24500, lr=0.000404061, gnorm=0.176, clip=0, loss_scale=2, train_wall=87, gb_free=22.6, wall=22653 epoch 015: 891 / 1689 loss=3.571, nll_loss=2.024, ppl=4.07, wps=562902, ups=1.14, wpb=494308, bsz=16269.2, num_updates=24500, lr=0.000404061, gnorm=0.176, clip=0, loss_scale=2, train_wall=87, gb_free=22.6, wall=22653 epoch 015: 891 / 1689 loss=3.571, nll_loss=2.024, ppl=4.07, wps=562902, ups=1.14, wpb=494308, bsz=16269.2, num_updates=24500, lr=0.000404061, gnorm=0.176, clip=0, loss_scale=2, train_wall=87, gb_free=22.6, wall=22653 epoch 015: 891 / 1689 loss=3.571, nll_loss=2.024, ppl=4.07, wps=562902, ups=1.14, wpb=494308, bsz=16269.2, num_updates=24500, lr=0.000404061, gnorm=0.176, clip=0, loss_scale=2, train_wall=87, gb_free=22.6, wall=22653 epoch 015: 891 / 1689 loss=3.571, nll_loss=2.024, ppl=4.07, wps=562902, ups=1.14, wpb=494308, bsz=16269.2, num_updates=24500, lr=0.000404061, gnorm=0.176, clip=0, loss_scale=2, train_wall=87, gb_free=22.6, wall=22653 epoch 015: 991 / 1689 loss=3.574, nll_loss=2.027, ppl=4.08, wps=566900, ups=1.14, wpb=497757, bsz=16263.1, num_updates=24600, lr=0.000403239, gnorm=0.176, clip=0, loss_scale=4, train_wall=87, gb_free=22.5, wall=22741 epoch 015: 991 / 1689 loss=3.574, nll_loss=2.027, ppl=4.08, wps=566900, ups=1.14, wpb=497757, bsz=16263.1, num_updates=24600, lr=0.000403239, gnorm=0.176, clip=0, loss_scale=4, train_wall=87, gb_free=22.5, wall=22741 epoch 015: 991 / 1689 loss=3.574, nll_loss=2.027, ppl=4.08, wps=566900, ups=1.14, wpb=497757, bsz=16263.1, num_updates=24600, lr=0.000403239, gnorm=0.176, clip=0, loss_scale=4, train_wall=87, gb_free=22.5, wall=22741 epoch 015: 991 / 1689 loss=3.574, nll_loss=2.027, ppl=4.08, wps=566900, ups=1.14, wpb=497757, bsz=16263.1, num_updates=24600, lr=0.000403239, gnorm=0.176, clip=0, loss_scale=4, train_wall=87, gb_free=22.5, wall=22741 epoch 015: 991 / 1689 loss=3.574, nll_loss=2.027, ppl=4.08, wps=566900, ups=1.14, wpb=497757, bsz=16263.1, num_updates=24600, lr=0.000403239, gnorm=0.176, clip=0, loss_scale=4, train_wall=87, gb_free=22.5, wall=22741 epoch 015: 991 / 1689 loss=3.574, nll_loss=2.027, ppl=4.08, wps=566900, ups=1.14, wpb=497757, bsz=16263.1, num_updates=24600, lr=0.000403239, gnorm=0.176, clip=0, loss_scale=4, train_wall=87, gb_free=22.5, wall=22741 epoch 015: 991 / 1689 loss=3.574, nll_loss=2.027, ppl=4.08, wps=566900, ups=1.14, wpb=497757, bsz=16263.1, num_updates=24600, lr=0.000403239, gnorm=0.176, clip=0, loss_scale=4, train_wall=87, gb_free=22.5, wall=22741 epoch 015: 991 / 1689 loss=3.574, nll_loss=2.027, ppl=4.08, wps=566900, ups=1.14, wpb=497757, bsz=16263.1, num_updates=24600, lr=0.000403239, gnorm=0.176, clip=0, loss_scale=4, train_wall=87, gb_free=22.5, wall=22741 epoch 015: 991 / 1689 loss=3.574, nll_loss=2.027, ppl=4.08, wps=566900, ups=1.14, wpb=497757, bsz=16263.1, num_updates=24600, lr=0.000403239, gnorm=0.176, clip=0, loss_scale=4, train_wall=87, gb_free=22.5, wall=22741 epoch 015: 991 / 1689 loss=3.574, nll_loss=2.027, ppl=4.08, wps=566900, ups=1.14, wpb=497757, bsz=16263.1, num_updates=24600, lr=0.000403239, gnorm=0.176, clip=0, loss_scale=4, train_wall=87, gb_free=22.5, wall=22741 epoch 015: 991 / 1689 loss=3.574, nll_loss=2.027, ppl=4.08, wps=566900, ups=1.14, wpb=497757, bsz=16263.1, num_updates=24600, lr=0.000403239, gnorm=0.176, clip=0, loss_scale=4, train_wall=87, gb_free=22.5, wall=22741 epoch 015: 991 / 1689 loss=3.574, nll_loss=2.027, ppl=4.08, wps=566900, ups=1.14, wpb=497757, bsz=16263.1, num_updates=24600, lr=0.000403239, gnorm=0.176, clip=0, loss_scale=4, train_wall=87, gb_free=22.5, wall=22741 epoch 015: 991 / 1689 loss=3.574, nll_loss=2.027, ppl=4.08, wps=566900, ups=1.14, wpb=497757, bsz=16263.1, num_updates=24600, lr=0.000403239, gnorm=0.176, clip=0, loss_scale=4, train_wall=87, gb_free=22.5, wall=22741 epoch 015: 991 / 1689 loss=3.574, nll_loss=2.027, ppl=4.08, wps=566900, ups=1.14, wpb=497757, bsz=16263.1, num_updates=24600, lr=0.000403239, gnorm=0.176, clip=0, loss_scale=4, train_wall=87, gb_free=22.5, wall=22741 epoch 015: 991 / 1689 loss=3.574, nll_loss=2.027, ppl=4.08, wps=566900, ups=1.14, wpb=497757, bsz=16263.1, num_updates=24600, lr=0.000403239, gnorm=0.176, clip=0, loss_scale=4, train_wall=87, gb_free=22.5, wall=22741 epoch 015: 1092 / 1689 loss=3.567, nll_loss=2.02, ppl=4.06, wps=551637, ups=1.11, wpb=495462, bsz=16761.1, num_updates=24700, lr=0.000402422, gnorm=0.182, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=22831 epoch 015: 1092 / 1689 loss=3.567, nll_loss=2.02, ppl=4.06, wps=551637, ups=1.11, wpb=495462, bsz=16761.1, num_updates=24700, lr=0.000402422, gnorm=0.182, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=22831 epoch 015: 1092 / 1689 loss=3.567, nll_loss=2.02, ppl=4.06, wps=551637, ups=1.11, wpb=495462, bsz=16761.1, num_updates=24700, lr=0.000402422, gnorm=0.182, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=22831 epoch 015: 1092 / 1689 loss=3.567, nll_loss=2.02, ppl=4.06, wps=551637, ups=1.11, wpb=495462, bsz=16761.1, num_updates=24700, lr=0.000402422, gnorm=0.182, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=22831 epoch 015: 1092 / 1689 loss=3.567, nll_loss=2.02, ppl=4.06, wps=551637, ups=1.11, wpb=495462, bsz=16761.1, num_updates=24700, lr=0.000402422, gnorm=0.182, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=22831 epoch 015: 1092 / 1689 loss=3.567, nll_loss=2.02, ppl=4.06, wps=551637, ups=1.11, wpb=495462, bsz=16761.1, num_updates=24700, lr=0.000402422, gnorm=0.182, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=22831 epoch 015: 1092 / 1689 loss=3.567, nll_loss=2.02, ppl=4.06, wps=551637, ups=1.11, wpb=495462, bsz=16761.1, num_updates=24700, lr=0.000402422, gnorm=0.182, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=22831 epoch 015: 1092 / 1689 loss=3.567, nll_loss=2.02, ppl=4.06, wps=551637, ups=1.11, wpb=495462, bsz=16761.1, num_updates=24700, lr=0.000402422, gnorm=0.182, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=22831 epoch 015: 1092 / 1689 loss=3.567, nll_loss=2.02, ppl=4.06, wps=551637, ups=1.11, wpb=495462, bsz=16761.1, num_updates=24700, lr=0.000402422, gnorm=0.182, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=22831 epoch 015: 1092 / 1689 loss=3.567, nll_loss=2.02, ppl=4.06, wps=551637, ups=1.11, wpb=495462, bsz=16761.1, num_updates=24700, lr=0.000402422, gnorm=0.182, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=22831 epoch 015: 1092 / 1689 loss=3.567, nll_loss=2.02, ppl=4.06, wps=551637, ups=1.11, wpb=495462, bsz=16761.1, num_updates=24700, lr=0.000402422, gnorm=0.182, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=22831 epoch 015: 1092 / 1689 loss=3.567, nll_loss=2.02, ppl=4.06, wps=551637, ups=1.11, wpb=495462, bsz=16761.1, num_updates=24700, lr=0.000402422, gnorm=0.182, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=22831 epoch 015: 1092 / 1689 loss=3.567, nll_loss=2.02, ppl=4.06, wps=551637, ups=1.11, wpb=495462, bsz=16761.1, num_updates=24700, lr=0.000402422, gnorm=0.182, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=22831 epoch 015: 1092 / 1689 loss=3.567, nll_loss=2.02, ppl=4.06, wps=551637, ups=1.11, wpb=495462, bsz=16761.1, num_updates=24700, lr=0.000402422, gnorm=0.182, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=22831 epoch 015: 1092 / 1689 loss=3.567, nll_loss=2.02, ppl=4.06, wps=551637, ups=1.11, wpb=495462, bsz=16761.1, num_updates=24700, lr=0.000402422, gnorm=0.182, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=22831 epoch 015: 1192 / 1689 loss=3.582, nll_loss=2.036, ppl=4.1, wps=557127, ups=1.13, wpb=493698, bsz=16253, num_updates=24800, lr=0.00040161, gnorm=0.192, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=22920 epoch 015: 1192 / 1689 loss=3.582, nll_loss=2.036, ppl=4.1, wps=557127, ups=1.13, wpb=493698, bsz=16253, num_updates=24800, lr=0.00040161, gnorm=0.192, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=22920 epoch 015: 1192 / 1689 loss=3.582, nll_loss=2.036, ppl=4.1, wps=557127, ups=1.13, wpb=493698, bsz=16253, num_updates=24800, lr=0.00040161, gnorm=0.192, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=22920 epoch 015: 1192 / 1689 loss=3.582, nll_loss=2.036, ppl=4.1, wps=557127, ups=1.13, wpb=493698, bsz=16253, num_updates=24800, lr=0.00040161, gnorm=0.192, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=22920 epoch 015: 1192 / 1689 loss=3.582, nll_loss=2.036, ppl=4.1, wps=557127, ups=1.13, wpb=493698, bsz=16253, num_updates=24800, lr=0.00040161, gnorm=0.192, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=22920 epoch 015: 1192 / 1689 loss=3.582, nll_loss=2.036, ppl=4.1, wps=557127, ups=1.13, wpb=493698, bsz=16253, num_updates=24800, lr=0.00040161, gnorm=0.192, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=22920 epoch 015: 1192 / 1689 loss=3.582, nll_loss=2.036, ppl=4.1, wps=557127, ups=1.13, wpb=493698, bsz=16253, num_updates=24800, lr=0.00040161, gnorm=0.192, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=22920 epoch 015: 1192 / 1689 loss=3.582, nll_loss=2.036, ppl=4.1, wps=557127, ups=1.13, wpb=493698, bsz=16253, num_updates=24800, lr=0.00040161, gnorm=0.192, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=22920 epoch 015: 1192 / 1689 loss=3.582, nll_loss=2.036, ppl=4.1, wps=557127, ups=1.13, wpb=493698, bsz=16253, num_updates=24800, lr=0.00040161, gnorm=0.192, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=22920 epoch 015: 1192 / 1689 loss=3.582, nll_loss=2.036, ppl=4.1, wps=557127, ups=1.13, wpb=493698, bsz=16253, num_updates=24800, lr=0.00040161, gnorm=0.192, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=22920 epoch 015: 1192 / 1689 loss=3.582, nll_loss=2.036, ppl=4.1, wps=557127, ups=1.13, wpb=493698, bsz=16253, num_updates=24800, lr=0.00040161, gnorm=0.192, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=22920 epoch 015: 1192 / 1689 loss=3.582, nll_loss=2.036, ppl=4.1, wps=557127, ups=1.13, wpb=493698, bsz=16253, num_updates=24800, lr=0.00040161, gnorm=0.192, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=22920 epoch 015: 1192 / 1689 loss=3.582, nll_loss=2.036, ppl=4.1, wps=557127, ups=1.13, wpb=493698, bsz=16253, num_updates=24800, lr=0.00040161, gnorm=0.192, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=22920 epoch 015: 1192 / 1689 loss=3.582, nll_loss=2.036, ppl=4.1, wps=557127, ups=1.13, wpb=493698, bsz=16253, num_updates=24800, lr=0.00040161, gnorm=0.192, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=22920 epoch 015: 1192 / 1689 loss=3.582, nll_loss=2.036, ppl=4.1, wps=557127, ups=1.13, wpb=493698, bsz=16253, num_updates=24800, lr=0.00040161, gnorm=0.192, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=22920 epoch 015: 1292 / 1689 loss=3.567, nll_loss=2.02, ppl=4.06, wps=552183, ups=1.11, wpb=495261, bsz=16927.4, num_updates=24900, lr=0.000400802, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=23009 epoch 015: 1292 / 1689 loss=3.567, nll_loss=2.02, ppl=4.06, wps=552183, ups=1.11, wpb=495261, bsz=16927.4, num_updates=24900, lr=0.000400802, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=23009 epoch 015: 1292 / 1689 loss=3.567, nll_loss=2.02, ppl=4.06, wps=552183, ups=1.11, wpb=495261, bsz=16927.4, num_updates=24900, lr=0.000400802, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=23009 epoch 015: 1292 / 1689 loss=3.567, nll_loss=2.02, ppl=4.06, wps=552183, ups=1.11, wpb=495261, bsz=16927.4, num_updates=24900, lr=0.000400802, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=23009 epoch 015: 1292 / 1689 loss=3.567, nll_loss=2.02, ppl=4.06, wps=552183, ups=1.11, wpb=495261, bsz=16927.4, num_updates=24900, lr=0.000400802, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=23009 epoch 015: 1292 / 1689 loss=3.567, nll_loss=2.02, ppl=4.06, wps=552183, ups=1.11, wpb=495261, bsz=16927.4, num_updates=24900, lr=0.000400802, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=23009 epoch 015: 1292 / 1689 loss=3.567, nll_loss=2.02, ppl=4.06, wps=552183, ups=1.11, wpb=495261, bsz=16927.4, num_updates=24900, lr=0.000400802, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=23009 epoch 015: 1292 / 1689 loss=3.567, nll_loss=2.02, ppl=4.06, wps=552183, ups=1.11, wpb=495261, bsz=16927.4, num_updates=24900, lr=0.000400802, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=23009 epoch 015: 1292 / 1689 loss=3.567, nll_loss=2.02, ppl=4.06, wps=552183, ups=1.11, wpb=495261, bsz=16927.4, num_updates=24900, lr=0.000400802, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=23009 epoch 015: 1292 / 1689 loss=3.567, nll_loss=2.02, ppl=4.06, wps=552183, ups=1.11, wpb=495261, bsz=16927.4, num_updates=24900, lr=0.000400802, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=23009 epoch 015: 1292 / 1689 loss=3.567, nll_loss=2.02, ppl=4.06, wps=552183, ups=1.11, wpb=495261, bsz=16927.4, num_updates=24900, lr=0.000400802, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=23009 epoch 015: 1292 / 1689 loss=3.567, nll_loss=2.02, ppl=4.06, wps=552183, ups=1.11, wpb=495261, bsz=16927.4, num_updates=24900, lr=0.000400802, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=23009 epoch 015: 1292 / 1689 loss=3.567, nll_loss=2.02, ppl=4.06, wps=552183, ups=1.11, wpb=495261, bsz=16927.4, num_updates=24900, lr=0.000400802, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=23009 epoch 015: 1292 / 1689 loss=3.567, nll_loss=2.02, ppl=4.06, wps=552183, ups=1.11, wpb=495261, bsz=16927.4, num_updates=24900, lr=0.000400802, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=23009 epoch 015: 1292 / 1689 loss=3.567, nll_loss=2.02, ppl=4.06, wps=552183, ups=1.11, wpb=495261, bsz=16927.4, num_updates=24900, lr=0.000400802, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=22, wall=23009 epoch 015: 1392 / 1689 loss=3.573, nll_loss=2.026, ppl=4.07, wps=557262, ups=1.12, wpb=497130, bsz=16537, num_updates=25000, lr=0.0004, gnorm=0.195, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=23099 epoch 015: 1392 / 1689 loss=3.573, nll_loss=2.026, ppl=4.07, wps=557262, ups=1.12, wpb=497130, bsz=16537, num_updates=25000, lr=0.0004, gnorm=0.195, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=23099 epoch 015: 1392 / 1689 loss=3.573, nll_loss=2.026, ppl=4.07, wps=557262, ups=1.12, wpb=497130, bsz=16537, num_updates=25000, lr=0.0004, gnorm=0.195, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=23099 epoch 015: 1392 / 1689 loss=3.573, nll_loss=2.026, ppl=4.07, wps=557262, ups=1.12, wpb=497130, bsz=16537, num_updates=25000, lr=0.0004, gnorm=0.195, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=23099 epoch 015: 1392 / 1689 loss=3.573, nll_loss=2.026, ppl=4.07, wps=557262, ups=1.12, wpb=497130, bsz=16537, num_updates=25000, lr=0.0004, gnorm=0.195, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=23099 epoch 015: 1392 / 1689 loss=3.573, nll_loss=2.026, ppl=4.07, wps=557262, ups=1.12, wpb=497130, bsz=16537, num_updates=25000, lr=0.0004, gnorm=0.195, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=23099 epoch 015: 1392 / 1689 loss=3.573, nll_loss=2.026, ppl=4.07, wps=557262, ups=1.12, wpb=497130, bsz=16537, num_updates=25000, lr=0.0004, gnorm=0.195, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=23099 epoch 015: 1392 / 1689 loss=3.573, nll_loss=2.026, ppl=4.07, wps=557262, ups=1.12, wpb=497130, bsz=16537, num_updates=25000, lr=0.0004, gnorm=0.195, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=23099 epoch 015: 1392 / 1689 loss=3.573, nll_loss=2.026, ppl=4.07, wps=557262, ups=1.12, wpb=497130, bsz=16537, num_updates=25000, lr=0.0004, gnorm=0.195, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=23099 epoch 015: 1392 / 1689 loss=3.573, nll_loss=2.026, ppl=4.07, wps=557262, ups=1.12, wpb=497130, bsz=16537, num_updates=25000, lr=0.0004, gnorm=0.195, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=23099 epoch 015: 1392 / 1689 loss=3.573, nll_loss=2.026, ppl=4.07, wps=557262, ups=1.12, wpb=497130, bsz=16537, num_updates=25000, lr=0.0004, gnorm=0.195, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=23099 epoch 015: 1392 / 1689 loss=3.573, nll_loss=2.026, ppl=4.07, wps=557262, ups=1.12, wpb=497130, bsz=16537, num_updates=25000, lr=0.0004, gnorm=0.195, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=23099 epoch 015: 1392 / 1689 loss=3.573, nll_loss=2.026, ppl=4.07, wps=557262, ups=1.12, wpb=497130, bsz=16537, num_updates=25000, lr=0.0004, gnorm=0.195, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=23099 epoch 015: 1392 / 1689 loss=3.573, nll_loss=2.026, ppl=4.07, wps=557262, ups=1.12, wpb=497130, bsz=16537, num_updates=25000, lr=0.0004, gnorm=0.195, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=23099 epoch 015: 1392 / 1689 loss=3.573, nll_loss=2.026, ppl=4.07, wps=557262, ups=1.12, wpb=497130, bsz=16537, num_updates=25000, lr=0.0004, gnorm=0.195, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=23099 begin validation on "valid" subset epoch 015 | valid on 'valid' subset | loss 3.74 | nll_loss 2.202 | ppl 4.6 | wps 0 | wpb 44526 | bsz 2008 | num_updates 25000 | best_loss 3.728 epoch 015 | valid on 'valid' subset | loss 3.74 | nll_loss 2.202 | ppl 4.6 | wps 0 | wpb 44526 | bsz 2008 | num_updates 25000 | best_loss 3.728 epoch 015 | valid on 'valid' subset | loss 3.74 | nll_loss 2.202 | ppl 4.6 | wps 0 | wpb 44526 | bsz 2008 | num_updates 25000 | best_loss 3.728 epoch 015 | valid on 'valid' subset | loss 3.74 | nll_loss 2.202 | ppl 4.6 | wps 0 | wpb 44526 | bsz 2008 | num_updates 25000 | best_loss 3.728 epoch 015 | valid on 'valid' subset | loss 3.74 | nll_loss 2.202 | ppl 4.6 | wps 0 | wpb 44526 | bsz 2008 | num_updates 25000 | best_loss 3.728 epoch 015 | valid on 'valid' subset | loss 3.74 | nll_loss 2.202 | ppl 4.6 | wps 0 | wpb 44526 | bsz 2008 | num_updates 25000 | best_loss 3.728 epoch 015 | valid on 'valid' subset | loss 3.74 | nll_loss 2.202 | ppl 4.6 | wps 0 | wpb 44526 | bsz 2008 | num_updates 25000 | best_loss 3.728 epoch 015 | valid on 'valid' subset | loss 3.74 | nll_loss 2.202 | ppl 4.6 | wps 0 | wpb 44526 | bsz 2008 | num_updates 25000 | best_loss 3.728 epoch 015 | valid on 'valid' subset | loss 3.74 | nll_loss 2.202 | ppl 4.6 | wps 0 | wpb 44526 | bsz 2008 | num_updates 25000 | best_loss 3.728 epoch 015 | valid on 'valid' subset | loss 3.74 | nll_loss 2.202 | ppl 4.6 | wps 0 | wpb 44526 | bsz 2008 | num_updates 25000 | best_loss 3.728 epoch 015 | valid on 'valid' subset | loss 3.74 | nll_loss 2.202 | ppl 4.6 | wps 0 | wpb 44526 | bsz 2008 | num_updates 25000 | best_loss 3.728 epoch 015 | valid on 'valid' subset | loss 3.74 | nll_loss 2.202 | ppl 4.6 | wps 0 | wpb 44526 | bsz 2008 | num_updates 25000 | best_loss 3.728 epoch 015 | valid on 'valid' subset | loss 3.74 | nll_loss 2.202 | ppl 4.6 | wps 0 | wpb 44526 | bsz 2008 | num_updates 25000 | best_loss 3.728 epoch 015 | valid on 'valid' subset | loss 3.74 | nll_loss 2.202 | ppl 4.6 | wps 0 | wpb 44526 | bsz 2008 | num_updates 25000 | best_loss 3.728 epoch 015 | valid on 'valid' subset | loss 3.74 | nll_loss 2.202 | ppl 4.6 | wps 0 | wpb 44526 | bsz 2008 | num_updates 25000 | best_loss 3.728 epoch 015: 1492 / 1689 loss=3.573, nll_loss=2.026, ppl=4.07, wps=483083, ups=0.98, wpb=494581, bsz=16878.7, num_updates=25100, lr=0.000399202, gnorm=0.181, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=23201 epoch 015: 1492 / 1689 loss=3.573, nll_loss=2.026, ppl=4.07, wps=483083, ups=0.98, wpb=494581, bsz=16878.7, num_updates=25100, lr=0.000399202, gnorm=0.181, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=23201 epoch 015: 1492 / 1689 loss=3.573, nll_loss=2.026, ppl=4.07, wps=483083, ups=0.98, wpb=494581, bsz=16878.7, num_updates=25100, lr=0.000399202, gnorm=0.181, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=23201 epoch 015: 1492 / 1689 loss=3.573, nll_loss=2.026, ppl=4.07, wps=483083, ups=0.98, wpb=494581, bsz=16878.7, num_updates=25100, lr=0.000399202, gnorm=0.181, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=23201 epoch 015: 1492 / 1689 loss=3.573, nll_loss=2.026, ppl=4.07, wps=483083, ups=0.98, wpb=494581, bsz=16878.7, num_updates=25100, lr=0.000399202, gnorm=0.181, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=23201 epoch 015: 1492 / 1689 loss=3.573, nll_loss=2.026, ppl=4.07, wps=483083, ups=0.98, wpb=494581, bsz=16878.7, num_updates=25100, lr=0.000399202, gnorm=0.181, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=23201 epoch 015: 1492 / 1689 loss=3.573, nll_loss=2.026, ppl=4.07, wps=483083, ups=0.98, wpb=494581, bsz=16878.7, num_updates=25100, lr=0.000399202, gnorm=0.181, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=23201 epoch 015: 1492 / 1689 loss=3.573, nll_loss=2.026, ppl=4.07, wps=483083, ups=0.98, wpb=494581, bsz=16878.7, num_updates=25100, lr=0.000399202, gnorm=0.181, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=23201 epoch 015: 1492 / 1689 loss=3.573, nll_loss=2.026, ppl=4.07, wps=483083, ups=0.98, wpb=494581, bsz=16878.7, num_updates=25100, lr=0.000399202, gnorm=0.181, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=23201 epoch 015: 1492 / 1689 loss=3.573, nll_loss=2.026, ppl=4.07, wps=483083, ups=0.98, wpb=494581, bsz=16878.7, num_updates=25100, lr=0.000399202, gnorm=0.181, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=23201 epoch 015: 1492 / 1689 loss=3.573, nll_loss=2.026, ppl=4.07, wps=483083, ups=0.98, wpb=494581, bsz=16878.7, num_updates=25100, lr=0.000399202, gnorm=0.181, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=23201 epoch 015: 1492 / 1689 loss=3.573, nll_loss=2.026, ppl=4.07, wps=483083, ups=0.98, wpb=494581, bsz=16878.7, num_updates=25100, lr=0.000399202, gnorm=0.181, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=23201 epoch 015: 1492 / 1689 loss=3.573, nll_loss=2.026, ppl=4.07, wps=483083, ups=0.98, wpb=494581, bsz=16878.7, num_updates=25100, lr=0.000399202, gnorm=0.181, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=23201 epoch 015: 1492 / 1689 loss=3.573, nll_loss=2.026, ppl=4.07, wps=483083, ups=0.98, wpb=494581, bsz=16878.7, num_updates=25100, lr=0.000399202, gnorm=0.181, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=23201 epoch 015: 1492 / 1689 loss=3.573, nll_loss=2.026, ppl=4.07, wps=483083, ups=0.98, wpb=494581, bsz=16878.7, num_updates=25100, lr=0.000399202, gnorm=0.181, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=23201 epoch 015: 1592 / 1689 loss=3.58, nll_loss=2.035, ppl=4.1, wps=556058, ups=1.12, wpb=496198, bsz=16552.5, num_updates=25200, lr=0.00039841, gnorm=0.185, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=23290 epoch 015: 1592 / 1689 loss=3.58, nll_loss=2.035, ppl=4.1, wps=556058, ups=1.12, wpb=496198, bsz=16552.5, num_updates=25200, lr=0.00039841, gnorm=0.185, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=23290 epoch 015: 1592 / 1689 loss=3.58, nll_loss=2.035, ppl=4.1, wps=556058, ups=1.12, wpb=496198, bsz=16552.5, num_updates=25200, lr=0.00039841, gnorm=0.185, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=23290 epoch 015: 1592 / 1689 loss=3.58, nll_loss=2.035, ppl=4.1, wps=556058, ups=1.12, wpb=496198, bsz=16552.5, num_updates=25200, lr=0.00039841, gnorm=0.185, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=23290 epoch 015: 1592 / 1689 loss=3.58, nll_loss=2.035, ppl=4.1, wps=556058, ups=1.12, wpb=496198, bsz=16552.5, num_updates=25200, lr=0.00039841, gnorm=0.185, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=23290 epoch 015: 1592 / 1689 loss=3.58, nll_loss=2.035, ppl=4.1, wps=556058, ups=1.12, wpb=496198, bsz=16552.5, num_updates=25200, lr=0.00039841, gnorm=0.185, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=23290 epoch 015: 1592 / 1689 loss=3.58, nll_loss=2.035, ppl=4.1, wps=556058, ups=1.12, wpb=496198, bsz=16552.5, num_updates=25200, lr=0.00039841, gnorm=0.185, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=23290 epoch 015: 1592 / 1689 loss=3.58, nll_loss=2.035, ppl=4.1, wps=556058, ups=1.12, wpb=496198, bsz=16552.5, num_updates=25200, lr=0.00039841, gnorm=0.185, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=23290 epoch 015: 1592 / 1689 loss=3.58, nll_loss=2.035, ppl=4.1, wps=556058, ups=1.12, wpb=496198, bsz=16552.5, num_updates=25200, lr=0.00039841, gnorm=0.185, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=23290 epoch 015: 1592 / 1689 loss=3.58, nll_loss=2.035, ppl=4.1, wps=556058, ups=1.12, wpb=496198, bsz=16552.5, num_updates=25200, lr=0.00039841, gnorm=0.185, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=23290 epoch 015: 1592 / 1689 loss=3.58, nll_loss=2.035, ppl=4.1, wps=556058, ups=1.12, wpb=496198, bsz=16552.5, num_updates=25200, lr=0.00039841, gnorm=0.185, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=23290 epoch 015: 1592 / 1689 loss=3.58, nll_loss=2.035, ppl=4.1, wps=556058, ups=1.12, wpb=496198, bsz=16552.5, num_updates=25200, lr=0.00039841, gnorm=0.185, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=23290 epoch 015: 1592 / 1689 loss=3.58, nll_loss=2.035, ppl=4.1, wps=556058, ups=1.12, wpb=496198, bsz=16552.5, num_updates=25200, lr=0.00039841, gnorm=0.185, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=23290 epoch 015: 1592 / 1689 loss=3.58, nll_loss=2.035, ppl=4.1, wps=556058, ups=1.12, wpb=496198, bsz=16552.5, num_updates=25200, lr=0.00039841, gnorm=0.185, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=23290 epoch 015: 1592 / 1689 loss=3.58, nll_loss=2.035, ppl=4.1, wps=556058, ups=1.12, wpb=496198, bsz=16552.5, num_updates=25200, lr=0.00039841, gnorm=0.185, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=23290 end of epoch 15 (average epoch stats below) epoch 015 | loss 3.567 | nll_loss 2.019 | ppl 4.05 | wps 544702 | ups 1.1 | wpb 495124 | bsz 16504.3 | num_updates 25296 | lr 0.000397653 | gnorm 0.18 | clip 0 | loss_scale 2 | train_wall 1483 | gb_free 24.3 | wall 23376 epoch 015 | loss 3.567 | nll_loss 2.019 | ppl 4.05 | wps 544702 | ups 1.1 | wpb 495124 | bsz 16504.3 | num_updates 25296 | lr 0.000397653 | gnorm 0.18 | clip 0 | loss_scale 2 | train_wall 1483 | gb_free 24.3 | wall 23376 epoch 015 | loss 3.567 | nll_loss 2.019 | ppl 4.05 | wps 544702 | ups 1.1 | wpb 495124 | bsz 16504.3 | num_updates 25296 | lr 0.000397653 | gnorm 0.18 | clip 0 | loss_scale 2 | train_wall 1483 | gb_free 24.3 | wall 23376 epoch 015 | loss 3.567 | nll_loss 2.019 | ppl 4.05 | wps 544702 | ups 1.1 | wpb 495124 | bsz 16504.3 | num_updates 25296 | lr 0.000397653 | gnorm 0.18 | clip 0 | loss_scale 2 | train_wall 1483 | gb_free 24.3 | wall 23376 epoch 015 | loss 3.567 | nll_loss 2.019 | ppl 4.05 | wps 544702 | ups 1.1 | wpb 495124 | bsz 16504.3 | num_updates 25296 | lr 0.000397653 | gnorm 0.18 | clip 0 | loss_scale 2 | train_wall 1483 | gb_free 24.3 | wall 23376 epoch 015 | loss 3.567 | nll_loss 2.019 | ppl 4.05 | wps 544702 | ups 1.1 | wpb 495124 | bsz 16504.3 | num_updates 25296 | lr 0.000397653 | gnorm 0.18 | clip 0 | loss_scale 2 | train_wall 1483 | gb_free 24.3 | wall 23376 epoch 015 | loss 3.567 | nll_loss 2.019 | ppl 4.05 | wps 544702 | ups 1.1 | wpb 495124 | bsz 16504.3 | num_updates 25296 | lr 0.000397653 | gnorm 0.18 | clip 0 | loss_scale 2 | train_wall 1483 | gb_free 24.3 | wall 23376 epoch 015 | loss 3.567 | nll_loss 2.019 | ppl 4.05 | wps 544702 | ups 1.1 | wpb 495124 | bsz 16504.3 | num_updates 25296 | lr 0.000397653 | gnorm 0.18 | clip 0 | loss_scale 2 | train_wall 1483 | gb_free 24.3 | wall 23376 epoch 015 | loss 3.567 | nll_loss 2.019 | ppl 4.05 | wps 544702 | ups 1.1 | wpb 495124 | bsz 16504.3 | num_updates 25296 | lr 0.000397653 | gnorm 0.18 | clip 0 | loss_scale 2 | train_wall 1483 | gb_free 24.3 | wall 23376 epoch 015 | loss 3.567 | nll_loss 2.019 | ppl 4.05 | wps 544702 | ups 1.1 | wpb 495124 | bsz 16504.3 | num_updates 25296 | lr 0.000397653 | gnorm 0.18 | clip 0 | loss_scale 2 | train_wall 1483 | gb_free 24.3 | wall 23376 epoch 015 | loss 3.567 | nll_loss 2.019 | ppl 4.05 | wps 544702 | ups 1.1 | wpb 495124 | bsz 16504.3 | num_updates 25296 | lr 0.000397653 | gnorm 0.18 | clip 0 | loss_scale 2 | train_wall 1483 | gb_free 24.3 | wall 23376 epoch 015 | loss 3.567 | nll_loss 2.019 | ppl 4.05 | wps 544702 | ups 1.1 | wpb 495124 | bsz 16504.3 | num_updates 25296 | lr 0.000397653 | gnorm 0.18 | clip 0 | loss_scale 2 | train_wall 1483 | gb_free 24.3 | wall 23376 epoch 015 | loss 3.567 | nll_loss 2.019 | ppl 4.05 | wps 544702 | ups 1.1 | wpb 495124 | bsz 16504.3 | num_updates 25296 | lr 0.000397653 | gnorm 0.18 | clip 0 | loss_scale 2 | train_wall 1483 | gb_free 24.3 | wall 23376 epoch 015 | loss 3.567 | nll_loss 2.019 | ppl 4.05 | wps 544702 | ups 1.1 | wpb 495124 | bsz 16504.3 | num_updates 25296 | lr 0.000397653 | gnorm 0.18 | clip 0 | loss_scale 2 | train_wall 1483 | gb_free 24.3 | wall 23376 epoch 015 | loss 3.567 | nll_loss 2.019 | ppl 4.05 | wps 544702 | ups 1.1 | wpb 495124 | bsz 16504.3 | num_updates 25296 | lr 0.000397653 | gnorm 0.18 | clip 0 | loss_scale 2 | train_wall 1483 | gb_free 24.3 | wall 23376 Start iterating over samples epoch 016: 4 / 1689 loss=3.58, nll_loss=2.034, ppl=4.1, wps=545513, ups=1.11, wpb=491682, bsz=15992.6, num_updates=25300, lr=0.000397621, gnorm=0.171, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=23380 epoch 016: 4 / 1689 loss=3.58, nll_loss=2.034, ppl=4.1, wps=545513, ups=1.11, wpb=491682, bsz=15992.6, num_updates=25300, lr=0.000397621, gnorm=0.171, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=23380 epoch 016: 4 / 1689 loss=3.58, nll_loss=2.034, ppl=4.1, wps=545513, ups=1.11, wpb=491682, bsz=15992.6, num_updates=25300, lr=0.000397621, gnorm=0.171, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=23380 epoch 016: 4 / 1689 loss=3.58, nll_loss=2.034, ppl=4.1, wps=545513, ups=1.11, wpb=491682, bsz=15992.6, num_updates=25300, lr=0.000397621, gnorm=0.171, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=23380 epoch 016: 4 / 1689 loss=3.58, nll_loss=2.034, ppl=4.1, wps=545513, ups=1.11, wpb=491682, bsz=15992.6, num_updates=25300, lr=0.000397621, gnorm=0.171, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=23380 epoch 016: 4 / 1689 loss=3.58, nll_loss=2.034, ppl=4.1, wps=545513, ups=1.11, wpb=491682, bsz=15992.6, num_updates=25300, lr=0.000397621, gnorm=0.171, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=23380 epoch 016: 4 / 1689 loss=3.58, nll_loss=2.034, ppl=4.1, wps=545513, ups=1.11, wpb=491682, bsz=15992.6, num_updates=25300, lr=0.000397621, gnorm=0.171, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=23380 epoch 016: 4 / 1689 loss=3.58, nll_loss=2.034, ppl=4.1, wps=545513, ups=1.11, wpb=491682, bsz=15992.6, num_updates=25300, lr=0.000397621, gnorm=0.171, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=23380 epoch 016: 4 / 1689 loss=3.58, nll_loss=2.034, ppl=4.1, wps=545513, ups=1.11, wpb=491682, bsz=15992.6, num_updates=25300, lr=0.000397621, gnorm=0.171, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=23380 epoch 016: 4 / 1689 loss=3.58, nll_loss=2.034, ppl=4.1, wps=545513, ups=1.11, wpb=491682, bsz=15992.6, num_updates=25300, lr=0.000397621, gnorm=0.171, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=23380 epoch 016: 4 / 1689 loss=3.58, nll_loss=2.034, ppl=4.1, wps=545513, ups=1.11, wpb=491682, bsz=15992.6, num_updates=25300, lr=0.000397621, gnorm=0.171, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=23380 epoch 016: 4 / 1689 loss=3.58, nll_loss=2.034, ppl=4.1, wps=545513, ups=1.11, wpb=491682, bsz=15992.6, num_updates=25300, lr=0.000397621, gnorm=0.171, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=23380 epoch 016: 4 / 1689 loss=3.58, nll_loss=2.034, ppl=4.1, wps=545513, ups=1.11, wpb=491682, bsz=15992.6, num_updates=25300, lr=0.000397621, gnorm=0.171, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=23380 epoch 016: 4 / 1689 loss=3.58, nll_loss=2.034, ppl=4.1, wps=545513, ups=1.11, wpb=491682, bsz=15992.6, num_updates=25300, lr=0.000397621, gnorm=0.171, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=23380 epoch 016: 4 / 1689 loss=3.58, nll_loss=2.034, ppl=4.1, wps=545513, ups=1.11, wpb=491682, bsz=15992.6, num_updates=25300, lr=0.000397621, gnorm=0.171, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=23380 epoch 016: 4 / 1689 loss=3.58, nll_loss=2.034, ppl=4.1, wps=545513, ups=1.11, wpb=491682, bsz=15992.6, num_updates=25300, lr=0.000397621, gnorm=0.171, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=23380 epoch 016: 104 / 1689 loss=3.542, nll_loss=1.991, ppl=3.98, wps=560190, ups=1.13, wpb=495104, bsz=16343.6, num_updates=25400, lr=0.000396838, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=23469 epoch 016: 104 / 1689 loss=3.542, nll_loss=1.991, ppl=3.98, wps=560190, ups=1.13, wpb=495104, bsz=16343.6, num_updates=25400, lr=0.000396838, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=23469 epoch 016: 104 / 1689 loss=3.542, nll_loss=1.991, ppl=3.98, wps=560190, ups=1.13, wpb=495104, bsz=16343.6, num_updates=25400, lr=0.000396838, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=23469 epoch 016: 104 / 1689 loss=3.542, nll_loss=1.991, ppl=3.98, wps=560190, ups=1.13, wpb=495104, bsz=16343.6, num_updates=25400, lr=0.000396838, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=23469 epoch 016: 104 / 1689 loss=3.542, nll_loss=1.991, ppl=3.98, wps=560190, ups=1.13, wpb=495104, bsz=16343.6, num_updates=25400, lr=0.000396838, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=23469 epoch 016: 104 / 1689 loss=3.542, nll_loss=1.991, ppl=3.98, wps=560190, ups=1.13, wpb=495104, bsz=16343.6, num_updates=25400, lr=0.000396838, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=23469 epoch 016: 104 / 1689 loss=3.542, nll_loss=1.991, ppl=3.98, wps=560190, ups=1.13, wpb=495104, bsz=16343.6, num_updates=25400, lr=0.000396838, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=23469 epoch 016: 104 / 1689 loss=3.542, nll_loss=1.991, ppl=3.98, wps=560190, ups=1.13, wpb=495104, bsz=16343.6, num_updates=25400, lr=0.000396838, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=23469 epoch 016: 104 / 1689 loss=3.542, nll_loss=1.991, ppl=3.98, wps=560190, ups=1.13, wpb=495104, bsz=16343.6, num_updates=25400, lr=0.000396838, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=23469 epoch 016: 104 / 1689 loss=3.542, nll_loss=1.991, ppl=3.98, wps=560190, ups=1.13, wpb=495104, bsz=16343.6, num_updates=25400, lr=0.000396838, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=23469 epoch 016: 104 / 1689 loss=3.542, nll_loss=1.991, ppl=3.98, wps=560190, ups=1.13, wpb=495104, bsz=16343.6, num_updates=25400, lr=0.000396838, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=23469 epoch 016: 104 / 1689 loss=3.542, nll_loss=1.991, ppl=3.98, wps=560190, ups=1.13, wpb=495104, bsz=16343.6, num_updates=25400, lr=0.000396838, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=23469 epoch 016: 104 / 1689 loss=3.542, nll_loss=1.991, ppl=3.98, wps=560190, ups=1.13, wpb=495104, bsz=16343.6, num_updates=25400, lr=0.000396838, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=23469 epoch 016: 104 / 1689 loss=3.542, nll_loss=1.991, ppl=3.98, wps=560190, ups=1.13, wpb=495104, bsz=16343.6, num_updates=25400, lr=0.000396838, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=23469 epoch 016: 104 / 1689 loss=3.542, nll_loss=1.991, ppl=3.98, wps=560190, ups=1.13, wpb=495104, bsz=16343.6, num_updates=25400, lr=0.000396838, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=23469 epoch 016: 104 / 1689 loss=3.542, nll_loss=1.991, ppl=3.98, wps=560190, ups=1.13, wpb=495104, bsz=16343.6, num_updates=25400, lr=0.000396838, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=23469 epoch 016: 204 / 1689 loss=3.541, nll_loss=1.99, ppl=3.97, wps=552787, ups=1.11, wpb=496631, bsz=16633.8, num_updates=25500, lr=0.000396059, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=23559 epoch 016: 204 / 1689 loss=3.541, nll_loss=1.99, ppl=3.97, wps=552787, ups=1.11, wpb=496631, bsz=16633.8, num_updates=25500, lr=0.000396059, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=23559 epoch 016: 204 / 1689 loss=3.541, nll_loss=1.99, ppl=3.97, wps=552787, ups=1.11, wpb=496631, bsz=16633.8, num_updates=25500, lr=0.000396059, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=23559 epoch 016: 204 / 1689 loss=3.541, nll_loss=1.99, ppl=3.97, wps=552787, ups=1.11, wpb=496631, bsz=16633.8, num_updates=25500, lr=0.000396059, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=23559 epoch 016: 204 / 1689 loss=3.541, nll_loss=1.99, ppl=3.97, wps=552787, ups=1.11, wpb=496631, bsz=16633.8, num_updates=25500, lr=0.000396059, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=23559 epoch 016: 204 / 1689 loss=3.541, nll_loss=1.99, ppl=3.97, wps=552787, ups=1.11, wpb=496631, bsz=16633.8, num_updates=25500, lr=0.000396059, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=23559 epoch 016: 204 / 1689 loss=3.541, nll_loss=1.99, ppl=3.97, wps=552787, ups=1.11, wpb=496631, bsz=16633.8, num_updates=25500, lr=0.000396059, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=23559 epoch 016: 204 / 1689 loss=3.541, nll_loss=1.99, ppl=3.97, wps=552787, ups=1.11, wpb=496631, bsz=16633.8, num_updates=25500, lr=0.000396059, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=23559 epoch 016: 204 / 1689 loss=3.541, nll_loss=1.99, ppl=3.97, wps=552787, ups=1.11, wpb=496631, bsz=16633.8, num_updates=25500, lr=0.000396059, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=23559 epoch 016: 204 / 1689 loss=3.541, nll_loss=1.99, ppl=3.97, wps=552787, ups=1.11, wpb=496631, bsz=16633.8, num_updates=25500, lr=0.000396059, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=23559 epoch 016: 204 / 1689 loss=3.541, nll_loss=1.99, ppl=3.97, wps=552787, ups=1.11, wpb=496631, bsz=16633.8, num_updates=25500, lr=0.000396059, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=23559 epoch 016: 204 / 1689 loss=3.541, nll_loss=1.99, ppl=3.97, wps=552787, ups=1.11, wpb=496631, bsz=16633.8, num_updates=25500, lr=0.000396059, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=23559 epoch 016: 204 / 1689 loss=3.541, nll_loss=1.99, ppl=3.97, wps=552787, ups=1.11, wpb=496631, bsz=16633.8, num_updates=25500, lr=0.000396059, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=23559 epoch 016: 204 / 1689 loss=3.541, nll_loss=1.99, ppl=3.97, wps=552787, ups=1.11, wpb=496631, bsz=16633.8, num_updates=25500, lr=0.000396059, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=23559 epoch 016: 204 / 1689 loss=3.541, nll_loss=1.99, ppl=3.97, wps=552787, ups=1.11, wpb=496631, bsz=16633.8, num_updates=25500, lr=0.000396059, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=23559 epoch 016: 204 / 1689 loss=3.541, nll_loss=1.99, ppl=3.97, wps=552787, ups=1.11, wpb=496631, bsz=16633.8, num_updates=25500, lr=0.000396059, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=23559 epoch 016: 304 / 1689 loss=3.55, nll_loss=2, ppl=4, wps=552720, ups=1.12, wpb=493421, bsz=16201, num_updates=25600, lr=0.000395285, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=23648 epoch 016: 304 / 1689 loss=3.55, nll_loss=2, ppl=4, wps=552720, ups=1.12, wpb=493421, bsz=16201, num_updates=25600, lr=0.000395285, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=23648 epoch 016: 304 / 1689 loss=3.55, nll_loss=2, ppl=4, wps=552720, ups=1.12, wpb=493421, bsz=16201, num_updates=25600, lr=0.000395285, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=23648 epoch 016: 304 / 1689 loss=3.55, nll_loss=2, ppl=4, wps=552720, ups=1.12, wpb=493421, bsz=16201, num_updates=25600, lr=0.000395285, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=23648 epoch 016: 304 / 1689 loss=3.55, nll_loss=2, ppl=4, wps=552720, ups=1.12, wpb=493421, bsz=16201, num_updates=25600, lr=0.000395285, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=23648 epoch 016: 304 / 1689 loss=3.55, nll_loss=2, ppl=4, wps=552720, ups=1.12, wpb=493421, bsz=16201, num_updates=25600, lr=0.000395285, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=23648 epoch 016: 304 / 1689 loss=3.55, nll_loss=2, ppl=4, wps=552720, ups=1.12, wpb=493421, bsz=16201, num_updates=25600, lr=0.000395285, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=23648 epoch 016: 304 / 1689 loss=3.55, nll_loss=2, ppl=4, wps=552720, ups=1.12, wpb=493421, bsz=16201, num_updates=25600, lr=0.000395285, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=23648 epoch 016: 304 / 1689 loss=3.55, nll_loss=2, ppl=4, wps=552720, ups=1.12, wpb=493421, bsz=16201, num_updates=25600, lr=0.000395285, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=23648 epoch 016: 304 / 1689 loss=3.55, nll_loss=2, ppl=4, wps=552720, ups=1.12, wpb=493421, bsz=16201, num_updates=25600, lr=0.000395285, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=23648 epoch 016: 304 / 1689 loss=3.55, nll_loss=2, ppl=4, wps=552720, ups=1.12, wpb=493421, bsz=16201, num_updates=25600, lr=0.000395285, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=23648 epoch 016: 304 / 1689 loss=3.55, nll_loss=2, ppl=4, wps=552720, ups=1.12, wpb=493421, bsz=16201, num_updates=25600, lr=0.000395285, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=23648 epoch 016: 304 / 1689 loss=3.55, nll_loss=2, ppl=4, wps=552720, ups=1.12, wpb=493421, bsz=16201, num_updates=25600, lr=0.000395285, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=23648 epoch 016: 304 / 1689 loss=3.55, nll_loss=2, ppl=4, wps=552720, ups=1.12, wpb=493421, bsz=16201, num_updates=25600, lr=0.000395285, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=23648 epoch 016: 304 / 1689 loss=3.55, nll_loss=2, ppl=4, wps=552720, ups=1.12, wpb=493421, bsz=16201, num_updates=25600, lr=0.000395285, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=23648 epoch 016: 304 / 1689 loss=3.55, nll_loss=2, ppl=4, wps=552720, ups=1.12, wpb=493421, bsz=16201, num_updates=25600, lr=0.000395285, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=23648 epoch 016: 404 / 1689 loss=3.55, nll_loss=2, ppl=4, wps=553803, ups=1.12, wpb=495102, bsz=16290.6, num_updates=25700, lr=0.000394515, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=23737 epoch 016: 404 / 1689 loss=3.55, nll_loss=2, ppl=4, wps=553803, ups=1.12, wpb=495102, bsz=16290.6, num_updates=25700, lr=0.000394515, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=23737 epoch 016: 404 / 1689 loss=3.55, nll_loss=2, ppl=4, wps=553803, ups=1.12, wpb=495102, bsz=16290.6, num_updates=25700, lr=0.000394515, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=23737 epoch 016: 404 / 1689 loss=3.55, nll_loss=2, ppl=4, wps=553803, ups=1.12, wpb=495102, bsz=16290.6, num_updates=25700, lr=0.000394515, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=23737 epoch 016: 404 / 1689 loss=3.55, nll_loss=2, ppl=4, wps=553803, ups=1.12, wpb=495102, bsz=16290.6, num_updates=25700, lr=0.000394515, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=23737 epoch 016: 404 / 1689 loss=3.55, nll_loss=2, ppl=4, wps=553803, ups=1.12, wpb=495102, bsz=16290.6, num_updates=25700, lr=0.000394515, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=23737 epoch 016: 404 / 1689 loss=3.55, nll_loss=2, ppl=4, wps=553803, ups=1.12, wpb=495102, bsz=16290.6, num_updates=25700, lr=0.000394515, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=23737 epoch 016: 404 / 1689 loss=3.55, nll_loss=2, ppl=4, wps=553803, ups=1.12, wpb=495102, bsz=16290.6, num_updates=25700, lr=0.000394515, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=23737 epoch 016: 404 / 1689 loss=3.55, nll_loss=2, ppl=4, wps=553803, ups=1.12, wpb=495102, bsz=16290.6, num_updates=25700, lr=0.000394515, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=23737 epoch 016: 404 / 1689 loss=3.55, nll_loss=2, ppl=4, wps=553803, ups=1.12, wpb=495102, bsz=16290.6, num_updates=25700, lr=0.000394515, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=23737 epoch 016: 404 / 1689 loss=3.55, nll_loss=2, ppl=4, wps=553803, ups=1.12, wpb=495102, bsz=16290.6, num_updates=25700, lr=0.000394515, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=23737 epoch 016: 404 / 1689 loss=3.55, nll_loss=2, ppl=4, wps=553803, ups=1.12, wpb=495102, bsz=16290.6, num_updates=25700, lr=0.000394515, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=23737 epoch 016: 404 / 1689 loss=3.55, nll_loss=2, ppl=4, wps=553803, ups=1.12, wpb=495102, bsz=16290.6, num_updates=25700, lr=0.000394515, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=23737 epoch 016: 404 / 1689 loss=3.55, nll_loss=2, ppl=4, wps=553803, ups=1.12, wpb=495102, bsz=16290.6, num_updates=25700, lr=0.000394515, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=23737 epoch 016: 404 / 1689 loss=3.55, nll_loss=2, ppl=4, wps=553803, ups=1.12, wpb=495102, bsz=16290.6, num_updates=25700, lr=0.000394515, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=23737 epoch 016: 404 / 1689 loss=3.55, nll_loss=2, ppl=4, wps=553803, ups=1.12, wpb=495102, bsz=16290.6, num_updates=25700, lr=0.000394515, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=23737 epoch 016: 504 / 1689 loss=3.555, nll_loss=2.006, ppl=4.02, wps=552464, ups=1.11, wpb=495879, bsz=16156.6, num_updates=25800, lr=0.00039375, gnorm=0.182, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=23827 epoch 016: 504 / 1689 loss=3.555, nll_loss=2.006, ppl=4.02, wps=552464, ups=1.11, wpb=495879, bsz=16156.6, num_updates=25800, lr=0.00039375, gnorm=0.182, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=23827 epoch 016: 504 / 1689 loss=3.555, nll_loss=2.006, ppl=4.02, wps=552464, ups=1.11, wpb=495879, bsz=16156.6, num_updates=25800, lr=0.00039375, gnorm=0.182, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=23827 epoch 016: 504 / 1689 loss=3.555, nll_loss=2.006, ppl=4.02, wps=552464, ups=1.11, wpb=495879, bsz=16156.6, num_updates=25800, lr=0.00039375, gnorm=0.182, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=23827 epoch 016: 504 / 1689 loss=3.555, nll_loss=2.006, ppl=4.02, wps=552464, ups=1.11, wpb=495879, bsz=16156.6, num_updates=25800, lr=0.00039375, gnorm=0.182, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=23827 epoch 016: 504 / 1689 loss=3.555, nll_loss=2.006, ppl=4.02, wps=552464, ups=1.11, wpb=495879, bsz=16156.6, num_updates=25800, lr=0.00039375, gnorm=0.182, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=23827 epoch 016: 504 / 1689 loss=3.555, nll_loss=2.006, ppl=4.02, wps=552464, ups=1.11, wpb=495879, bsz=16156.6, num_updates=25800, lr=0.00039375, gnorm=0.182, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=23827 epoch 016: 504 / 1689 loss=3.555, nll_loss=2.006, ppl=4.02, wps=552464, ups=1.11, wpb=495879, bsz=16156.6, num_updates=25800, lr=0.00039375, gnorm=0.182, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=23827 epoch 016: 504 / 1689 loss=3.555, nll_loss=2.006, ppl=4.02, wps=552464, ups=1.11, wpb=495879, bsz=16156.6, num_updates=25800, lr=0.00039375, gnorm=0.182, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=23827 epoch 016: 504 / 1689 loss=3.555, nll_loss=2.006, ppl=4.02, wps=552464, ups=1.11, wpb=495879, bsz=16156.6, num_updates=25800, lr=0.00039375, gnorm=0.182, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=23827 epoch 016: 504 / 1689 loss=3.555, nll_loss=2.006, ppl=4.02, wps=552464, ups=1.11, wpb=495879, bsz=16156.6, num_updates=25800, lr=0.00039375, gnorm=0.182, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=23827 epoch 016: 504 / 1689 loss=3.555, nll_loss=2.006, ppl=4.02, wps=552464, ups=1.11, wpb=495879, bsz=16156.6, num_updates=25800, lr=0.00039375, gnorm=0.182, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=23827 epoch 016: 504 / 1689 loss=3.555, nll_loss=2.006, ppl=4.02, wps=552464, ups=1.11, wpb=495879, bsz=16156.6, num_updates=25800, lr=0.00039375, gnorm=0.182, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=23827 epoch 016: 504 / 1689 loss=3.555, nll_loss=2.006, ppl=4.02, wps=552464, ups=1.11, wpb=495879, bsz=16156.6, num_updates=25800, lr=0.00039375, gnorm=0.182, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=23827 epoch 016: 504 / 1689 loss=3.555, nll_loss=2.006, ppl=4.02, wps=552464, ups=1.11, wpb=495879, bsz=16156.6, num_updates=25800, lr=0.00039375, gnorm=0.182, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=23827 epoch 016: 504 / 1689 loss=3.555, nll_loss=2.006, ppl=4.02, wps=552464, ups=1.11, wpb=495879, bsz=16156.6, num_updates=25800, lr=0.00039375, gnorm=0.182, clip=0, loss_scale=4, train_wall=88, gb_free=22.5, wall=23827 epoch 016: 604 / 1689 loss=3.552, nll_loss=2.003, ppl=4.01, wps=550629, ups=1.11, wpb=495019, bsz=16595.9, num_updates=25900, lr=0.000392989, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=22.7, wall=23917 epoch 016: 604 / 1689 loss=3.552, nll_loss=2.003, ppl=4.01, wps=550629, ups=1.11, wpb=495019, bsz=16595.9, num_updates=25900, lr=0.000392989, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=22.7, wall=23917 epoch 016: 604 / 1689 loss=3.552, nll_loss=2.003, ppl=4.01, wps=550629, ups=1.11, wpb=495019, bsz=16595.9, num_updates=25900, lr=0.000392989, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=22.7, wall=23917 epoch 016: 604 / 1689 loss=3.552, nll_loss=2.003, ppl=4.01, wps=550629, ups=1.11, wpb=495019, bsz=16595.9, num_updates=25900, lr=0.000392989, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=22.7, wall=23917 epoch 016: 604 / 1689 loss=3.552, nll_loss=2.003, ppl=4.01, wps=550629, ups=1.11, wpb=495019, bsz=16595.9, num_updates=25900, lr=0.000392989, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=22.7, wall=23917 epoch 016: 604 / 1689 loss=3.552, nll_loss=2.003, ppl=4.01, wps=550629, ups=1.11, wpb=495019, bsz=16595.9, num_updates=25900, lr=0.000392989, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=22.7, wall=23917 epoch 016: 604 / 1689 loss=3.552, nll_loss=2.003, ppl=4.01, wps=550629, ups=1.11, wpb=495019, bsz=16595.9, num_updates=25900, lr=0.000392989, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=22.7, wall=23917 epoch 016: 604 / 1689 loss=3.552, nll_loss=2.003, ppl=4.01, wps=550629, ups=1.11, wpb=495019, bsz=16595.9, num_updates=25900, lr=0.000392989, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=22.7, wall=23917 epoch 016: 604 / 1689 loss=3.552, nll_loss=2.003, ppl=4.01, wps=550629, ups=1.11, wpb=495019, bsz=16595.9, num_updates=25900, lr=0.000392989, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=22.7, wall=23917 epoch 016: 604 / 1689 loss=3.552, nll_loss=2.003, ppl=4.01, wps=550629, ups=1.11, wpb=495019, bsz=16595.9, num_updates=25900, lr=0.000392989, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=22.7, wall=23917 epoch 016: 604 / 1689 loss=3.552, nll_loss=2.003, ppl=4.01, wps=550629, ups=1.11, wpb=495019, bsz=16595.9, num_updates=25900, lr=0.000392989, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=22.7, wall=23917 epoch 016: 604 / 1689 loss=3.552, nll_loss=2.003, ppl=4.01, wps=550629, ups=1.11, wpb=495019, bsz=16595.9, num_updates=25900, lr=0.000392989, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=22.7, wall=23917 epoch 016: 604 / 1689 loss=3.552, nll_loss=2.003, ppl=4.01, wps=550629, ups=1.11, wpb=495019, bsz=16595.9, num_updates=25900, lr=0.000392989, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=22.7, wall=23917 epoch 016: 604 / 1689 loss=3.552, nll_loss=2.003, ppl=4.01, wps=550629, ups=1.11, wpb=495019, bsz=16595.9, num_updates=25900, lr=0.000392989, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=22.7, wall=23917 epoch 016: 604 / 1689 loss=3.552, nll_loss=2.003, ppl=4.01, wps=550629, ups=1.11, wpb=495019, bsz=16595.9, num_updates=25900, lr=0.000392989, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=22.7, wall=23917 epoch 016: 604 / 1689 loss=3.552, nll_loss=2.003, ppl=4.01, wps=550629, ups=1.11, wpb=495019, bsz=16595.9, num_updates=25900, lr=0.000392989, gnorm=0.175, clip=0, loss_scale=4, train_wall=88, gb_free=22.7, wall=23917 epoch 016: 705 / 1689 loss=3.556, nll_loss=2.007, ppl=4.02, wps=548475, ups=1.11, wpb=494208, bsz=16616.4, num_updates=26000, lr=0.000392232, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.7, wall=24007 epoch 016: 705 / 1689 loss=3.556, nll_loss=2.007, ppl=4.02, wps=548475, ups=1.11, wpb=494208, bsz=16616.4, num_updates=26000, lr=0.000392232, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.7, wall=24007 epoch 016: 705 / 1689 loss=3.556, nll_loss=2.007, ppl=4.02, wps=548475, ups=1.11, wpb=494208, bsz=16616.4, num_updates=26000, lr=0.000392232, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.7, wall=24007 epoch 016: 705 / 1689 loss=3.556, nll_loss=2.007, ppl=4.02, wps=548475, ups=1.11, wpb=494208, bsz=16616.4, num_updates=26000, lr=0.000392232, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.7, wall=24007 epoch 016: 705 / 1689 loss=3.556, nll_loss=2.007, ppl=4.02, wps=548475, ups=1.11, wpb=494208, bsz=16616.4, num_updates=26000, lr=0.000392232, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.7, wall=24007 epoch 016: 705 / 1689 loss=3.556, nll_loss=2.007, ppl=4.02, wps=548475, ups=1.11, wpb=494208, bsz=16616.4, num_updates=26000, lr=0.000392232, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.7, wall=24007 epoch 016: 705 / 1689 loss=3.556, nll_loss=2.007, ppl=4.02, wps=548475, ups=1.11, wpb=494208, bsz=16616.4, num_updates=26000, lr=0.000392232, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.7, wall=24007 epoch 016: 705 / 1689 loss=3.556, nll_loss=2.007, ppl=4.02, wps=548475, ups=1.11, wpb=494208, bsz=16616.4, num_updates=26000, lr=0.000392232, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.7, wall=24007 epoch 016: 705 / 1689 loss=3.556, nll_loss=2.007, ppl=4.02, wps=548475, ups=1.11, wpb=494208, bsz=16616.4, num_updates=26000, lr=0.000392232, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.7, wall=24007 epoch 016: 705 / 1689 loss=3.556, nll_loss=2.007, ppl=4.02, wps=548475, ups=1.11, wpb=494208, bsz=16616.4, num_updates=26000, lr=0.000392232, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.7, wall=24007 epoch 016: 705 / 1689 loss=3.556, nll_loss=2.007, ppl=4.02, wps=548475, ups=1.11, wpb=494208, bsz=16616.4, num_updates=26000, lr=0.000392232, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.7, wall=24007 epoch 016: 705 / 1689 loss=3.556, nll_loss=2.007, ppl=4.02, wps=548475, ups=1.11, wpb=494208, bsz=16616.4, num_updates=26000, lr=0.000392232, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.7, wall=24007 epoch 016: 705 / 1689 loss=3.556, nll_loss=2.007, ppl=4.02, wps=548475, ups=1.11, wpb=494208, bsz=16616.4, num_updates=26000, lr=0.000392232, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.7, wall=24007 epoch 016: 705 / 1689 loss=3.556, nll_loss=2.007, ppl=4.02, wps=548475, ups=1.11, wpb=494208, bsz=16616.4, num_updates=26000, lr=0.000392232, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.7, wall=24007 epoch 016: 705 / 1689 loss=3.556, nll_loss=2.007, ppl=4.02, wps=548475, ups=1.11, wpb=494208, bsz=16616.4, num_updates=26000, lr=0.000392232, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.7, wall=24007 epoch 016: 705 / 1689 loss=3.556, nll_loss=2.007, ppl=4.02, wps=548475, ups=1.11, wpb=494208, bsz=16616.4, num_updates=26000, lr=0.000392232, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.7, wall=24007 begin validation on "valid" subset epoch 016 | valid on 'valid' subset | loss 3.72 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 26000 | best_loss 3.72 epoch 016 | valid on 'valid' subset | loss 3.72 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 26000 | best_loss 3.72 epoch 016 | valid on 'valid' subset | loss 3.72 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 26000 | best_loss 3.72 epoch 016 | valid on 'valid' subset | loss 3.72 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 26000 | best_loss 3.72 epoch 016 | valid on 'valid' subset | loss 3.72 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 26000 | best_loss 3.72 epoch 016 | valid on 'valid' subset | loss 3.72 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 26000 | best_loss 3.72 epoch 016 | valid on 'valid' subset | loss 3.72 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 26000 | best_loss 3.72 epoch 016 | valid on 'valid' subset | loss 3.72 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 26000 | best_loss 3.72 epoch 016 | valid on 'valid' subset | loss 3.72 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 26000 | best_loss 3.72 epoch 016 | valid on 'valid' subset | loss 3.72 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 26000 | best_loss 3.72 epoch 016 | valid on 'valid' subset | loss 3.72 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 26000 | best_loss 3.72 epoch 016 | valid on 'valid' subset | loss 3.72 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 26000 | best_loss 3.72 epoch 016 | valid on 'valid' subset | loss 3.72 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 26000 | best_loss 3.72 epoch 016 | valid on 'valid' subset | loss 3.72 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 26000 | best_loss 3.72 epoch 016 | valid on 'valid' subset | loss 3.72 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 26000 | best_loss 3.72 epoch 016 | valid on 'valid' subset | loss 3.72 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 26000 | best_loss 3.72 epoch 016: 805 / 1689 loss=3.559, nll_loss=2.01, ppl=4.03, wps=389978, ups=0.79, wpb=494015, bsz=16598.2, num_updates=26100, lr=0.00039148, gnorm=0.174, clip=0, loss_scale=2, train_wall=86, gb_free=22.7, wall=24134 epoch 016: 805 / 1689 loss=3.559, nll_loss=2.01, ppl=4.03, wps=389978, ups=0.79, wpb=494015, bsz=16598.2, num_updates=26100, lr=0.00039148, gnorm=0.174, clip=0, loss_scale=2, train_wall=86, gb_free=22.7, wall=24134 epoch 016: 805 / 1689 loss=3.559, nll_loss=2.01, ppl=4.03, wps=389978, ups=0.79, wpb=494015, bsz=16598.2, num_updates=26100, lr=0.00039148, gnorm=0.174, clip=0, loss_scale=2, train_wall=86, gb_free=22.7, wall=24134 epoch 016: 805 / 1689 loss=3.559, nll_loss=2.01, ppl=4.03, wps=389978, ups=0.79, wpb=494015, bsz=16598.2, num_updates=26100, lr=0.00039148, gnorm=0.174, clip=0, loss_scale=2, train_wall=86, gb_free=22.7, wall=24134 epoch 016: 805 / 1689 loss=3.559, nll_loss=2.01, ppl=4.03, wps=389978, ups=0.79, wpb=494015, bsz=16598.2, num_updates=26100, lr=0.00039148, gnorm=0.174, clip=0, loss_scale=2, train_wall=86, gb_free=22.7, wall=24134 epoch 016: 805 / 1689 loss=3.559, nll_loss=2.01, ppl=4.03, wps=389978, ups=0.79, wpb=494015, bsz=16598.2, num_updates=26100, lr=0.00039148, gnorm=0.174, clip=0, loss_scale=2, train_wall=86, gb_free=22.7, wall=24134 epoch 016: 805 / 1689 loss=3.559, nll_loss=2.01, ppl=4.03, wps=389978, ups=0.79, wpb=494015, bsz=16598.2, num_updates=26100, lr=0.00039148, gnorm=0.174, clip=0, loss_scale=2, train_wall=86, gb_free=22.7, wall=24134 epoch 016: 805 / 1689 loss=3.559, nll_loss=2.01, ppl=4.03, wps=389978, ups=0.79, wpb=494015, bsz=16598.2, num_updates=26100, lr=0.00039148, gnorm=0.174, clip=0, loss_scale=2, train_wall=86, gb_free=22.7, wall=24134 epoch 016: 805 / 1689 loss=3.559, nll_loss=2.01, ppl=4.03, wps=389978, ups=0.79, wpb=494015, bsz=16598.2, num_updates=26100, lr=0.00039148, gnorm=0.174, clip=0, loss_scale=2, train_wall=86, gb_free=22.7, wall=24134 epoch 016: 805 / 1689 loss=3.559, nll_loss=2.01, ppl=4.03, wps=389978, ups=0.79, wpb=494015, bsz=16598.2, num_updates=26100, lr=0.00039148, gnorm=0.174, clip=0, loss_scale=2, train_wall=86, gb_free=22.7, wall=24134 epoch 016: 805 / 1689 loss=3.559, nll_loss=2.01, ppl=4.03, wps=389978, ups=0.79, wpb=494015, bsz=16598.2, num_updates=26100, lr=0.00039148, gnorm=0.174, clip=0, loss_scale=2, train_wall=86, gb_free=22.7, wall=24134 epoch 016: 805 / 1689 loss=3.559, nll_loss=2.01, ppl=4.03, wps=389978, ups=0.79, wpb=494015, bsz=16598.2, num_updates=26100, lr=0.00039148, gnorm=0.174, clip=0, loss_scale=2, train_wall=86, gb_free=22.7, wall=24134 epoch 016: 805 / 1689 loss=3.559, nll_loss=2.01, ppl=4.03, wps=389978, ups=0.79, wpb=494015, bsz=16598.2, num_updates=26100, lr=0.00039148, gnorm=0.174, clip=0, loss_scale=2, train_wall=86, gb_free=22.7, wall=24134 epoch 016: 805 / 1689 loss=3.559, nll_loss=2.01, ppl=4.03, wps=389978, ups=0.79, wpb=494015, bsz=16598.2, num_updates=26100, lr=0.00039148, gnorm=0.174, clip=0, loss_scale=2, train_wall=86, gb_free=22.7, wall=24134 epoch 016: 805 / 1689 loss=3.559, nll_loss=2.01, ppl=4.03, wps=389978, ups=0.79, wpb=494015, bsz=16598.2, num_updates=26100, lr=0.00039148, gnorm=0.174, clip=0, loss_scale=2, train_wall=86, gb_free=22.7, wall=24134 epoch 016: 805 / 1689 loss=3.559, nll_loss=2.01, ppl=4.03, wps=389978, ups=0.79, wpb=494015, bsz=16598.2, num_updates=26100, lr=0.00039148, gnorm=0.174, clip=0, loss_scale=2, train_wall=86, gb_free=22.7, wall=24134 epoch 016: 905 / 1689 loss=3.56, nll_loss=2.012, ppl=4.03, wps=558467, ups=1.13, wpb=495969, bsz=16631, num_updates=26200, lr=0.000390732, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=24223 epoch 016: 905 / 1689 loss=3.56, nll_loss=2.012, ppl=4.03, wps=558467, ups=1.13, wpb=495969, bsz=16631, num_updates=26200, lr=0.000390732, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=24223 epoch 016: 905 / 1689 loss=3.56, nll_loss=2.012, ppl=4.03, wps=558467, ups=1.13, wpb=495969, bsz=16631, num_updates=26200, lr=0.000390732, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=24223 epoch 016: 905 / 1689 loss=3.56, nll_loss=2.012, ppl=4.03, wps=558467, ups=1.13, wpb=495969, bsz=16631, num_updates=26200, lr=0.000390732, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=24223 epoch 016: 905 / 1689 loss=3.56, nll_loss=2.012, ppl=4.03, wps=558467, ups=1.13, wpb=495969, bsz=16631, num_updates=26200, lr=0.000390732, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=24223 epoch 016: 905 / 1689 loss=3.56, nll_loss=2.012, ppl=4.03, wps=558467, ups=1.13, wpb=495969, bsz=16631, num_updates=26200, lr=0.000390732, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=24223 epoch 016: 905 / 1689 loss=3.56, nll_loss=2.012, ppl=4.03, wps=558467, ups=1.13, wpb=495969, bsz=16631, num_updates=26200, lr=0.000390732, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=24223 epoch 016: 905 / 1689 loss=3.56, nll_loss=2.012, ppl=4.03, wps=558467, ups=1.13, wpb=495969, bsz=16631, num_updates=26200, lr=0.000390732, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=24223 epoch 016: 905 / 1689 loss=3.56, nll_loss=2.012, ppl=4.03, wps=558467, ups=1.13, wpb=495969, bsz=16631, num_updates=26200, lr=0.000390732, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=24223 epoch 016: 905 / 1689 loss=3.56, nll_loss=2.012, ppl=4.03, wps=558467, ups=1.13, wpb=495969, bsz=16631, num_updates=26200, lr=0.000390732, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=24223 epoch 016: 905 / 1689 loss=3.56, nll_loss=2.012, ppl=4.03, wps=558467, ups=1.13, wpb=495969, bsz=16631, num_updates=26200, lr=0.000390732, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=24223 epoch 016: 905 / 1689 loss=3.56, nll_loss=2.012, ppl=4.03, wps=558467, ups=1.13, wpb=495969, bsz=16631, num_updates=26200, lr=0.000390732, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=24223 epoch 016: 905 / 1689 loss=3.56, nll_loss=2.012, ppl=4.03, wps=558467, ups=1.13, wpb=495969, bsz=16631, num_updates=26200, lr=0.000390732, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=24223 epoch 016: 905 / 1689 loss=3.56, nll_loss=2.012, ppl=4.03, wps=558467, ups=1.13, wpb=495969, bsz=16631, num_updates=26200, lr=0.000390732, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=24223 epoch 016: 905 / 1689 loss=3.56, nll_loss=2.012, ppl=4.03, wps=558467, ups=1.13, wpb=495969, bsz=16631, num_updates=26200, lr=0.000390732, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=24223 epoch 016: 905 / 1689 loss=3.56, nll_loss=2.012, ppl=4.03, wps=558467, ups=1.13, wpb=495969, bsz=16631, num_updates=26200, lr=0.000390732, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=24223 epoch 016: 1005 / 1689 loss=3.558, nll_loss=2.01, ppl=4.03, wps=564522, ups=1.14, wpb=495712, bsz=16320, num_updates=26300, lr=0.000389989, gnorm=0.17, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=24310 epoch 016: 1005 / 1689 loss=3.558, nll_loss=2.01, ppl=4.03, wps=564522, ups=1.14, wpb=495712, bsz=16320, num_updates=26300, lr=0.000389989, gnorm=0.17, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=24310 epoch 016: 1005 / 1689 loss=3.558, nll_loss=2.01, ppl=4.03, wps=564522, ups=1.14, wpb=495712, bsz=16320, num_updates=26300, lr=0.000389989, gnorm=0.17, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=24310 epoch 016: 1005 / 1689 loss=3.558, nll_loss=2.01, ppl=4.03, wps=564522, ups=1.14, wpb=495712, bsz=16320, num_updates=26300, lr=0.000389989, gnorm=0.17, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=24310 epoch 016: 1005 / 1689 loss=3.558, nll_loss=2.01, ppl=4.03, wps=564522, ups=1.14, wpb=495712, bsz=16320, num_updates=26300, lr=0.000389989, gnorm=0.17, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=24310 epoch 016: 1005 / 1689 loss=3.558, nll_loss=2.01, ppl=4.03, wps=564522, ups=1.14, wpb=495712, bsz=16320, num_updates=26300, lr=0.000389989, gnorm=0.17, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=24310 epoch 016: 1005 / 1689 loss=3.558, nll_loss=2.01, ppl=4.03, wps=564522, ups=1.14, wpb=495712, bsz=16320, num_updates=26300, lr=0.000389989, gnorm=0.17, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=24310 epoch 016: 1005 / 1689 loss=3.558, nll_loss=2.01, ppl=4.03, wps=564522, ups=1.14, wpb=495712, bsz=16320, num_updates=26300, lr=0.000389989, gnorm=0.17, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=24310 epoch 016: 1005 / 1689 loss=3.558, nll_loss=2.01, ppl=4.03, wps=564522, ups=1.14, wpb=495712, bsz=16320, num_updates=26300, lr=0.000389989, gnorm=0.17, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=24310 epoch 016: 1005 / 1689 loss=3.558, nll_loss=2.01, ppl=4.03, wps=564522, ups=1.14, wpb=495712, bsz=16320, num_updates=26300, lr=0.000389989, gnorm=0.17, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=24310 epoch 016: 1005 / 1689 loss=3.558, nll_loss=2.01, ppl=4.03, wps=564522, ups=1.14, wpb=495712, bsz=16320, num_updates=26300, lr=0.000389989, gnorm=0.17, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=24310 epoch 016: 1005 / 1689 loss=3.558, nll_loss=2.01, ppl=4.03, wps=564522, ups=1.14, wpb=495712, bsz=16320, num_updates=26300, lr=0.000389989, gnorm=0.17, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=24310 epoch 016: 1005 / 1689 loss=3.558, nll_loss=2.01, ppl=4.03, wps=564522, ups=1.14, wpb=495712, bsz=16320, num_updates=26300, lr=0.000389989, gnorm=0.17, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=24310 epoch 016: 1005 / 1689 loss=3.558, nll_loss=2.01, ppl=4.03, wps=564522, ups=1.14, wpb=495712, bsz=16320, num_updates=26300, lr=0.000389989, gnorm=0.17, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=24310 epoch 016: 1005 / 1689 loss=3.558, nll_loss=2.01, ppl=4.03, wps=564522, ups=1.14, wpb=495712, bsz=16320, num_updates=26300, lr=0.000389989, gnorm=0.17, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=24310 epoch 016: 1005 / 1689 loss=3.558, nll_loss=2.01, ppl=4.03, wps=564522, ups=1.14, wpb=495712, bsz=16320, num_updates=26300, lr=0.000389989, gnorm=0.17, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=24310 epoch 016: 1105 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=558216, ups=1.12, wpb=496582, bsz=16570.6, num_updates=26400, lr=0.000389249, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=24399 epoch 016: 1105 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=558216, ups=1.12, wpb=496582, bsz=16570.6, num_updates=26400, lr=0.000389249, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=24399 epoch 016: 1105 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=558216, ups=1.12, wpb=496582, bsz=16570.6, num_updates=26400, lr=0.000389249, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=24399 epoch 016: 1105 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=558216, ups=1.12, wpb=496582, bsz=16570.6, num_updates=26400, lr=0.000389249, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=24399 epoch 016: 1105 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=558216, ups=1.12, wpb=496582, bsz=16570.6, num_updates=26400, lr=0.000389249, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=24399 epoch 016: 1105 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=558216, ups=1.12, wpb=496582, bsz=16570.6, num_updates=26400, lr=0.000389249, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=24399 epoch 016: 1105 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=558216, ups=1.12, wpb=496582, bsz=16570.6, num_updates=26400, lr=0.000389249, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=24399 epoch 016: 1105 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=558216, ups=1.12, wpb=496582, bsz=16570.6, num_updates=26400, lr=0.000389249, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=24399 epoch 016: 1105 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=558216, ups=1.12, wpb=496582, bsz=16570.6, num_updates=26400, lr=0.000389249, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=24399 epoch 016: 1105 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=558216, ups=1.12, wpb=496582, bsz=16570.6, num_updates=26400, lr=0.000389249, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=24399 epoch 016: 1105 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=558216, ups=1.12, wpb=496582, bsz=16570.6, num_updates=26400, lr=0.000389249, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=24399 epoch 016: 1105 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=558216, ups=1.12, wpb=496582, bsz=16570.6, num_updates=26400, lr=0.000389249, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=24399 epoch 016: 1105 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=558216, ups=1.12, wpb=496582, bsz=16570.6, num_updates=26400, lr=0.000389249, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=24399 epoch 016: 1105 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=558216, ups=1.12, wpb=496582, bsz=16570.6, num_updates=26400, lr=0.000389249, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=24399 epoch 016: 1105 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=558216, ups=1.12, wpb=496582, bsz=16570.6, num_updates=26400, lr=0.000389249, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=24399 epoch 016: 1105 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=558216, ups=1.12, wpb=496582, bsz=16570.6, num_updates=26400, lr=0.000389249, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=24399 epoch 016: 1206 / 1689 loss=3.563, nll_loss=2.016, ppl=4.04, wps=552398, ups=1.12, wpb=495181, bsz=17152.9, num_updates=26500, lr=0.000388514, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=24489 epoch 016: 1206 / 1689 loss=3.563, nll_loss=2.016, ppl=4.04, wps=552398, ups=1.12, wpb=495181, bsz=17152.9, num_updates=26500, lr=0.000388514, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=24489 epoch 016: 1206 / 1689 loss=3.563, nll_loss=2.016, ppl=4.04, wps=552398, ups=1.12, wpb=495181, bsz=17152.9, num_updates=26500, lr=0.000388514, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=24489 epoch 016: 1206 / 1689 loss=3.563, nll_loss=2.016, ppl=4.04, wps=552398, ups=1.12, wpb=495181, bsz=17152.9, num_updates=26500, lr=0.000388514, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=24489 epoch 016: 1206 / 1689 loss=3.563, nll_loss=2.016, ppl=4.04, wps=552398, ups=1.12, wpb=495181, bsz=17152.9, num_updates=26500, lr=0.000388514, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=24489 epoch 016: 1206 / 1689 loss=3.563, nll_loss=2.016, ppl=4.04, wps=552398, ups=1.12, wpb=495181, bsz=17152.9, num_updates=26500, lr=0.000388514, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=24489 epoch 016: 1206 / 1689 loss=3.563, nll_loss=2.016, ppl=4.04, wps=552398, ups=1.12, wpb=495181, bsz=17152.9, num_updates=26500, lr=0.000388514, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=24489 epoch 016: 1206 / 1689 loss=3.563, nll_loss=2.016, ppl=4.04, wps=552398, ups=1.12, wpb=495181, bsz=17152.9, num_updates=26500, lr=0.000388514, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=24489 epoch 016: 1206 / 1689 loss=3.563, nll_loss=2.016, ppl=4.04, wps=552398, ups=1.12, wpb=495181, bsz=17152.9, num_updates=26500, lr=0.000388514, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=24489 epoch 016: 1206 / 1689 loss=3.563, nll_loss=2.016, ppl=4.04, wps=552398, ups=1.12, wpb=495181, bsz=17152.9, num_updates=26500, lr=0.000388514, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=24489 epoch 016: 1206 / 1689 loss=3.563, nll_loss=2.016, ppl=4.04, wps=552398, ups=1.12, wpb=495181, bsz=17152.9, num_updates=26500, lr=0.000388514, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=24489 epoch 016: 1206 / 1689 loss=3.563, nll_loss=2.016, ppl=4.04, wps=552398, ups=1.12, wpb=495181, bsz=17152.9, num_updates=26500, lr=0.000388514, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=24489 epoch 016: 1206 / 1689 loss=3.563, nll_loss=2.016, ppl=4.04, wps=552398, ups=1.12, wpb=495181, bsz=17152.9, num_updates=26500, lr=0.000388514, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=24489 epoch 016: 1206 / 1689 loss=3.563, nll_loss=2.016, ppl=4.04, wps=552398, ups=1.12, wpb=495181, bsz=17152.9, num_updates=26500, lr=0.000388514, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=24489 epoch 016: 1206 / 1689 loss=3.563, nll_loss=2.016, ppl=4.04, wps=552398, ups=1.12, wpb=495181, bsz=17152.9, num_updates=26500, lr=0.000388514, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=24489 epoch 016: 1206 / 1689 loss=3.563, nll_loss=2.016, ppl=4.04, wps=552398, ups=1.12, wpb=495181, bsz=17152.9, num_updates=26500, lr=0.000388514, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=24489 epoch 016: 1306 / 1689 loss=3.562, nll_loss=2.015, ppl=4.04, wps=550370, ups=1.11, wpb=496270, bsz=16692.6, num_updates=26600, lr=0.000387783, gnorm=0.177, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=24579 epoch 016: 1306 / 1689 loss=3.562, nll_loss=2.015, ppl=4.04, wps=550370, ups=1.11, wpb=496270, bsz=16692.6, num_updates=26600, lr=0.000387783, gnorm=0.177, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=24579 epoch 016: 1306 / 1689 loss=3.562, nll_loss=2.015, ppl=4.04, wps=550370, ups=1.11, wpb=496270, bsz=16692.6, num_updates=26600, lr=0.000387783, gnorm=0.177, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=24579 epoch 016: 1306 / 1689 loss=3.562, nll_loss=2.015, ppl=4.04, wps=550370, ups=1.11, wpb=496270, bsz=16692.6, num_updates=26600, lr=0.000387783, gnorm=0.177, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=24579 epoch 016: 1306 / 1689 loss=3.562, nll_loss=2.015, ppl=4.04, wps=550370, ups=1.11, wpb=496270, bsz=16692.6, num_updates=26600, lr=0.000387783, gnorm=0.177, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=24579 epoch 016: 1306 / 1689 loss=3.562, nll_loss=2.015, ppl=4.04, wps=550370, ups=1.11, wpb=496270, bsz=16692.6, num_updates=26600, lr=0.000387783, gnorm=0.177, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=24579 epoch 016: 1306 / 1689 loss=3.562, nll_loss=2.015, ppl=4.04, wps=550370, ups=1.11, wpb=496270, bsz=16692.6, num_updates=26600, lr=0.000387783, gnorm=0.177, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=24579 epoch 016: 1306 / 1689 loss=3.562, nll_loss=2.015, ppl=4.04, wps=550370, ups=1.11, wpb=496270, bsz=16692.6, num_updates=26600, lr=0.000387783, gnorm=0.177, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=24579 epoch 016: 1306 / 1689 loss=3.562, nll_loss=2.015, ppl=4.04, wps=550370, ups=1.11, wpb=496270, bsz=16692.6, num_updates=26600, lr=0.000387783, gnorm=0.177, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=24579 epoch 016: 1306 / 1689 loss=3.562, nll_loss=2.015, ppl=4.04, wps=550370, ups=1.11, wpb=496270, bsz=16692.6, num_updates=26600, lr=0.000387783, gnorm=0.177, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=24579 epoch 016: 1306 / 1689 loss=3.562, nll_loss=2.015, ppl=4.04, wps=550370, ups=1.11, wpb=496270, bsz=16692.6, num_updates=26600, lr=0.000387783, gnorm=0.177, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=24579 epoch 016: 1306 / 1689 loss=3.562, nll_loss=2.015, ppl=4.04, wps=550370, ups=1.11, wpb=496270, bsz=16692.6, num_updates=26600, lr=0.000387783, gnorm=0.177, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=24579 epoch 016: 1306 / 1689 loss=3.562, nll_loss=2.015, ppl=4.04, wps=550370, ups=1.11, wpb=496270, bsz=16692.6, num_updates=26600, lr=0.000387783, gnorm=0.177, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=24579 epoch 016: 1306 / 1689 loss=3.562, nll_loss=2.015, ppl=4.04, wps=550370, ups=1.11, wpb=496270, bsz=16692.6, num_updates=26600, lr=0.000387783, gnorm=0.177, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=24579 epoch 016: 1306 / 1689 loss=3.562, nll_loss=2.015, ppl=4.04, wps=550370, ups=1.11, wpb=496270, bsz=16692.6, num_updates=26600, lr=0.000387783, gnorm=0.177, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=24579 epoch 016: 1306 / 1689 loss=3.562, nll_loss=2.015, ppl=4.04, wps=550370, ups=1.11, wpb=496270, bsz=16692.6, num_updates=26600, lr=0.000387783, gnorm=0.177, clip=0, loss_scale=2, train_wall=89, gb_free=22.2, wall=24579 epoch 016: 1406 / 1689 loss=3.559, nll_loss=2.012, ppl=4.03, wps=553352, ups=1.12, wpb=495160, bsz=16474, num_updates=26700, lr=0.000387056, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=24669 epoch 016: 1406 / 1689 loss=3.559, nll_loss=2.012, ppl=4.03, wps=553352, ups=1.12, wpb=495160, bsz=16474, num_updates=26700, lr=0.000387056, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=24669 epoch 016: 1406 / 1689 loss=3.559, nll_loss=2.012, ppl=4.03, wps=553352, ups=1.12, wpb=495160, bsz=16474, num_updates=26700, lr=0.000387056, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=24669 epoch 016: 1406 / 1689 loss=3.559, nll_loss=2.012, ppl=4.03, wps=553352, ups=1.12, wpb=495160, bsz=16474, num_updates=26700, lr=0.000387056, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=24669 epoch 016: 1406 / 1689 loss=3.559, nll_loss=2.012, ppl=4.03, wps=553352, ups=1.12, wpb=495160, bsz=16474, num_updates=26700, lr=0.000387056, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=24669 epoch 016: 1406 / 1689 loss=3.559, nll_loss=2.012, ppl=4.03, wps=553352, ups=1.12, wpb=495160, bsz=16474, num_updates=26700, lr=0.000387056, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=24669 epoch 016: 1406 / 1689 loss=3.559, nll_loss=2.012, ppl=4.03, wps=553352, ups=1.12, wpb=495160, bsz=16474, num_updates=26700, lr=0.000387056, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=24669 epoch 016: 1406 / 1689 loss=3.559, nll_loss=2.012, ppl=4.03, wps=553352, ups=1.12, wpb=495160, bsz=16474, num_updates=26700, lr=0.000387056, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=24669 epoch 016: 1406 / 1689 loss=3.559, nll_loss=2.012, ppl=4.03, wps=553352, ups=1.12, wpb=495160, bsz=16474, num_updates=26700, lr=0.000387056, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=24669 epoch 016: 1406 / 1689 loss=3.559, nll_loss=2.012, ppl=4.03, wps=553352, ups=1.12, wpb=495160, bsz=16474, num_updates=26700, lr=0.000387056, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=24669 epoch 016: 1406 / 1689 loss=3.559, nll_loss=2.012, ppl=4.03, wps=553352, ups=1.12, wpb=495160, bsz=16474, num_updates=26700, lr=0.000387056, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=24669 epoch 016: 1406 / 1689 loss=3.559, nll_loss=2.012, ppl=4.03, wps=553352, ups=1.12, wpb=495160, bsz=16474, num_updates=26700, lr=0.000387056, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=24669 epoch 016: 1406 / 1689 loss=3.559, nll_loss=2.012, ppl=4.03, wps=553352, ups=1.12, wpb=495160, bsz=16474, num_updates=26700, lr=0.000387056, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=24669 epoch 016: 1406 / 1689 loss=3.559, nll_loss=2.012, ppl=4.03, wps=553352, ups=1.12, wpb=495160, bsz=16474, num_updates=26700, lr=0.000387056, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=24669 epoch 016: 1406 / 1689 loss=3.559, nll_loss=2.012, ppl=4.03, wps=553352, ups=1.12, wpb=495160, bsz=16474, num_updates=26700, lr=0.000387056, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=24669 epoch 016: 1406 / 1689 loss=3.559, nll_loss=2.012, ppl=4.03, wps=553352, ups=1.12, wpb=495160, bsz=16474, num_updates=26700, lr=0.000387056, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=24669 epoch 016: 1506 / 1689 loss=3.564, nll_loss=2.017, ppl=4.05, wps=550756, ups=1.11, wpb=495401, bsz=16469, num_updates=26800, lr=0.000386334, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=24759 epoch 016: 1506 / 1689 loss=3.564, nll_loss=2.017, ppl=4.05, wps=550756, ups=1.11, wpb=495401, bsz=16469, num_updates=26800, lr=0.000386334, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=24759 epoch 016: 1506 / 1689 loss=3.564, nll_loss=2.017, ppl=4.05, wps=550756, ups=1.11, wpb=495401, bsz=16469, num_updates=26800, lr=0.000386334, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=24759 epoch 016: 1506 / 1689 loss=3.564, nll_loss=2.017, ppl=4.05, wps=550756, ups=1.11, wpb=495401, bsz=16469, num_updates=26800, lr=0.000386334, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=24759 epoch 016: 1506 / 1689 loss=3.564, nll_loss=2.017, ppl=4.05, wps=550756, ups=1.11, wpb=495401, bsz=16469, num_updates=26800, lr=0.000386334, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=24759 epoch 016: 1506 / 1689 loss=3.564, nll_loss=2.017, ppl=4.05, wps=550756, ups=1.11, wpb=495401, bsz=16469, num_updates=26800, lr=0.000386334, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=24759 epoch 016: 1506 / 1689 loss=3.564, nll_loss=2.017, ppl=4.05, wps=550756, ups=1.11, wpb=495401, bsz=16469, num_updates=26800, lr=0.000386334, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=24759 epoch 016: 1506 / 1689 loss=3.564, nll_loss=2.017, ppl=4.05, wps=550756, ups=1.11, wpb=495401, bsz=16469, num_updates=26800, lr=0.000386334, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=24759 epoch 016: 1506 / 1689 loss=3.564, nll_loss=2.017, ppl=4.05, wps=550756, ups=1.11, wpb=495401, bsz=16469, num_updates=26800, lr=0.000386334, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=24759 epoch 016: 1506 / 1689 loss=3.564, nll_loss=2.017, ppl=4.05, wps=550756, ups=1.11, wpb=495401, bsz=16469, num_updates=26800, lr=0.000386334, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=24759 epoch 016: 1506 / 1689 loss=3.564, nll_loss=2.017, ppl=4.05, wps=550756, ups=1.11, wpb=495401, bsz=16469, num_updates=26800, lr=0.000386334, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=24759 epoch 016: 1506 / 1689 loss=3.564, nll_loss=2.017, ppl=4.05, wps=550756, ups=1.11, wpb=495401, bsz=16469, num_updates=26800, lr=0.000386334, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=24759 epoch 016: 1506 / 1689 loss=3.564, nll_loss=2.017, ppl=4.05, wps=550756, ups=1.11, wpb=495401, bsz=16469, num_updates=26800, lr=0.000386334, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=24759 epoch 016: 1506 / 1689 loss=3.564, nll_loss=2.017, ppl=4.05, wps=550756, ups=1.11, wpb=495401, bsz=16469, num_updates=26800, lr=0.000386334, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=24759 epoch 016: 1506 / 1689 loss=3.564, nll_loss=2.017, ppl=4.05, wps=550756, ups=1.11, wpb=495401, bsz=16469, num_updates=26800, lr=0.000386334, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=24759 epoch 016: 1506 / 1689 loss=3.564, nll_loss=2.017, ppl=4.05, wps=550756, ups=1.11, wpb=495401, bsz=16469, num_updates=26800, lr=0.000386334, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=24759 epoch 016: 1606 / 1689 loss=3.564, nll_loss=2.017, ppl=4.05, wps=553549, ups=1.11, wpb=496514, bsz=16305.3, num_updates=26900, lr=0.000385615, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=24848 epoch 016: 1606 / 1689 loss=3.564, nll_loss=2.017, ppl=4.05, wps=553549, ups=1.11, wpb=496514, bsz=16305.3, num_updates=26900, lr=0.000385615, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=24848 epoch 016: 1606 / 1689 loss=3.564, nll_loss=2.017, ppl=4.05, wps=553549, ups=1.11, wpb=496514, bsz=16305.3, num_updates=26900, lr=0.000385615, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=24848 epoch 016: 1606 / 1689 loss=3.564, nll_loss=2.017, ppl=4.05, wps=553549, ups=1.11, wpb=496514, bsz=16305.3, num_updates=26900, lr=0.000385615, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=24848 epoch 016: 1606 / 1689 loss=3.564, nll_loss=2.017, ppl=4.05, wps=553549, ups=1.11, wpb=496514, bsz=16305.3, num_updates=26900, lr=0.000385615, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=24848 epoch 016: 1606 / 1689 loss=3.564, nll_loss=2.017, ppl=4.05, wps=553549, ups=1.11, wpb=496514, bsz=16305.3, num_updates=26900, lr=0.000385615, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=24848 epoch 016: 1606 / 1689 loss=3.564, nll_loss=2.017, ppl=4.05, wps=553549, ups=1.11, wpb=496514, bsz=16305.3, num_updates=26900, lr=0.000385615, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=24848 epoch 016: 1606 / 1689 loss=3.564, nll_loss=2.017, ppl=4.05, wps=553549, ups=1.11, wpb=496514, bsz=16305.3, num_updates=26900, lr=0.000385615, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=24848 epoch 016: 1606 / 1689 loss=3.564, nll_loss=2.017, ppl=4.05, wps=553549, ups=1.11, wpb=496514, bsz=16305.3, num_updates=26900, lr=0.000385615, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=24848 epoch 016: 1606 / 1689 loss=3.564, nll_loss=2.017, ppl=4.05, wps=553549, ups=1.11, wpb=496514, bsz=16305.3, num_updates=26900, lr=0.000385615, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=24848 epoch 016: 1606 / 1689 loss=3.564, nll_loss=2.017, ppl=4.05, wps=553549, ups=1.11, wpb=496514, bsz=16305.3, num_updates=26900, lr=0.000385615, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=24848 epoch 016: 1606 / 1689 loss=3.564, nll_loss=2.017, ppl=4.05, wps=553549, ups=1.11, wpb=496514, bsz=16305.3, num_updates=26900, lr=0.000385615, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=24848 epoch 016: 1606 / 1689 loss=3.564, nll_loss=2.017, ppl=4.05, wps=553549, ups=1.11, wpb=496514, bsz=16305.3, num_updates=26900, lr=0.000385615, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=24848 epoch 016: 1606 / 1689 loss=3.564, nll_loss=2.017, ppl=4.05, wps=553549, ups=1.11, wpb=496514, bsz=16305.3, num_updates=26900, lr=0.000385615, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=24848 epoch 016: 1606 / 1689 loss=3.564, nll_loss=2.017, ppl=4.05, wps=553549, ups=1.11, wpb=496514, bsz=16305.3, num_updates=26900, lr=0.000385615, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=24848 epoch 016: 1606 / 1689 loss=3.564, nll_loss=2.017, ppl=4.05, wps=553549, ups=1.11, wpb=496514, bsz=16305.3, num_updates=26900, lr=0.000385615, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=24848 end of epoch 16 (average epoch stats below) epoch 016 | loss 3.556 | nll_loss 2.008 | ppl 4.02 | wps 540407 | ups 1.09 | wpb 495146 | bsz 16505.3 | num_updates 26983 | lr 0.000385021 | gnorm 0.175 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.3 | wall 24922 epoch 016 | loss 3.556 | nll_loss 2.008 | ppl 4.02 | wps 540407 | ups 1.09 | wpb 495146 | bsz 16505.3 | num_updates 26983 | lr 0.000385021 | gnorm 0.175 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.3 | wall 24922 epoch 016 | loss 3.556 | nll_loss 2.008 | ppl 4.02 | wps 540407 | ups 1.09 | wpb 495146 | bsz 16505.3 | num_updates 26983 | lr 0.000385021 | gnorm 0.175 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.3 | wall 24922 epoch 016 | loss 3.556 | nll_loss 2.008 | ppl 4.02 | wps 540407 | ups 1.09 | wpb 495146 | bsz 16505.3 | num_updates 26983 | lr 0.000385021 | gnorm 0.175 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.3 | wall 24922 epoch 016 | loss 3.556 | nll_loss 2.008 | ppl 4.02 | wps 540407 | ups 1.09 | wpb 495146 | bsz 16505.3 | num_updates 26983 | lr 0.000385021 | gnorm 0.175 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.3 | wall 24922 epoch 016 | loss 3.556 | nll_loss 2.008 | ppl 4.02 | wps 540407 | ups 1.09 | wpb 495146 | bsz 16505.3 | num_updates 26983 | lr 0.000385021 | gnorm 0.175 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.3 | wall 24922 epoch 016 | loss 3.556 | nll_loss 2.008 | ppl 4.02 | wps 540407 | ups 1.09 | wpb 495146 | bsz 16505.3 | num_updates 26983 | lr 0.000385021 | gnorm 0.175 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.3 | wall 24922 epoch 016 | loss 3.556 | nll_loss 2.008 | ppl 4.02 | wps 540407 | ups 1.09 | wpb 495146 | bsz 16505.3 | num_updates 26983 | lr 0.000385021 | gnorm 0.175 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.3 | wall 24922 epoch 016 | loss 3.556 | nll_loss 2.008 | ppl 4.02 | wps 540407 | ups 1.09 | wpb 495146 | bsz 16505.3 | num_updates 26983 | lr 0.000385021 | gnorm 0.175 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.3 | wall 24922 epoch 016 | loss 3.556 | nll_loss 2.008 | ppl 4.02 | wps 540407 | ups 1.09 | wpb 495146 | bsz 16505.3 | num_updates 26983 | lr 0.000385021 | gnorm 0.175 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.3 | wall 24922 epoch 016 | loss 3.556 | nll_loss 2.008 | ppl 4.02 | wps 540407 | ups 1.09 | wpb 495146 | bsz 16505.3 | num_updates 26983 | lr 0.000385021 | gnorm 0.175 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.3 | wall 24922 epoch 016 | loss 3.556 | nll_loss 2.008 | ppl 4.02 | wps 540407 | ups 1.09 | wpb 495146 | bsz 16505.3 | num_updates 26983 | lr 0.000385021 | gnorm 0.175 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.3 | wall 24922 epoch 016 | loss 3.556 | nll_loss 2.008 | ppl 4.02 | wps 540407 | ups 1.09 | wpb 495146 | bsz 16505.3 | num_updates 26983 | lr 0.000385021 | gnorm 0.175 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.3 | wall 24922 epoch 016 | loss 3.556 | nll_loss 2.008 | ppl 4.02 | wps 540407 | ups 1.09 | wpb 495146 | bsz 16505.3 | num_updates 26983 | lr 0.000385021 | gnorm 0.175 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.3 | wall 24922 epoch 016 | loss 3.556 | nll_loss 2.008 | ppl 4.02 | wps 540407 | ups 1.09 | wpb 495146 | bsz 16505.3 | num_updates 26983 | lr 0.000385021 | gnorm 0.175 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.3 | wall 24922 epoch 016 | loss 3.556 | nll_loss 2.008 | ppl 4.02 | wps 540407 | ups 1.09 | wpb 495146 | bsz 16505.3 | num_updates 26983 | lr 0.000385021 | gnorm 0.175 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.3 | wall 24922 Start iterating over samples epoch 017: 17 / 1689 loss=3.56, nll_loss=2.013, ppl=4.03, wps=548931, ups=1.12, wpb=491098, bsz=16587.5, num_updates=27000, lr=0.0003849, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=24938 epoch 017: 17 / 1689 loss=3.56, nll_loss=2.013, ppl=4.03, wps=548931, ups=1.12, wpb=491098, bsz=16587.5, num_updates=27000, lr=0.0003849, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=24938 epoch 017: 17 / 1689 loss=3.56, nll_loss=2.013, ppl=4.03, wps=548931, ups=1.12, wpb=491098, bsz=16587.5, num_updates=27000, lr=0.0003849, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=24938 epoch 017: 17 / 1689 loss=3.56, nll_loss=2.013, ppl=4.03, wps=548931, ups=1.12, wpb=491098, bsz=16587.5, num_updates=27000, lr=0.0003849, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=24938 epoch 017: 17 / 1689 loss=3.56, nll_loss=2.013, ppl=4.03, wps=548931, ups=1.12, wpb=491098, bsz=16587.5, num_updates=27000, lr=0.0003849, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=24938 epoch 017: 17 / 1689 loss=3.56, nll_loss=2.013, ppl=4.03, wps=548931, ups=1.12, wpb=491098, bsz=16587.5, num_updates=27000, lr=0.0003849, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=24938 epoch 017: 17 / 1689 loss=3.56, nll_loss=2.013, ppl=4.03, wps=548931, ups=1.12, wpb=491098, bsz=16587.5, num_updates=27000, lr=0.0003849, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=24938 epoch 017: 17 / 1689 loss=3.56, nll_loss=2.013, ppl=4.03, wps=548931, ups=1.12, wpb=491098, bsz=16587.5, num_updates=27000, lr=0.0003849, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=24938 epoch 017: 17 / 1689 loss=3.56, nll_loss=2.013, ppl=4.03, wps=548931, ups=1.12, wpb=491098, bsz=16587.5, num_updates=27000, lr=0.0003849, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=24938 epoch 017: 17 / 1689 loss=3.56, nll_loss=2.013, ppl=4.03, wps=548931, ups=1.12, wpb=491098, bsz=16587.5, num_updates=27000, lr=0.0003849, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=24938 epoch 017: 17 / 1689 loss=3.56, nll_loss=2.013, ppl=4.03, wps=548931, ups=1.12, wpb=491098, bsz=16587.5, num_updates=27000, lr=0.0003849, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=24938 epoch 017: 17 / 1689 loss=3.56, nll_loss=2.013, ppl=4.03, wps=548931, ups=1.12, wpb=491098, bsz=16587.5, num_updates=27000, lr=0.0003849, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=24938 epoch 017: 17 / 1689 loss=3.56, nll_loss=2.013, ppl=4.03, wps=548931, ups=1.12, wpb=491098, bsz=16587.5, num_updates=27000, lr=0.0003849, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=24938 epoch 017: 17 / 1689 loss=3.56, nll_loss=2.013, ppl=4.03, wps=548931, ups=1.12, wpb=491098, bsz=16587.5, num_updates=27000, lr=0.0003849, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=24938 epoch 017: 17 / 1689 loss=3.56, nll_loss=2.013, ppl=4.03, wps=548931, ups=1.12, wpb=491098, bsz=16587.5, num_updates=27000, lr=0.0003849, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=24938 epoch 017: 17 / 1689 loss=3.56, nll_loss=2.013, ppl=4.03, wps=548931, ups=1.12, wpb=491098, bsz=16587.5, num_updates=27000, lr=0.0003849, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=24938 epoch 017: 17 / 1689 loss=3.56, nll_loss=2.013, ppl=4.03, wps=548931, ups=1.12, wpb=491098, bsz=16587.5, num_updates=27000, lr=0.0003849, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=24938 begin validation on "valid" subset epoch 017 | valid on 'valid' subset | loss 3.733 | nll_loss 2.186 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 27000 | best_loss 3.72 epoch 017 | valid on 'valid' subset | loss 3.733 | nll_loss 2.186 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 27000 | best_loss 3.72 epoch 017 | valid on 'valid' subset | loss 3.733 | nll_loss 2.186 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 27000 | best_loss 3.72 epoch 017 | valid on 'valid' subset | loss 3.733 | nll_loss 2.186 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 27000 | best_loss 3.72 epoch 017 | valid on 'valid' subset | loss 3.733 | nll_loss 2.186 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 27000 | best_loss 3.72 epoch 017 | valid on 'valid' subset | loss 3.733 | nll_loss 2.186 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 27000 | best_loss 3.72 epoch 017 | valid on 'valid' subset | loss 3.733 | nll_loss 2.186 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 27000 | best_loss 3.72 epoch 017 | valid on 'valid' subset | loss 3.733 | nll_loss 2.186 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 27000 | best_loss 3.72 epoch 017 | valid on 'valid' subset | loss 3.733 | nll_loss 2.186 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 27000 | best_loss 3.72 epoch 017 | valid on 'valid' subset | loss 3.733 | nll_loss 2.186 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 27000 | best_loss 3.72 epoch 017 | valid on 'valid' subset | loss 3.733 | nll_loss 2.186 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 27000 | best_loss 3.72 epoch 017 | valid on 'valid' subset | loss 3.733 | nll_loss 2.186 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 27000 | best_loss 3.72 epoch 017 | valid on 'valid' subset | loss 3.733 | nll_loss 2.186 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 27000 | best_loss 3.72 epoch 017 | valid on 'valid' subset | loss 3.733 | nll_loss 2.186 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 27000 | best_loss 3.72 epoch 017 | valid on 'valid' subset | loss 3.733 | nll_loss 2.186 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 27000 | best_loss 3.72 epoch 017 | valid on 'valid' subset | loss 3.733 | nll_loss 2.186 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 27000 | best_loss 3.72 epoch 017 | valid on 'valid' subset | loss 3.733 | nll_loss 2.186 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 27000 | best_loss 3.72 epoch 017: 118 / 1689 loss=3.534, nll_loss=1.982, ppl=3.95, wps=490047, ups=0.99, wpb=495908, bsz=16486, num_updates=27100, lr=0.000384189, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.5, wall=25039 epoch 017: 118 / 1689 loss=3.534, nll_loss=1.982, ppl=3.95, wps=490047, ups=0.99, wpb=495908, bsz=16486, num_updates=27100, lr=0.000384189, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.5, wall=25039 epoch 017: 118 / 1689 loss=3.534, nll_loss=1.982, ppl=3.95, wps=490047, ups=0.99, wpb=495908, bsz=16486, num_updates=27100, lr=0.000384189, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.5, wall=25039 epoch 017: 118 / 1689 loss=3.534, nll_loss=1.982, ppl=3.95, wps=490047, ups=0.99, wpb=495908, bsz=16486, num_updates=27100, lr=0.000384189, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.5, wall=25039 epoch 017: 118 / 1689 loss=3.534, nll_loss=1.982, ppl=3.95, wps=490047, ups=0.99, wpb=495908, bsz=16486, num_updates=27100, lr=0.000384189, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.5, wall=25039 epoch 017: 118 / 1689 loss=3.534, nll_loss=1.982, ppl=3.95, wps=490047, ups=0.99, wpb=495908, bsz=16486, num_updates=27100, lr=0.000384189, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.5, wall=25039 epoch 017: 118 / 1689 loss=3.534, nll_loss=1.982, ppl=3.95, wps=490047, ups=0.99, wpb=495908, bsz=16486, num_updates=27100, lr=0.000384189, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.5, wall=25039 epoch 017: 118 / 1689 loss=3.534, nll_loss=1.982, ppl=3.95, wps=490047, ups=0.99, wpb=495908, bsz=16486, num_updates=27100, lr=0.000384189, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.5, wall=25039 epoch 017: 118 / 1689 loss=3.534, nll_loss=1.982, ppl=3.95, wps=490047, ups=0.99, wpb=495908, bsz=16486, num_updates=27100, lr=0.000384189, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.5, wall=25039 epoch 017: 118 / 1689 loss=3.534, nll_loss=1.982, ppl=3.95, wps=490047, ups=0.99, wpb=495908, bsz=16486, num_updates=27100, lr=0.000384189, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.5, wall=25039 epoch 017: 118 / 1689 loss=3.534, nll_loss=1.982, ppl=3.95, wps=490047, ups=0.99, wpb=495908, bsz=16486, num_updates=27100, lr=0.000384189, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.5, wall=25039 epoch 017: 118 / 1689 loss=3.534, nll_loss=1.982, ppl=3.95, wps=490047, ups=0.99, wpb=495908, bsz=16486, num_updates=27100, lr=0.000384189, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.5, wall=25039 epoch 017: 118 / 1689 loss=3.534, nll_loss=1.982, ppl=3.95, wps=490047, ups=0.99, wpb=495908, bsz=16486, num_updates=27100, lr=0.000384189, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.5, wall=25039 epoch 017: 118 / 1689 loss=3.534, nll_loss=1.982, ppl=3.95, wps=490047, ups=0.99, wpb=495908, bsz=16486, num_updates=27100, lr=0.000384189, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.5, wall=25039 epoch 017: 118 / 1689 loss=3.534, nll_loss=1.982, ppl=3.95, wps=490047, ups=0.99, wpb=495908, bsz=16486, num_updates=27100, lr=0.000384189, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.5, wall=25039 epoch 017: 118 / 1689 loss=3.534, nll_loss=1.982, ppl=3.95, wps=490047, ups=0.99, wpb=495908, bsz=16486, num_updates=27100, lr=0.000384189, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.5, wall=25039 epoch 017: 118 / 1689 loss=3.534, nll_loss=1.982, ppl=3.95, wps=490047, ups=0.99, wpb=495908, bsz=16486, num_updates=27100, lr=0.000384189, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.5, wall=25039 epoch 017: 218 / 1689 loss=3.54, nll_loss=1.988, ppl=3.97, wps=550505, ups=1.11, wpb=495141, bsz=16344.7, num_updates=27200, lr=0.000383482, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=25129 epoch 017: 218 / 1689 loss=3.54, nll_loss=1.988, ppl=3.97, wps=550505, ups=1.11, wpb=495141, bsz=16344.7, num_updates=27200, lr=0.000383482, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=25129 epoch 017: 218 / 1689 loss=3.54, nll_loss=1.988, ppl=3.97, wps=550505, ups=1.11, wpb=495141, bsz=16344.7, num_updates=27200, lr=0.000383482, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=25129 epoch 017: 218 / 1689 loss=3.54, nll_loss=1.988, ppl=3.97, wps=550505, ups=1.11, wpb=495141, bsz=16344.7, num_updates=27200, lr=0.000383482, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=25129 epoch 017: 218 / 1689 loss=3.54, nll_loss=1.988, ppl=3.97, wps=550505, ups=1.11, wpb=495141, bsz=16344.7, num_updates=27200, lr=0.000383482, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=25129 epoch 017: 218 / 1689 loss=3.54, nll_loss=1.988, ppl=3.97, wps=550505, ups=1.11, wpb=495141, bsz=16344.7, num_updates=27200, lr=0.000383482, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=25129 epoch 017: 218 / 1689 loss=3.54, nll_loss=1.988, ppl=3.97, wps=550505, ups=1.11, wpb=495141, bsz=16344.7, num_updates=27200, lr=0.000383482, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=25129 epoch 017: 218 / 1689 loss=3.54, nll_loss=1.988, ppl=3.97, wps=550505, ups=1.11, wpb=495141, bsz=16344.7, num_updates=27200, lr=0.000383482, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=25129 epoch 017: 218 / 1689 loss=3.54, nll_loss=1.988, ppl=3.97, wps=550505, ups=1.11, wpb=495141, bsz=16344.7, num_updates=27200, lr=0.000383482, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=25129 epoch 017: 218 / 1689 loss=3.54, nll_loss=1.988, ppl=3.97, wps=550505, ups=1.11, wpb=495141, bsz=16344.7, num_updates=27200, lr=0.000383482, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=25129 epoch 017: 218 / 1689 loss=3.54, nll_loss=1.988, ppl=3.97, wps=550505, ups=1.11, wpb=495141, bsz=16344.7, num_updates=27200, lr=0.000383482, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=25129 epoch 017: 218 / 1689 loss=3.54, nll_loss=1.988, ppl=3.97, wps=550505, ups=1.11, wpb=495141, bsz=16344.7, num_updates=27200, lr=0.000383482, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=25129 epoch 017: 218 / 1689 loss=3.54, nll_loss=1.988, ppl=3.97, wps=550505, ups=1.11, wpb=495141, bsz=16344.7, num_updates=27200, lr=0.000383482, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=25129 epoch 017: 218 / 1689 loss=3.54, nll_loss=1.988, ppl=3.97, wps=550505, ups=1.11, wpb=495141, bsz=16344.7, num_updates=27200, lr=0.000383482, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=25129 epoch 017: 218 / 1689 loss=3.54, nll_loss=1.988, ppl=3.97, wps=550505, ups=1.11, wpb=495141, bsz=16344.7, num_updates=27200, lr=0.000383482, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=25129 epoch 017: 218 / 1689 loss=3.54, nll_loss=1.988, ppl=3.97, wps=550505, ups=1.11, wpb=495141, bsz=16344.7, num_updates=27200, lr=0.000383482, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=25129 epoch 017: 218 / 1689 loss=3.54, nll_loss=1.988, ppl=3.97, wps=550505, ups=1.11, wpb=495141, bsz=16344.7, num_updates=27200, lr=0.000383482, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=25129 epoch 017: 318 / 1689 loss=3.534, nll_loss=1.982, ppl=3.95, wps=548816, ups=1.11, wpb=494354, bsz=16529.4, num_updates=27300, lr=0.00038278, gnorm=0.168, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=25219 epoch 017: 318 / 1689 loss=3.534, nll_loss=1.982, ppl=3.95, wps=548816, ups=1.11, wpb=494354, bsz=16529.4, num_updates=27300, lr=0.00038278, gnorm=0.168, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=25219 epoch 017: 318 / 1689 loss=3.534, nll_loss=1.982, ppl=3.95, wps=548816, ups=1.11, wpb=494354, bsz=16529.4, num_updates=27300, lr=0.00038278, gnorm=0.168, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=25219 epoch 017: 318 / 1689 loss=3.534, nll_loss=1.982, ppl=3.95, wps=548816, ups=1.11, wpb=494354, bsz=16529.4, num_updates=27300, lr=0.00038278, gnorm=0.168, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=25219 epoch 017: 318 / 1689 loss=3.534, nll_loss=1.982, ppl=3.95, wps=548816, ups=1.11, wpb=494354, bsz=16529.4, num_updates=27300, lr=0.00038278, gnorm=0.168, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=25219 epoch 017: 318 / 1689 loss=3.534, nll_loss=1.982, ppl=3.95, wps=548816, ups=1.11, wpb=494354, bsz=16529.4, num_updates=27300, lr=0.00038278, gnorm=0.168, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=25219 epoch 017: 318 / 1689 loss=3.534, nll_loss=1.982, ppl=3.95, wps=548816, ups=1.11, wpb=494354, bsz=16529.4, num_updates=27300, lr=0.00038278, gnorm=0.168, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=25219 epoch 017: 318 / 1689 loss=3.534, nll_loss=1.982, ppl=3.95, wps=548816, ups=1.11, wpb=494354, bsz=16529.4, num_updates=27300, lr=0.00038278, gnorm=0.168, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=25219 epoch 017: 318 / 1689 loss=3.534, nll_loss=1.982, ppl=3.95, wps=548816, ups=1.11, wpb=494354, bsz=16529.4, num_updates=27300, lr=0.00038278, gnorm=0.168, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=25219 epoch 017: 318 / 1689 loss=3.534, nll_loss=1.982, ppl=3.95, wps=548816, ups=1.11, wpb=494354, bsz=16529.4, num_updates=27300, lr=0.00038278, gnorm=0.168, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=25219 epoch 017: 318 / 1689 loss=3.534, nll_loss=1.982, ppl=3.95, wps=548816, ups=1.11, wpb=494354, bsz=16529.4, num_updates=27300, lr=0.00038278, gnorm=0.168, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=25219 epoch 017: 318 / 1689 loss=3.534, nll_loss=1.982, ppl=3.95, wps=548816, ups=1.11, wpb=494354, bsz=16529.4, num_updates=27300, lr=0.00038278, gnorm=0.168, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=25219 epoch 017: 318 / 1689 loss=3.534, nll_loss=1.982, ppl=3.95, wps=548816, ups=1.11, wpb=494354, bsz=16529.4, num_updates=27300, lr=0.00038278, gnorm=0.168, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=25219 epoch 017: 318 / 1689 loss=3.534, nll_loss=1.982, ppl=3.95, wps=548816, ups=1.11, wpb=494354, bsz=16529.4, num_updates=27300, lr=0.00038278, gnorm=0.168, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=25219 epoch 017: 318 / 1689 loss=3.534, nll_loss=1.982, ppl=3.95, wps=548816, ups=1.11, wpb=494354, bsz=16529.4, num_updates=27300, lr=0.00038278, gnorm=0.168, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=25219 epoch 017: 318 / 1689 loss=3.534, nll_loss=1.982, ppl=3.95, wps=548816, ups=1.11, wpb=494354, bsz=16529.4, num_updates=27300, lr=0.00038278, gnorm=0.168, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=25219 epoch 017: 318 / 1689 loss=3.534, nll_loss=1.982, ppl=3.95, wps=548816, ups=1.11, wpb=494354, bsz=16529.4, num_updates=27300, lr=0.00038278, gnorm=0.168, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=25219 epoch 017: 418 / 1689 loss=3.537, nll_loss=1.986, ppl=3.96, wps=552983, ups=1.12, wpb=495941, bsz=16605, num_updates=27400, lr=0.00038208, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=25309 epoch 017: 418 / 1689 loss=3.537, nll_loss=1.986, ppl=3.96, wps=552983, ups=1.12, wpb=495941, bsz=16605, num_updates=27400, lr=0.00038208, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=25309 epoch 017: 418 / 1689 loss=3.537, nll_loss=1.986, ppl=3.96, wps=552983, ups=1.12, wpb=495941, bsz=16605, num_updates=27400, lr=0.00038208, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=25309 epoch 017: 418 / 1689 loss=3.537, nll_loss=1.986, ppl=3.96, wps=552983, ups=1.12, wpb=495941, bsz=16605, num_updates=27400, lr=0.00038208, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=25309 epoch 017: 418 / 1689 loss=3.537, nll_loss=1.986, ppl=3.96, wps=552983, ups=1.12, wpb=495941, bsz=16605, num_updates=27400, lr=0.00038208, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=25309 epoch 017: 418 / 1689 loss=3.537, nll_loss=1.986, ppl=3.96, wps=552983, ups=1.12, wpb=495941, bsz=16605, num_updates=27400, lr=0.00038208, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=25309 epoch 017: 418 / 1689 loss=3.537, nll_loss=1.986, ppl=3.96, wps=552983, ups=1.12, wpb=495941, bsz=16605, num_updates=27400, lr=0.00038208, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=25309 epoch 017: 418 / 1689 loss=3.537, nll_loss=1.986, ppl=3.96, wps=552983, ups=1.12, wpb=495941, bsz=16605, num_updates=27400, lr=0.00038208, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=25309 epoch 017: 418 / 1689 loss=3.537, nll_loss=1.986, ppl=3.96, wps=552983, ups=1.12, wpb=495941, bsz=16605, num_updates=27400, lr=0.00038208, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=25309 epoch 017: 418 / 1689 loss=3.537, nll_loss=1.986, ppl=3.96, wps=552983, ups=1.12, wpb=495941, bsz=16605, num_updates=27400, lr=0.00038208, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=25309 epoch 017: 418 / 1689 loss=3.537, nll_loss=1.986, ppl=3.96, wps=552983, ups=1.12, wpb=495941, bsz=16605, num_updates=27400, lr=0.00038208, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=25309 epoch 017: 418 / 1689 loss=3.537, nll_loss=1.986, ppl=3.96, wps=552983, ups=1.12, wpb=495941, bsz=16605, num_updates=27400, lr=0.00038208, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=25309 epoch 017: 418 / 1689 loss=3.537, nll_loss=1.986, ppl=3.96, wps=552983, ups=1.12, wpb=495941, bsz=16605, num_updates=27400, lr=0.00038208, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=25309 epoch 017: 418 / 1689 loss=3.537, nll_loss=1.986, ppl=3.96, wps=552983, ups=1.12, wpb=495941, bsz=16605, num_updates=27400, lr=0.00038208, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=25309 epoch 017: 418 / 1689 loss=3.537, nll_loss=1.986, ppl=3.96, wps=552983, ups=1.12, wpb=495941, bsz=16605, num_updates=27400, lr=0.00038208, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=25309 epoch 017: 418 / 1689 loss=3.537, nll_loss=1.986, ppl=3.96, wps=552983, ups=1.12, wpb=495941, bsz=16605, num_updates=27400, lr=0.00038208, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=25309 epoch 017: 418 / 1689 loss=3.537, nll_loss=1.986, ppl=3.96, wps=552983, ups=1.12, wpb=495941, bsz=16605, num_updates=27400, lr=0.00038208, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=25309 epoch 017: 518 / 1689 loss=3.551, nll_loss=2.001, ppl=4, wps=551990, ups=1.12, wpb=494230, bsz=16426.9, num_updates=27500, lr=0.000381385, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=25398 epoch 017: 518 / 1689 loss=3.551, nll_loss=2.001, ppl=4, wps=551990, ups=1.12, wpb=494230, bsz=16426.9, num_updates=27500, lr=0.000381385, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=25398 epoch 017: 518 / 1689 loss=3.551, nll_loss=2.001, ppl=4, wps=551990, ups=1.12, wpb=494230, bsz=16426.9, num_updates=27500, lr=0.000381385, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=25398 epoch 017: 518 / 1689 loss=3.551, nll_loss=2.001, ppl=4, wps=551990, ups=1.12, wpb=494230, bsz=16426.9, num_updates=27500, lr=0.000381385, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=25398 epoch 017: 518 / 1689 loss=3.551, nll_loss=2.001, ppl=4, wps=551990, ups=1.12, wpb=494230, bsz=16426.9, num_updates=27500, lr=0.000381385, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=25398 epoch 017: 518 / 1689 loss=3.551, nll_loss=2.001, ppl=4, wps=551990, ups=1.12, wpb=494230, bsz=16426.9, num_updates=27500, lr=0.000381385, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=25398 epoch 017: 518 / 1689 loss=3.551, nll_loss=2.001, ppl=4, wps=551990, ups=1.12, wpb=494230, bsz=16426.9, num_updates=27500, lr=0.000381385, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=25398 epoch 017: 518 / 1689 loss=3.551, nll_loss=2.001, ppl=4, wps=551990, ups=1.12, wpb=494230, bsz=16426.9, num_updates=27500, lr=0.000381385, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=25398 epoch 017: 518 / 1689 loss=3.551, nll_loss=2.001, ppl=4, wps=551990, ups=1.12, wpb=494230, bsz=16426.9, num_updates=27500, lr=0.000381385, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=25398 epoch 017: 518 / 1689 loss=3.551, nll_loss=2.001, ppl=4, wps=551990, ups=1.12, wpb=494230, bsz=16426.9, num_updates=27500, lr=0.000381385, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=25398 epoch 017: 518 / 1689 loss=3.551, nll_loss=2.001, ppl=4, wps=551990, ups=1.12, wpb=494230, bsz=16426.9, num_updates=27500, lr=0.000381385, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=25398 epoch 017: 518 / 1689 loss=3.551, nll_loss=2.001, ppl=4, wps=551990, ups=1.12, wpb=494230, bsz=16426.9, num_updates=27500, lr=0.000381385, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=25398 epoch 017: 518 / 1689 loss=3.551, nll_loss=2.001, ppl=4, wps=551990, ups=1.12, wpb=494230, bsz=16426.9, num_updates=27500, lr=0.000381385, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=25398 epoch 017: 518 / 1689 loss=3.551, nll_loss=2.001, ppl=4, wps=551990, ups=1.12, wpb=494230, bsz=16426.9, num_updates=27500, lr=0.000381385, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=25398 epoch 017: 518 / 1689 loss=3.551, nll_loss=2.001, ppl=4, wps=551990, ups=1.12, wpb=494230, bsz=16426.9, num_updates=27500, lr=0.000381385, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=25398 epoch 017: 518 / 1689 loss=3.551, nll_loss=2.001, ppl=4, wps=551990, ups=1.12, wpb=494230, bsz=16426.9, num_updates=27500, lr=0.000381385, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=25398 epoch 017: 518 / 1689 loss=3.551, nll_loss=2.001, ppl=4, wps=551990, ups=1.12, wpb=494230, bsz=16426.9, num_updates=27500, lr=0.000381385, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=25398 epoch 017: 619 / 1689 loss=3.545, nll_loss=1.995, ppl=3.99, wps=546893, ups=1.11, wpb=493730, bsz=16174.1, num_updates=27600, lr=0.000380693, gnorm=0.17, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=25488 epoch 017: 619 / 1689 loss=3.545, nll_loss=1.995, ppl=3.99, wps=546893, ups=1.11, wpb=493730, bsz=16174.1, num_updates=27600, lr=0.000380693, gnorm=0.17, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=25488 epoch 017: 619 / 1689 loss=3.545, nll_loss=1.995, ppl=3.99, wps=546893, ups=1.11, wpb=493730, bsz=16174.1, num_updates=27600, lr=0.000380693, gnorm=0.17, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=25488 epoch 017: 619 / 1689 loss=3.545, nll_loss=1.995, ppl=3.99, wps=546893, ups=1.11, wpb=493730, bsz=16174.1, num_updates=27600, lr=0.000380693, gnorm=0.17, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=25488 epoch 017: 619 / 1689 loss=3.545, nll_loss=1.995, ppl=3.99, wps=546893, ups=1.11, wpb=493730, bsz=16174.1, num_updates=27600, lr=0.000380693, gnorm=0.17, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=25488 epoch 017: 619 / 1689 loss=3.545, nll_loss=1.995, ppl=3.99, wps=546893, ups=1.11, wpb=493730, bsz=16174.1, num_updates=27600, lr=0.000380693, gnorm=0.17, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=25488 epoch 017: 619 / 1689 loss=3.545, nll_loss=1.995, ppl=3.99, wps=546893, ups=1.11, wpb=493730, bsz=16174.1, num_updates=27600, lr=0.000380693, gnorm=0.17, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=25488 epoch 017: 619 / 1689 loss=3.545, nll_loss=1.995, ppl=3.99, wps=546893, ups=1.11, wpb=493730, bsz=16174.1, num_updates=27600, lr=0.000380693, gnorm=0.17, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=25488 epoch 017: 619 / 1689 loss=3.545, nll_loss=1.995, ppl=3.99, wps=546893, ups=1.11, wpb=493730, bsz=16174.1, num_updates=27600, lr=0.000380693, gnorm=0.17, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=25488 epoch 017: 619 / 1689 loss=3.545, nll_loss=1.995, ppl=3.99, wps=546893, ups=1.11, wpb=493730, bsz=16174.1, num_updates=27600, lr=0.000380693, gnorm=0.17, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=25488 epoch 017: 619 / 1689 loss=3.545, nll_loss=1.995, ppl=3.99, wps=546893, ups=1.11, wpb=493730, bsz=16174.1, num_updates=27600, lr=0.000380693, gnorm=0.17, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=25488 epoch 017: 619 / 1689 loss=3.545, nll_loss=1.995, ppl=3.99, wps=546893, ups=1.11, wpb=493730, bsz=16174.1, num_updates=27600, lr=0.000380693, gnorm=0.17, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=25488 epoch 017: 619 / 1689 loss=3.545, nll_loss=1.995, ppl=3.99, wps=546893, ups=1.11, wpb=493730, bsz=16174.1, num_updates=27600, lr=0.000380693, gnorm=0.17, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=25488 epoch 017: 619 / 1689 loss=3.545, nll_loss=1.995, ppl=3.99, wps=546893, ups=1.11, wpb=493730, bsz=16174.1, num_updates=27600, lr=0.000380693, gnorm=0.17, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=25488 epoch 017: 619 / 1689 loss=3.545, nll_loss=1.995, ppl=3.99, wps=546893, ups=1.11, wpb=493730, bsz=16174.1, num_updates=27600, lr=0.000380693, gnorm=0.17, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=25488 epoch 017: 619 / 1689 loss=3.545, nll_loss=1.995, ppl=3.99, wps=546893, ups=1.11, wpb=493730, bsz=16174.1, num_updates=27600, lr=0.000380693, gnorm=0.17, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=25488 epoch 017: 619 / 1689 loss=3.545, nll_loss=1.995, ppl=3.99, wps=546893, ups=1.11, wpb=493730, bsz=16174.1, num_updates=27600, lr=0.000380693, gnorm=0.17, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=25488 epoch 017: 719 / 1689 loss=3.547, nll_loss=1.998, ppl=3.99, wps=549522, ups=1.11, wpb=495407, bsz=16297.5, num_updates=27700, lr=0.000380006, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.6, wall=25579 epoch 017: 719 / 1689 loss=3.547, nll_loss=1.998, ppl=3.99, wps=549522, ups=1.11, wpb=495407, bsz=16297.5, num_updates=27700, lr=0.000380006, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.6, wall=25579 epoch 017: 719 / 1689 loss=3.547, nll_loss=1.998, ppl=3.99, wps=549522, ups=1.11, wpb=495407, bsz=16297.5, num_updates=27700, lr=0.000380006, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.6, wall=25579 epoch 017: 719 / 1689 loss=3.547, nll_loss=1.998, ppl=3.99, wps=549522, ups=1.11, wpb=495407, bsz=16297.5, num_updates=27700, lr=0.000380006, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.6, wall=25579 epoch 017: 719 / 1689 loss=3.547, nll_loss=1.998, ppl=3.99, wps=549522, ups=1.11, wpb=495407, bsz=16297.5, num_updates=27700, lr=0.000380006, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.6, wall=25579 epoch 017: 719 / 1689 loss=3.547, nll_loss=1.998, ppl=3.99, wps=549522, ups=1.11, wpb=495407, bsz=16297.5, num_updates=27700, lr=0.000380006, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.6, wall=25579 epoch 017: 719 / 1689 loss=3.547, nll_loss=1.998, ppl=3.99, wps=549522, ups=1.11, wpb=495407, bsz=16297.5, num_updates=27700, lr=0.000380006, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.6, wall=25579 epoch 017: 719 / 1689 loss=3.547, nll_loss=1.998, ppl=3.99, wps=549522, ups=1.11, wpb=495407, bsz=16297.5, num_updates=27700, lr=0.000380006, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.6, wall=25579 epoch 017: 719 / 1689 loss=3.547, nll_loss=1.998, ppl=3.99, wps=549522, ups=1.11, wpb=495407, bsz=16297.5, num_updates=27700, lr=0.000380006, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.6, wall=25579 epoch 017: 719 / 1689 loss=3.547, nll_loss=1.998, ppl=3.99, wps=549522, ups=1.11, wpb=495407, bsz=16297.5, num_updates=27700, lr=0.000380006, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.6, wall=25579 epoch 017: 719 / 1689 loss=3.547, nll_loss=1.998, ppl=3.99, wps=549522, ups=1.11, wpb=495407, bsz=16297.5, num_updates=27700, lr=0.000380006, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.6, wall=25579 epoch 017: 719 / 1689 loss=3.547, nll_loss=1.998, ppl=3.99, wps=549522, ups=1.11, wpb=495407, bsz=16297.5, num_updates=27700, lr=0.000380006, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.6, wall=25579 epoch 017: 719 / 1689 loss=3.547, nll_loss=1.998, ppl=3.99, wps=549522, ups=1.11, wpb=495407, bsz=16297.5, num_updates=27700, lr=0.000380006, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.6, wall=25579 epoch 017: 719 / 1689 loss=3.547, nll_loss=1.998, ppl=3.99, wps=549522, ups=1.11, wpb=495407, bsz=16297.5, num_updates=27700, lr=0.000380006, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.6, wall=25579 epoch 017: 719 / 1689 loss=3.547, nll_loss=1.998, ppl=3.99, wps=549522, ups=1.11, wpb=495407, bsz=16297.5, num_updates=27700, lr=0.000380006, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.6, wall=25579 epoch 017: 719 / 1689 loss=3.547, nll_loss=1.998, ppl=3.99, wps=549522, ups=1.11, wpb=495407, bsz=16297.5, num_updates=27700, lr=0.000380006, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.6, wall=25579 epoch 017: 719 / 1689 loss=3.547, nll_loss=1.998, ppl=3.99, wps=549522, ups=1.11, wpb=495407, bsz=16297.5, num_updates=27700, lr=0.000380006, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.6, wall=25579 epoch 017: 819 / 1689 loss=3.55, nll_loss=2.001, ppl=4, wps=551867, ups=1.11, wpb=496593, bsz=17044.2, num_updates=27800, lr=0.000379322, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=25669 epoch 017: 819 / 1689 loss=3.55, nll_loss=2.001, ppl=4, wps=551867, ups=1.11, wpb=496593, bsz=17044.2, num_updates=27800, lr=0.000379322, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=25669 epoch 017: 819 / 1689 loss=3.55, nll_loss=2.001, ppl=4, wps=551867, ups=1.11, wpb=496593, bsz=17044.2, num_updates=27800, lr=0.000379322, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=25669 epoch 017: 819 / 1689 loss=3.55, nll_loss=2.001, ppl=4, wps=551867, ups=1.11, wpb=496593, bsz=17044.2, num_updates=27800, lr=0.000379322, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=25669 epoch 017: 819 / 1689 loss=3.55, nll_loss=2.001, ppl=4, wps=551867, ups=1.11, wpb=496593, bsz=17044.2, num_updates=27800, lr=0.000379322, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=25669 epoch 017: 819 / 1689 loss=3.55, nll_loss=2.001, ppl=4, wps=551867, ups=1.11, wpb=496593, bsz=17044.2, num_updates=27800, lr=0.000379322, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=25669 epoch 017: 819 / 1689 loss=3.55, nll_loss=2.001, ppl=4, wps=551867, ups=1.11, wpb=496593, bsz=17044.2, num_updates=27800, lr=0.000379322, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=25669 epoch 017: 819 / 1689 loss=3.55, nll_loss=2.001, ppl=4, wps=551867, ups=1.11, wpb=496593, bsz=17044.2, num_updates=27800, lr=0.000379322, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=25669 epoch 017: 819 / 1689 loss=3.55, nll_loss=2.001, ppl=4, wps=551867, ups=1.11, wpb=496593, bsz=17044.2, num_updates=27800, lr=0.000379322, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=25669 epoch 017: 819 / 1689 loss=3.55, nll_loss=2.001, ppl=4, wps=551867, ups=1.11, wpb=496593, bsz=17044.2, num_updates=27800, lr=0.000379322, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=25669 epoch 017: 819 / 1689 loss=3.55, nll_loss=2.001, ppl=4, wps=551867, ups=1.11, wpb=496593, bsz=17044.2, num_updates=27800, lr=0.000379322, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=25669 epoch 017: 819 / 1689 loss=3.55, nll_loss=2.001, ppl=4, wps=551867, ups=1.11, wpb=496593, bsz=17044.2, num_updates=27800, lr=0.000379322, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=25669 epoch 017: 819 / 1689 loss=3.55, nll_loss=2.001, ppl=4, wps=551867, ups=1.11, wpb=496593, bsz=17044.2, num_updates=27800, lr=0.000379322, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=25669 epoch 017: 819 / 1689 loss=3.55, nll_loss=2.001, ppl=4, wps=551867, ups=1.11, wpb=496593, bsz=17044.2, num_updates=27800, lr=0.000379322, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=25669 epoch 017: 819 / 1689 loss=3.55, nll_loss=2.001, ppl=4, wps=551867, ups=1.11, wpb=496593, bsz=17044.2, num_updates=27800, lr=0.000379322, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=25669 epoch 017: 819 / 1689 loss=3.55, nll_loss=2.001, ppl=4, wps=551867, ups=1.11, wpb=496593, bsz=17044.2, num_updates=27800, lr=0.000379322, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=25669 epoch 017: 819 / 1689 loss=3.55, nll_loss=2.001, ppl=4, wps=551867, ups=1.11, wpb=496593, bsz=17044.2, num_updates=27800, lr=0.000379322, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=25669 epoch 017: 919 / 1689 loss=3.549, nll_loss=2, ppl=4, wps=551946, ups=1.11, wpb=495674, bsz=16679.1, num_updates=27900, lr=0.000378641, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=25758 epoch 017: 919 / 1689 loss=3.549, nll_loss=2, ppl=4, wps=551946, ups=1.11, wpb=495674, bsz=16679.1, num_updates=27900, lr=0.000378641, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=25758 epoch 017: 919 / 1689 loss=3.549, nll_loss=2, ppl=4, wps=551946, ups=1.11, wpb=495674, bsz=16679.1, num_updates=27900, lr=0.000378641, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=25758 epoch 017: 919 / 1689 loss=3.549, nll_loss=2, ppl=4, wps=551946, ups=1.11, wpb=495674, bsz=16679.1, num_updates=27900, lr=0.000378641, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=25758 epoch 017: 919 / 1689 loss=3.549, nll_loss=2, ppl=4, wps=551946, ups=1.11, wpb=495674, bsz=16679.1, num_updates=27900, lr=0.000378641, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=25758 epoch 017: 919 / 1689 loss=3.549, nll_loss=2, ppl=4, wps=551946, ups=1.11, wpb=495674, bsz=16679.1, num_updates=27900, lr=0.000378641, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=25758 epoch 017: 919 / 1689 loss=3.549, nll_loss=2, ppl=4, wps=551946, ups=1.11, wpb=495674, bsz=16679.1, num_updates=27900, lr=0.000378641, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=25758 epoch 017: 919 / 1689 loss=3.549, nll_loss=2, ppl=4, wps=551946, ups=1.11, wpb=495674, bsz=16679.1, num_updates=27900, lr=0.000378641, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=25758 epoch 017: 919 / 1689 loss=3.549, nll_loss=2, ppl=4, wps=551946, ups=1.11, wpb=495674, bsz=16679.1, num_updates=27900, lr=0.000378641, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=25758 epoch 017: 919 / 1689 loss=3.549, nll_loss=2, ppl=4, wps=551946, ups=1.11, wpb=495674, bsz=16679.1, num_updates=27900, lr=0.000378641, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=25758 epoch 017: 919 / 1689 loss=3.549, nll_loss=2, ppl=4, wps=551946, ups=1.11, wpb=495674, bsz=16679.1, num_updates=27900, lr=0.000378641, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=25758 epoch 017: 919 / 1689 loss=3.549, nll_loss=2, ppl=4, wps=551946, ups=1.11, wpb=495674, bsz=16679.1, num_updates=27900, lr=0.000378641, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=25758 epoch 017: 919 / 1689 loss=3.549, nll_loss=2, ppl=4, wps=551946, ups=1.11, wpb=495674, bsz=16679.1, num_updates=27900, lr=0.000378641, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=25758 epoch 017: 919 / 1689 loss=3.549, nll_loss=2, ppl=4, wps=551946, ups=1.11, wpb=495674, bsz=16679.1, num_updates=27900, lr=0.000378641, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=25758 epoch 017: 919 / 1689 loss=3.549, nll_loss=2, ppl=4, wps=551946, ups=1.11, wpb=495674, bsz=16679.1, num_updates=27900, lr=0.000378641, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=25758 epoch 017: 919 / 1689 loss=3.549, nll_loss=2, ppl=4, wps=551946, ups=1.11, wpb=495674, bsz=16679.1, num_updates=27900, lr=0.000378641, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=25758 epoch 017: 919 / 1689 loss=3.549, nll_loss=2, ppl=4, wps=551946, ups=1.11, wpb=495674, bsz=16679.1, num_updates=27900, lr=0.000378641, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=25758 epoch 017: 1019 / 1689 loss=3.553, nll_loss=2.004, ppl=4.01, wps=550729, ups=1.11, wpb=494444, bsz=16295.8, num_updates=28000, lr=0.000377964, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=25848 epoch 017: 1019 / 1689 loss=3.553, nll_loss=2.004, ppl=4.01, wps=550729, ups=1.11, wpb=494444, bsz=16295.8, num_updates=28000, lr=0.000377964, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=25848 epoch 017: 1019 / 1689 loss=3.553, nll_loss=2.004, ppl=4.01, wps=550729, ups=1.11, wpb=494444, bsz=16295.8, num_updates=28000, lr=0.000377964, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=25848 epoch 017: 1019 / 1689 loss=3.553, nll_loss=2.004, ppl=4.01, wps=550729, ups=1.11, wpb=494444, bsz=16295.8, num_updates=28000, lr=0.000377964, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=25848 epoch 017: 1019 / 1689 loss=3.553, nll_loss=2.004, ppl=4.01, wps=550729, ups=1.11, wpb=494444, bsz=16295.8, num_updates=28000, lr=0.000377964, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=25848 epoch 017: 1019 / 1689 loss=3.553, nll_loss=2.004, ppl=4.01, wps=550729, ups=1.11, wpb=494444, bsz=16295.8, num_updates=28000, lr=0.000377964, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=25848 epoch 017: 1019 / 1689 loss=3.553, nll_loss=2.004, ppl=4.01, wps=550729, ups=1.11, wpb=494444, bsz=16295.8, num_updates=28000, lr=0.000377964, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=25848 epoch 017: 1019 / 1689 loss=3.553, nll_loss=2.004, ppl=4.01, wps=550729, ups=1.11, wpb=494444, bsz=16295.8, num_updates=28000, lr=0.000377964, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=25848 epoch 017: 1019 / 1689 loss=3.553, nll_loss=2.004, ppl=4.01, wps=550729, ups=1.11, wpb=494444, bsz=16295.8, num_updates=28000, lr=0.000377964, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=25848 epoch 017: 1019 / 1689 loss=3.553, nll_loss=2.004, ppl=4.01, wps=550729, ups=1.11, wpb=494444, bsz=16295.8, num_updates=28000, lr=0.000377964, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=25848 epoch 017: 1019 / 1689 loss=3.553, nll_loss=2.004, ppl=4.01, wps=550729, ups=1.11, wpb=494444, bsz=16295.8, num_updates=28000, lr=0.000377964, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=25848 epoch 017: 1019 / 1689 loss=3.553, nll_loss=2.004, ppl=4.01, wps=550729, ups=1.11, wpb=494444, bsz=16295.8, num_updates=28000, lr=0.000377964, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=25848 epoch 017: 1019 / 1689 loss=3.553, nll_loss=2.004, ppl=4.01, wps=550729, ups=1.11, wpb=494444, bsz=16295.8, num_updates=28000, lr=0.000377964, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=25848 epoch 017: 1019 / 1689 loss=3.553, nll_loss=2.004, ppl=4.01, wps=550729, ups=1.11, wpb=494444, bsz=16295.8, num_updates=28000, lr=0.000377964, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=25848 epoch 017: 1019 / 1689 loss=3.553, nll_loss=2.004, ppl=4.01, wps=550729, ups=1.11, wpb=494444, bsz=16295.8, num_updates=28000, lr=0.000377964, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=25848 epoch 017: 1019 / 1689 loss=3.553, nll_loss=2.004, ppl=4.01, wps=550729, ups=1.11, wpb=494444, bsz=16295.8, num_updates=28000, lr=0.000377964, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=25848 epoch 017: 1019 / 1689 loss=3.553, nll_loss=2.004, ppl=4.01, wps=550729, ups=1.11, wpb=494444, bsz=16295.8, num_updates=28000, lr=0.000377964, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=25848 begin validation on "valid" subset epoch 017 | valid on 'valid' subset | loss 3.72 | nll_loss 2.175 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 28000 | best_loss 3.72 epoch 017 | valid on 'valid' subset | loss 3.72 | nll_loss 2.175 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 28000 | best_loss 3.72 epoch 017 | valid on 'valid' subset | loss 3.72 | nll_loss 2.175 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 28000 | best_loss 3.72 epoch 017 | valid on 'valid' subset | loss 3.72 | nll_loss 2.175 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 28000 | best_loss 3.72 epoch 017 | valid on 'valid' subset | loss 3.72 | nll_loss 2.175 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 28000 | best_loss 3.72 epoch 017 | valid on 'valid' subset | loss 3.72 | nll_loss 2.175 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 28000 | best_loss 3.72 epoch 017 | valid on 'valid' subset | loss 3.72 | nll_loss 2.175 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 28000 | best_loss 3.72 epoch 017 | valid on 'valid' subset | loss 3.72 | nll_loss 2.175 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 28000 | best_loss 3.72 epoch 017 | valid on 'valid' subset | loss 3.72 | nll_loss 2.175 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 28000 | best_loss 3.72 epoch 017 | valid on 'valid' subset | loss 3.72 | nll_loss 2.175 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 28000 | best_loss 3.72 epoch 017 | valid on 'valid' subset | loss 3.72 | nll_loss 2.175 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 28000 | best_loss 3.72 epoch 017 | valid on 'valid' subset | loss 3.72 | nll_loss 2.175 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 28000 | best_loss 3.72 epoch 017 | valid on 'valid' subset | loss 3.72 | nll_loss 2.175 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 28000 | best_loss 3.72 epoch 017 | valid on 'valid' subset | loss 3.72 | nll_loss 2.175 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 28000 | best_loss 3.72 epoch 017 | valid on 'valid' subset | loss 3.72 | nll_loss 2.175 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 28000 | best_loss 3.72 epoch 017 | valid on 'valid' subset | loss 3.72 | nll_loss 2.175 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 28000 | best_loss 3.72 epoch 017 | valid on 'valid' subset | loss 3.72 | nll_loss 2.175 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 28000 | best_loss 3.72 epoch 017: 1119 / 1689 loss=3.55, nll_loss=2.001, ppl=4, wps=458496, ups=0.92, wpb=496736, bsz=16783.2, num_updates=28100, lr=0.000377291, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=25957 epoch 017: 1119 / 1689 loss=3.55, nll_loss=2.001, ppl=4, wps=458496, ups=0.92, wpb=496736, bsz=16783.2, num_updates=28100, lr=0.000377291, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=25957 epoch 017: 1119 / 1689 loss=3.55, nll_loss=2.001, ppl=4, wps=458496, ups=0.92, wpb=496736, bsz=16783.2, num_updates=28100, lr=0.000377291, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=25957 epoch 017: 1119 / 1689 loss=3.55, nll_loss=2.001, ppl=4, wps=458496, ups=0.92, wpb=496736, bsz=16783.2, num_updates=28100, lr=0.000377291, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=25957 epoch 017: 1119 / 1689 loss=3.55, nll_loss=2.001, ppl=4, wps=458496, ups=0.92, wpb=496736, bsz=16783.2, num_updates=28100, lr=0.000377291, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=25957 epoch 017: 1119 / 1689 loss=3.55, nll_loss=2.001, ppl=4, wps=458496, ups=0.92, wpb=496736, bsz=16783.2, num_updates=28100, lr=0.000377291, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=25957 epoch 017: 1119 / 1689 loss=3.55, nll_loss=2.001, ppl=4, wps=458496, ups=0.92, wpb=496736, bsz=16783.2, num_updates=28100, lr=0.000377291, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=25957 epoch 017: 1119 / 1689 loss=3.55, nll_loss=2.001, ppl=4, wps=458496, ups=0.92, wpb=496736, bsz=16783.2, num_updates=28100, lr=0.000377291, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=25957 epoch 017: 1119 / 1689 loss=3.55, nll_loss=2.001, ppl=4, wps=458496, ups=0.92, wpb=496736, bsz=16783.2, num_updates=28100, lr=0.000377291, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=25957 epoch 017: 1119 / 1689 loss=3.55, nll_loss=2.001, ppl=4, wps=458496, ups=0.92, wpb=496736, bsz=16783.2, num_updates=28100, lr=0.000377291, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=25957 epoch 017: 1119 / 1689 loss=3.55, nll_loss=2.001, ppl=4, wps=458496, ups=0.92, wpb=496736, bsz=16783.2, num_updates=28100, lr=0.000377291, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=25957 epoch 017: 1119 / 1689 loss=3.55, nll_loss=2.001, ppl=4, wps=458496, ups=0.92, wpb=496736, bsz=16783.2, num_updates=28100, lr=0.000377291, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=25957 epoch 017: 1119 / 1689 loss=3.55, nll_loss=2.001, ppl=4, wps=458496, ups=0.92, wpb=496736, bsz=16783.2, num_updates=28100, lr=0.000377291, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=25957 epoch 017: 1119 / 1689 loss=3.55, nll_loss=2.001, ppl=4, wps=458496, ups=0.92, wpb=496736, bsz=16783.2, num_updates=28100, lr=0.000377291, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=25957 epoch 017: 1119 / 1689 loss=3.55, nll_loss=2.001, ppl=4, wps=458496, ups=0.92, wpb=496736, bsz=16783.2, num_updates=28100, lr=0.000377291, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=25957 epoch 017: 1119 / 1689 loss=3.55, nll_loss=2.001, ppl=4, wps=458496, ups=0.92, wpb=496736, bsz=16783.2, num_updates=28100, lr=0.000377291, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=25957 epoch 017: 1119 / 1689 loss=3.55, nll_loss=2.001, ppl=4, wps=458496, ups=0.92, wpb=496736, bsz=16783.2, num_updates=28100, lr=0.000377291, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=25957 epoch 017: 1220 / 1689 loss=3.548, nll_loss=1.998, ppl=4, wps=548896, ups=1.11, wpb=495140, bsz=16213.9, num_updates=28200, lr=0.000376622, gnorm=0.17, clip=0, loss_scale=2, train_wall=89, gb_free=22.4, wall=26047 epoch 017: 1220 / 1689 loss=3.548, nll_loss=1.998, ppl=4, wps=548896, ups=1.11, wpb=495140, bsz=16213.9, num_updates=28200, lr=0.000376622, gnorm=0.17, clip=0, loss_scale=2, train_wall=89, gb_free=22.4, wall=26047 epoch 017: 1220 / 1689 loss=3.548, nll_loss=1.998, ppl=4, wps=548896, ups=1.11, wpb=495140, bsz=16213.9, num_updates=28200, lr=0.000376622, gnorm=0.17, clip=0, loss_scale=2, train_wall=89, gb_free=22.4, wall=26047 epoch 017: 1220 / 1689 loss=3.548, nll_loss=1.998, ppl=4, wps=548896, ups=1.11, wpb=495140, bsz=16213.9, num_updates=28200, lr=0.000376622, gnorm=0.17, clip=0, loss_scale=2, train_wall=89, gb_free=22.4, wall=26047 epoch 017: 1220 / 1689 loss=3.548, nll_loss=1.998, ppl=4, wps=548896, ups=1.11, wpb=495140, bsz=16213.9, num_updates=28200, lr=0.000376622, gnorm=0.17, clip=0, loss_scale=2, train_wall=89, gb_free=22.4, wall=26047 epoch 017: 1220 / 1689 loss=3.548, nll_loss=1.998, ppl=4, wps=548896, ups=1.11, wpb=495140, bsz=16213.9, num_updates=28200, lr=0.000376622, gnorm=0.17, clip=0, loss_scale=2, train_wall=89, gb_free=22.4, wall=26047 epoch 017: 1220 / 1689 loss=3.548, nll_loss=1.998, ppl=4, wps=548896, ups=1.11, wpb=495140, bsz=16213.9, num_updates=28200, lr=0.000376622, gnorm=0.17, clip=0, loss_scale=2, train_wall=89, gb_free=22.4, wall=26047 epoch 017: 1220 / 1689 loss=3.548, nll_loss=1.998, ppl=4, wps=548896, ups=1.11, wpb=495140, bsz=16213.9, num_updates=28200, lr=0.000376622, gnorm=0.17, clip=0, loss_scale=2, train_wall=89, gb_free=22.4, wall=26047 epoch 017: 1220 / 1689 loss=3.548, nll_loss=1.998, ppl=4, wps=548896, ups=1.11, wpb=495140, bsz=16213.9, num_updates=28200, lr=0.000376622, gnorm=0.17, clip=0, loss_scale=2, train_wall=89, gb_free=22.4, wall=26047 epoch 017: 1220 / 1689 loss=3.548, nll_loss=1.998, ppl=4, wps=548896, ups=1.11, wpb=495140, bsz=16213.9, num_updates=28200, lr=0.000376622, gnorm=0.17, clip=0, loss_scale=2, train_wall=89, gb_free=22.4, wall=26047 epoch 017: 1220 / 1689 loss=3.548, nll_loss=1.998, ppl=4, wps=548896, ups=1.11, wpb=495140, bsz=16213.9, num_updates=28200, lr=0.000376622, gnorm=0.17, clip=0, loss_scale=2, train_wall=89, gb_free=22.4, wall=26047 epoch 017: 1220 / 1689 loss=3.548, nll_loss=1.998, ppl=4, wps=548896, ups=1.11, wpb=495140, bsz=16213.9, num_updates=28200, lr=0.000376622, gnorm=0.17, clip=0, loss_scale=2, train_wall=89, gb_free=22.4, wall=26047 epoch 017: 1220 / 1689 loss=3.548, nll_loss=1.998, ppl=4, wps=548896, ups=1.11, wpb=495140, bsz=16213.9, num_updates=28200, lr=0.000376622, gnorm=0.17, clip=0, loss_scale=2, train_wall=89, gb_free=22.4, wall=26047 epoch 017: 1220 / 1689 loss=3.548, nll_loss=1.998, ppl=4, wps=548896, ups=1.11, wpb=495140, bsz=16213.9, num_updates=28200, lr=0.000376622, gnorm=0.17, clip=0, loss_scale=2, train_wall=89, gb_free=22.4, wall=26047 epoch 017: 1220 / 1689 loss=3.548, nll_loss=1.998, ppl=4, wps=548896, ups=1.11, wpb=495140, bsz=16213.9, num_updates=28200, lr=0.000376622, gnorm=0.17, clip=0, loss_scale=2, train_wall=89, gb_free=22.4, wall=26047 epoch 017: 1220 / 1689 loss=3.548, nll_loss=1.998, ppl=4, wps=548896, ups=1.11, wpb=495140, bsz=16213.9, num_updates=28200, lr=0.000376622, gnorm=0.17, clip=0, loss_scale=2, train_wall=89, gb_free=22.4, wall=26047 epoch 017: 1220 / 1689 loss=3.548, nll_loss=1.998, ppl=4, wps=548896, ups=1.11, wpb=495140, bsz=16213.9, num_updates=28200, lr=0.000376622, gnorm=0.17, clip=0, loss_scale=2, train_wall=89, gb_free=22.4, wall=26047 epoch 017: 1320 / 1689 loss=3.552, nll_loss=2.003, ppl=4.01, wps=556507, ups=1.12, wpb=494817, bsz=16407.2, num_updates=28300, lr=0.000375956, gnorm=0.171, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=26136 epoch 017: 1320 / 1689 loss=3.552, nll_loss=2.003, ppl=4.01, wps=556507, ups=1.12, wpb=494817, bsz=16407.2, num_updates=28300, lr=0.000375956, gnorm=0.171, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=26136 epoch 017: 1320 / 1689 loss=3.552, nll_loss=2.003, ppl=4.01, wps=556507, ups=1.12, wpb=494817, bsz=16407.2, num_updates=28300, lr=0.000375956, gnorm=0.171, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=26136 epoch 017: 1320 / 1689 loss=3.552, nll_loss=2.003, ppl=4.01, wps=556507, ups=1.12, wpb=494817, bsz=16407.2, num_updates=28300, lr=0.000375956, gnorm=0.171, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=26136 epoch 017: 1320 / 1689 loss=3.552, nll_loss=2.003, ppl=4.01, wps=556507, ups=1.12, wpb=494817, bsz=16407.2, num_updates=28300, lr=0.000375956, gnorm=0.171, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=26136 epoch 017: 1320 / 1689 loss=3.552, nll_loss=2.003, ppl=4.01, wps=556507, ups=1.12, wpb=494817, bsz=16407.2, num_updates=28300, lr=0.000375956, gnorm=0.171, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=26136 epoch 017: 1320 / 1689 loss=3.552, nll_loss=2.003, ppl=4.01, wps=556507, ups=1.12, wpb=494817, bsz=16407.2, num_updates=28300, lr=0.000375956, gnorm=0.171, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=26136 epoch 017: 1320 / 1689 loss=3.552, nll_loss=2.003, ppl=4.01, wps=556507, ups=1.12, wpb=494817, bsz=16407.2, num_updates=28300, lr=0.000375956, gnorm=0.171, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=26136 epoch 017: 1320 / 1689 loss=3.552, nll_loss=2.003, ppl=4.01, wps=556507, ups=1.12, wpb=494817, bsz=16407.2, num_updates=28300, lr=0.000375956, gnorm=0.171, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=26136 epoch 017: 1320 / 1689 loss=3.552, nll_loss=2.003, ppl=4.01, wps=556507, ups=1.12, wpb=494817, bsz=16407.2, num_updates=28300, lr=0.000375956, gnorm=0.171, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=26136 epoch 017: 1320 / 1689 loss=3.552, nll_loss=2.003, ppl=4.01, wps=556507, ups=1.12, wpb=494817, bsz=16407.2, num_updates=28300, lr=0.000375956, gnorm=0.171, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=26136 epoch 017: 1320 / 1689 loss=3.552, nll_loss=2.003, ppl=4.01, wps=556507, ups=1.12, wpb=494817, bsz=16407.2, num_updates=28300, lr=0.000375956, gnorm=0.171, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=26136 epoch 017: 1320 / 1689 loss=3.552, nll_loss=2.003, ppl=4.01, wps=556507, ups=1.12, wpb=494817, bsz=16407.2, num_updates=28300, lr=0.000375956, gnorm=0.171, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=26136 epoch 017: 1320 / 1689 loss=3.552, nll_loss=2.003, ppl=4.01, wps=556507, ups=1.12, wpb=494817, bsz=16407.2, num_updates=28300, lr=0.000375956, gnorm=0.171, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=26136 epoch 017: 1320 / 1689 loss=3.552, nll_loss=2.003, ppl=4.01, wps=556507, ups=1.12, wpb=494817, bsz=16407.2, num_updates=28300, lr=0.000375956, gnorm=0.171, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=26136 epoch 017: 1320 / 1689 loss=3.552, nll_loss=2.003, ppl=4.01, wps=556507, ups=1.12, wpb=494817, bsz=16407.2, num_updates=28300, lr=0.000375956, gnorm=0.171, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=26136 epoch 017: 1320 / 1689 loss=3.552, nll_loss=2.003, ppl=4.01, wps=556507, ups=1.12, wpb=494817, bsz=16407.2, num_updates=28300, lr=0.000375956, gnorm=0.171, clip=0, loss_scale=2, train_wall=87, gb_free=22.4, wall=26136 epoch 017: 1421 / 1689 loss=3.55, nll_loss=2.002, ppl=4, wps=546098, ups=1.1, wpb=495598, bsz=16539.3, num_updates=28400, lr=0.000375293, gnorm=0.173, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=26226 epoch 017: 1421 / 1689 loss=3.55, nll_loss=2.002, ppl=4, wps=546098, ups=1.1, wpb=495598, bsz=16539.3, num_updates=28400, lr=0.000375293, gnorm=0.173, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=26226 epoch 017: 1421 / 1689 loss=3.55, nll_loss=2.002, ppl=4, wps=546098, ups=1.1, wpb=495598, bsz=16539.3, num_updates=28400, lr=0.000375293, gnorm=0.173, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=26226 epoch 017: 1421 / 1689 loss=3.55, nll_loss=2.002, ppl=4, wps=546098, ups=1.1, wpb=495598, bsz=16539.3, num_updates=28400, lr=0.000375293, gnorm=0.173, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=26226 epoch 017: 1421 / 1689 loss=3.55, nll_loss=2.002, ppl=4, wps=546098, ups=1.1, wpb=495598, bsz=16539.3, num_updates=28400, lr=0.000375293, gnorm=0.173, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=26226 epoch 017: 1421 / 1689 loss=3.55, nll_loss=2.002, ppl=4, wps=546098, ups=1.1, wpb=495598, bsz=16539.3, num_updates=28400, lr=0.000375293, gnorm=0.173, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=26226 epoch 017: 1421 / 1689 loss=3.55, nll_loss=2.002, ppl=4, wps=546098, ups=1.1, wpb=495598, bsz=16539.3, num_updates=28400, lr=0.000375293, gnorm=0.173, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=26226 epoch 017: 1421 / 1689 loss=3.55, nll_loss=2.002, ppl=4, wps=546098, ups=1.1, wpb=495598, bsz=16539.3, num_updates=28400, lr=0.000375293, gnorm=0.173, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=26226 epoch 017: 1421 / 1689 loss=3.55, nll_loss=2.002, ppl=4, wps=546098, ups=1.1, wpb=495598, bsz=16539.3, num_updates=28400, lr=0.000375293, gnorm=0.173, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=26226 epoch 017: 1421 / 1689 loss=3.55, nll_loss=2.002, ppl=4, wps=546098, ups=1.1, wpb=495598, bsz=16539.3, num_updates=28400, lr=0.000375293, gnorm=0.173, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=26226 epoch 017: 1421 / 1689 loss=3.55, nll_loss=2.002, ppl=4, wps=546098, ups=1.1, wpb=495598, bsz=16539.3, num_updates=28400, lr=0.000375293, gnorm=0.173, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=26226 epoch 017: 1421 / 1689 loss=3.55, nll_loss=2.002, ppl=4, wps=546098, ups=1.1, wpb=495598, bsz=16539.3, num_updates=28400, lr=0.000375293, gnorm=0.173, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=26226 epoch 017: 1421 / 1689 loss=3.55, nll_loss=2.002, ppl=4, wps=546098, ups=1.1, wpb=495598, bsz=16539.3, num_updates=28400, lr=0.000375293, gnorm=0.173, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=26226 epoch 017: 1421 / 1689 loss=3.55, nll_loss=2.002, ppl=4, wps=546098, ups=1.1, wpb=495598, bsz=16539.3, num_updates=28400, lr=0.000375293, gnorm=0.173, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=26226 epoch 017: 1421 / 1689 loss=3.55, nll_loss=2.002, ppl=4, wps=546098, ups=1.1, wpb=495598, bsz=16539.3, num_updates=28400, lr=0.000375293, gnorm=0.173, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=26226 epoch 017: 1421 / 1689 loss=3.55, nll_loss=2.002, ppl=4, wps=546098, ups=1.1, wpb=495598, bsz=16539.3, num_updates=28400, lr=0.000375293, gnorm=0.173, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=26226 epoch 017: 1421 / 1689 loss=3.55, nll_loss=2.002, ppl=4, wps=546098, ups=1.1, wpb=495598, bsz=16539.3, num_updates=28400, lr=0.000375293, gnorm=0.173, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=26226 epoch 017: 1521 / 1689 loss=3.552, nll_loss=2.004, ppl=4.01, wps=555464, ups=1.12, wpb=496913, bsz=16624.1, num_updates=28500, lr=0.000374634, gnorm=0.181, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=26316 epoch 017: 1521 / 1689 loss=3.552, nll_loss=2.004, ppl=4.01, wps=555464, ups=1.12, wpb=496913, bsz=16624.1, num_updates=28500, lr=0.000374634, gnorm=0.181, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=26316 epoch 017: 1521 / 1689 loss=3.552, nll_loss=2.004, ppl=4.01, wps=555464, ups=1.12, wpb=496913, bsz=16624.1, num_updates=28500, lr=0.000374634, gnorm=0.181, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=26316 epoch 017: 1521 / 1689 loss=3.552, nll_loss=2.004, ppl=4.01, wps=555464, ups=1.12, wpb=496913, bsz=16624.1, num_updates=28500, lr=0.000374634, gnorm=0.181, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=26316 epoch 017: 1521 / 1689 loss=3.552, nll_loss=2.004, ppl=4.01, wps=555464, ups=1.12, wpb=496913, bsz=16624.1, num_updates=28500, lr=0.000374634, gnorm=0.181, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=26316 epoch 017: 1521 / 1689 loss=3.552, nll_loss=2.004, ppl=4.01, wps=555464, ups=1.12, wpb=496913, bsz=16624.1, num_updates=28500, lr=0.000374634, gnorm=0.181, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=26316 epoch 017: 1521 / 1689 loss=3.552, nll_loss=2.004, ppl=4.01, wps=555464, ups=1.12, wpb=496913, bsz=16624.1, num_updates=28500, lr=0.000374634, gnorm=0.181, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=26316 epoch 017: 1521 / 1689 loss=3.552, nll_loss=2.004, ppl=4.01, wps=555464, ups=1.12, wpb=496913, bsz=16624.1, num_updates=28500, lr=0.000374634, gnorm=0.181, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=26316 epoch 017: 1521 / 1689 loss=3.552, nll_loss=2.004, ppl=4.01, wps=555464, ups=1.12, wpb=496913, bsz=16624.1, num_updates=28500, lr=0.000374634, gnorm=0.181, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=26316 epoch 017: 1521 / 1689 loss=3.552, nll_loss=2.004, ppl=4.01, wps=555464, ups=1.12, wpb=496913, bsz=16624.1, num_updates=28500, lr=0.000374634, gnorm=0.181, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=26316 epoch 017: 1521 / 1689 loss=3.552, nll_loss=2.004, ppl=4.01, wps=555464, ups=1.12, wpb=496913, bsz=16624.1, num_updates=28500, lr=0.000374634, gnorm=0.181, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=26316 epoch 017: 1521 / 1689 loss=3.552, nll_loss=2.004, ppl=4.01, wps=555464, ups=1.12, wpb=496913, bsz=16624.1, num_updates=28500, lr=0.000374634, gnorm=0.181, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=26316 epoch 017: 1521 / 1689 loss=3.552, nll_loss=2.004, ppl=4.01, wps=555464, ups=1.12, wpb=496913, bsz=16624.1, num_updates=28500, lr=0.000374634, gnorm=0.181, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=26316 epoch 017: 1521 / 1689 loss=3.552, nll_loss=2.004, ppl=4.01, wps=555464, ups=1.12, wpb=496913, bsz=16624.1, num_updates=28500, lr=0.000374634, gnorm=0.181, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=26316 epoch 017: 1521 / 1689 loss=3.552, nll_loss=2.004, ppl=4.01, wps=555464, ups=1.12, wpb=496913, bsz=16624.1, num_updates=28500, lr=0.000374634, gnorm=0.181, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=26316 epoch 017: 1521 / 1689 loss=3.552, nll_loss=2.004, ppl=4.01, wps=555464, ups=1.12, wpb=496913, bsz=16624.1, num_updates=28500, lr=0.000374634, gnorm=0.181, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=26316 epoch 017: 1521 / 1689 loss=3.552, nll_loss=2.004, ppl=4.01, wps=555464, ups=1.12, wpb=496913, bsz=16624.1, num_updates=28500, lr=0.000374634, gnorm=0.181, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=26316 epoch 017: 1621 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=551676, ups=1.11, wpb=495580, bsz=16836.8, num_updates=28600, lr=0.000373979, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=26406 epoch 017: 1621 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=551676, ups=1.11, wpb=495580, bsz=16836.8, num_updates=28600, lr=0.000373979, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=26406 epoch 017: 1621 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=551676, ups=1.11, wpb=495580, bsz=16836.8, num_updates=28600, lr=0.000373979, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=26406 epoch 017: 1621 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=551676, ups=1.11, wpb=495580, bsz=16836.8, num_updates=28600, lr=0.000373979, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=26406 epoch 017: 1621 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=551676, ups=1.11, wpb=495580, bsz=16836.8, num_updates=28600, lr=0.000373979, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=26406 epoch 017: 1621 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=551676, ups=1.11, wpb=495580, bsz=16836.8, num_updates=28600, lr=0.000373979, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=26406 epoch 017: 1621 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=551676, ups=1.11, wpb=495580, bsz=16836.8, num_updates=28600, lr=0.000373979, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=26406 epoch 017: 1621 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=551676, ups=1.11, wpb=495580, bsz=16836.8, num_updates=28600, lr=0.000373979, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=26406 epoch 017: 1621 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=551676, ups=1.11, wpb=495580, bsz=16836.8, num_updates=28600, lr=0.000373979, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=26406 epoch 017: 1621 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=551676, ups=1.11, wpb=495580, bsz=16836.8, num_updates=28600, lr=0.000373979, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=26406 epoch 017: 1621 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=551676, ups=1.11, wpb=495580, bsz=16836.8, num_updates=28600, lr=0.000373979, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=26406 epoch 017: 1621 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=551676, ups=1.11, wpb=495580, bsz=16836.8, num_updates=28600, lr=0.000373979, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=26406 epoch 017: 1621 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=551676, ups=1.11, wpb=495580, bsz=16836.8, num_updates=28600, lr=0.000373979, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=26406 epoch 017: 1621 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=551676, ups=1.11, wpb=495580, bsz=16836.8, num_updates=28600, lr=0.000373979, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=26406 epoch 017: 1621 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=551676, ups=1.11, wpb=495580, bsz=16836.8, num_updates=28600, lr=0.000373979, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=26406 epoch 017: 1621 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=551676, ups=1.11, wpb=495580, bsz=16836.8, num_updates=28600, lr=0.000373979, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=26406 epoch 017: 1621 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=551676, ups=1.11, wpb=495580, bsz=16836.8, num_updates=28600, lr=0.000373979, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=26406 end of epoch 17 (average epoch stats below) epoch 017 | loss 3.547 | nll_loss 1.997 | ppl 3.99 | wps 540276 | ups 1.09 | wpb 495127 | bsz 16498.7 | num_updates 28668 | lr 0.000373535 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 21.4 | wall 26466 epoch 017 | loss 3.547 | nll_loss 1.997 | ppl 3.99 | wps 540276 | ups 1.09 | wpb 495127 | bsz 16498.7 | num_updates 28668 | lr 0.000373535 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 21.4 | wall 26466 epoch 017 | loss 3.547 | nll_loss 1.997 | ppl 3.99 | wps 540276 | ups 1.09 | wpb 495127 | bsz 16498.7 | num_updates 28668 | lr 0.000373535 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 21.4 | wall 26466 epoch 017 | loss 3.547 | nll_loss 1.997 | ppl 3.99 | wps 540276 | ups 1.09 | wpb 495127 | bsz 16498.7 | num_updates 28668 | lr 0.000373535 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 21.4 | wall 26466 epoch 017 | loss 3.547 | nll_loss 1.997 | ppl 3.99 | wps 540276 | ups 1.09 | wpb 495127 | bsz 16498.7 | num_updates 28668 | lr 0.000373535 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 21.4 | wall 26466 epoch 017 | loss 3.547 | nll_loss 1.997 | ppl 3.99 | wps 540276 | ups 1.09 | wpb 495127 | bsz 16498.7 | num_updates 28668 | lr 0.000373535 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 21.4 | wall 26466 epoch 017 | loss 3.547 | nll_loss 1.997 | ppl 3.99 | wps 540276 | ups 1.09 | wpb 495127 | bsz 16498.7 | num_updates 28668 | lr 0.000373535 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 21.4 | wall 26466 epoch 017 | loss 3.547 | nll_loss 1.997 | ppl 3.99 | wps 540276 | ups 1.09 | wpb 495127 | bsz 16498.7 | num_updates 28668 | lr 0.000373535 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 21.4 | wall 26466 epoch 017 | loss 3.547 | nll_loss 1.997 | ppl 3.99 | wps 540276 | ups 1.09 | wpb 495127 | bsz 16498.7 | num_updates 28668 | lr 0.000373535 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 21.4 | wall 26466 epoch 017 | loss 3.547 | nll_loss 1.997 | ppl 3.99 | wps 540276 | ups 1.09 | wpb 495127 | bsz 16498.7 | num_updates 28668 | lr 0.000373535 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 21.4 | wall 26466 epoch 017 | loss 3.547 | nll_loss 1.997 | ppl 3.99 | wps 540276 | ups 1.09 | wpb 495127 | bsz 16498.7 | num_updates 28668 | lr 0.000373535 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 21.4 | wall 26466 epoch 017 | loss 3.547 | nll_loss 1.997 | ppl 3.99 | wps 540276 | ups 1.09 | wpb 495127 | bsz 16498.7 | num_updates 28668 | lr 0.000373535 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 21.4 | wall 26466 epoch 017 | loss 3.547 | nll_loss 1.997 | ppl 3.99 | wps 540276 | ups 1.09 | wpb 495127 | bsz 16498.7 | num_updates 28668 | lr 0.000373535 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 21.4 | wall 26466 epoch 017 | loss 3.547 | nll_loss 1.997 | ppl 3.99 | wps 540276 | ups 1.09 | wpb 495127 | bsz 16498.7 | num_updates 28668 | lr 0.000373535 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 21.4 | wall 26466 epoch 017 | loss 3.547 | nll_loss 1.997 | ppl 3.99 | wps 540276 | ups 1.09 | wpb 495127 | bsz 16498.7 | num_updates 28668 | lr 0.000373535 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 21.4 | wall 26466 epoch 017 | loss 3.547 | nll_loss 1.997 | ppl 3.99 | wps 540276 | ups 1.09 | wpb 495127 | bsz 16498.7 | num_updates 28668 | lr 0.000373535 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 21.4 | wall 26466 epoch 017 | loss 3.547 | nll_loss 1.997 | ppl 3.99 | wps 540276 | ups 1.09 | wpb 495127 | bsz 16498.7 | num_updates 28668 | lr 0.000373535 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 21.4 | wall 26466 Start iterating over samples epoch 018: 32 / 1689 loss=3.545, nll_loss=1.995, ppl=3.99, wps=543392, ups=1.11, wpb=490315, bsz=16099.1, num_updates=28700, lr=0.000373327, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=21.9, wall=26496 epoch 018: 32 / 1689 loss=3.545, nll_loss=1.995, ppl=3.99, wps=543392, ups=1.11, wpb=490315, bsz=16099.1, num_updates=28700, lr=0.000373327, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=21.9, wall=26496 epoch 018: 32 / 1689 loss=3.545, nll_loss=1.995, ppl=3.99, wps=543392, ups=1.11, wpb=490315, bsz=16099.1, num_updates=28700, lr=0.000373327, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=21.9, wall=26496 epoch 018: 32 / 1689 loss=3.545, nll_loss=1.995, ppl=3.99, wps=543392, ups=1.11, wpb=490315, bsz=16099.1, num_updates=28700, lr=0.000373327, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=21.9, wall=26496 epoch 018: 32 / 1689 loss=3.545, nll_loss=1.995, ppl=3.99, wps=543392, ups=1.11, wpb=490315, bsz=16099.1, num_updates=28700, lr=0.000373327, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=21.9, wall=26496 epoch 018: 32 / 1689 loss=3.545, nll_loss=1.995, ppl=3.99, wps=543392, ups=1.11, wpb=490315, bsz=16099.1, num_updates=28700, lr=0.000373327, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=21.9, wall=26496 epoch 018: 32 / 1689 loss=3.545, nll_loss=1.995, ppl=3.99, wps=543392, ups=1.11, wpb=490315, bsz=16099.1, num_updates=28700, lr=0.000373327, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=21.9, wall=26496 epoch 018: 32 / 1689 loss=3.545, nll_loss=1.995, ppl=3.99, wps=543392, ups=1.11, wpb=490315, bsz=16099.1, num_updates=28700, lr=0.000373327, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=21.9, wall=26496 epoch 018: 32 / 1689 loss=3.545, nll_loss=1.995, ppl=3.99, wps=543392, ups=1.11, wpb=490315, bsz=16099.1, num_updates=28700, lr=0.000373327, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=21.9, wall=26496 epoch 018: 32 / 1689 loss=3.545, nll_loss=1.995, ppl=3.99, wps=543392, ups=1.11, wpb=490315, bsz=16099.1, num_updates=28700, lr=0.000373327, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=21.9, wall=26496 epoch 018: 32 / 1689 loss=3.545, nll_loss=1.995, ppl=3.99, wps=543392, ups=1.11, wpb=490315, bsz=16099.1, num_updates=28700, lr=0.000373327, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=21.9, wall=26496 epoch 018: 32 / 1689 loss=3.545, nll_loss=1.995, ppl=3.99, wps=543392, ups=1.11, wpb=490315, bsz=16099.1, num_updates=28700, lr=0.000373327, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=21.9, wall=26496 epoch 018: 32 / 1689 loss=3.545, nll_loss=1.995, ppl=3.99, wps=543392, ups=1.11, wpb=490315, bsz=16099.1, num_updates=28700, lr=0.000373327, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=21.9, wall=26496 epoch 018: 32 / 1689 loss=3.545, nll_loss=1.995, ppl=3.99, wps=543392, ups=1.11, wpb=490315, bsz=16099.1, num_updates=28700, lr=0.000373327, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=21.9, wall=26496 epoch 018: 32 / 1689 loss=3.545, nll_loss=1.995, ppl=3.99, wps=543392, ups=1.11, wpb=490315, bsz=16099.1, num_updates=28700, lr=0.000373327, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=21.9, wall=26496 epoch 018: 32 / 1689 loss=3.545, nll_loss=1.995, ppl=3.99, wps=543392, ups=1.11, wpb=490315, bsz=16099.1, num_updates=28700, lr=0.000373327, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=21.9, wall=26496 epoch 018: 32 / 1689 loss=3.545, nll_loss=1.995, ppl=3.99, wps=543392, ups=1.11, wpb=490315, bsz=16099.1, num_updates=28700, lr=0.000373327, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=21.9, wall=26496 epoch 018: 32 / 1689 loss=3.545, nll_loss=1.995, ppl=3.99, wps=543392, ups=1.11, wpb=490315, bsz=16099.1, num_updates=28700, lr=0.000373327, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=21.9, wall=26496 epoch 018: 132 / 1689 loss=3.521, nll_loss=1.968, ppl=3.91, wps=550663, ups=1.11, wpb=494910, bsz=16435.4, num_updates=28800, lr=0.000372678, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=26586 epoch 018: 132 / 1689 loss=3.521, nll_loss=1.968, ppl=3.91, wps=550663, ups=1.11, wpb=494910, bsz=16435.4, num_updates=28800, lr=0.000372678, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=26586 epoch 018: 132 / 1689 loss=3.521, nll_loss=1.968, ppl=3.91, wps=550663, ups=1.11, wpb=494910, bsz=16435.4, num_updates=28800, lr=0.000372678, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=26586 epoch 018: 132 / 1689 loss=3.521, nll_loss=1.968, ppl=3.91, wps=550663, ups=1.11, wpb=494910, bsz=16435.4, num_updates=28800, lr=0.000372678, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=26586 epoch 018: 132 / 1689 loss=3.521, nll_loss=1.968, ppl=3.91, wps=550663, ups=1.11, wpb=494910, bsz=16435.4, num_updates=28800, lr=0.000372678, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=26586 epoch 018: 132 / 1689 loss=3.521, nll_loss=1.968, ppl=3.91, wps=550663, ups=1.11, wpb=494910, bsz=16435.4, num_updates=28800, lr=0.000372678, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=26586 epoch 018: 132 / 1689 loss=3.521, nll_loss=1.968, ppl=3.91, wps=550663, ups=1.11, wpb=494910, bsz=16435.4, num_updates=28800, lr=0.000372678, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=26586 epoch 018: 132 / 1689 loss=3.521, nll_loss=1.968, ppl=3.91, wps=550663, ups=1.11, wpb=494910, bsz=16435.4, num_updates=28800, lr=0.000372678, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=26586 epoch 018: 132 / 1689 loss=3.521, nll_loss=1.968, ppl=3.91, wps=550663, ups=1.11, wpb=494910, bsz=16435.4, num_updates=28800, lr=0.000372678, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=26586 epoch 018: 132 / 1689 loss=3.521, nll_loss=1.968, ppl=3.91, wps=550663, ups=1.11, wpb=494910, bsz=16435.4, num_updates=28800, lr=0.000372678, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=26586 epoch 018: 132 / 1689 loss=3.521, nll_loss=1.968, ppl=3.91, wps=550663, ups=1.11, wpb=494910, bsz=16435.4, num_updates=28800, lr=0.000372678, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=26586 epoch 018: 132 / 1689 loss=3.521, nll_loss=1.968, ppl=3.91, wps=550663, ups=1.11, wpb=494910, bsz=16435.4, num_updates=28800, lr=0.000372678, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=26586 epoch 018: 132 / 1689 loss=3.521, nll_loss=1.968, ppl=3.91, wps=550663, ups=1.11, wpb=494910, bsz=16435.4, num_updates=28800, lr=0.000372678, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=26586 epoch 018: 132 / 1689 loss=3.521, nll_loss=1.968, ppl=3.91, wps=550663, ups=1.11, wpb=494910, bsz=16435.4, num_updates=28800, lr=0.000372678, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=26586 epoch 018: 132 / 1689 loss=3.521, nll_loss=1.968, ppl=3.91, wps=550663, ups=1.11, wpb=494910, bsz=16435.4, num_updates=28800, lr=0.000372678, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=26586 epoch 018: 132 / 1689 loss=3.521, nll_loss=1.968, ppl=3.91, wps=550663, ups=1.11, wpb=494910, bsz=16435.4, num_updates=28800, lr=0.000372678, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=26586 epoch 018: 132 / 1689 loss=3.521, nll_loss=1.968, ppl=3.91, wps=550663, ups=1.11, wpb=494910, bsz=16435.4, num_updates=28800, lr=0.000372678, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=26586 epoch 018: 132 / 1689 loss=3.521, nll_loss=1.968, ppl=3.91, wps=550663, ups=1.11, wpb=494910, bsz=16435.4, num_updates=28800, lr=0.000372678, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=26586 epoch 018: 232 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=553424, ups=1.12, wpb=496026, bsz=16319.2, num_updates=28900, lr=0.000372033, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=26675 epoch 018: 232 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=553424, ups=1.12, wpb=496026, bsz=16319.2, num_updates=28900, lr=0.000372033, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=26675 epoch 018: 232 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=553424, ups=1.12, wpb=496026, bsz=16319.2, num_updates=28900, lr=0.000372033, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=26675 epoch 018: 232 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=553424, ups=1.12, wpb=496026, bsz=16319.2, num_updates=28900, lr=0.000372033, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=26675 epoch 018: 232 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=553424, ups=1.12, wpb=496026, bsz=16319.2, num_updates=28900, lr=0.000372033, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=26675 epoch 018: 232 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=553424, ups=1.12, wpb=496026, bsz=16319.2, num_updates=28900, lr=0.000372033, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=26675 epoch 018: 232 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=553424, ups=1.12, wpb=496026, bsz=16319.2, num_updates=28900, lr=0.000372033, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=26675 epoch 018: 232 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=553424, ups=1.12, wpb=496026, bsz=16319.2, num_updates=28900, lr=0.000372033, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=26675 epoch 018: 232 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=553424, ups=1.12, wpb=496026, bsz=16319.2, num_updates=28900, lr=0.000372033, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=26675 epoch 018: 232 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=553424, ups=1.12, wpb=496026, bsz=16319.2, num_updates=28900, lr=0.000372033, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=26675 epoch 018: 232 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=553424, ups=1.12, wpb=496026, bsz=16319.2, num_updates=28900, lr=0.000372033, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=26675 epoch 018: 232 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=553424, ups=1.12, wpb=496026, bsz=16319.2, num_updates=28900, lr=0.000372033, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=26675 epoch 018: 232 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=553424, ups=1.12, wpb=496026, bsz=16319.2, num_updates=28900, lr=0.000372033, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=26675 epoch 018: 232 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=553424, ups=1.12, wpb=496026, bsz=16319.2, num_updates=28900, lr=0.000372033, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=26675 epoch 018: 232 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=553424, ups=1.12, wpb=496026, bsz=16319.2, num_updates=28900, lr=0.000372033, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=26675 epoch 018: 232 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=553424, ups=1.12, wpb=496026, bsz=16319.2, num_updates=28900, lr=0.000372033, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=26675 epoch 018: 232 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=553424, ups=1.12, wpb=496026, bsz=16319.2, num_updates=28900, lr=0.000372033, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=26675 epoch 018: 232 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=553424, ups=1.12, wpb=496026, bsz=16319.2, num_updates=28900, lr=0.000372033, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=26675 epoch 018: 332 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=556320, ups=1.12, wpb=495828, bsz=16604.8, num_updates=29000, lr=0.000371391, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=26765 epoch 018: 332 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=556320, ups=1.12, wpb=495828, bsz=16604.8, num_updates=29000, lr=0.000371391, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=26765 epoch 018: 332 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=556320, ups=1.12, wpb=495828, bsz=16604.8, num_updates=29000, lr=0.000371391, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=26765 epoch 018: 332 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=556320, ups=1.12, wpb=495828, bsz=16604.8, num_updates=29000, lr=0.000371391, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=26765 epoch 018: 332 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=556320, ups=1.12, wpb=495828, bsz=16604.8, num_updates=29000, lr=0.000371391, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=26765 epoch 018: 332 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=556320, ups=1.12, wpb=495828, bsz=16604.8, num_updates=29000, lr=0.000371391, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=26765 epoch 018: 332 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=556320, ups=1.12, wpb=495828, bsz=16604.8, num_updates=29000, lr=0.000371391, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=26765 epoch 018: 332 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=556320, ups=1.12, wpb=495828, bsz=16604.8, num_updates=29000, lr=0.000371391, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=26765 epoch 018: 332 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=556320, ups=1.12, wpb=495828, bsz=16604.8, num_updates=29000, lr=0.000371391, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=26765 epoch 018: 332 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=556320, ups=1.12, wpb=495828, bsz=16604.8, num_updates=29000, lr=0.000371391, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=26765 epoch 018: 332 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=556320, ups=1.12, wpb=495828, bsz=16604.8, num_updates=29000, lr=0.000371391, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=26765 epoch 018: 332 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=556320, ups=1.12, wpb=495828, bsz=16604.8, num_updates=29000, lr=0.000371391, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=26765 epoch 018: 332 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=556320, ups=1.12, wpb=495828, bsz=16604.8, num_updates=29000, lr=0.000371391, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=26765 epoch 018: 332 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=556320, ups=1.12, wpb=495828, bsz=16604.8, num_updates=29000, lr=0.000371391, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=26765 epoch 018: 332 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=556320, ups=1.12, wpb=495828, bsz=16604.8, num_updates=29000, lr=0.000371391, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=26765 epoch 018: 332 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=556320, ups=1.12, wpb=495828, bsz=16604.8, num_updates=29000, lr=0.000371391, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=26765 epoch 018: 332 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=556320, ups=1.12, wpb=495828, bsz=16604.8, num_updates=29000, lr=0.000371391, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=26765 epoch 018: 332 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=556320, ups=1.12, wpb=495828, bsz=16604.8, num_updates=29000, lr=0.000371391, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=26765 begin validation on "valid" subset epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 begin validation on "valid" subset epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 epoch 018 | valid on 'valid' subset | loss 3.729 | nll_loss 2.184 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 29000 | best_loss 3.72 epoch 018: 433 / 1689 loss=3.526, nll_loss=1.974, ppl=3.93, wps=437922, ups=0.88, wpb=498560, bsz=16674.9, num_updates=29100, lr=0.000370752, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=26878 epoch 018: 433 / 1689 loss=3.526, nll_loss=1.974, ppl=3.93, wps=437922, ups=0.88, wpb=498560, bsz=16674.9, num_updates=29100, lr=0.000370752, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=26878 epoch 018: 433 / 1689 loss=3.526, nll_loss=1.974, ppl=3.93, wps=437922, ups=0.88, wpb=498560, bsz=16674.9, num_updates=29100, lr=0.000370752, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=26878 epoch 018: 433 / 1689 loss=3.526, nll_loss=1.974, ppl=3.93, wps=437922, ups=0.88, wpb=498560, bsz=16674.9, num_updates=29100, lr=0.000370752, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=26878 epoch 018: 433 / 1689 loss=3.526, nll_loss=1.974, ppl=3.93, wps=437922, ups=0.88, wpb=498560, bsz=16674.9, num_updates=29100, lr=0.000370752, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=26878 epoch 018: 433 / 1689 loss=3.526, nll_loss=1.974, ppl=3.93, wps=437922, ups=0.88, wpb=498560, bsz=16674.9, num_updates=29100, lr=0.000370752, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=26878 epoch 018: 433 / 1689 loss=3.526, nll_loss=1.974, ppl=3.93, wps=437922, ups=0.88, wpb=498560, bsz=16674.9, num_updates=29100, lr=0.000370752, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=26878 epoch 018: 433 / 1689 loss=3.526, nll_loss=1.974, ppl=3.93, wps=437922, ups=0.88, wpb=498560, bsz=16674.9, num_updates=29100, lr=0.000370752, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=26878 epoch 018: 433 / 1689 loss=3.526, nll_loss=1.974, ppl=3.93, wps=437922, ups=0.88, wpb=498560, bsz=16674.9, num_updates=29100, lr=0.000370752, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=26878 epoch 018: 433 / 1689 loss=3.526, nll_loss=1.974, ppl=3.93, wps=437922, ups=0.88, wpb=498560, bsz=16674.9, num_updates=29100, lr=0.000370752, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=26878 epoch 018: 433 / 1689 loss=3.526, nll_loss=1.974, ppl=3.93, wps=437922, ups=0.88, wpb=498560, bsz=16674.9, num_updates=29100, lr=0.000370752, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=26878 epoch 018: 433 / 1689 loss=3.526, nll_loss=1.974, ppl=3.93, wps=437922, ups=0.88, wpb=498560, bsz=16674.9, num_updates=29100, lr=0.000370752, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=26878 epoch 018: 433 / 1689 loss=3.526, nll_loss=1.974, ppl=3.93, wps=437922, ups=0.88, wpb=498560, bsz=16674.9, num_updates=29100, lr=0.000370752, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=26878 epoch 018: 433 / 1689 loss=3.526, nll_loss=1.974, ppl=3.93, wps=437922, ups=0.88, wpb=498560, bsz=16674.9, num_updates=29100, lr=0.000370752, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=26878 epoch 018: 433 / 1689 loss=3.526, nll_loss=1.974, ppl=3.93, wps=437922, ups=0.88, wpb=498560, bsz=16674.9, num_updates=29100, lr=0.000370752, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=26878 epoch 018: 433 / 1689 loss=3.526, nll_loss=1.974, ppl=3.93, wps=437922, ups=0.88, wpb=498560, bsz=16674.9, num_updates=29100, lr=0.000370752, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=26878 epoch 018: 433 / 1689 loss=3.526, nll_loss=1.974, ppl=3.93, wps=437922, ups=0.88, wpb=498560, bsz=16674.9, num_updates=29100, lr=0.000370752, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=26878 epoch 018: 433 / 1689 loss=3.526, nll_loss=1.974, ppl=3.93, wps=437922, ups=0.88, wpb=498560, bsz=16674.9, num_updates=29100, lr=0.000370752, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=26878 epoch 018: 533 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551744, ups=1.11, wpb=495374, bsz=16427.5, num_updates=29200, lr=0.000370117, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=26968 epoch 018: 533 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551744, ups=1.11, wpb=495374, bsz=16427.5, num_updates=29200, lr=0.000370117, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=26968 epoch 018: 533 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551744, ups=1.11, wpb=495374, bsz=16427.5, num_updates=29200, lr=0.000370117, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=26968 epoch 018: 533 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551744, ups=1.11, wpb=495374, bsz=16427.5, num_updates=29200, lr=0.000370117, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=26968 epoch 018: 533 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551744, ups=1.11, wpb=495374, bsz=16427.5, num_updates=29200, lr=0.000370117, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=26968 epoch 018: 533 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551744, ups=1.11, wpb=495374, bsz=16427.5, num_updates=29200, lr=0.000370117, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=26968 epoch 018: 533 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551744, ups=1.11, wpb=495374, bsz=16427.5, num_updates=29200, lr=0.000370117, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=26968 epoch 018: 533 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551744, ups=1.11, wpb=495374, bsz=16427.5, num_updates=29200, lr=0.000370117, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=26968 epoch 018: 533 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551744, ups=1.11, wpb=495374, bsz=16427.5, num_updates=29200, lr=0.000370117, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=26968 epoch 018: 533 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551744, ups=1.11, wpb=495374, bsz=16427.5, num_updates=29200, lr=0.000370117, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=26968 epoch 018: 533 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551744, ups=1.11, wpb=495374, bsz=16427.5, num_updates=29200, lr=0.000370117, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=26968 epoch 018: 533 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551744, ups=1.11, wpb=495374, bsz=16427.5, num_updates=29200, lr=0.000370117, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=26968 epoch 018: 533 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551744, ups=1.11, wpb=495374, bsz=16427.5, num_updates=29200, lr=0.000370117, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=26968 epoch 018: 533 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551744, ups=1.11, wpb=495374, bsz=16427.5, num_updates=29200, lr=0.000370117, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=26968 epoch 018: 533 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551744, ups=1.11, wpb=495374, bsz=16427.5, num_updates=29200, lr=0.000370117, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=26968 epoch 018: 533 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551744, ups=1.11, wpb=495374, bsz=16427.5, num_updates=29200, lr=0.000370117, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=26968 epoch 018: 533 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551744, ups=1.11, wpb=495374, bsz=16427.5, num_updates=29200, lr=0.000370117, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=26968 epoch 018: 533 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551744, ups=1.11, wpb=495374, bsz=16427.5, num_updates=29200, lr=0.000370117, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=26968 epoch 018: 633 / 1689 loss=3.534, nll_loss=1.983, ppl=3.95, wps=554088, ups=1.12, wpb=496050, bsz=16266.5, num_updates=29300, lr=0.000369484, gnorm=0.181, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27058 epoch 018: 633 / 1689 loss=3.534, nll_loss=1.983, ppl=3.95, wps=554088, ups=1.12, wpb=496050, bsz=16266.5, num_updates=29300, lr=0.000369484, gnorm=0.181, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27058 epoch 018: 633 / 1689 loss=3.534, nll_loss=1.983, ppl=3.95, wps=554088, ups=1.12, wpb=496050, bsz=16266.5, num_updates=29300, lr=0.000369484, gnorm=0.181, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27058 epoch 018: 633 / 1689 loss=3.534, nll_loss=1.983, ppl=3.95, wps=554088, ups=1.12, wpb=496050, bsz=16266.5, num_updates=29300, lr=0.000369484, gnorm=0.181, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27058 epoch 018: 633 / 1689 loss=3.534, nll_loss=1.983, ppl=3.95, wps=554088, ups=1.12, wpb=496050, bsz=16266.5, num_updates=29300, lr=0.000369484, gnorm=0.181, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27058 epoch 018: 633 / 1689 loss=3.534, nll_loss=1.983, ppl=3.95, wps=554088, ups=1.12, wpb=496050, bsz=16266.5, num_updates=29300, lr=0.000369484, gnorm=0.181, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27058 epoch 018: 633 / 1689 loss=3.534, nll_loss=1.983, ppl=3.95, wps=554088, ups=1.12, wpb=496050, bsz=16266.5, num_updates=29300, lr=0.000369484, gnorm=0.181, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27058 epoch 018: 633 / 1689 loss=3.534, nll_loss=1.983, ppl=3.95, wps=554088, ups=1.12, wpb=496050, bsz=16266.5, num_updates=29300, lr=0.000369484, gnorm=0.181, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27058 epoch 018: 633 / 1689 loss=3.534, nll_loss=1.983, ppl=3.95, wps=554088, ups=1.12, wpb=496050, bsz=16266.5, num_updates=29300, lr=0.000369484, gnorm=0.181, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27058 epoch 018: 633 / 1689 loss=3.534, nll_loss=1.983, ppl=3.95, wps=554088, ups=1.12, wpb=496050, bsz=16266.5, num_updates=29300, lr=0.000369484, gnorm=0.181, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27058 epoch 018: 633 / 1689 loss=3.534, nll_loss=1.983, ppl=3.95, wps=554088, ups=1.12, wpb=496050, bsz=16266.5, num_updates=29300, lr=0.000369484, gnorm=0.181, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27058 epoch 018: 633 / 1689 loss=3.534, nll_loss=1.983, ppl=3.95, wps=554088, ups=1.12, wpb=496050, bsz=16266.5, num_updates=29300, lr=0.000369484, gnorm=0.181, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27058 epoch 018: 633 / 1689 loss=3.534, nll_loss=1.983, ppl=3.95, wps=554088, ups=1.12, wpb=496050, bsz=16266.5, num_updates=29300, lr=0.000369484, gnorm=0.181, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27058 epoch 018: 633 / 1689 loss=3.534, nll_loss=1.983, ppl=3.95, wps=554088, ups=1.12, wpb=496050, bsz=16266.5, num_updates=29300, lr=0.000369484, gnorm=0.181, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27058 epoch 018: 633 / 1689 loss=3.534, nll_loss=1.983, ppl=3.95, wps=554088, ups=1.12, wpb=496050, bsz=16266.5, num_updates=29300, lr=0.000369484, gnorm=0.181, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27058 epoch 018: 633 / 1689 loss=3.534, nll_loss=1.983, ppl=3.95, wps=554088, ups=1.12, wpb=496050, bsz=16266.5, num_updates=29300, lr=0.000369484, gnorm=0.181, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27058 epoch 018: 633 / 1689 loss=3.534, nll_loss=1.983, ppl=3.95, wps=554088, ups=1.12, wpb=496050, bsz=16266.5, num_updates=29300, lr=0.000369484, gnorm=0.181, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27058 epoch 018: 633 / 1689 loss=3.534, nll_loss=1.983, ppl=3.95, wps=554088, ups=1.12, wpb=496050, bsz=16266.5, num_updates=29300, lr=0.000369484, gnorm=0.181, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27058 epoch 018: 733 / 1689 loss=3.538, nll_loss=1.987, ppl=3.96, wps=555413, ups=1.12, wpb=495605, bsz=16565.3, num_updates=29400, lr=0.000368856, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=27147 epoch 018: 733 / 1689 loss=3.538, nll_loss=1.987, ppl=3.96, wps=555413, ups=1.12, wpb=495605, bsz=16565.3, num_updates=29400, lr=0.000368856, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=27147 epoch 018: 733 / 1689 loss=3.538, nll_loss=1.987, ppl=3.96, wps=555413, ups=1.12, wpb=495605, bsz=16565.3, num_updates=29400, lr=0.000368856, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=27147 epoch 018: 733 / 1689 loss=3.538, nll_loss=1.987, ppl=3.96, wps=555413, ups=1.12, wpb=495605, bsz=16565.3, num_updates=29400, lr=0.000368856, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=27147 epoch 018: 733 / 1689 loss=3.538, nll_loss=1.987, ppl=3.96, wps=555413, ups=1.12, wpb=495605, bsz=16565.3, num_updates=29400, lr=0.000368856, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=27147 epoch 018: 733 / 1689 loss=3.538, nll_loss=1.987, ppl=3.96, wps=555413, ups=1.12, wpb=495605, bsz=16565.3, num_updates=29400, lr=0.000368856, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=27147 epoch 018: 733 / 1689 loss=3.538, nll_loss=1.987, ppl=3.96, wps=555413, ups=1.12, wpb=495605, bsz=16565.3, num_updates=29400, lr=0.000368856, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=27147 epoch 018: 733 / 1689 loss=3.538, nll_loss=1.987, ppl=3.96, wps=555413, ups=1.12, wpb=495605, bsz=16565.3, num_updates=29400, lr=0.000368856, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=27147 epoch 018: 733 / 1689 loss=3.538, nll_loss=1.987, ppl=3.96, wps=555413, ups=1.12, wpb=495605, bsz=16565.3, num_updates=29400, lr=0.000368856, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=27147 epoch 018: 733 / 1689 loss=3.538, nll_loss=1.987, ppl=3.96, wps=555413, ups=1.12, wpb=495605, bsz=16565.3, num_updates=29400, lr=0.000368856, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=27147 epoch 018: 733 / 1689 loss=3.538, nll_loss=1.987, ppl=3.96, wps=555413, ups=1.12, wpb=495605, bsz=16565.3, num_updates=29400, lr=0.000368856, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=27147 epoch 018: 733 / 1689 loss=3.538, nll_loss=1.987, ppl=3.96, wps=555413, ups=1.12, wpb=495605, bsz=16565.3, num_updates=29400, lr=0.000368856, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=27147 epoch 018: 733 / 1689 loss=3.538, nll_loss=1.987, ppl=3.96, wps=555413, ups=1.12, wpb=495605, bsz=16565.3, num_updates=29400, lr=0.000368856, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=27147 epoch 018: 733 / 1689 loss=3.538, nll_loss=1.987, ppl=3.96, wps=555413, ups=1.12, wpb=495605, bsz=16565.3, num_updates=29400, lr=0.000368856, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=27147 epoch 018: 733 / 1689 loss=3.538, nll_loss=1.987, ppl=3.96, wps=555413, ups=1.12, wpb=495605, bsz=16565.3, num_updates=29400, lr=0.000368856, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=27147 epoch 018: 733 / 1689 loss=3.538, nll_loss=1.987, ppl=3.96, wps=555413, ups=1.12, wpb=495605, bsz=16565.3, num_updates=29400, lr=0.000368856, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=27147 epoch 018: 733 / 1689 loss=3.538, nll_loss=1.987, ppl=3.96, wps=555413, ups=1.12, wpb=495605, bsz=16565.3, num_updates=29400, lr=0.000368856, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=27147 epoch 018: 733 / 1689 loss=3.538, nll_loss=1.987, ppl=3.96, wps=555413, ups=1.12, wpb=495605, bsz=16565.3, num_updates=29400, lr=0.000368856, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=27147 epoch 018: 833 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=554950, ups=1.12, wpb=494788, bsz=16937.3, num_updates=29500, lr=0.00036823, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=27236 epoch 018: 833 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=554950, ups=1.12, wpb=494788, bsz=16937.3, num_updates=29500, lr=0.00036823, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=27236 epoch 018: 833 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=554950, ups=1.12, wpb=494788, bsz=16937.3, num_updates=29500, lr=0.00036823, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=27236 epoch 018: 833 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=554950, ups=1.12, wpb=494788, bsz=16937.3, num_updates=29500, lr=0.00036823, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=27236 epoch 018: 833 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=554950, ups=1.12, wpb=494788, bsz=16937.3, num_updates=29500, lr=0.00036823, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=27236 epoch 018: 833 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=554950, ups=1.12, wpb=494788, bsz=16937.3, num_updates=29500, lr=0.00036823, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=27236 epoch 018: 833 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=554950, ups=1.12, wpb=494788, bsz=16937.3, num_updates=29500, lr=0.00036823, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=27236 epoch 018: 833 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=554950, ups=1.12, wpb=494788, bsz=16937.3, num_updates=29500, lr=0.00036823, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=27236 epoch 018: 833 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=554950, ups=1.12, wpb=494788, bsz=16937.3, num_updates=29500, lr=0.00036823, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=27236 epoch 018: 833 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=554950, ups=1.12, wpb=494788, bsz=16937.3, num_updates=29500, lr=0.00036823, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=27236 epoch 018: 833 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=554950, ups=1.12, wpb=494788, bsz=16937.3, num_updates=29500, lr=0.00036823, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=27236 epoch 018: 833 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=554950, ups=1.12, wpb=494788, bsz=16937.3, num_updates=29500, lr=0.00036823, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=27236 epoch 018: 833 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=554950, ups=1.12, wpb=494788, bsz=16937.3, num_updates=29500, lr=0.00036823, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=27236 epoch 018: 833 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=554950, ups=1.12, wpb=494788, bsz=16937.3, num_updates=29500, lr=0.00036823, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=27236 epoch 018: 833 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=554950, ups=1.12, wpb=494788, bsz=16937.3, num_updates=29500, lr=0.00036823, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=27236 epoch 018: 833 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=554950, ups=1.12, wpb=494788, bsz=16937.3, num_updates=29500, lr=0.00036823, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=27236 epoch 018: 833 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=554950, ups=1.12, wpb=494788, bsz=16937.3, num_updates=29500, lr=0.00036823, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=27236 epoch 018: 833 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=554950, ups=1.12, wpb=494788, bsz=16937.3, num_updates=29500, lr=0.00036823, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=27236 epoch 018: 933 / 1689 loss=3.541, nll_loss=1.991, ppl=3.97, wps=557378, ups=1.12, wpb=495998, bsz=16399.4, num_updates=29600, lr=0.000367607, gnorm=0.17, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=27325 epoch 018: 933 / 1689 loss=3.541, nll_loss=1.991, ppl=3.97, wps=557378, ups=1.12, wpb=495998, bsz=16399.4, num_updates=29600, lr=0.000367607, gnorm=0.17, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=27325 epoch 018: 933 / 1689 loss=3.541, nll_loss=1.991, ppl=3.97, wps=557378, ups=1.12, wpb=495998, bsz=16399.4, num_updates=29600, lr=0.000367607, gnorm=0.17, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=27325 epoch 018: 933 / 1689 loss=3.541, nll_loss=1.991, ppl=3.97, wps=557378, ups=1.12, wpb=495998, bsz=16399.4, num_updates=29600, lr=0.000367607, gnorm=0.17, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=27325 epoch 018: 933 / 1689 loss=3.541, nll_loss=1.991, ppl=3.97, wps=557378, ups=1.12, wpb=495998, bsz=16399.4, num_updates=29600, lr=0.000367607, gnorm=0.17, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=27325 epoch 018: 933 / 1689 loss=3.541, nll_loss=1.991, ppl=3.97, wps=557378, ups=1.12, wpb=495998, bsz=16399.4, num_updates=29600, lr=0.000367607, gnorm=0.17, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=27325 epoch 018: 933 / 1689 loss=3.541, nll_loss=1.991, ppl=3.97, wps=557378, ups=1.12, wpb=495998, bsz=16399.4, num_updates=29600, lr=0.000367607, gnorm=0.17, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=27325 epoch 018: 933 / 1689 loss=3.541, nll_loss=1.991, ppl=3.97, wps=557378, ups=1.12, wpb=495998, bsz=16399.4, num_updates=29600, lr=0.000367607, gnorm=0.17, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=27325 epoch 018: 933 / 1689 loss=3.541, nll_loss=1.991, ppl=3.97, wps=557378, ups=1.12, wpb=495998, bsz=16399.4, num_updates=29600, lr=0.000367607, gnorm=0.17, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=27325 epoch 018: 933 / 1689 loss=3.541, nll_loss=1.991, ppl=3.97, wps=557378, ups=1.12, wpb=495998, bsz=16399.4, num_updates=29600, lr=0.000367607, gnorm=0.17, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=27325 epoch 018: 933 / 1689 loss=3.541, nll_loss=1.991, ppl=3.97, wps=557378, ups=1.12, wpb=495998, bsz=16399.4, num_updates=29600, lr=0.000367607, gnorm=0.17, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=27325 epoch 018: 933 / 1689 loss=3.541, nll_loss=1.991, ppl=3.97, wps=557378, ups=1.12, wpb=495998, bsz=16399.4, num_updates=29600, lr=0.000367607, gnorm=0.17, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=27325 epoch 018: 933 / 1689 loss=3.541, nll_loss=1.991, ppl=3.97, wps=557378, ups=1.12, wpb=495998, bsz=16399.4, num_updates=29600, lr=0.000367607, gnorm=0.17, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=27325 epoch 018: 933 / 1689 loss=3.541, nll_loss=1.991, ppl=3.97, wps=557378, ups=1.12, wpb=495998, bsz=16399.4, num_updates=29600, lr=0.000367607, gnorm=0.17, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=27325 epoch 018: 933 / 1689 loss=3.541, nll_loss=1.991, ppl=3.97, wps=557378, ups=1.12, wpb=495998, bsz=16399.4, num_updates=29600, lr=0.000367607, gnorm=0.17, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=27325 epoch 018: 933 / 1689 loss=3.541, nll_loss=1.991, ppl=3.97, wps=557378, ups=1.12, wpb=495998, bsz=16399.4, num_updates=29600, lr=0.000367607, gnorm=0.17, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=27325 epoch 018: 933 / 1689 loss=3.541, nll_loss=1.991, ppl=3.97, wps=557378, ups=1.12, wpb=495998, bsz=16399.4, num_updates=29600, lr=0.000367607, gnorm=0.17, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=27325 epoch 018: 933 / 1689 loss=3.541, nll_loss=1.991, ppl=3.97, wps=557378, ups=1.12, wpb=495998, bsz=16399.4, num_updates=29600, lr=0.000367607, gnorm=0.17, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=27325 epoch 018: 1033 / 1689 loss=3.542, nll_loss=1.992, ppl=3.98, wps=551722, ups=1.12, wpb=494670, bsz=16384.6, num_updates=29700, lr=0.000366988, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=27415 epoch 018: 1033 / 1689 loss=3.542, nll_loss=1.992, ppl=3.98, wps=551722, ups=1.12, wpb=494670, bsz=16384.6, num_updates=29700, lr=0.000366988, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=27415 epoch 018: 1033 / 1689 loss=3.542, nll_loss=1.992, ppl=3.98, wps=551722, ups=1.12, wpb=494670, bsz=16384.6, num_updates=29700, lr=0.000366988, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=27415 epoch 018: 1033 / 1689 loss=3.542, nll_loss=1.992, ppl=3.98, wps=551722, ups=1.12, wpb=494670, bsz=16384.6, num_updates=29700, lr=0.000366988, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=27415 epoch 018: 1033 / 1689 loss=3.542, nll_loss=1.992, ppl=3.98, wps=551722, ups=1.12, wpb=494670, bsz=16384.6, num_updates=29700, lr=0.000366988, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=27415 epoch 018: 1033 / 1689 loss=3.542, nll_loss=1.992, ppl=3.98, wps=551722, ups=1.12, wpb=494670, bsz=16384.6, num_updates=29700, lr=0.000366988, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=27415 epoch 018: 1033 / 1689 loss=3.542, nll_loss=1.992, ppl=3.98, wps=551722, ups=1.12, wpb=494670, bsz=16384.6, num_updates=29700, lr=0.000366988, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=27415 epoch 018: 1033 / 1689 loss=3.542, nll_loss=1.992, ppl=3.98, wps=551722, ups=1.12, wpb=494670, bsz=16384.6, num_updates=29700, lr=0.000366988, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=27415 epoch 018: 1033 / 1689 loss=3.542, nll_loss=1.992, ppl=3.98, wps=551722, ups=1.12, wpb=494670, bsz=16384.6, num_updates=29700, lr=0.000366988, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=27415 epoch 018: 1033 / 1689 loss=3.542, nll_loss=1.992, ppl=3.98, wps=551722, ups=1.12, wpb=494670, bsz=16384.6, num_updates=29700, lr=0.000366988, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=27415 epoch 018: 1033 / 1689 loss=3.542, nll_loss=1.992, ppl=3.98, wps=551722, ups=1.12, wpb=494670, bsz=16384.6, num_updates=29700, lr=0.000366988, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=27415 epoch 018: 1033 / 1689 loss=3.542, nll_loss=1.992, ppl=3.98, wps=551722, ups=1.12, wpb=494670, bsz=16384.6, num_updates=29700, lr=0.000366988, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=27415 epoch 018: 1033 / 1689 loss=3.542, nll_loss=1.992, ppl=3.98, wps=551722, ups=1.12, wpb=494670, bsz=16384.6, num_updates=29700, lr=0.000366988, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=27415 epoch 018: 1033 / 1689 loss=3.542, nll_loss=1.992, ppl=3.98, wps=551722, ups=1.12, wpb=494670, bsz=16384.6, num_updates=29700, lr=0.000366988, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=27415 epoch 018: 1033 / 1689 loss=3.542, nll_loss=1.992, ppl=3.98, wps=551722, ups=1.12, wpb=494670, bsz=16384.6, num_updates=29700, lr=0.000366988, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=27415 epoch 018: 1033 / 1689 loss=3.542, nll_loss=1.992, ppl=3.98, wps=551722, ups=1.12, wpb=494670, bsz=16384.6, num_updates=29700, lr=0.000366988, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=27415 epoch 018: 1033 / 1689 loss=3.542, nll_loss=1.992, ppl=3.98, wps=551722, ups=1.12, wpb=494670, bsz=16384.6, num_updates=29700, lr=0.000366988, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=27415 epoch 018: 1033 / 1689 loss=3.542, nll_loss=1.992, ppl=3.98, wps=551722, ups=1.12, wpb=494670, bsz=16384.6, num_updates=29700, lr=0.000366988, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=27415 epoch 018: 1133 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=549635, ups=1.11, wpb=495469, bsz=16618.7, num_updates=29800, lr=0.000366372, gnorm=0.176, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=27505 epoch 018: 1133 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=549635, ups=1.11, wpb=495469, bsz=16618.7, num_updates=29800, lr=0.000366372, gnorm=0.176, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=27505 epoch 018: 1133 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=549635, ups=1.11, wpb=495469, bsz=16618.7, num_updates=29800, lr=0.000366372, gnorm=0.176, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=27505 epoch 018: 1133 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=549635, ups=1.11, wpb=495469, bsz=16618.7, num_updates=29800, lr=0.000366372, gnorm=0.176, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=27505 epoch 018: 1133 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=549635, ups=1.11, wpb=495469, bsz=16618.7, num_updates=29800, lr=0.000366372, gnorm=0.176, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=27505 epoch 018: 1133 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=549635, ups=1.11, wpb=495469, bsz=16618.7, num_updates=29800, lr=0.000366372, gnorm=0.176, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=27505 epoch 018: 1133 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=549635, ups=1.11, wpb=495469, bsz=16618.7, num_updates=29800, lr=0.000366372, gnorm=0.176, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=27505 epoch 018: 1133 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=549635, ups=1.11, wpb=495469, bsz=16618.7, num_updates=29800, lr=0.000366372, gnorm=0.176, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=27505 epoch 018: 1133 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=549635, ups=1.11, wpb=495469, bsz=16618.7, num_updates=29800, lr=0.000366372, gnorm=0.176, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=27505 epoch 018: 1133 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=549635, ups=1.11, wpb=495469, bsz=16618.7, num_updates=29800, lr=0.000366372, gnorm=0.176, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=27505 epoch 018: 1133 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=549635, ups=1.11, wpb=495469, bsz=16618.7, num_updates=29800, lr=0.000366372, gnorm=0.176, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=27505 epoch 018: 1133 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=549635, ups=1.11, wpb=495469, bsz=16618.7, num_updates=29800, lr=0.000366372, gnorm=0.176, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=27505 epoch 018: 1133 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=549635, ups=1.11, wpb=495469, bsz=16618.7, num_updates=29800, lr=0.000366372, gnorm=0.176, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=27505 epoch 018: 1133 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=549635, ups=1.11, wpb=495469, bsz=16618.7, num_updates=29800, lr=0.000366372, gnorm=0.176, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=27505 epoch 018: 1133 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=549635, ups=1.11, wpb=495469, bsz=16618.7, num_updates=29800, lr=0.000366372, gnorm=0.176, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=27505 epoch 018: 1133 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=549635, ups=1.11, wpb=495469, bsz=16618.7, num_updates=29800, lr=0.000366372, gnorm=0.176, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=27505 epoch 018: 1133 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=549635, ups=1.11, wpb=495469, bsz=16618.7, num_updates=29800, lr=0.000366372, gnorm=0.176, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=27505 epoch 018: 1133 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=549635, ups=1.11, wpb=495469, bsz=16618.7, num_updates=29800, lr=0.000366372, gnorm=0.176, clip=0, loss_scale=2, train_wall=89, gb_free=21.6, wall=27505 epoch 018: 1234 / 1689 loss=3.538, nll_loss=1.988, ppl=3.97, wps=543332, ups=1.1, wpb=495722, bsz=16598.6, num_updates=29900, lr=0.000365758, gnorm=0.171, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=27596 epoch 018: 1234 / 1689 loss=3.538, nll_loss=1.988, ppl=3.97, wps=543332, ups=1.1, wpb=495722, bsz=16598.6, num_updates=29900, lr=0.000365758, gnorm=0.171, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=27596 epoch 018: 1234 / 1689 loss=3.538, nll_loss=1.988, ppl=3.97, wps=543332, ups=1.1, wpb=495722, bsz=16598.6, num_updates=29900, lr=0.000365758, gnorm=0.171, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=27596 epoch 018: 1234 / 1689 loss=3.538, nll_loss=1.988, ppl=3.97, wps=543332, ups=1.1, wpb=495722, bsz=16598.6, num_updates=29900, lr=0.000365758, gnorm=0.171, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=27596 epoch 018: 1234 / 1689 loss=3.538, nll_loss=1.988, ppl=3.97, wps=543332, ups=1.1, wpb=495722, bsz=16598.6, num_updates=29900, lr=0.000365758, gnorm=0.171, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=27596 epoch 018: 1234 / 1689 loss=3.538, nll_loss=1.988, ppl=3.97, wps=543332, ups=1.1, wpb=495722, bsz=16598.6, num_updates=29900, lr=0.000365758, gnorm=0.171, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=27596 epoch 018: 1234 / 1689 loss=3.538, nll_loss=1.988, ppl=3.97, wps=543332, ups=1.1, wpb=495722, bsz=16598.6, num_updates=29900, lr=0.000365758, gnorm=0.171, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=27596 epoch 018: 1234 / 1689 loss=3.538, nll_loss=1.988, ppl=3.97, wps=543332, ups=1.1, wpb=495722, bsz=16598.6, num_updates=29900, lr=0.000365758, gnorm=0.171, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=27596 epoch 018: 1234 / 1689 loss=3.538, nll_loss=1.988, ppl=3.97, wps=543332, ups=1.1, wpb=495722, bsz=16598.6, num_updates=29900, lr=0.000365758, gnorm=0.171, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=27596 epoch 018: 1234 / 1689 loss=3.538, nll_loss=1.988, ppl=3.97, wps=543332, ups=1.1, wpb=495722, bsz=16598.6, num_updates=29900, lr=0.000365758, gnorm=0.171, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=27596 epoch 018: 1234 / 1689 loss=3.538, nll_loss=1.988, ppl=3.97, wps=543332, ups=1.1, wpb=495722, bsz=16598.6, num_updates=29900, lr=0.000365758, gnorm=0.171, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=27596 epoch 018: 1234 / 1689 loss=3.538, nll_loss=1.988, ppl=3.97, wps=543332, ups=1.1, wpb=495722, bsz=16598.6, num_updates=29900, lr=0.000365758, gnorm=0.171, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=27596 epoch 018: 1234 / 1689 loss=3.538, nll_loss=1.988, ppl=3.97, wps=543332, ups=1.1, wpb=495722, bsz=16598.6, num_updates=29900, lr=0.000365758, gnorm=0.171, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=27596 epoch 018: 1234 / 1689 loss=3.538, nll_loss=1.988, ppl=3.97, wps=543332, ups=1.1, wpb=495722, bsz=16598.6, num_updates=29900, lr=0.000365758, gnorm=0.171, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=27596 epoch 018: 1234 / 1689 loss=3.538, nll_loss=1.988, ppl=3.97, wps=543332, ups=1.1, wpb=495722, bsz=16598.6, num_updates=29900, lr=0.000365758, gnorm=0.171, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=27596 epoch 018: 1234 / 1689 loss=3.538, nll_loss=1.988, ppl=3.97, wps=543332, ups=1.1, wpb=495722, bsz=16598.6, num_updates=29900, lr=0.000365758, gnorm=0.171, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=27596 epoch 018: 1234 / 1689 loss=3.538, nll_loss=1.988, ppl=3.97, wps=543332, ups=1.1, wpb=495722, bsz=16598.6, num_updates=29900, lr=0.000365758, gnorm=0.171, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=27596 epoch 018: 1234 / 1689 loss=3.538, nll_loss=1.988, ppl=3.97, wps=543332, ups=1.1, wpb=495722, bsz=16598.6, num_updates=29900, lr=0.000365758, gnorm=0.171, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=27596 epoch 018: 1334 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=557956, ups=1.13, wpb=495241, bsz=16330.1, num_updates=30000, lr=0.000365148, gnorm=0.176, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=27685 epoch 018: 1334 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=557956, ups=1.13, wpb=495241, bsz=16330.1, num_updates=30000, lr=0.000365148, gnorm=0.176, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=27685 epoch 018: 1334 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=557956, ups=1.13, wpb=495241, bsz=16330.1, num_updates=30000, lr=0.000365148, gnorm=0.176, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=27685 epoch 018: 1334 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=557956, ups=1.13, wpb=495241, bsz=16330.1, num_updates=30000, lr=0.000365148, gnorm=0.176, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=27685 epoch 018: 1334 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=557956, ups=1.13, wpb=495241, bsz=16330.1, num_updates=30000, lr=0.000365148, gnorm=0.176, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=27685 epoch 018: 1334 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=557956, ups=1.13, wpb=495241, bsz=16330.1, num_updates=30000, lr=0.000365148, gnorm=0.176, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=27685 epoch 018: 1334 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=557956, ups=1.13, wpb=495241, bsz=16330.1, num_updates=30000, lr=0.000365148, gnorm=0.176, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=27685 epoch 018: 1334 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=557956, ups=1.13, wpb=495241, bsz=16330.1, num_updates=30000, lr=0.000365148, gnorm=0.176, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=27685 epoch 018: 1334 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=557956, ups=1.13, wpb=495241, bsz=16330.1, num_updates=30000, lr=0.000365148, gnorm=0.176, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=27685 epoch 018: 1334 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=557956, ups=1.13, wpb=495241, bsz=16330.1, num_updates=30000, lr=0.000365148, gnorm=0.176, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=27685 epoch 018: 1334 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=557956, ups=1.13, wpb=495241, bsz=16330.1, num_updates=30000, lr=0.000365148, gnorm=0.176, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=27685 epoch 018: 1334 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=557956, ups=1.13, wpb=495241, bsz=16330.1, num_updates=30000, lr=0.000365148, gnorm=0.176, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=27685 epoch 018: 1334 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=557956, ups=1.13, wpb=495241, bsz=16330.1, num_updates=30000, lr=0.000365148, gnorm=0.176, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=27685 epoch 018: 1334 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=557956, ups=1.13, wpb=495241, bsz=16330.1, num_updates=30000, lr=0.000365148, gnorm=0.176, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=27685 epoch 018: 1334 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=557956, ups=1.13, wpb=495241, bsz=16330.1, num_updates=30000, lr=0.000365148, gnorm=0.176, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=27685 epoch 018: 1334 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=557956, ups=1.13, wpb=495241, bsz=16330.1, num_updates=30000, lr=0.000365148, gnorm=0.176, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=27685 epoch 018: 1334 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=557956, ups=1.13, wpb=495241, bsz=16330.1, num_updates=30000, lr=0.000365148, gnorm=0.176, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=27685 epoch 018: 1334 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=557956, ups=1.13, wpb=495241, bsz=16330.1, num_updates=30000, lr=0.000365148, gnorm=0.176, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=27685 begin validation on "valid" subset epoch 018 | valid on 'valid' subset | loss 3.714 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 30000 | best_loss 3.714 epoch 018 | valid on 'valid' subset | loss 3.714 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 30000 | best_loss 3.714 epoch 018 | valid on 'valid' subset | loss 3.714 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 30000 | best_loss 3.714 epoch 018 | valid on 'valid' subset | loss 3.714 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 30000 | best_loss 3.714 epoch 018 | valid on 'valid' subset | loss 3.714 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 30000 | best_loss 3.714 epoch 018 | valid on 'valid' subset | loss 3.714 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 30000 | best_loss 3.714 epoch 018 | valid on 'valid' subset | loss 3.714 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 30000 | best_loss 3.714 epoch 018 | valid on 'valid' subset | loss 3.714 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 30000 | best_loss 3.714 epoch 018 | valid on 'valid' subset | loss 3.714 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 30000 | best_loss 3.714 epoch 018 | valid on 'valid' subset | loss 3.714 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 30000 | best_loss 3.714 epoch 018 | valid on 'valid' subset | loss 3.714 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 30000 | best_loss 3.714 epoch 018 | valid on 'valid' subset | loss 3.714 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 30000 | best_loss 3.714 epoch 018 | valid on 'valid' subset | loss 3.714 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 30000 | best_loss 3.714 epoch 018 | valid on 'valid' subset | loss 3.714 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 30000 | best_loss 3.714 epoch 018 | valid on 'valid' subset | loss 3.714 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 30000 | best_loss 3.714 epoch 018 | valid on 'valid' subset | loss 3.714 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 30000 | best_loss 3.714 epoch 018 | valid on 'valid' subset | loss 3.714 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 30000 | best_loss 3.714 epoch 018 | valid on 'valid' subset | loss 3.714 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 30000 | best_loss 3.714 epoch 018: 1434 / 1689 loss=3.548, nll_loss=1.999, ppl=4, wps=284445, ups=0.57, wpb=494862, bsz=16356.2, num_updates=30100, lr=0.000364541, gnorm=0.174, clip=0, loss_scale=1, train_wall=92, gb_free=21.9, wall=27859 epoch 018: 1434 / 1689 loss=3.548, nll_loss=1.999, ppl=4, wps=284445, ups=0.57, wpb=494862, bsz=16356.2, num_updates=30100, lr=0.000364541, gnorm=0.174, clip=0, loss_scale=1, train_wall=92, gb_free=21.9, wall=27859 epoch 018: 1434 / 1689 loss=3.548, nll_loss=1.999, ppl=4, wps=284445, ups=0.57, wpb=494862, bsz=16356.2, num_updates=30100, lr=0.000364541, gnorm=0.174, clip=0, loss_scale=1, train_wall=92, gb_free=21.9, wall=27859 epoch 018: 1434 / 1689 loss=3.548, nll_loss=1.999, ppl=4, wps=284445, ups=0.57, wpb=494862, bsz=16356.2, num_updates=30100, lr=0.000364541, gnorm=0.174, clip=0, loss_scale=1, train_wall=92, gb_free=21.9, wall=27859 epoch 018: 1434 / 1689 loss=3.548, nll_loss=1.999, ppl=4, wps=284445, ups=0.57, wpb=494862, bsz=16356.2, num_updates=30100, lr=0.000364541, gnorm=0.174, clip=0, loss_scale=1, train_wall=92, gb_free=21.9, wall=27859 epoch 018: 1434 / 1689 loss=3.548, nll_loss=1.999, ppl=4, wps=284445, ups=0.57, wpb=494862, bsz=16356.2, num_updates=30100, lr=0.000364541, gnorm=0.174, clip=0, loss_scale=1, train_wall=92, gb_free=21.9, wall=27859 epoch 018: 1434 / 1689 loss=3.548, nll_loss=1.999, ppl=4, wps=284445, ups=0.57, wpb=494862, bsz=16356.2, num_updates=30100, lr=0.000364541, gnorm=0.174, clip=0, loss_scale=1, train_wall=92, gb_free=21.9, wall=27859 epoch 018: 1434 / 1689 loss=3.548, nll_loss=1.999, ppl=4, wps=284445, ups=0.57, wpb=494862, bsz=16356.2, num_updates=30100, lr=0.000364541, gnorm=0.174, clip=0, loss_scale=1, train_wall=92, gb_free=21.9, wall=27859 epoch 018: 1434 / 1689 loss=3.548, nll_loss=1.999, ppl=4, wps=284445, ups=0.57, wpb=494862, bsz=16356.2, num_updates=30100, lr=0.000364541, gnorm=0.174, clip=0, loss_scale=1, train_wall=92, gb_free=21.9, wall=27859 epoch 018: 1434 / 1689 loss=3.548, nll_loss=1.999, ppl=4, wps=284445, ups=0.57, wpb=494862, bsz=16356.2, num_updates=30100, lr=0.000364541, gnorm=0.174, clip=0, loss_scale=1, train_wall=92, gb_free=21.9, wall=27859 epoch 018: 1434 / 1689 loss=3.548, nll_loss=1.999, ppl=4, wps=284445, ups=0.57, wpb=494862, bsz=16356.2, num_updates=30100, lr=0.000364541, gnorm=0.174, clip=0, loss_scale=1, train_wall=92, gb_free=21.9, wall=27859 epoch 018: 1434 / 1689 loss=3.548, nll_loss=1.999, ppl=4, wps=284445, ups=0.57, wpb=494862, bsz=16356.2, num_updates=30100, lr=0.000364541, gnorm=0.174, clip=0, loss_scale=1, train_wall=92, gb_free=21.9, wall=27859 epoch 018: 1434 / 1689 loss=3.548, nll_loss=1.999, ppl=4, wps=284445, ups=0.57, wpb=494862, bsz=16356.2, num_updates=30100, lr=0.000364541, gnorm=0.174, clip=0, loss_scale=1, train_wall=92, gb_free=21.9, wall=27859 epoch 018: 1434 / 1689 loss=3.548, nll_loss=1.999, ppl=4, wps=284445, ups=0.57, wpb=494862, bsz=16356.2, num_updates=30100, lr=0.000364541, gnorm=0.174, clip=0, loss_scale=1, train_wall=92, gb_free=21.9, wall=27859 epoch 018: 1434 / 1689 loss=3.548, nll_loss=1.999, ppl=4, wps=284445, ups=0.57, wpb=494862, bsz=16356.2, num_updates=30100, lr=0.000364541, gnorm=0.174, clip=0, loss_scale=1, train_wall=92, gb_free=21.9, wall=27859 epoch 018: 1434 / 1689 loss=3.548, nll_loss=1.999, ppl=4, wps=284445, ups=0.57, wpb=494862, bsz=16356.2, num_updates=30100, lr=0.000364541, gnorm=0.174, clip=0, loss_scale=1, train_wall=92, gb_free=21.9, wall=27859 epoch 018: 1434 / 1689 loss=3.548, nll_loss=1.999, ppl=4, wps=284445, ups=0.57, wpb=494862, bsz=16356.2, num_updates=30100, lr=0.000364541, gnorm=0.174, clip=0, loss_scale=1, train_wall=92, gb_free=21.9, wall=27859 epoch 018: 1434 / 1689 loss=3.548, nll_loss=1.999, ppl=4, wps=284445, ups=0.57, wpb=494862, bsz=16356.2, num_updates=30100, lr=0.000364541, gnorm=0.174, clip=0, loss_scale=1, train_wall=92, gb_free=21.9, wall=27859 epoch 018: 1534 / 1689 loss=3.549, nll_loss=2.001, ppl=4, wps=560910, ups=1.13, wpb=495259, bsz=16319.8, num_updates=30200, lr=0.000363937, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27947 epoch 018: 1534 / 1689 loss=3.549, nll_loss=2.001, ppl=4, wps=560910, ups=1.13, wpb=495259, bsz=16319.8, num_updates=30200, lr=0.000363937, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27947 epoch 018: 1534 / 1689 loss=3.549, nll_loss=2.001, ppl=4, wps=560910, ups=1.13, wpb=495259, bsz=16319.8, num_updates=30200, lr=0.000363937, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27947 epoch 018: 1534 / 1689 loss=3.549, nll_loss=2.001, ppl=4, wps=560910, ups=1.13, wpb=495259, bsz=16319.8, num_updates=30200, lr=0.000363937, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27947 epoch 018: 1534 / 1689 loss=3.549, nll_loss=2.001, ppl=4, wps=560910, ups=1.13, wpb=495259, bsz=16319.8, num_updates=30200, lr=0.000363937, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27947 epoch 018: 1534 / 1689 loss=3.549, nll_loss=2.001, ppl=4, wps=560910, ups=1.13, wpb=495259, bsz=16319.8, num_updates=30200, lr=0.000363937, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27947 epoch 018: 1534 / 1689 loss=3.549, nll_loss=2.001, ppl=4, wps=560910, ups=1.13, wpb=495259, bsz=16319.8, num_updates=30200, lr=0.000363937, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27947 epoch 018: 1534 / 1689 loss=3.549, nll_loss=2.001, ppl=4, wps=560910, ups=1.13, wpb=495259, bsz=16319.8, num_updates=30200, lr=0.000363937, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27947 epoch 018: 1534 / 1689 loss=3.549, nll_loss=2.001, ppl=4, wps=560910, ups=1.13, wpb=495259, bsz=16319.8, num_updates=30200, lr=0.000363937, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27947 epoch 018: 1534 / 1689 loss=3.549, nll_loss=2.001, ppl=4, wps=560910, ups=1.13, wpb=495259, bsz=16319.8, num_updates=30200, lr=0.000363937, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27947 epoch 018: 1534 / 1689 loss=3.549, nll_loss=2.001, ppl=4, wps=560910, ups=1.13, wpb=495259, bsz=16319.8, num_updates=30200, lr=0.000363937, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27947 epoch 018: 1534 / 1689 loss=3.549, nll_loss=2.001, ppl=4, wps=560910, ups=1.13, wpb=495259, bsz=16319.8, num_updates=30200, lr=0.000363937, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27947 epoch 018: 1534 / 1689 loss=3.549, nll_loss=2.001, ppl=4, wps=560910, ups=1.13, wpb=495259, bsz=16319.8, num_updates=30200, lr=0.000363937, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27947 epoch 018: 1534 / 1689 loss=3.549, nll_loss=2.001, ppl=4, wps=560910, ups=1.13, wpb=495259, bsz=16319.8, num_updates=30200, lr=0.000363937, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27947 epoch 018: 1534 / 1689 loss=3.549, nll_loss=2.001, ppl=4, wps=560910, ups=1.13, wpb=495259, bsz=16319.8, num_updates=30200, lr=0.000363937, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27947 epoch 018: 1534 / 1689 loss=3.549, nll_loss=2.001, ppl=4, wps=560910, ups=1.13, wpb=495259, bsz=16319.8, num_updates=30200, lr=0.000363937, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27947 epoch 018: 1534 / 1689 loss=3.549, nll_loss=2.001, ppl=4, wps=560910, ups=1.13, wpb=495259, bsz=16319.8, num_updates=30200, lr=0.000363937, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27947 epoch 018: 1534 / 1689 loss=3.549, nll_loss=2.001, ppl=4, wps=560910, ups=1.13, wpb=495259, bsz=16319.8, num_updates=30200, lr=0.000363937, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=27947 epoch 018: 1634 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=551194, ups=1.12, wpb=492139, bsz=16800.9, num_updates=30300, lr=0.000363336, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=28037 epoch 018: 1634 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=551194, ups=1.12, wpb=492139, bsz=16800.9, num_updates=30300, lr=0.000363336, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=28037 epoch 018: 1634 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=551194, ups=1.12, wpb=492139, bsz=16800.9, num_updates=30300, lr=0.000363336, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=28037 epoch 018: 1634 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=551194, ups=1.12, wpb=492139, bsz=16800.9, num_updates=30300, lr=0.000363336, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=28037 epoch 018: 1634 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=551194, ups=1.12, wpb=492139, bsz=16800.9, num_updates=30300, lr=0.000363336, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=28037 epoch 018: 1634 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=551194, ups=1.12, wpb=492139, bsz=16800.9, num_updates=30300, lr=0.000363336, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=28037 epoch 018: 1634 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=551194, ups=1.12, wpb=492139, bsz=16800.9, num_updates=30300, lr=0.000363336, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=28037 epoch 018: 1634 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=551194, ups=1.12, wpb=492139, bsz=16800.9, num_updates=30300, lr=0.000363336, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=28037 epoch 018: 1634 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=551194, ups=1.12, wpb=492139, bsz=16800.9, num_updates=30300, lr=0.000363336, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=28037 epoch 018: 1634 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=551194, ups=1.12, wpb=492139, bsz=16800.9, num_updates=30300, lr=0.000363336, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=28037 epoch 018: 1634 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=551194, ups=1.12, wpb=492139, bsz=16800.9, num_updates=30300, lr=0.000363336, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=28037 epoch 018: 1634 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=551194, ups=1.12, wpb=492139, bsz=16800.9, num_updates=30300, lr=0.000363336, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=28037 epoch 018: 1634 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=551194, ups=1.12, wpb=492139, bsz=16800.9, num_updates=30300, lr=0.000363336, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=28037 epoch 018: 1634 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=551194, ups=1.12, wpb=492139, bsz=16800.9, num_updates=30300, lr=0.000363336, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=28037 epoch 018: 1634 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=551194, ups=1.12, wpb=492139, bsz=16800.9, num_updates=30300, lr=0.000363336, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=28037 epoch 018: 1634 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=551194, ups=1.12, wpb=492139, bsz=16800.9, num_updates=30300, lr=0.000363336, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=28037 epoch 018: 1634 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=551194, ups=1.12, wpb=492139, bsz=16800.9, num_updates=30300, lr=0.000363336, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=28037 epoch 018: 1634 / 1689 loss=3.559, nll_loss=2.011, ppl=4.03, wps=551194, ups=1.12, wpb=492139, bsz=16800.9, num_updates=30300, lr=0.000363336, gnorm=0.177, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=28037 end of epoch 18 (average epoch stats below) epoch 018 | loss 3.538 | nll_loss 1.988 | ppl 3.97 | wps 516144 | ups 1.04 | wpb 495124 | bsz 16505.2 | num_updates 30355 | lr 0.000363007 | gnorm 0.174 | clip 0 | loss_scale 2 | train_wall 1489 | gb_free 22.9 | wall 28084 epoch 018 | loss 3.538 | nll_loss 1.988 | ppl 3.97 | wps 516144 | ups 1.04 | wpb 495124 | bsz 16505.2 | num_updates 30355 | lr 0.000363007 | gnorm 0.174 | clip 0 | loss_scale 2 | train_wall 1489 | gb_free 22.9 | wall 28084 epoch 018 | loss 3.538 | nll_loss 1.988 | ppl 3.97 | wps 516144 | ups 1.04 | wpb 495124 | bsz 16505.2 | num_updates 30355 | lr 0.000363007 | gnorm 0.174 | clip 0 | loss_scale 2 | train_wall 1489 | gb_free 22.9 | wall 28084 epoch 018 | loss 3.538 | nll_loss 1.988 | ppl 3.97 | wps 516144 | ups 1.04 | wpb 495124 | bsz 16505.2 | num_updates 30355 | lr 0.000363007 | gnorm 0.174 | clip 0 | loss_scale 2 | train_wall 1489 | gb_free 22.9 | wall 28084 epoch 018 | loss 3.538 | nll_loss 1.988 | ppl 3.97 | wps 516144 | ups 1.04 | wpb 495124 | bsz 16505.2 | num_updates 30355 | lr 0.000363007 | gnorm 0.174 | clip 0 | loss_scale 2 | train_wall 1489 | gb_free 22.9 | wall 28084 epoch 018 | loss 3.538 | nll_loss 1.988 | ppl 3.97 | wps 516144 | ups 1.04 | wpb 495124 | bsz 16505.2 | num_updates 30355 | lr 0.000363007 | gnorm 0.174 | clip 0 | loss_scale 2 | train_wall 1489 | gb_free 22.9 | wall 28084 epoch 018 | loss 3.538 | nll_loss 1.988 | ppl 3.97 | wps 516144 | ups 1.04 | wpb 495124 | bsz 16505.2 | num_updates 30355 | lr 0.000363007 | gnorm 0.174 | clip 0 | loss_scale 2 | train_wall 1489 | gb_free 22.9 | wall 28084 epoch 018 | loss 3.538 | nll_loss 1.988 | ppl 3.97 | wps 516144 | ups 1.04 | wpb 495124 | bsz 16505.2 | num_updates 30355 | lr 0.000363007 | gnorm 0.174 | clip 0 | loss_scale 2 | train_wall 1489 | gb_free 22.9 | wall 28084 epoch 018 | loss 3.538 | nll_loss 1.988 | ppl 3.97 | wps 516144 | ups 1.04 | wpb 495124 | bsz 16505.2 | num_updates 30355 | lr 0.000363007 | gnorm 0.174 | clip 0 | loss_scale 2 | train_wall 1489 | gb_free 22.9 | wall 28084 epoch 018 | loss 3.538 | nll_loss 1.988 | ppl 3.97 | wps 516144 | ups 1.04 | wpb 495124 | bsz 16505.2 | num_updates 30355 | lr 0.000363007 | gnorm 0.174 | clip 0 | loss_scale 2 | train_wall 1489 | gb_free 22.9 | wall 28084 epoch 018 | loss 3.538 | nll_loss 1.988 | ppl 3.97 | wps 516144 | ups 1.04 | wpb 495124 | bsz 16505.2 | num_updates 30355 | lr 0.000363007 | gnorm 0.174 | clip 0 | loss_scale 2 | train_wall 1489 | gb_free 22.9 | wall 28084 epoch 018 | loss 3.538 | nll_loss 1.988 | ppl 3.97 | wps 516144 | ups 1.04 | wpb 495124 | bsz 16505.2 | num_updates 30355 | lr 0.000363007 | gnorm 0.174 | clip 0 | loss_scale 2 | train_wall 1489 | gb_free 22.9 | wall 28084 epoch 018 | loss 3.538 | nll_loss 1.988 | ppl 3.97 | wps 516144 | ups 1.04 | wpb 495124 | bsz 16505.2 | num_updates 30355 | lr 0.000363007 | gnorm 0.174 | clip 0 | loss_scale 2 | train_wall 1489 | gb_free 22.9 | wall 28084 epoch 018 | loss 3.538 | nll_loss 1.988 | ppl 3.97 | wps 516144 | ups 1.04 | wpb 495124 | bsz 16505.2 | num_updates 30355 | lr 0.000363007 | gnorm 0.174 | clip 0 | loss_scale 2 | train_wall 1489 | gb_free 22.9 | wall 28084 epoch 018 | loss 3.538 | nll_loss 1.988 | ppl 3.97 | wps 516144 | ups 1.04 | wpb 495124 | bsz 16505.2 | num_updates 30355 | lr 0.000363007 | gnorm 0.174 | clip 0 | loss_scale 2 | train_wall 1489 | gb_free 22.9 | wall 28084 epoch 018 | loss 3.538 | nll_loss 1.988 | ppl 3.97 | wps 516144 | ups 1.04 | wpb 495124 | bsz 16505.2 | num_updates 30355 | lr 0.000363007 | gnorm 0.174 | clip 0 | loss_scale 2 | train_wall 1489 | gb_free 22.9 | wall 28084 epoch 018 | loss 3.538 | nll_loss 1.988 | ppl 3.97 | wps 516144 | ups 1.04 | wpb 495124 | bsz 16505.2 | num_updates 30355 | lr 0.000363007 | gnorm 0.174 | clip 0 | loss_scale 2 | train_wall 1489 | gb_free 22.9 | wall 28084 epoch 018 | loss 3.538 | nll_loss 1.988 | ppl 3.97 | wps 516144 | ups 1.04 | wpb 495124 | bsz 16505.2 | num_updates 30355 | lr 0.000363007 | gnorm 0.174 | clip 0 | loss_scale 2 | train_wall 1489 | gb_free 22.9 | wall 28084 Start iterating over samples epoch 019: 45 / 1689 loss=3.535, nll_loss=1.984, ppl=3.95, wps=555196, ups=1.13, wpb=491958, bsz=16574.6, num_updates=30400, lr=0.000362738, gnorm=0.174, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=28125 epoch 019: 45 / 1689 loss=3.535, nll_loss=1.984, ppl=3.95, wps=555196, ups=1.13, wpb=491958, bsz=16574.6, num_updates=30400, lr=0.000362738, gnorm=0.174, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=28125 epoch 019: 45 / 1689 loss=3.535, nll_loss=1.984, ppl=3.95, wps=555196, ups=1.13, wpb=491958, bsz=16574.6, num_updates=30400, lr=0.000362738, gnorm=0.174, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=28125 epoch 019: 45 / 1689 loss=3.535, nll_loss=1.984, ppl=3.95, wps=555196, ups=1.13, wpb=491958, bsz=16574.6, num_updates=30400, lr=0.000362738, gnorm=0.174, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=28125 epoch 019: 45 / 1689 loss=3.535, nll_loss=1.984, ppl=3.95, wps=555196, ups=1.13, wpb=491958, bsz=16574.6, num_updates=30400, lr=0.000362738, gnorm=0.174, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=28125 epoch 019: 45 / 1689 loss=3.535, nll_loss=1.984, ppl=3.95, wps=555196, ups=1.13, wpb=491958, bsz=16574.6, num_updates=30400, lr=0.000362738, gnorm=0.174, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=28125 epoch 019: 45 / 1689 loss=3.535, nll_loss=1.984, ppl=3.95, wps=555196, ups=1.13, wpb=491958, bsz=16574.6, num_updates=30400, lr=0.000362738, gnorm=0.174, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=28125 epoch 019: 45 / 1689 loss=3.535, nll_loss=1.984, ppl=3.95, wps=555196, ups=1.13, wpb=491958, bsz=16574.6, num_updates=30400, lr=0.000362738, gnorm=0.174, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=28125 epoch 019: 45 / 1689 loss=3.535, nll_loss=1.984, ppl=3.95, wps=555196, ups=1.13, wpb=491958, bsz=16574.6, num_updates=30400, lr=0.000362738, gnorm=0.174, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=28125 epoch 019: 45 / 1689 loss=3.535, nll_loss=1.984, ppl=3.95, wps=555196, ups=1.13, wpb=491958, bsz=16574.6, num_updates=30400, lr=0.000362738, gnorm=0.174, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=28125 epoch 019: 45 / 1689 loss=3.535, nll_loss=1.984, ppl=3.95, wps=555196, ups=1.13, wpb=491958, bsz=16574.6, num_updates=30400, lr=0.000362738, gnorm=0.174, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=28125 epoch 019: 45 / 1689 loss=3.535, nll_loss=1.984, ppl=3.95, wps=555196, ups=1.13, wpb=491958, bsz=16574.6, num_updates=30400, lr=0.000362738, gnorm=0.174, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=28125 epoch 019: 45 / 1689 loss=3.535, nll_loss=1.984, ppl=3.95, wps=555196, ups=1.13, wpb=491958, bsz=16574.6, num_updates=30400, lr=0.000362738, gnorm=0.174, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=28125 epoch 019: 45 / 1689 loss=3.535, nll_loss=1.984, ppl=3.95, wps=555196, ups=1.13, wpb=491958, bsz=16574.6, num_updates=30400, lr=0.000362738, gnorm=0.174, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=28125 epoch 019: 45 / 1689 loss=3.535, nll_loss=1.984, ppl=3.95, wps=555196, ups=1.13, wpb=491958, bsz=16574.6, num_updates=30400, lr=0.000362738, gnorm=0.174, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=28125 epoch 019: 45 / 1689 loss=3.535, nll_loss=1.984, ppl=3.95, wps=555196, ups=1.13, wpb=491958, bsz=16574.6, num_updates=30400, lr=0.000362738, gnorm=0.174, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=28125 epoch 019: 45 / 1689 loss=3.535, nll_loss=1.984, ppl=3.95, wps=555196, ups=1.13, wpb=491958, bsz=16574.6, num_updates=30400, lr=0.000362738, gnorm=0.174, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=28125 epoch 019: 45 / 1689 loss=3.535, nll_loss=1.984, ppl=3.95, wps=555196, ups=1.13, wpb=491958, bsz=16574.6, num_updates=30400, lr=0.000362738, gnorm=0.174, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=28125 epoch 019: 45 / 1689 loss=3.535, nll_loss=1.984, ppl=3.95, wps=555196, ups=1.13, wpb=491958, bsz=16574.6, num_updates=30400, lr=0.000362738, gnorm=0.174, clip=0, loss_scale=2, train_wall=87, gb_free=22.3, wall=28125 epoch 019: 145 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=558908, ups=1.13, wpb=496519, bsz=16885.4, num_updates=30500, lr=0.000362143, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=28214 epoch 019: 145 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=558908, ups=1.13, wpb=496519, bsz=16885.4, num_updates=30500, lr=0.000362143, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=28214 epoch 019: 145 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=558908, ups=1.13, wpb=496519, bsz=16885.4, num_updates=30500, lr=0.000362143, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=28214 epoch 019: 145 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=558908, ups=1.13, wpb=496519, bsz=16885.4, num_updates=30500, lr=0.000362143, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=28214 epoch 019: 145 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=558908, ups=1.13, wpb=496519, bsz=16885.4, num_updates=30500, lr=0.000362143, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=28214 epoch 019: 145 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=558908, ups=1.13, wpb=496519, bsz=16885.4, num_updates=30500, lr=0.000362143, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=28214 epoch 019: 145 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=558908, ups=1.13, wpb=496519, bsz=16885.4, num_updates=30500, lr=0.000362143, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=28214 epoch 019: 145 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=558908, ups=1.13, wpb=496519, bsz=16885.4, num_updates=30500, lr=0.000362143, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=28214 epoch 019: 145 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=558908, ups=1.13, wpb=496519, bsz=16885.4, num_updates=30500, lr=0.000362143, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=28214 epoch 019: 145 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=558908, ups=1.13, wpb=496519, bsz=16885.4, num_updates=30500, lr=0.000362143, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=28214 epoch 019: 145 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=558908, ups=1.13, wpb=496519, bsz=16885.4, num_updates=30500, lr=0.000362143, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=28214 epoch 019: 145 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=558908, ups=1.13, wpb=496519, bsz=16885.4, num_updates=30500, lr=0.000362143, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=28214 epoch 019: 145 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=558908, ups=1.13, wpb=496519, bsz=16885.4, num_updates=30500, lr=0.000362143, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=28214 epoch 019: 145 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=558908, ups=1.13, wpb=496519, bsz=16885.4, num_updates=30500, lr=0.000362143, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=28214 epoch 019: 145 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=558908, ups=1.13, wpb=496519, bsz=16885.4, num_updates=30500, lr=0.000362143, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=28214 epoch 019: 145 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=558908, ups=1.13, wpb=496519, bsz=16885.4, num_updates=30500, lr=0.000362143, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=28214 epoch 019: 145 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=558908, ups=1.13, wpb=496519, bsz=16885.4, num_updates=30500, lr=0.000362143, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=28214 epoch 019: 145 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=558908, ups=1.13, wpb=496519, bsz=16885.4, num_updates=30500, lr=0.000362143, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=28214 epoch 019: 145 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=558908, ups=1.13, wpb=496519, bsz=16885.4, num_updates=30500, lr=0.000362143, gnorm=0.169, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=28214 epoch 019: 246 / 1689 loss=3.519, nll_loss=1.965, ppl=3.9, wps=553061, ups=1.11, wpb=496712, bsz=16530.9, num_updates=30600, lr=0.000361551, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28304 epoch 019: 246 / 1689 loss=3.519, nll_loss=1.965, ppl=3.9, wps=553061, ups=1.11, wpb=496712, bsz=16530.9, num_updates=30600, lr=0.000361551, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28304 epoch 019: 246 / 1689 loss=3.519, nll_loss=1.965, ppl=3.9, wps=553061, ups=1.11, wpb=496712, bsz=16530.9, num_updates=30600, lr=0.000361551, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28304 epoch 019: 246 / 1689 loss=3.519, nll_loss=1.965, ppl=3.9, wps=553061, ups=1.11, wpb=496712, bsz=16530.9, num_updates=30600, lr=0.000361551, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28304 epoch 019: 246 / 1689 loss=3.519, nll_loss=1.965, ppl=3.9, wps=553061, ups=1.11, wpb=496712, bsz=16530.9, num_updates=30600, lr=0.000361551, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28304 epoch 019: 246 / 1689 loss=3.519, nll_loss=1.965, ppl=3.9, wps=553061, ups=1.11, wpb=496712, bsz=16530.9, num_updates=30600, lr=0.000361551, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28304 epoch 019: 246 / 1689 loss=3.519, nll_loss=1.965, ppl=3.9, wps=553061, ups=1.11, wpb=496712, bsz=16530.9, num_updates=30600, lr=0.000361551, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28304 epoch 019: 246 / 1689 loss=3.519, nll_loss=1.965, ppl=3.9, wps=553061, ups=1.11, wpb=496712, bsz=16530.9, num_updates=30600, lr=0.000361551, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28304 epoch 019: 246 / 1689 loss=3.519, nll_loss=1.965, ppl=3.9, wps=553061, ups=1.11, wpb=496712, bsz=16530.9, num_updates=30600, lr=0.000361551, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28304 epoch 019: 246 / 1689 loss=3.519, nll_loss=1.965, ppl=3.9, wps=553061, ups=1.11, wpb=496712, bsz=16530.9, num_updates=30600, lr=0.000361551, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28304 epoch 019: 246 / 1689 loss=3.519, nll_loss=1.965, ppl=3.9, wps=553061, ups=1.11, wpb=496712, bsz=16530.9, num_updates=30600, lr=0.000361551, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28304 epoch 019: 246 / 1689 loss=3.519, nll_loss=1.965, ppl=3.9, wps=553061, ups=1.11, wpb=496712, bsz=16530.9, num_updates=30600, lr=0.000361551, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28304 epoch 019: 246 / 1689 loss=3.519, nll_loss=1.965, ppl=3.9, wps=553061, ups=1.11, wpb=496712, bsz=16530.9, num_updates=30600, lr=0.000361551, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28304 epoch 019: 246 / 1689 loss=3.519, nll_loss=1.965, ppl=3.9, wps=553061, ups=1.11, wpb=496712, bsz=16530.9, num_updates=30600, lr=0.000361551, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28304 epoch 019: 246 / 1689 loss=3.519, nll_loss=1.965, ppl=3.9, wps=553061, ups=1.11, wpb=496712, bsz=16530.9, num_updates=30600, lr=0.000361551, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28304 epoch 019: 246 / 1689 loss=3.519, nll_loss=1.965, ppl=3.9, wps=553061, ups=1.11, wpb=496712, bsz=16530.9, num_updates=30600, lr=0.000361551, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28304 epoch 019: 246 / 1689 loss=3.519, nll_loss=1.965, ppl=3.9, wps=553061, ups=1.11, wpb=496712, bsz=16530.9, num_updates=30600, lr=0.000361551, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28304 epoch 019: 246 / 1689 loss=3.519, nll_loss=1.965, ppl=3.9, wps=553061, ups=1.11, wpb=496712, bsz=16530.9, num_updates=30600, lr=0.000361551, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28304 epoch 019: 246 / 1689 loss=3.519, nll_loss=1.965, ppl=3.9, wps=553061, ups=1.11, wpb=496712, bsz=16530.9, num_updates=30600, lr=0.000361551, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28304 epoch 019: 346 / 1689 loss=3.521, nll_loss=1.967, ppl=3.91, wps=551303, ups=1.11, wpb=495666, bsz=16678.2, num_updates=30700, lr=0.000360961, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28394 epoch 019: 346 / 1689 loss=3.521, nll_loss=1.967, ppl=3.91, wps=551303, ups=1.11, wpb=495666, bsz=16678.2, num_updates=30700, lr=0.000360961, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28394 epoch 019: 346 / 1689 loss=3.521, nll_loss=1.967, ppl=3.91, wps=551303, ups=1.11, wpb=495666, bsz=16678.2, num_updates=30700, lr=0.000360961, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28394 epoch 019: 346 / 1689 loss=3.521, nll_loss=1.967, ppl=3.91, wps=551303, ups=1.11, wpb=495666, bsz=16678.2, num_updates=30700, lr=0.000360961, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28394 epoch 019: 346 / 1689 loss=3.521, nll_loss=1.967, ppl=3.91, wps=551303, ups=1.11, wpb=495666, bsz=16678.2, num_updates=30700, lr=0.000360961, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28394 epoch 019: 346 / 1689 loss=3.521, nll_loss=1.967, ppl=3.91, wps=551303, ups=1.11, wpb=495666, bsz=16678.2, num_updates=30700, lr=0.000360961, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28394 epoch 019: 346 / 1689 loss=3.521, nll_loss=1.967, ppl=3.91, wps=551303, ups=1.11, wpb=495666, bsz=16678.2, num_updates=30700, lr=0.000360961, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28394 epoch 019: 346 / 1689 loss=3.521, nll_loss=1.967, ppl=3.91, wps=551303, ups=1.11, wpb=495666, bsz=16678.2, num_updates=30700, lr=0.000360961, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28394 epoch 019: 346 / 1689 loss=3.521, nll_loss=1.967, ppl=3.91, wps=551303, ups=1.11, wpb=495666, bsz=16678.2, num_updates=30700, lr=0.000360961, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28394 epoch 019: 346 / 1689 loss=3.521, nll_loss=1.967, ppl=3.91, wps=551303, ups=1.11, wpb=495666, bsz=16678.2, num_updates=30700, lr=0.000360961, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28394 epoch 019: 346 / 1689 loss=3.521, nll_loss=1.967, ppl=3.91, wps=551303, ups=1.11, wpb=495666, bsz=16678.2, num_updates=30700, lr=0.000360961, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28394 epoch 019: 346 / 1689 loss=3.521, nll_loss=1.967, ppl=3.91, wps=551303, ups=1.11, wpb=495666, bsz=16678.2, num_updates=30700, lr=0.000360961, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28394 epoch 019: 346 / 1689 loss=3.521, nll_loss=1.967, ppl=3.91, wps=551303, ups=1.11, wpb=495666, bsz=16678.2, num_updates=30700, lr=0.000360961, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28394 epoch 019: 346 / 1689 loss=3.521, nll_loss=1.967, ppl=3.91, wps=551303, ups=1.11, wpb=495666, bsz=16678.2, num_updates=30700, lr=0.000360961, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28394 epoch 019: 346 / 1689 loss=3.521, nll_loss=1.967, ppl=3.91, wps=551303, ups=1.11, wpb=495666, bsz=16678.2, num_updates=30700, lr=0.000360961, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28394 epoch 019: 346 / 1689 loss=3.521, nll_loss=1.967, ppl=3.91, wps=551303, ups=1.11, wpb=495666, bsz=16678.2, num_updates=30700, lr=0.000360961, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28394 epoch 019: 346 / 1689 loss=3.521, nll_loss=1.967, ppl=3.91, wps=551303, ups=1.11, wpb=495666, bsz=16678.2, num_updates=30700, lr=0.000360961, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28394 epoch 019: 346 / 1689 loss=3.521, nll_loss=1.967, ppl=3.91, wps=551303, ups=1.11, wpb=495666, bsz=16678.2, num_updates=30700, lr=0.000360961, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28394 epoch 019: 346 / 1689 loss=3.521, nll_loss=1.967, ppl=3.91, wps=551303, ups=1.11, wpb=495666, bsz=16678.2, num_updates=30700, lr=0.000360961, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=28394 epoch 019: 446 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=555671, ups=1.12, wpb=496067, bsz=16085.8, num_updates=30800, lr=0.000360375, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=28483 epoch 019: 446 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=555671, ups=1.12, wpb=496067, bsz=16085.8, num_updates=30800, lr=0.000360375, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=28483 epoch 019: 446 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=555671, ups=1.12, wpb=496067, bsz=16085.8, num_updates=30800, lr=0.000360375, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=28483 epoch 019: 446 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=555671, ups=1.12, wpb=496067, bsz=16085.8, num_updates=30800, lr=0.000360375, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=28483 epoch 019: 446 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=555671, ups=1.12, wpb=496067, bsz=16085.8, num_updates=30800, lr=0.000360375, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=28483 epoch 019: 446 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=555671, ups=1.12, wpb=496067, bsz=16085.8, num_updates=30800, lr=0.000360375, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=28483 epoch 019: 446 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=555671, ups=1.12, wpb=496067, bsz=16085.8, num_updates=30800, lr=0.000360375, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=28483 epoch 019: 446 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=555671, ups=1.12, wpb=496067, bsz=16085.8, num_updates=30800, lr=0.000360375, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=28483 epoch 019: 446 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=555671, ups=1.12, wpb=496067, bsz=16085.8, num_updates=30800, lr=0.000360375, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=28483 epoch 019: 446 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=555671, ups=1.12, wpb=496067, bsz=16085.8, num_updates=30800, lr=0.000360375, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=28483 epoch 019: 446 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=555671, ups=1.12, wpb=496067, bsz=16085.8, num_updates=30800, lr=0.000360375, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=28483 epoch 019: 446 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=555671, ups=1.12, wpb=496067, bsz=16085.8, num_updates=30800, lr=0.000360375, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=28483 epoch 019: 446 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=555671, ups=1.12, wpb=496067, bsz=16085.8, num_updates=30800, lr=0.000360375, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=28483 epoch 019: 446 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=555671, ups=1.12, wpb=496067, bsz=16085.8, num_updates=30800, lr=0.000360375, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=28483 epoch 019: 446 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=555671, ups=1.12, wpb=496067, bsz=16085.8, num_updates=30800, lr=0.000360375, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=28483 epoch 019: 446 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=555671, ups=1.12, wpb=496067, bsz=16085.8, num_updates=30800, lr=0.000360375, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=28483 epoch 019: 446 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=555671, ups=1.12, wpb=496067, bsz=16085.8, num_updates=30800, lr=0.000360375, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=28483 epoch 019: 446 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=555671, ups=1.12, wpb=496067, bsz=16085.8, num_updates=30800, lr=0.000360375, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=28483 epoch 019: 446 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=555671, ups=1.12, wpb=496067, bsz=16085.8, num_updates=30800, lr=0.000360375, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=28483 epoch 019: 546 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=557438, ups=1.12, wpb=496530, bsz=16598.7, num_updates=30900, lr=0.000359791, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=28572 epoch 019: 546 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=557438, ups=1.12, wpb=496530, bsz=16598.7, num_updates=30900, lr=0.000359791, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=28572 epoch 019: 546 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=557438, ups=1.12, wpb=496530, bsz=16598.7, num_updates=30900, lr=0.000359791, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=28572 epoch 019: 546 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=557438, ups=1.12, wpb=496530, bsz=16598.7, num_updates=30900, lr=0.000359791, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=28572 epoch 019: 546 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=557438, ups=1.12, wpb=496530, bsz=16598.7, num_updates=30900, lr=0.000359791, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=28572 epoch 019: 546 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=557438, ups=1.12, wpb=496530, bsz=16598.7, num_updates=30900, lr=0.000359791, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=28572 epoch 019: 546 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=557438, ups=1.12, wpb=496530, bsz=16598.7, num_updates=30900, lr=0.000359791, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=28572 epoch 019: 546 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=557438, ups=1.12, wpb=496530, bsz=16598.7, num_updates=30900, lr=0.000359791, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=28572 epoch 019: 546 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=557438, ups=1.12, wpb=496530, bsz=16598.7, num_updates=30900, lr=0.000359791, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=28572 epoch 019: 546 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=557438, ups=1.12, wpb=496530, bsz=16598.7, num_updates=30900, lr=0.000359791, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=28572 epoch 019: 546 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=557438, ups=1.12, wpb=496530, bsz=16598.7, num_updates=30900, lr=0.000359791, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=28572 epoch 019: 546 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=557438, ups=1.12, wpb=496530, bsz=16598.7, num_updates=30900, lr=0.000359791, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=28572 epoch 019: 546 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=557438, ups=1.12, wpb=496530, bsz=16598.7, num_updates=30900, lr=0.000359791, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=28572 epoch 019: 546 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=557438, ups=1.12, wpb=496530, bsz=16598.7, num_updates=30900, lr=0.000359791, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=28572 epoch 019: 546 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=557438, ups=1.12, wpb=496530, bsz=16598.7, num_updates=30900, lr=0.000359791, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=28572 epoch 019: 546 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=557438, ups=1.12, wpb=496530, bsz=16598.7, num_updates=30900, lr=0.000359791, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=28572 epoch 019: 546 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=557438, ups=1.12, wpb=496530, bsz=16598.7, num_updates=30900, lr=0.000359791, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=28572 epoch 019: 546 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=557438, ups=1.12, wpb=496530, bsz=16598.7, num_updates=30900, lr=0.000359791, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=28572 epoch 019: 546 / 1689 loss=3.532, nll_loss=1.98, ppl=3.95, wps=557438, ups=1.12, wpb=496530, bsz=16598.7, num_updates=30900, lr=0.000359791, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=28572 epoch 019: 646 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=553190, ups=1.12, wpb=495648, bsz=16839.9, num_updates=31000, lr=0.000359211, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=28662 epoch 019: 646 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=553190, ups=1.12, wpb=495648, bsz=16839.9, num_updates=31000, lr=0.000359211, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=28662 epoch 019: 646 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=553190, ups=1.12, wpb=495648, bsz=16839.9, num_updates=31000, lr=0.000359211, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=28662 epoch 019: 646 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=553190, ups=1.12, wpb=495648, bsz=16839.9, num_updates=31000, lr=0.000359211, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=28662 epoch 019: 646 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=553190, ups=1.12, wpb=495648, bsz=16839.9, num_updates=31000, lr=0.000359211, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=28662 epoch 019: 646 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=553190, ups=1.12, wpb=495648, bsz=16839.9, num_updates=31000, lr=0.000359211, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=28662 epoch 019: 646 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=553190, ups=1.12, wpb=495648, bsz=16839.9, num_updates=31000, lr=0.000359211, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=28662 epoch 019: 646 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=553190, ups=1.12, wpb=495648, bsz=16839.9, num_updates=31000, lr=0.000359211, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=28662 epoch 019: 646 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=553190, ups=1.12, wpb=495648, bsz=16839.9, num_updates=31000, lr=0.000359211, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=28662 epoch 019: 646 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=553190, ups=1.12, wpb=495648, bsz=16839.9, num_updates=31000, lr=0.000359211, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=28662 epoch 019: 646 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=553190, ups=1.12, wpb=495648, bsz=16839.9, num_updates=31000, lr=0.000359211, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=28662 epoch 019: 646 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=553190, ups=1.12, wpb=495648, bsz=16839.9, num_updates=31000, lr=0.000359211, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=28662 epoch 019: 646 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=553190, ups=1.12, wpb=495648, bsz=16839.9, num_updates=31000, lr=0.000359211, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=28662 epoch 019: 646 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=553190, ups=1.12, wpb=495648, bsz=16839.9, num_updates=31000, lr=0.000359211, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=28662 epoch 019: 646 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=553190, ups=1.12, wpb=495648, bsz=16839.9, num_updates=31000, lr=0.000359211, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=28662 epoch 019: 646 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=553190, ups=1.12, wpb=495648, bsz=16839.9, num_updates=31000, lr=0.000359211, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=28662 epoch 019: 646 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=553190, ups=1.12, wpb=495648, bsz=16839.9, num_updates=31000, lr=0.000359211, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=28662 epoch 019: 646 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=553190, ups=1.12, wpb=495648, bsz=16839.9, num_updates=31000, lr=0.000359211, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=28662 epoch 019: 646 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=553190, ups=1.12, wpb=495648, bsz=16839.9, num_updates=31000, lr=0.000359211, gnorm=0.179, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=28662 begin validation on "valid" subset epoch 019 | valid on 'valid' subset | loss 3.723 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 31000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.723 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 31000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.723 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 31000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.723 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 31000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.723 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 31000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.723 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 31000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.723 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 31000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.723 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 31000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.723 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 31000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.723 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 31000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.723 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 31000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.723 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 31000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.723 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 31000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.723 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 31000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.723 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 31000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.723 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 31000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.723 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 31000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.723 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 31000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.723 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 31000 | best_loss 3.714 epoch 019: 746 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=482525, ups=0.97, wpb=495118, bsz=16715.1, num_updates=31100, lr=0.000358633, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=28764 epoch 019: 746 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=482525, ups=0.97, wpb=495118, bsz=16715.1, num_updates=31100, lr=0.000358633, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=28764 epoch 019: 746 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=482525, ups=0.97, wpb=495118, bsz=16715.1, num_updates=31100, lr=0.000358633, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=28764 epoch 019: 746 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=482525, ups=0.97, wpb=495118, bsz=16715.1, num_updates=31100, lr=0.000358633, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=28764 epoch 019: 746 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=482525, ups=0.97, wpb=495118, bsz=16715.1, num_updates=31100, lr=0.000358633, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=28764 epoch 019: 746 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=482525, ups=0.97, wpb=495118, bsz=16715.1, num_updates=31100, lr=0.000358633, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=28764 epoch 019: 746 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=482525, ups=0.97, wpb=495118, bsz=16715.1, num_updates=31100, lr=0.000358633, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=28764 epoch 019: 746 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=482525, ups=0.97, wpb=495118, bsz=16715.1, num_updates=31100, lr=0.000358633, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=28764 epoch 019: 746 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=482525, ups=0.97, wpb=495118, bsz=16715.1, num_updates=31100, lr=0.000358633, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=28764 epoch 019: 746 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=482525, ups=0.97, wpb=495118, bsz=16715.1, num_updates=31100, lr=0.000358633, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=28764 epoch 019: 746 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=482525, ups=0.97, wpb=495118, bsz=16715.1, num_updates=31100, lr=0.000358633, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=28764 epoch 019: 746 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=482525, ups=0.97, wpb=495118, bsz=16715.1, num_updates=31100, lr=0.000358633, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=28764 epoch 019: 746 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=482525, ups=0.97, wpb=495118, bsz=16715.1, num_updates=31100, lr=0.000358633, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=28764 epoch 019: 746 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=482525, ups=0.97, wpb=495118, bsz=16715.1, num_updates=31100, lr=0.000358633, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=28764 epoch 019: 746 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=482525, ups=0.97, wpb=495118, bsz=16715.1, num_updates=31100, lr=0.000358633, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=28764 epoch 019: 746 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=482525, ups=0.97, wpb=495118, bsz=16715.1, num_updates=31100, lr=0.000358633, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=28764 epoch 019: 746 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=482525, ups=0.97, wpb=495118, bsz=16715.1, num_updates=31100, lr=0.000358633, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=28764 epoch 019: 746 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=482525, ups=0.97, wpb=495118, bsz=16715.1, num_updates=31100, lr=0.000358633, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=28764 epoch 019: 746 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=482525, ups=0.97, wpb=495118, bsz=16715.1, num_updates=31100, lr=0.000358633, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=28764 epoch 019: 846 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551010, ups=1.11, wpb=494888, bsz=16419.1, num_updates=31200, lr=0.000358057, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=28854 epoch 019: 846 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551010, ups=1.11, wpb=494888, bsz=16419.1, num_updates=31200, lr=0.000358057, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=28854 epoch 019: 846 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551010, ups=1.11, wpb=494888, bsz=16419.1, num_updates=31200, lr=0.000358057, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=28854 epoch 019: 846 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551010, ups=1.11, wpb=494888, bsz=16419.1, num_updates=31200, lr=0.000358057, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=28854 epoch 019: 846 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551010, ups=1.11, wpb=494888, bsz=16419.1, num_updates=31200, lr=0.000358057, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=28854 epoch 019: 846 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551010, ups=1.11, wpb=494888, bsz=16419.1, num_updates=31200, lr=0.000358057, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=28854 epoch 019: 846 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551010, ups=1.11, wpb=494888, bsz=16419.1, num_updates=31200, lr=0.000358057, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=28854 epoch 019: 846 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551010, ups=1.11, wpb=494888, bsz=16419.1, num_updates=31200, lr=0.000358057, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=28854 epoch 019: 846 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551010, ups=1.11, wpb=494888, bsz=16419.1, num_updates=31200, lr=0.000358057, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=28854 epoch 019: 846 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551010, ups=1.11, wpb=494888, bsz=16419.1, num_updates=31200, lr=0.000358057, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=28854 epoch 019: 846 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551010, ups=1.11, wpb=494888, bsz=16419.1, num_updates=31200, lr=0.000358057, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=28854 epoch 019: 846 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551010, ups=1.11, wpb=494888, bsz=16419.1, num_updates=31200, lr=0.000358057, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=28854 epoch 019: 846 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551010, ups=1.11, wpb=494888, bsz=16419.1, num_updates=31200, lr=0.000358057, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=28854 epoch 019: 846 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551010, ups=1.11, wpb=494888, bsz=16419.1, num_updates=31200, lr=0.000358057, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=28854 epoch 019: 846 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551010, ups=1.11, wpb=494888, bsz=16419.1, num_updates=31200, lr=0.000358057, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=28854 epoch 019: 846 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551010, ups=1.11, wpb=494888, bsz=16419.1, num_updates=31200, lr=0.000358057, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=28854 epoch 019: 846 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551010, ups=1.11, wpb=494888, bsz=16419.1, num_updates=31200, lr=0.000358057, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=28854 epoch 019: 846 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551010, ups=1.11, wpb=494888, bsz=16419.1, num_updates=31200, lr=0.000358057, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=28854 epoch 019: 846 / 1689 loss=3.53, nll_loss=1.978, ppl=3.94, wps=551010, ups=1.11, wpb=494888, bsz=16419.1, num_updates=31200, lr=0.000358057, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=28854 epoch 019: 946 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=556521, ups=1.12, wpb=496240, bsz=16241.4, num_updates=31300, lr=0.000357485, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=28943 epoch 019: 946 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=556521, ups=1.12, wpb=496240, bsz=16241.4, num_updates=31300, lr=0.000357485, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=28943 epoch 019: 946 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=556521, ups=1.12, wpb=496240, bsz=16241.4, num_updates=31300, lr=0.000357485, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=28943 epoch 019: 946 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=556521, ups=1.12, wpb=496240, bsz=16241.4, num_updates=31300, lr=0.000357485, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=28943 epoch 019: 946 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=556521, ups=1.12, wpb=496240, bsz=16241.4, num_updates=31300, lr=0.000357485, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=28943 epoch 019: 946 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=556521, ups=1.12, wpb=496240, bsz=16241.4, num_updates=31300, lr=0.000357485, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=28943 epoch 019: 946 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=556521, ups=1.12, wpb=496240, bsz=16241.4, num_updates=31300, lr=0.000357485, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=28943 epoch 019: 946 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=556521, ups=1.12, wpb=496240, bsz=16241.4, num_updates=31300, lr=0.000357485, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=28943 epoch 019: 946 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=556521, ups=1.12, wpb=496240, bsz=16241.4, num_updates=31300, lr=0.000357485, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=28943 epoch 019: 946 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=556521, ups=1.12, wpb=496240, bsz=16241.4, num_updates=31300, lr=0.000357485, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=28943 epoch 019: 946 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=556521, ups=1.12, wpb=496240, bsz=16241.4, num_updates=31300, lr=0.000357485, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=28943 epoch 019: 946 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=556521, ups=1.12, wpb=496240, bsz=16241.4, num_updates=31300, lr=0.000357485, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=28943 epoch 019: 946 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=556521, ups=1.12, wpb=496240, bsz=16241.4, num_updates=31300, lr=0.000357485, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=28943 epoch 019: 946 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=556521, ups=1.12, wpb=496240, bsz=16241.4, num_updates=31300, lr=0.000357485, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=28943 epoch 019: 946 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=556521, ups=1.12, wpb=496240, bsz=16241.4, num_updates=31300, lr=0.000357485, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=28943 epoch 019: 946 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=556521, ups=1.12, wpb=496240, bsz=16241.4, num_updates=31300, lr=0.000357485, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=28943 epoch 019: 946 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=556521, ups=1.12, wpb=496240, bsz=16241.4, num_updates=31300, lr=0.000357485, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=28943 epoch 019: 946 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=556521, ups=1.12, wpb=496240, bsz=16241.4, num_updates=31300, lr=0.000357485, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=28943 epoch 019: 946 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=556521, ups=1.12, wpb=496240, bsz=16241.4, num_updates=31300, lr=0.000357485, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=28943 epoch 019: 1046 / 1689 loss=3.535, nll_loss=1.984, ppl=3.96, wps=551912, ups=1.12, wpb=494339, bsz=16381.6, num_updates=31400, lr=0.000356915, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=29033 epoch 019: 1046 / 1689 loss=3.535, nll_loss=1.984, ppl=3.96, wps=551912, ups=1.12, wpb=494339, bsz=16381.6, num_updates=31400, lr=0.000356915, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=29033 epoch 019: 1046 / 1689 loss=3.535, nll_loss=1.984, ppl=3.96, wps=551912, ups=1.12, wpb=494339, bsz=16381.6, num_updates=31400, lr=0.000356915, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=29033 epoch 019: 1046 / 1689 loss=3.535, nll_loss=1.984, ppl=3.96, wps=551912, ups=1.12, wpb=494339, bsz=16381.6, num_updates=31400, lr=0.000356915, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=29033 epoch 019: 1046 / 1689 loss=3.535, nll_loss=1.984, ppl=3.96, wps=551912, ups=1.12, wpb=494339, bsz=16381.6, num_updates=31400, lr=0.000356915, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=29033 epoch 019: 1046 / 1689 loss=3.535, nll_loss=1.984, ppl=3.96, wps=551912, ups=1.12, wpb=494339, bsz=16381.6, num_updates=31400, lr=0.000356915, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=29033 epoch 019: 1046 / 1689 loss=3.535, nll_loss=1.984, ppl=3.96, wps=551912, ups=1.12, wpb=494339, bsz=16381.6, num_updates=31400, lr=0.000356915, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=29033 epoch 019: 1046 / 1689 loss=3.535, nll_loss=1.984, ppl=3.96, wps=551912, ups=1.12, wpb=494339, bsz=16381.6, num_updates=31400, lr=0.000356915, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=29033 epoch 019: 1046 / 1689 loss=3.535, nll_loss=1.984, ppl=3.96, wps=551912, ups=1.12, wpb=494339, bsz=16381.6, num_updates=31400, lr=0.000356915, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=29033 epoch 019: 1046 / 1689 loss=3.535, nll_loss=1.984, ppl=3.96, wps=551912, ups=1.12, wpb=494339, bsz=16381.6, num_updates=31400, lr=0.000356915, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=29033 epoch 019: 1046 / 1689 loss=3.535, nll_loss=1.984, ppl=3.96, wps=551912, ups=1.12, wpb=494339, bsz=16381.6, num_updates=31400, lr=0.000356915, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=29033 epoch 019: 1046 / 1689 loss=3.535, nll_loss=1.984, ppl=3.96, wps=551912, ups=1.12, wpb=494339, bsz=16381.6, num_updates=31400, lr=0.000356915, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=29033 epoch 019: 1046 / 1689 loss=3.535, nll_loss=1.984, ppl=3.96, wps=551912, ups=1.12, wpb=494339, bsz=16381.6, num_updates=31400, lr=0.000356915, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=29033 epoch 019: 1046 / 1689 loss=3.535, nll_loss=1.984, ppl=3.96, wps=551912, ups=1.12, wpb=494339, bsz=16381.6, num_updates=31400, lr=0.000356915, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=29033 epoch 019: 1046 / 1689 loss=3.535, nll_loss=1.984, ppl=3.96, wps=551912, ups=1.12, wpb=494339, bsz=16381.6, num_updates=31400, lr=0.000356915, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=29033 epoch 019: 1046 / 1689 loss=3.535, nll_loss=1.984, ppl=3.96, wps=551912, ups=1.12, wpb=494339, bsz=16381.6, num_updates=31400, lr=0.000356915, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=29033 epoch 019: 1046 / 1689 loss=3.535, nll_loss=1.984, ppl=3.96, wps=551912, ups=1.12, wpb=494339, bsz=16381.6, num_updates=31400, lr=0.000356915, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=29033 epoch 019: 1046 / 1689 loss=3.535, nll_loss=1.984, ppl=3.96, wps=551912, ups=1.12, wpb=494339, bsz=16381.6, num_updates=31400, lr=0.000356915, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=29033 epoch 019: 1046 / 1689 loss=3.535, nll_loss=1.984, ppl=3.96, wps=551912, ups=1.12, wpb=494339, bsz=16381.6, num_updates=31400, lr=0.000356915, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=29033 epoch 019: 1146 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=551943, ups=1.12, wpb=493574, bsz=16341.2, num_updates=31500, lr=0.000356348, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=29122 epoch 019: 1146 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=551943, ups=1.12, wpb=493574, bsz=16341.2, num_updates=31500, lr=0.000356348, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=29122 epoch 019: 1146 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=551943, ups=1.12, wpb=493574, bsz=16341.2, num_updates=31500, lr=0.000356348, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=29122 epoch 019: 1146 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=551943, ups=1.12, wpb=493574, bsz=16341.2, num_updates=31500, lr=0.000356348, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=29122 epoch 019: 1146 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=551943, ups=1.12, wpb=493574, bsz=16341.2, num_updates=31500, lr=0.000356348, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=29122 epoch 019: 1146 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=551943, ups=1.12, wpb=493574, bsz=16341.2, num_updates=31500, lr=0.000356348, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=29122 epoch 019: 1146 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=551943, ups=1.12, wpb=493574, bsz=16341.2, num_updates=31500, lr=0.000356348, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=29122 epoch 019: 1146 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=551943, ups=1.12, wpb=493574, bsz=16341.2, num_updates=31500, lr=0.000356348, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=29122 epoch 019: 1146 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=551943, ups=1.12, wpb=493574, bsz=16341.2, num_updates=31500, lr=0.000356348, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=29122 epoch 019: 1146 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=551943, ups=1.12, wpb=493574, bsz=16341.2, num_updates=31500, lr=0.000356348, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=29122 epoch 019: 1146 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=551943, ups=1.12, wpb=493574, bsz=16341.2, num_updates=31500, lr=0.000356348, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=29122 epoch 019: 1146 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=551943, ups=1.12, wpb=493574, bsz=16341.2, num_updates=31500, lr=0.000356348, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=29122 epoch 019: 1146 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=551943, ups=1.12, wpb=493574, bsz=16341.2, num_updates=31500, lr=0.000356348, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=29122 epoch 019: 1146 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=551943, ups=1.12, wpb=493574, bsz=16341.2, num_updates=31500, lr=0.000356348, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=29122 epoch 019: 1146 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=551943, ups=1.12, wpb=493574, bsz=16341.2, num_updates=31500, lr=0.000356348, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=29122 epoch 019: 1146 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=551943, ups=1.12, wpb=493574, bsz=16341.2, num_updates=31500, lr=0.000356348, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=29122 epoch 019: 1146 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=551943, ups=1.12, wpb=493574, bsz=16341.2, num_updates=31500, lr=0.000356348, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=29122 epoch 019: 1146 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=551943, ups=1.12, wpb=493574, bsz=16341.2, num_updates=31500, lr=0.000356348, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=29122 epoch 019: 1146 / 1689 loss=3.54, nll_loss=1.99, ppl=3.97, wps=551943, ups=1.12, wpb=493574, bsz=16341.2, num_updates=31500, lr=0.000356348, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=29122 epoch 019: 1247 / 1689 loss=3.544, nll_loss=1.995, ppl=3.99, wps=547038, ups=1.11, wpb=494024, bsz=16846.2, num_updates=31600, lr=0.000355784, gnorm=0.177, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=29213 epoch 019: 1247 / 1689 loss=3.544, nll_loss=1.995, ppl=3.99, wps=547038, ups=1.11, wpb=494024, bsz=16846.2, num_updates=31600, lr=0.000355784, gnorm=0.177, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=29213 epoch 019: 1247 / 1689 loss=3.544, nll_loss=1.995, ppl=3.99, wps=547038, ups=1.11, wpb=494024, bsz=16846.2, num_updates=31600, lr=0.000355784, gnorm=0.177, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=29213 epoch 019: 1247 / 1689 loss=3.544, nll_loss=1.995, ppl=3.99, wps=547038, ups=1.11, wpb=494024, bsz=16846.2, num_updates=31600, lr=0.000355784, gnorm=0.177, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=29213 epoch 019: 1247 / 1689 loss=3.544, nll_loss=1.995, ppl=3.99, wps=547038, ups=1.11, wpb=494024, bsz=16846.2, num_updates=31600, lr=0.000355784, gnorm=0.177, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=29213 epoch 019: 1247 / 1689 loss=3.544, nll_loss=1.995, ppl=3.99, wps=547038, ups=1.11, wpb=494024, bsz=16846.2, num_updates=31600, lr=0.000355784, gnorm=0.177, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=29213 epoch 019: 1247 / 1689 loss=3.544, nll_loss=1.995, ppl=3.99, wps=547038, ups=1.11, wpb=494024, bsz=16846.2, num_updates=31600, lr=0.000355784, gnorm=0.177, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=29213 epoch 019: 1247 / 1689 loss=3.544, nll_loss=1.995, ppl=3.99, wps=547038, ups=1.11, wpb=494024, bsz=16846.2, num_updates=31600, lr=0.000355784, gnorm=0.177, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=29213 epoch 019: 1247 / 1689 loss=3.544, nll_loss=1.995, ppl=3.99, wps=547038, ups=1.11, wpb=494024, bsz=16846.2, num_updates=31600, lr=0.000355784, gnorm=0.177, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=29213 epoch 019: 1247 / 1689 loss=3.544, nll_loss=1.995, ppl=3.99, wps=547038, ups=1.11, wpb=494024, bsz=16846.2, num_updates=31600, lr=0.000355784, gnorm=0.177, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=29213 epoch 019: 1247 / 1689 loss=3.544, nll_loss=1.995, ppl=3.99, wps=547038, ups=1.11, wpb=494024, bsz=16846.2, num_updates=31600, lr=0.000355784, gnorm=0.177, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=29213 epoch 019: 1247 / 1689 loss=3.544, nll_loss=1.995, ppl=3.99, wps=547038, ups=1.11, wpb=494024, bsz=16846.2, num_updates=31600, lr=0.000355784, gnorm=0.177, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=29213 epoch 019: 1247 / 1689 loss=3.544, nll_loss=1.995, ppl=3.99, wps=547038, ups=1.11, wpb=494024, bsz=16846.2, num_updates=31600, lr=0.000355784, gnorm=0.177, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=29213 epoch 019: 1247 / 1689 loss=3.544, nll_loss=1.995, ppl=3.99, wps=547038, ups=1.11, wpb=494024, bsz=16846.2, num_updates=31600, lr=0.000355784, gnorm=0.177, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=29213 epoch 019: 1247 / 1689 loss=3.544, nll_loss=1.995, ppl=3.99, wps=547038, ups=1.11, wpb=494024, bsz=16846.2, num_updates=31600, lr=0.000355784, gnorm=0.177, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=29213 epoch 019: 1247 / 1689 loss=3.544, nll_loss=1.995, ppl=3.99, wps=547038, ups=1.11, wpb=494024, bsz=16846.2, num_updates=31600, lr=0.000355784, gnorm=0.177, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=29213 epoch 019: 1247 / 1689 loss=3.544, nll_loss=1.995, ppl=3.99, wps=547038, ups=1.11, wpb=494024, bsz=16846.2, num_updates=31600, lr=0.000355784, gnorm=0.177, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=29213 epoch 019: 1247 / 1689 loss=3.544, nll_loss=1.995, ppl=3.99, wps=547038, ups=1.11, wpb=494024, bsz=16846.2, num_updates=31600, lr=0.000355784, gnorm=0.177, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=29213 epoch 019: 1247 / 1689 loss=3.544, nll_loss=1.995, ppl=3.99, wps=547038, ups=1.11, wpb=494024, bsz=16846.2, num_updates=31600, lr=0.000355784, gnorm=0.177, clip=0, loss_scale=2, train_wall=89, gb_free=22.5, wall=29213 epoch 019: 1347 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=550282, ups=1.11, wpb=495385, bsz=16699.3, num_updates=31700, lr=0.000355222, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29303 epoch 019: 1347 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=550282, ups=1.11, wpb=495385, bsz=16699.3, num_updates=31700, lr=0.000355222, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29303 epoch 019: 1347 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=550282, ups=1.11, wpb=495385, bsz=16699.3, num_updates=31700, lr=0.000355222, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29303 epoch 019: 1347 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=550282, ups=1.11, wpb=495385, bsz=16699.3, num_updates=31700, lr=0.000355222, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29303 epoch 019: 1347 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=550282, ups=1.11, wpb=495385, bsz=16699.3, num_updates=31700, lr=0.000355222, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29303 epoch 019: 1347 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=550282, ups=1.11, wpb=495385, bsz=16699.3, num_updates=31700, lr=0.000355222, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29303 epoch 019: 1347 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=550282, ups=1.11, wpb=495385, bsz=16699.3, num_updates=31700, lr=0.000355222, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29303 epoch 019: 1347 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=550282, ups=1.11, wpb=495385, bsz=16699.3, num_updates=31700, lr=0.000355222, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29303 epoch 019: 1347 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=550282, ups=1.11, wpb=495385, bsz=16699.3, num_updates=31700, lr=0.000355222, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29303 epoch 019: 1347 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=550282, ups=1.11, wpb=495385, bsz=16699.3, num_updates=31700, lr=0.000355222, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29303 epoch 019: 1347 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=550282, ups=1.11, wpb=495385, bsz=16699.3, num_updates=31700, lr=0.000355222, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29303 epoch 019: 1347 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=550282, ups=1.11, wpb=495385, bsz=16699.3, num_updates=31700, lr=0.000355222, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29303 epoch 019: 1347 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=550282, ups=1.11, wpb=495385, bsz=16699.3, num_updates=31700, lr=0.000355222, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29303 epoch 019: 1347 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=550282, ups=1.11, wpb=495385, bsz=16699.3, num_updates=31700, lr=0.000355222, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29303 epoch 019: 1347 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=550282, ups=1.11, wpb=495385, bsz=16699.3, num_updates=31700, lr=0.000355222, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29303 epoch 019: 1347 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=550282, ups=1.11, wpb=495385, bsz=16699.3, num_updates=31700, lr=0.000355222, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29303 epoch 019: 1347 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=550282, ups=1.11, wpb=495385, bsz=16699.3, num_updates=31700, lr=0.000355222, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29303 epoch 019: 1347 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=550282, ups=1.11, wpb=495385, bsz=16699.3, num_updates=31700, lr=0.000355222, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29303 epoch 019: 1347 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=550282, ups=1.11, wpb=495385, bsz=16699.3, num_updates=31700, lr=0.000355222, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29303 epoch 019: 1447 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=554172, ups=1.12, wpb=495031, bsz=16270, num_updates=31800, lr=0.000354663, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29392 epoch 019: 1447 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=554172, ups=1.12, wpb=495031, bsz=16270, num_updates=31800, lr=0.000354663, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29392 epoch 019: 1447 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=554172, ups=1.12, wpb=495031, bsz=16270, num_updates=31800, lr=0.000354663, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29392 epoch 019: 1447 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=554172, ups=1.12, wpb=495031, bsz=16270, num_updates=31800, lr=0.000354663, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29392 epoch 019: 1447 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=554172, ups=1.12, wpb=495031, bsz=16270, num_updates=31800, lr=0.000354663, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29392 epoch 019: 1447 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=554172, ups=1.12, wpb=495031, bsz=16270, num_updates=31800, lr=0.000354663, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29392 epoch 019: 1447 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=554172, ups=1.12, wpb=495031, bsz=16270, num_updates=31800, lr=0.000354663, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29392 epoch 019: 1447 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=554172, ups=1.12, wpb=495031, bsz=16270, num_updates=31800, lr=0.000354663, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29392 epoch 019: 1447 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=554172, ups=1.12, wpb=495031, bsz=16270, num_updates=31800, lr=0.000354663, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29392 epoch 019: 1447 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=554172, ups=1.12, wpb=495031, bsz=16270, num_updates=31800, lr=0.000354663, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29392 epoch 019: 1447 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=554172, ups=1.12, wpb=495031, bsz=16270, num_updates=31800, lr=0.000354663, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29392 epoch 019: 1447 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=554172, ups=1.12, wpb=495031, bsz=16270, num_updates=31800, lr=0.000354663, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29392 epoch 019: 1447 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=554172, ups=1.12, wpb=495031, bsz=16270, num_updates=31800, lr=0.000354663, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29392 epoch 019: 1447 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=554172, ups=1.12, wpb=495031, bsz=16270, num_updates=31800, lr=0.000354663, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29392 epoch 019: 1447 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=554172, ups=1.12, wpb=495031, bsz=16270, num_updates=31800, lr=0.000354663, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29392 epoch 019: 1447 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=554172, ups=1.12, wpb=495031, bsz=16270, num_updates=31800, lr=0.000354663, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29392 epoch 019: 1447 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=554172, ups=1.12, wpb=495031, bsz=16270, num_updates=31800, lr=0.000354663, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29392 epoch 019: 1447 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=554172, ups=1.12, wpb=495031, bsz=16270, num_updates=31800, lr=0.000354663, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29392 epoch 019: 1447 / 1689 loss=3.536, nll_loss=1.986, ppl=3.96, wps=554172, ups=1.12, wpb=495031, bsz=16270, num_updates=31800, lr=0.000354663, gnorm=0.184, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=29392 epoch 019: 1547 / 1689 loss=3.534, nll_loss=1.984, ppl=3.96, wps=554167, ups=1.12, wpb=495493, bsz=16233, num_updates=31900, lr=0.000354107, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=29481 epoch 019: 1547 / 1689 loss=3.534, nll_loss=1.984, ppl=3.96, wps=554167, ups=1.12, wpb=495493, bsz=16233, num_updates=31900, lr=0.000354107, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=29481 epoch 019: 1547 / 1689 loss=3.534, nll_loss=1.984, ppl=3.96, wps=554167, ups=1.12, wpb=495493, bsz=16233, num_updates=31900, lr=0.000354107, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=29481 epoch 019: 1547 / 1689 loss=3.534, nll_loss=1.984, ppl=3.96, wps=554167, ups=1.12, wpb=495493, bsz=16233, num_updates=31900, lr=0.000354107, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=29481 epoch 019: 1547 / 1689 loss=3.534, nll_loss=1.984, ppl=3.96, wps=554167, ups=1.12, wpb=495493, bsz=16233, num_updates=31900, lr=0.000354107, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=29481 epoch 019: 1547 / 1689 loss=3.534, nll_loss=1.984, ppl=3.96, wps=554167, ups=1.12, wpb=495493, bsz=16233, num_updates=31900, lr=0.000354107, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=29481 epoch 019: 1547 / 1689 loss=3.534, nll_loss=1.984, ppl=3.96, wps=554167, ups=1.12, wpb=495493, bsz=16233, num_updates=31900, lr=0.000354107, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=29481 epoch 019: 1547 / 1689 loss=3.534, nll_loss=1.984, ppl=3.96, wps=554167, ups=1.12, wpb=495493, bsz=16233, num_updates=31900, lr=0.000354107, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=29481 epoch 019: 1547 / 1689 loss=3.534, nll_loss=1.984, ppl=3.96, wps=554167, ups=1.12, wpb=495493, bsz=16233, num_updates=31900, lr=0.000354107, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=29481 epoch 019: 1547 / 1689 loss=3.534, nll_loss=1.984, ppl=3.96, wps=554167, ups=1.12, wpb=495493, bsz=16233, num_updates=31900, lr=0.000354107, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=29481 epoch 019: 1547 / 1689 loss=3.534, nll_loss=1.984, ppl=3.96, wps=554167, ups=1.12, wpb=495493, bsz=16233, num_updates=31900, lr=0.000354107, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=29481 epoch 019: 1547 / 1689 loss=3.534, nll_loss=1.984, ppl=3.96, wps=554167, ups=1.12, wpb=495493, bsz=16233, num_updates=31900, lr=0.000354107, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=29481 epoch 019: 1547 / 1689 loss=3.534, nll_loss=1.984, ppl=3.96, wps=554167, ups=1.12, wpb=495493, bsz=16233, num_updates=31900, lr=0.000354107, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=29481 epoch 019: 1547 / 1689 loss=3.534, nll_loss=1.984, ppl=3.96, wps=554167, ups=1.12, wpb=495493, bsz=16233, num_updates=31900, lr=0.000354107, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=29481 epoch 019: 1547 / 1689 loss=3.534, nll_loss=1.984, ppl=3.96, wps=554167, ups=1.12, wpb=495493, bsz=16233, num_updates=31900, lr=0.000354107, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=29481 epoch 019: 1547 / 1689 loss=3.534, nll_loss=1.984, ppl=3.96, wps=554167, ups=1.12, wpb=495493, bsz=16233, num_updates=31900, lr=0.000354107, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=29481 epoch 019: 1547 / 1689 loss=3.534, nll_loss=1.984, ppl=3.96, wps=554167, ups=1.12, wpb=495493, bsz=16233, num_updates=31900, lr=0.000354107, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=29481 epoch 019: 1547 / 1689 loss=3.534, nll_loss=1.984, ppl=3.96, wps=554167, ups=1.12, wpb=495493, bsz=16233, num_updates=31900, lr=0.000354107, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=29481 epoch 019: 1547 / 1689 loss=3.534, nll_loss=1.984, ppl=3.96, wps=554167, ups=1.12, wpb=495493, bsz=16233, num_updates=31900, lr=0.000354107, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=29481 epoch 019: 1648 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=547047, ups=1.1, wpb=495247, bsz=16541.4, num_updates=32000, lr=0.000353553, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=29572 epoch 019: 1648 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=547047, ups=1.1, wpb=495247, bsz=16541.4, num_updates=32000, lr=0.000353553, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=29572 epoch 019: 1648 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=547047, ups=1.1, wpb=495247, bsz=16541.4, num_updates=32000, lr=0.000353553, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=29572 epoch 019: 1648 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=547047, ups=1.1, wpb=495247, bsz=16541.4, num_updates=32000, lr=0.000353553, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=29572 epoch 019: 1648 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=547047, ups=1.1, wpb=495247, bsz=16541.4, num_updates=32000, lr=0.000353553, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=29572 epoch 019: 1648 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=547047, ups=1.1, wpb=495247, bsz=16541.4, num_updates=32000, lr=0.000353553, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=29572 epoch 019: 1648 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=547047, ups=1.1, wpb=495247, bsz=16541.4, num_updates=32000, lr=0.000353553, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=29572 epoch 019: 1648 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=547047, ups=1.1, wpb=495247, bsz=16541.4, num_updates=32000, lr=0.000353553, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=29572 epoch 019: 1648 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=547047, ups=1.1, wpb=495247, bsz=16541.4, num_updates=32000, lr=0.000353553, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=29572 epoch 019: 1648 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=547047, ups=1.1, wpb=495247, bsz=16541.4, num_updates=32000, lr=0.000353553, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=29572 epoch 019: 1648 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=547047, ups=1.1, wpb=495247, bsz=16541.4, num_updates=32000, lr=0.000353553, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=29572 epoch 019: 1648 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=547047, ups=1.1, wpb=495247, bsz=16541.4, num_updates=32000, lr=0.000353553, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=29572 epoch 019: 1648 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=547047, ups=1.1, wpb=495247, bsz=16541.4, num_updates=32000, lr=0.000353553, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=29572 epoch 019: 1648 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=547047, ups=1.1, wpb=495247, bsz=16541.4, num_updates=32000, lr=0.000353553, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=29572 epoch 019: 1648 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=547047, ups=1.1, wpb=495247, bsz=16541.4, num_updates=32000, lr=0.000353553, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=29572 epoch 019: 1648 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=547047, ups=1.1, wpb=495247, bsz=16541.4, num_updates=32000, lr=0.000353553, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=29572 epoch 019: 1648 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=547047, ups=1.1, wpb=495247, bsz=16541.4, num_updates=32000, lr=0.000353553, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=29572 epoch 019: 1648 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=547047, ups=1.1, wpb=495247, bsz=16541.4, num_updates=32000, lr=0.000353553, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=29572 epoch 019: 1648 / 1689 loss=3.541, nll_loss=1.992, ppl=3.98, wps=547047, ups=1.1, wpb=495247, bsz=16541.4, num_updates=32000, lr=0.000353553, gnorm=0.179, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=29572 begin validation on "valid" subset epoch 019 | valid on 'valid' subset | loss 3.718 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 32000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.718 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 32000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.718 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 32000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.718 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 32000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.718 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 32000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.718 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 32000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.718 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 32000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.718 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 32000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.718 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 32000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.718 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 32000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.718 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 32000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.718 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 32000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.718 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 32000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.718 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 32000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.718 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 32000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.718 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 32000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.718 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 32000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.718 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 32000 | best_loss 3.714 epoch 019 | valid on 'valid' subset | loss 3.718 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 32000 | best_loss 3.714 end of epoch 19 (average epoch stats below) epoch 019 | loss 3.53 | nll_loss 1.979 | ppl 3.94 | wps 543803 | ups 1.1 | wpb 495120 | bsz 16506.1 | num_updates 32041 | lr 0.000353327 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1486 | gb_free 23.7 | wall 29619 epoch 019 | loss 3.53 | nll_loss 1.979 | ppl 3.94 | wps 543803 | ups 1.1 | wpb 495120 | bsz 16506.1 | num_updates 32041 | lr 0.000353327 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1486 | gb_free 23.7 | wall 29619 epoch 019 | loss 3.53 | nll_loss 1.979 | ppl 3.94 | wps 543803 | ups 1.1 | wpb 495120 | bsz 16506.1 | num_updates 32041 | lr 0.000353327 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1486 | gb_free 23.7 | wall 29619 epoch 019 | loss 3.53 | nll_loss 1.979 | ppl 3.94 | wps 543803 | ups 1.1 | wpb 495120 | bsz 16506.1 | num_updates 32041 | lr 0.000353327 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1486 | gb_free 23.7 | wall 29619 epoch 019 | loss 3.53 | nll_loss 1.979 | ppl 3.94 | wps 543803 | ups 1.1 | wpb 495120 | bsz 16506.1 | num_updates 32041 | lr 0.000353327 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1486 | gb_free 23.7 | wall 29619 epoch 019 | loss 3.53 | nll_loss 1.979 | ppl 3.94 | wps 543803 | ups 1.1 | wpb 495120 | bsz 16506.1 | num_updates 32041 | lr 0.000353327 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1486 | gb_free 23.7 | wall 29619 epoch 019 | loss 3.53 | nll_loss 1.979 | ppl 3.94 | wps 543803 | ups 1.1 | wpb 495120 | bsz 16506.1 | num_updates 32041 | lr 0.000353327 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1486 | gb_free 23.7 | wall 29619 epoch 019 | loss 3.53 | nll_loss 1.979 | ppl 3.94 | wps 543803 | ups 1.1 | wpb 495120 | bsz 16506.1 | num_updates 32041 | lr 0.000353327 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1486 | gb_free 23.7 | wall 29619 epoch 019 | loss 3.53 | nll_loss 1.979 | ppl 3.94 | wps 543803 | ups 1.1 | wpb 495120 | bsz 16506.1 | num_updates 32041 | lr 0.000353327 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1486 | gb_free 23.7 | wall 29619 epoch 019 | loss 3.53 | nll_loss 1.979 | ppl 3.94 | wps 543803 | ups 1.1 | wpb 495120 | bsz 16506.1 | num_updates 32041 | lr 0.000353327 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1486 | gb_free 23.7 | wall 29619 epoch 019 | loss 3.53 | nll_loss 1.979 | ppl 3.94 | wps 543803 | ups 1.1 | wpb 495120 | bsz 16506.1 | num_updates 32041 | lr 0.000353327 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1486 | gb_free 23.7 | wall 29619 epoch 019 | loss 3.53 | nll_loss 1.979 | ppl 3.94 | wps 543803 | ups 1.1 | wpb 495120 | bsz 16506.1 | num_updates 32041 | lr 0.000353327 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1486 | gb_free 23.7 | wall 29619 epoch 019 | loss 3.53 | nll_loss 1.979 | ppl 3.94 | wps 543803 | ups 1.1 | wpb 495120 | bsz 16506.1 | num_updates 32041 | lr 0.000353327 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1486 | gb_free 23.7 | wall 29619 epoch 019 | loss 3.53 | nll_loss 1.979 | ppl 3.94 | wps 543803 | ups 1.1 | wpb 495120 | bsz 16506.1 | num_updates 32041 | lr 0.000353327 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1486 | gb_free 23.7 | wall 29619 epoch 019 | loss 3.53 | nll_loss 1.979 | ppl 3.94 | wps 543803 | ups 1.1 | wpb 495120 | bsz 16506.1 | num_updates 32041 | lr 0.000353327 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1486 | gb_free 23.7 | wall 29619 epoch 019 | loss 3.53 | nll_loss 1.979 | ppl 3.94 | wps 543803 | ups 1.1 | wpb 495120 | bsz 16506.1 | num_updates 32041 | lr 0.000353327 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1486 | gb_free 23.7 | wall 29619 epoch 019 | loss 3.53 | nll_loss 1.979 | ppl 3.94 | wps 543803 | ups 1.1 | wpb 495120 | bsz 16506.1 | num_updates 32041 | lr 0.000353327 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1486 | gb_free 23.7 | wall 29619 epoch 019 | loss 3.53 | nll_loss 1.979 | ppl 3.94 | wps 543803 | ups 1.1 | wpb 495120 | bsz 16506.1 | num_updates 32041 | lr 0.000353327 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1486 | gb_free 23.7 | wall 29619 epoch 019 | loss 3.53 | nll_loss 1.979 | ppl 3.94 | wps 543803 | ups 1.1 | wpb 495120 | bsz 16506.1 | num_updates 32041 | lr 0.000353327 | gnorm 0.174 | clip 0 | loss_scale 1 | train_wall 1486 | gb_free 23.7 | wall 29619 Start iterating over samples epoch 020: 59 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=485847, ups=0.99, wpb=490816, bsz=16311.4, num_updates=32100, lr=0.000353002, gnorm=0.178, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=29673 epoch 020: 59 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=485847, ups=0.99, wpb=490816, bsz=16311.4, num_updates=32100, lr=0.000353002, gnorm=0.178, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=29673 epoch 020: 59 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=485847, ups=0.99, wpb=490816, bsz=16311.4, num_updates=32100, lr=0.000353002, gnorm=0.178, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=29673 epoch 020: 59 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=485847, ups=0.99, wpb=490816, bsz=16311.4, num_updates=32100, lr=0.000353002, gnorm=0.178, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=29673 epoch 020: 59 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=485847, ups=0.99, wpb=490816, bsz=16311.4, num_updates=32100, lr=0.000353002, gnorm=0.178, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=29673 epoch 020: 59 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=485847, ups=0.99, wpb=490816, bsz=16311.4, num_updates=32100, lr=0.000353002, gnorm=0.178, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=29673 epoch 020: 59 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=485847, ups=0.99, wpb=490816, bsz=16311.4, num_updates=32100, lr=0.000353002, gnorm=0.178, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=29673 epoch 020: 59 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=485847, ups=0.99, wpb=490816, bsz=16311.4, num_updates=32100, lr=0.000353002, gnorm=0.178, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=29673 epoch 020: 59 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=485847, ups=0.99, wpb=490816, bsz=16311.4, num_updates=32100, lr=0.000353002, gnorm=0.178, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=29673 epoch 020: 59 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=485847, ups=0.99, wpb=490816, bsz=16311.4, num_updates=32100, lr=0.000353002, gnorm=0.178, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=29673 epoch 020: 59 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=485847, ups=0.99, wpb=490816, bsz=16311.4, num_updates=32100, lr=0.000353002, gnorm=0.178, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=29673 epoch 020: 59 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=485847, ups=0.99, wpb=490816, bsz=16311.4, num_updates=32100, lr=0.000353002, gnorm=0.178, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=29673 epoch 020: 59 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=485847, ups=0.99, wpb=490816, bsz=16311.4, num_updates=32100, lr=0.000353002, gnorm=0.178, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=29673 epoch 020: 59 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=485847, ups=0.99, wpb=490816, bsz=16311.4, num_updates=32100, lr=0.000353002, gnorm=0.178, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=29673 epoch 020: 59 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=485847, ups=0.99, wpb=490816, bsz=16311.4, num_updates=32100, lr=0.000353002, gnorm=0.178, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=29673 epoch 020: 59 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=485847, ups=0.99, wpb=490816, bsz=16311.4, num_updates=32100, lr=0.000353002, gnorm=0.178, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=29673 epoch 020: 59 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=485847, ups=0.99, wpb=490816, bsz=16311.4, num_updates=32100, lr=0.000353002, gnorm=0.178, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=29673 epoch 020: 59 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=485847, ups=0.99, wpb=490816, bsz=16311.4, num_updates=32100, lr=0.000353002, gnorm=0.178, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=29673 epoch 020: 59 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=485847, ups=0.99, wpb=490816, bsz=16311.4, num_updates=32100, lr=0.000353002, gnorm=0.178, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=29673 epoch 020: 59 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=485847, ups=0.99, wpb=490816, bsz=16311.4, num_updates=32100, lr=0.000353002, gnorm=0.178, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=29673 epoch 020: 159 / 1689 loss=3.507, nll_loss=1.952, ppl=3.87, wps=553934, ups=1.12, wpb=495899, bsz=16164.4, num_updates=32200, lr=0.000352454, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=29762 epoch 020: 159 / 1689 loss=3.507, nll_loss=1.952, ppl=3.87, wps=553934, ups=1.12, wpb=495899, bsz=16164.4, num_updates=32200, lr=0.000352454, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=29762 epoch 020: 159 / 1689 loss=3.507, nll_loss=1.952, ppl=3.87, wps=553934, ups=1.12, wpb=495899, bsz=16164.4, num_updates=32200, lr=0.000352454, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=29762 epoch 020: 159 / 1689 loss=3.507, nll_loss=1.952, ppl=3.87, wps=553934, ups=1.12, wpb=495899, bsz=16164.4, num_updates=32200, lr=0.000352454, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=29762 epoch 020: 159 / 1689 loss=3.507, nll_loss=1.952, ppl=3.87, wps=553934, ups=1.12, wpb=495899, bsz=16164.4, num_updates=32200, lr=0.000352454, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=29762 epoch 020: 159 / 1689 loss=3.507, nll_loss=1.952, ppl=3.87, wps=553934, ups=1.12, wpb=495899, bsz=16164.4, num_updates=32200, lr=0.000352454, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=29762 epoch 020: 159 / 1689 loss=3.507, nll_loss=1.952, ppl=3.87, wps=553934, ups=1.12, wpb=495899, bsz=16164.4, num_updates=32200, lr=0.000352454, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=29762 epoch 020: 159 / 1689 loss=3.507, nll_loss=1.952, ppl=3.87, wps=553934, ups=1.12, wpb=495899, bsz=16164.4, num_updates=32200, lr=0.000352454, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=29762 epoch 020: 159 / 1689 loss=3.507, nll_loss=1.952, ppl=3.87, wps=553934, ups=1.12, wpb=495899, bsz=16164.4, num_updates=32200, lr=0.000352454, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=29762 epoch 020: 159 / 1689 loss=3.507, nll_loss=1.952, ppl=3.87, wps=553934, ups=1.12, wpb=495899, bsz=16164.4, num_updates=32200, lr=0.000352454, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=29762 epoch 020: 159 / 1689 loss=3.507, nll_loss=1.952, ppl=3.87, wps=553934, ups=1.12, wpb=495899, bsz=16164.4, num_updates=32200, lr=0.000352454, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=29762 epoch 020: 159 / 1689 loss=3.507, nll_loss=1.952, ppl=3.87, wps=553934, ups=1.12, wpb=495899, bsz=16164.4, num_updates=32200, lr=0.000352454, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=29762 epoch 020: 159 / 1689 loss=3.507, nll_loss=1.952, ppl=3.87, wps=553934, ups=1.12, wpb=495899, bsz=16164.4, num_updates=32200, lr=0.000352454, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=29762 epoch 020: 159 / 1689 loss=3.507, nll_loss=1.952, ppl=3.87, wps=553934, ups=1.12, wpb=495899, bsz=16164.4, num_updates=32200, lr=0.000352454, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=29762 epoch 020: 159 / 1689 loss=3.507, nll_loss=1.952, ppl=3.87, wps=553934, ups=1.12, wpb=495899, bsz=16164.4, num_updates=32200, lr=0.000352454, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=29762 epoch 020: 159 / 1689 loss=3.507, nll_loss=1.952, ppl=3.87, wps=553934, ups=1.12, wpb=495899, bsz=16164.4, num_updates=32200, lr=0.000352454, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=29762 epoch 020: 159 / 1689 loss=3.507, nll_loss=1.952, ppl=3.87, wps=553934, ups=1.12, wpb=495899, bsz=16164.4, num_updates=32200, lr=0.000352454, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=29762 epoch 020: 159 / 1689 loss=3.507, nll_loss=1.952, ppl=3.87, wps=553934, ups=1.12, wpb=495899, bsz=16164.4, num_updates=32200, lr=0.000352454, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=29762 epoch 020: 159 / 1689 loss=3.507, nll_loss=1.952, ppl=3.87, wps=553934, ups=1.12, wpb=495899, bsz=16164.4, num_updates=32200, lr=0.000352454, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=29762 epoch 020: 159 / 1689 loss=3.507, nll_loss=1.952, ppl=3.87, wps=553934, ups=1.12, wpb=495899, bsz=16164.4, num_updates=32200, lr=0.000352454, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=29762 epoch 020: 259 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=550394, ups=1.11, wpb=495064, bsz=16453.4, num_updates=32300, lr=0.000351908, gnorm=0.184, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=29852 epoch 020: 259 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=550394, ups=1.11, wpb=495064, bsz=16453.4, num_updates=32300, lr=0.000351908, gnorm=0.184, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=29852 epoch 020: 259 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=550394, ups=1.11, wpb=495064, bsz=16453.4, num_updates=32300, lr=0.000351908, gnorm=0.184, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=29852 epoch 020: 259 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=550394, ups=1.11, wpb=495064, bsz=16453.4, num_updates=32300, lr=0.000351908, gnorm=0.184, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=29852 epoch 020: 259 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=550394, ups=1.11, wpb=495064, bsz=16453.4, num_updates=32300, lr=0.000351908, gnorm=0.184, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=29852 epoch 020: 259 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=550394, ups=1.11, wpb=495064, bsz=16453.4, num_updates=32300, lr=0.000351908, gnorm=0.184, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=29852 epoch 020: 259 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=550394, ups=1.11, wpb=495064, bsz=16453.4, num_updates=32300, lr=0.000351908, gnorm=0.184, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=29852 epoch 020: 259 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=550394, ups=1.11, wpb=495064, bsz=16453.4, num_updates=32300, lr=0.000351908, gnorm=0.184, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=29852 epoch 020: 259 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=550394, ups=1.11, wpb=495064, bsz=16453.4, num_updates=32300, lr=0.000351908, gnorm=0.184, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=29852 epoch 020: 259 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=550394, ups=1.11, wpb=495064, bsz=16453.4, num_updates=32300, lr=0.000351908, gnorm=0.184, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=29852 epoch 020: 259 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=550394, ups=1.11, wpb=495064, bsz=16453.4, num_updates=32300, lr=0.000351908, gnorm=0.184, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=29852 epoch 020: 259 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=550394, ups=1.11, wpb=495064, bsz=16453.4, num_updates=32300, lr=0.000351908, gnorm=0.184, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=29852 epoch 020: 259 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=550394, ups=1.11, wpb=495064, bsz=16453.4, num_updates=32300, lr=0.000351908, gnorm=0.184, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=29852 epoch 020: 259 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=550394, ups=1.11, wpb=495064, bsz=16453.4, num_updates=32300, lr=0.000351908, gnorm=0.184, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=29852 epoch 020: 259 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=550394, ups=1.11, wpb=495064, bsz=16453.4, num_updates=32300, lr=0.000351908, gnorm=0.184, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=29852 epoch 020: 259 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=550394, ups=1.11, wpb=495064, bsz=16453.4, num_updates=32300, lr=0.000351908, gnorm=0.184, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=29852 epoch 020: 259 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=550394, ups=1.11, wpb=495064, bsz=16453.4, num_updates=32300, lr=0.000351908, gnorm=0.184, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=29852 epoch 020: 259 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=550394, ups=1.11, wpb=495064, bsz=16453.4, num_updates=32300, lr=0.000351908, gnorm=0.184, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=29852 epoch 020: 259 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=550394, ups=1.11, wpb=495064, bsz=16453.4, num_updates=32300, lr=0.000351908, gnorm=0.184, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=29852 epoch 020: 259 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=550394, ups=1.11, wpb=495064, bsz=16453.4, num_updates=32300, lr=0.000351908, gnorm=0.184, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=29852 epoch 020: 359 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=553079, ups=1.12, wpb=495155, bsz=16646, num_updates=32400, lr=0.000351364, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.1, wall=29942 epoch 020: 359 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=553079, ups=1.12, wpb=495155, bsz=16646, num_updates=32400, lr=0.000351364, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.1, wall=29942 epoch 020: 359 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=553079, ups=1.12, wpb=495155, bsz=16646, num_updates=32400, lr=0.000351364, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.1, wall=29942 epoch 020: 359 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=553079, ups=1.12, wpb=495155, bsz=16646, num_updates=32400, lr=0.000351364, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.1, wall=29942 epoch 020: 359 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=553079, ups=1.12, wpb=495155, bsz=16646, num_updates=32400, lr=0.000351364, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.1, wall=29942 epoch 020: 359 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=553079, ups=1.12, wpb=495155, bsz=16646, num_updates=32400, lr=0.000351364, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.1, wall=29942 epoch 020: 359 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=553079, ups=1.12, wpb=495155, bsz=16646, num_updates=32400, lr=0.000351364, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.1, wall=29942 epoch 020: 359 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=553079, ups=1.12, wpb=495155, bsz=16646, num_updates=32400, lr=0.000351364, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.1, wall=29942 epoch 020: 359 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=553079, ups=1.12, wpb=495155, bsz=16646, num_updates=32400, lr=0.000351364, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.1, wall=29942 epoch 020: 359 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=553079, ups=1.12, wpb=495155, bsz=16646, num_updates=32400, lr=0.000351364, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.1, wall=29942 epoch 020: 359 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=553079, ups=1.12, wpb=495155, bsz=16646, num_updates=32400, lr=0.000351364, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.1, wall=29942 epoch 020: 359 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=553079, ups=1.12, wpb=495155, bsz=16646, num_updates=32400, lr=0.000351364, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.1, wall=29942 epoch 020: 359 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=553079, ups=1.12, wpb=495155, bsz=16646, num_updates=32400, lr=0.000351364, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.1, wall=29942 epoch 020: 359 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=553079, ups=1.12, wpb=495155, bsz=16646, num_updates=32400, lr=0.000351364, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.1, wall=29942 epoch 020: 359 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=553079, ups=1.12, wpb=495155, bsz=16646, num_updates=32400, lr=0.000351364, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.1, wall=29942 epoch 020: 359 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=553079, ups=1.12, wpb=495155, bsz=16646, num_updates=32400, lr=0.000351364, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.1, wall=29942 epoch 020: 359 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=553079, ups=1.12, wpb=495155, bsz=16646, num_updates=32400, lr=0.000351364, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.1, wall=29942 epoch 020: 359 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=553079, ups=1.12, wpb=495155, bsz=16646, num_updates=32400, lr=0.000351364, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.1, wall=29942 epoch 020: 359 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=553079, ups=1.12, wpb=495155, bsz=16646, num_updates=32400, lr=0.000351364, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.1, wall=29942 epoch 020: 359 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=553079, ups=1.12, wpb=495155, bsz=16646, num_updates=32400, lr=0.000351364, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.1, wall=29942 epoch 020: 459 / 1689 loss=3.512, nll_loss=1.958, ppl=3.89, wps=552563, ups=1.11, wpb=497373, bsz=16753.2, num_updates=32500, lr=0.000350823, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30032 epoch 020: 459 / 1689 loss=3.512, nll_loss=1.958, ppl=3.89, wps=552563, ups=1.11, wpb=497373, bsz=16753.2, num_updates=32500, lr=0.000350823, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30032 epoch 020: 459 / 1689 loss=3.512, nll_loss=1.958, ppl=3.89, wps=552563, ups=1.11, wpb=497373, bsz=16753.2, num_updates=32500, lr=0.000350823, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30032 epoch 020: 459 / 1689 loss=3.512, nll_loss=1.958, ppl=3.89, wps=552563, ups=1.11, wpb=497373, bsz=16753.2, num_updates=32500, lr=0.000350823, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30032 epoch 020: 459 / 1689 loss=3.512, nll_loss=1.958, ppl=3.89, wps=552563, ups=1.11, wpb=497373, bsz=16753.2, num_updates=32500, lr=0.000350823, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30032 epoch 020: 459 / 1689 loss=3.512, nll_loss=1.958, ppl=3.89, wps=552563, ups=1.11, wpb=497373, bsz=16753.2, num_updates=32500, lr=0.000350823, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30032 epoch 020: 459 / 1689 loss=3.512, nll_loss=1.958, ppl=3.89, wps=552563, ups=1.11, wpb=497373, bsz=16753.2, num_updates=32500, lr=0.000350823, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30032 epoch 020: 459 / 1689 loss=3.512, nll_loss=1.958, ppl=3.89, wps=552563, ups=1.11, wpb=497373, bsz=16753.2, num_updates=32500, lr=0.000350823, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30032 epoch 020: 459 / 1689 loss=3.512, nll_loss=1.958, ppl=3.89, wps=552563, ups=1.11, wpb=497373, bsz=16753.2, num_updates=32500, lr=0.000350823, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30032 epoch 020: 459 / 1689 loss=3.512, nll_loss=1.958, ppl=3.89, wps=552563, ups=1.11, wpb=497373, bsz=16753.2, num_updates=32500, lr=0.000350823, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30032 epoch 020: 459 / 1689 loss=3.512, nll_loss=1.958, ppl=3.89, wps=552563, ups=1.11, wpb=497373, bsz=16753.2, num_updates=32500, lr=0.000350823, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30032 epoch 020: 459 / 1689 loss=3.512, nll_loss=1.958, ppl=3.89, wps=552563, ups=1.11, wpb=497373, bsz=16753.2, num_updates=32500, lr=0.000350823, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30032 epoch 020: 459 / 1689 loss=3.512, nll_loss=1.958, ppl=3.89, wps=552563, ups=1.11, wpb=497373, bsz=16753.2, num_updates=32500, lr=0.000350823, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30032 epoch 020: 459 / 1689 loss=3.512, nll_loss=1.958, ppl=3.89, wps=552563, ups=1.11, wpb=497373, bsz=16753.2, num_updates=32500, lr=0.000350823, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30032 epoch 020: 459 / 1689 loss=3.512, nll_loss=1.958, ppl=3.89, wps=552563, ups=1.11, wpb=497373, bsz=16753.2, num_updates=32500, lr=0.000350823, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30032 epoch 020: 459 / 1689 loss=3.512, nll_loss=1.958, ppl=3.89, wps=552563, ups=1.11, wpb=497373, bsz=16753.2, num_updates=32500, lr=0.000350823, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30032 epoch 020: 459 / 1689 loss=3.512, nll_loss=1.958, ppl=3.89, wps=552563, ups=1.11, wpb=497373, bsz=16753.2, num_updates=32500, lr=0.000350823, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30032 epoch 020: 459 / 1689 loss=3.512, nll_loss=1.958, ppl=3.89, wps=552563, ups=1.11, wpb=497373, bsz=16753.2, num_updates=32500, lr=0.000350823, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30032 epoch 020: 459 / 1689 loss=3.512, nll_loss=1.958, ppl=3.89, wps=552563, ups=1.11, wpb=497373, bsz=16753.2, num_updates=32500, lr=0.000350823, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30032 epoch 020: 459 / 1689 loss=3.512, nll_loss=1.958, ppl=3.89, wps=552563, ups=1.11, wpb=497373, bsz=16753.2, num_updates=32500, lr=0.000350823, gnorm=0.172, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30032 epoch 020: 559 / 1689 loss=3.523, nll_loss=1.97, ppl=3.92, wps=547777, ups=1.11, wpb=494836, bsz=16751.6, num_updates=32600, lr=0.000350285, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=30122 epoch 020: 559 / 1689 loss=3.523, nll_loss=1.97, ppl=3.92, wps=547777, ups=1.11, wpb=494836, bsz=16751.6, num_updates=32600, lr=0.000350285, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=30122 epoch 020: 559 / 1689 loss=3.523, nll_loss=1.97, ppl=3.92, wps=547777, ups=1.11, wpb=494836, bsz=16751.6, num_updates=32600, lr=0.000350285, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=30122 epoch 020: 559 / 1689 loss=3.523, nll_loss=1.97, ppl=3.92, wps=547777, ups=1.11, wpb=494836, bsz=16751.6, num_updates=32600, lr=0.000350285, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=30122 epoch 020: 559 / 1689 loss=3.523, nll_loss=1.97, ppl=3.92, wps=547777, ups=1.11, wpb=494836, bsz=16751.6, num_updates=32600, lr=0.000350285, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=30122 epoch 020: 559 / 1689 loss=3.523, nll_loss=1.97, ppl=3.92, wps=547777, ups=1.11, wpb=494836, bsz=16751.6, num_updates=32600, lr=0.000350285, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=30122 epoch 020: 559 / 1689 loss=3.523, nll_loss=1.97, ppl=3.92, wps=547777, ups=1.11, wpb=494836, bsz=16751.6, num_updates=32600, lr=0.000350285, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=30122 epoch 020: 559 / 1689 loss=3.523, nll_loss=1.97, ppl=3.92, wps=547777, ups=1.11, wpb=494836, bsz=16751.6, num_updates=32600, lr=0.000350285, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=30122 epoch 020: 559 / 1689 loss=3.523, nll_loss=1.97, ppl=3.92, wps=547777, ups=1.11, wpb=494836, bsz=16751.6, num_updates=32600, lr=0.000350285, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=30122 epoch 020: 559 / 1689 loss=3.523, nll_loss=1.97, ppl=3.92, wps=547777, ups=1.11, wpb=494836, bsz=16751.6, num_updates=32600, lr=0.000350285, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=30122 epoch 020: 559 / 1689 loss=3.523, nll_loss=1.97, ppl=3.92, wps=547777, ups=1.11, wpb=494836, bsz=16751.6, num_updates=32600, lr=0.000350285, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=30122 epoch 020: 559 / 1689 loss=3.523, nll_loss=1.97, ppl=3.92, wps=547777, ups=1.11, wpb=494836, bsz=16751.6, num_updates=32600, lr=0.000350285, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=30122 epoch 020: 559 / 1689 loss=3.523, nll_loss=1.97, ppl=3.92, wps=547777, ups=1.11, wpb=494836, bsz=16751.6, num_updates=32600, lr=0.000350285, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=30122 epoch 020: 559 / 1689 loss=3.523, nll_loss=1.97, ppl=3.92, wps=547777, ups=1.11, wpb=494836, bsz=16751.6, num_updates=32600, lr=0.000350285, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=30122 epoch 020: 559 / 1689 loss=3.523, nll_loss=1.97, ppl=3.92, wps=547777, ups=1.11, wpb=494836, bsz=16751.6, num_updates=32600, lr=0.000350285, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=30122 epoch 020: 559 / 1689 loss=3.523, nll_loss=1.97, ppl=3.92, wps=547777, ups=1.11, wpb=494836, bsz=16751.6, num_updates=32600, lr=0.000350285, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=30122 epoch 020: 559 / 1689 loss=3.523, nll_loss=1.97, ppl=3.92, wps=547777, ups=1.11, wpb=494836, bsz=16751.6, num_updates=32600, lr=0.000350285, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=30122 epoch 020: 559 / 1689 loss=3.523, nll_loss=1.97, ppl=3.92, wps=547777, ups=1.11, wpb=494836, bsz=16751.6, num_updates=32600, lr=0.000350285, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=30122 epoch 020: 559 / 1689 loss=3.523, nll_loss=1.97, ppl=3.92, wps=547777, ups=1.11, wpb=494836, bsz=16751.6, num_updates=32600, lr=0.000350285, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=30122 epoch 020: 559 / 1689 loss=3.523, nll_loss=1.97, ppl=3.92, wps=547777, ups=1.11, wpb=494836, bsz=16751.6, num_updates=32600, lr=0.000350285, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=30122 epoch 020: 659 / 1689 loss=3.528, nll_loss=1.977, ppl=3.94, wps=556038, ups=1.12, wpb=494869, bsz=16353.3, num_updates=32700, lr=0.000349749, gnorm=0.168, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=30211 epoch 020: 659 / 1689 loss=3.528, nll_loss=1.977, ppl=3.94, wps=556038, ups=1.12, wpb=494869, bsz=16353.3, num_updates=32700, lr=0.000349749, gnorm=0.168, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=30211 epoch 020: 659 / 1689 loss=3.528, nll_loss=1.977, ppl=3.94, wps=556038, ups=1.12, wpb=494869, bsz=16353.3, num_updates=32700, lr=0.000349749, gnorm=0.168, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=30211 epoch 020: 659 / 1689 loss=3.528, nll_loss=1.977, ppl=3.94, wps=556038, ups=1.12, wpb=494869, bsz=16353.3, num_updates=32700, lr=0.000349749, gnorm=0.168, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=30211 epoch 020: 659 / 1689 loss=3.528, nll_loss=1.977, ppl=3.94, wps=556038, ups=1.12, wpb=494869, bsz=16353.3, num_updates=32700, lr=0.000349749, gnorm=0.168, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=30211 epoch 020: 659 / 1689 loss=3.528, nll_loss=1.977, ppl=3.94, wps=556038, ups=1.12, wpb=494869, bsz=16353.3, num_updates=32700, lr=0.000349749, gnorm=0.168, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=30211 epoch 020: 659 / 1689 loss=3.528, nll_loss=1.977, ppl=3.94, wps=556038, ups=1.12, wpb=494869, bsz=16353.3, num_updates=32700, lr=0.000349749, gnorm=0.168, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=30211 epoch 020: 659 / 1689 loss=3.528, nll_loss=1.977, ppl=3.94, wps=556038, ups=1.12, wpb=494869, bsz=16353.3, num_updates=32700, lr=0.000349749, gnorm=0.168, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=30211 epoch 020: 659 / 1689 loss=3.528, nll_loss=1.977, ppl=3.94, wps=556038, ups=1.12, wpb=494869, bsz=16353.3, num_updates=32700, lr=0.000349749, gnorm=0.168, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=30211 epoch 020: 659 / 1689 loss=3.528, nll_loss=1.977, ppl=3.94, wps=556038, ups=1.12, wpb=494869, bsz=16353.3, num_updates=32700, lr=0.000349749, gnorm=0.168, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=30211 epoch 020: 659 / 1689 loss=3.528, nll_loss=1.977, ppl=3.94, wps=556038, ups=1.12, wpb=494869, bsz=16353.3, num_updates=32700, lr=0.000349749, gnorm=0.168, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=30211 epoch 020: 659 / 1689 loss=3.528, nll_loss=1.977, ppl=3.94, wps=556038, ups=1.12, wpb=494869, bsz=16353.3, num_updates=32700, lr=0.000349749, gnorm=0.168, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=30211 epoch 020: 659 / 1689 loss=3.528, nll_loss=1.977, ppl=3.94, wps=556038, ups=1.12, wpb=494869, bsz=16353.3, num_updates=32700, lr=0.000349749, gnorm=0.168, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=30211 epoch 020: 659 / 1689 loss=3.528, nll_loss=1.977, ppl=3.94, wps=556038, ups=1.12, wpb=494869, bsz=16353.3, num_updates=32700, lr=0.000349749, gnorm=0.168, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=30211 epoch 020: 659 / 1689 loss=3.528, nll_loss=1.977, ppl=3.94, wps=556038, ups=1.12, wpb=494869, bsz=16353.3, num_updates=32700, lr=0.000349749, gnorm=0.168, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=30211 epoch 020: 659 / 1689 loss=3.528, nll_loss=1.977, ppl=3.94, wps=556038, ups=1.12, wpb=494869, bsz=16353.3, num_updates=32700, lr=0.000349749, gnorm=0.168, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=30211 epoch 020: 659 / 1689 loss=3.528, nll_loss=1.977, ppl=3.94, wps=556038, ups=1.12, wpb=494869, bsz=16353.3, num_updates=32700, lr=0.000349749, gnorm=0.168, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=30211 epoch 020: 659 / 1689 loss=3.528, nll_loss=1.977, ppl=3.94, wps=556038, ups=1.12, wpb=494869, bsz=16353.3, num_updates=32700, lr=0.000349749, gnorm=0.168, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=30211 epoch 020: 659 / 1689 loss=3.528, nll_loss=1.977, ppl=3.94, wps=556038, ups=1.12, wpb=494869, bsz=16353.3, num_updates=32700, lr=0.000349749, gnorm=0.168, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=30211 epoch 020: 659 / 1689 loss=3.528, nll_loss=1.977, ppl=3.94, wps=556038, ups=1.12, wpb=494869, bsz=16353.3, num_updates=32700, lr=0.000349749, gnorm=0.168, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=30211 epoch 020: 759 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=556424, ups=1.12, wpb=496803, bsz=17062.5, num_updates=32800, lr=0.000349215, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=30301 epoch 020: 759 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=556424, ups=1.12, wpb=496803, bsz=17062.5, num_updates=32800, lr=0.000349215, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=30301 epoch 020: 759 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=556424, ups=1.12, wpb=496803, bsz=17062.5, num_updates=32800, lr=0.000349215, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=30301 epoch 020: 759 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=556424, ups=1.12, wpb=496803, bsz=17062.5, num_updates=32800, lr=0.000349215, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=30301 epoch 020: 759 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=556424, ups=1.12, wpb=496803, bsz=17062.5, num_updates=32800, lr=0.000349215, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=30301 epoch 020: 759 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=556424, ups=1.12, wpb=496803, bsz=17062.5, num_updates=32800, lr=0.000349215, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=30301 epoch 020: 759 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=556424, ups=1.12, wpb=496803, bsz=17062.5, num_updates=32800, lr=0.000349215, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=30301 epoch 020: 759 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=556424, ups=1.12, wpb=496803, bsz=17062.5, num_updates=32800, lr=0.000349215, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=30301 epoch 020: 759 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=556424, ups=1.12, wpb=496803, bsz=17062.5, num_updates=32800, lr=0.000349215, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=30301 epoch 020: 759 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=556424, ups=1.12, wpb=496803, bsz=17062.5, num_updates=32800, lr=0.000349215, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=30301 epoch 020: 759 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=556424, ups=1.12, wpb=496803, bsz=17062.5, num_updates=32800, lr=0.000349215, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=30301 epoch 020: 759 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=556424, ups=1.12, wpb=496803, bsz=17062.5, num_updates=32800, lr=0.000349215, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=30301 epoch 020: 759 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=556424, ups=1.12, wpb=496803, bsz=17062.5, num_updates=32800, lr=0.000349215, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=30301 epoch 020: 759 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=556424, ups=1.12, wpb=496803, bsz=17062.5, num_updates=32800, lr=0.000349215, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=30301 epoch 020: 759 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=556424, ups=1.12, wpb=496803, bsz=17062.5, num_updates=32800, lr=0.000349215, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=30301 epoch 020: 759 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=556424, ups=1.12, wpb=496803, bsz=17062.5, num_updates=32800, lr=0.000349215, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=30301 epoch 020: 759 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=556424, ups=1.12, wpb=496803, bsz=17062.5, num_updates=32800, lr=0.000349215, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=30301 epoch 020: 759 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=556424, ups=1.12, wpb=496803, bsz=17062.5, num_updates=32800, lr=0.000349215, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=30301 epoch 020: 759 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=556424, ups=1.12, wpb=496803, bsz=17062.5, num_updates=32800, lr=0.000349215, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=30301 epoch 020: 759 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=556424, ups=1.12, wpb=496803, bsz=17062.5, num_updates=32800, lr=0.000349215, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=30301 epoch 020: 859 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=552932, ups=1.12, wpb=493965, bsz=16355.1, num_updates=32900, lr=0.000348684, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=30390 epoch 020: 859 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=552932, ups=1.12, wpb=493965, bsz=16355.1, num_updates=32900, lr=0.000348684, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=30390 epoch 020: 859 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=552932, ups=1.12, wpb=493965, bsz=16355.1, num_updates=32900, lr=0.000348684, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=30390 epoch 020: 859 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=552932, ups=1.12, wpb=493965, bsz=16355.1, num_updates=32900, lr=0.000348684, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=30390 epoch 020: 859 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=552932, ups=1.12, wpb=493965, bsz=16355.1, num_updates=32900, lr=0.000348684, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=30390 epoch 020: 859 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=552932, ups=1.12, wpb=493965, bsz=16355.1, num_updates=32900, lr=0.000348684, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=30390 epoch 020: 859 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=552932, ups=1.12, wpb=493965, bsz=16355.1, num_updates=32900, lr=0.000348684, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=30390 epoch 020: 859 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=552932, ups=1.12, wpb=493965, bsz=16355.1, num_updates=32900, lr=0.000348684, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=30390 epoch 020: 859 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=552932, ups=1.12, wpb=493965, bsz=16355.1, num_updates=32900, lr=0.000348684, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=30390 epoch 020: 859 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=552932, ups=1.12, wpb=493965, bsz=16355.1, num_updates=32900, lr=0.000348684, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=30390 epoch 020: 859 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=552932, ups=1.12, wpb=493965, bsz=16355.1, num_updates=32900, lr=0.000348684, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=30390 epoch 020: 859 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=552932, ups=1.12, wpb=493965, bsz=16355.1, num_updates=32900, lr=0.000348684, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=30390 epoch 020: 859 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=552932, ups=1.12, wpb=493965, bsz=16355.1, num_updates=32900, lr=0.000348684, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=30390 epoch 020: 859 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=552932, ups=1.12, wpb=493965, bsz=16355.1, num_updates=32900, lr=0.000348684, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=30390 epoch 020: 859 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=552932, ups=1.12, wpb=493965, bsz=16355.1, num_updates=32900, lr=0.000348684, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=30390 epoch 020: 859 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=552932, ups=1.12, wpb=493965, bsz=16355.1, num_updates=32900, lr=0.000348684, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=30390 epoch 020: 859 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=552932, ups=1.12, wpb=493965, bsz=16355.1, num_updates=32900, lr=0.000348684, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=30390 epoch 020: 859 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=552932, ups=1.12, wpb=493965, bsz=16355.1, num_updates=32900, lr=0.000348684, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=30390 epoch 020: 859 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=552932, ups=1.12, wpb=493965, bsz=16355.1, num_updates=32900, lr=0.000348684, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=30390 epoch 020: 859 / 1689 loss=3.525, nll_loss=1.973, ppl=3.93, wps=552932, ups=1.12, wpb=493965, bsz=16355.1, num_updates=32900, lr=0.000348684, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=30390 epoch 020: 959 / 1689 loss=3.532, nll_loss=1.981, ppl=3.95, wps=551305, ups=1.11, wpb=494632, bsz=16447.3, num_updates=33000, lr=0.000348155, gnorm=0.169, clip=0, loss_scale=4, train_wall=88, gb_free=22.4, wall=30480 epoch 020: 959 / 1689 loss=3.532, nll_loss=1.981, ppl=3.95, wps=551305, ups=1.11, wpb=494632, bsz=16447.3, num_updates=33000, lr=0.000348155, gnorm=0.169, clip=0, loss_scale=4, train_wall=88, gb_free=22.4, wall=30480 epoch 020: 959 / 1689 loss=3.532, nll_loss=1.981, ppl=3.95, wps=551305, ups=1.11, wpb=494632, bsz=16447.3, num_updates=33000, lr=0.000348155, gnorm=0.169, clip=0, loss_scale=4, train_wall=88, gb_free=22.4, wall=30480 epoch 020: 959 / 1689 loss=3.532, nll_loss=1.981, ppl=3.95, wps=551305, ups=1.11, wpb=494632, bsz=16447.3, num_updates=33000, lr=0.000348155, gnorm=0.169, clip=0, loss_scale=4, train_wall=88, gb_free=22.4, wall=30480 epoch 020: 959 / 1689 loss=3.532, nll_loss=1.981, ppl=3.95, wps=551305, ups=1.11, wpb=494632, bsz=16447.3, num_updates=33000, lr=0.000348155, gnorm=0.169, clip=0, loss_scale=4, train_wall=88, gb_free=22.4, wall=30480 epoch 020: 959 / 1689 loss=3.532, nll_loss=1.981, ppl=3.95, wps=551305, ups=1.11, wpb=494632, bsz=16447.3, num_updates=33000, lr=0.000348155, gnorm=0.169, clip=0, loss_scale=4, train_wall=88, gb_free=22.4, wall=30480 epoch 020: 959 / 1689 loss=3.532, nll_loss=1.981, ppl=3.95, wps=551305, ups=1.11, wpb=494632, bsz=16447.3, num_updates=33000, lr=0.000348155, gnorm=0.169, clip=0, loss_scale=4, train_wall=88, gb_free=22.4, wall=30480 epoch 020: 959 / 1689 loss=3.532, nll_loss=1.981, ppl=3.95, wps=551305, ups=1.11, wpb=494632, bsz=16447.3, num_updates=33000, lr=0.000348155, gnorm=0.169, clip=0, loss_scale=4, train_wall=88, gb_free=22.4, wall=30480 epoch 020: 959 / 1689 loss=3.532, nll_loss=1.981, ppl=3.95, wps=551305, ups=1.11, wpb=494632, bsz=16447.3, num_updates=33000, lr=0.000348155, gnorm=0.169, clip=0, loss_scale=4, train_wall=88, gb_free=22.4, wall=30480 epoch 020: 959 / 1689 loss=3.532, nll_loss=1.981, ppl=3.95, wps=551305, ups=1.11, wpb=494632, bsz=16447.3, num_updates=33000, lr=0.000348155, gnorm=0.169, clip=0, loss_scale=4, train_wall=88, gb_free=22.4, wall=30480 epoch 020: 959 / 1689 loss=3.532, nll_loss=1.981, ppl=3.95, wps=551305, ups=1.11, wpb=494632, bsz=16447.3, num_updates=33000, lr=0.000348155, gnorm=0.169, clip=0, loss_scale=4, train_wall=88, gb_free=22.4, wall=30480 epoch 020: 959 / 1689 loss=3.532, nll_loss=1.981, ppl=3.95, wps=551305, ups=1.11, wpb=494632, bsz=16447.3, num_updates=33000, lr=0.000348155, gnorm=0.169, clip=0, loss_scale=4, train_wall=88, gb_free=22.4, wall=30480 epoch 020: 959 / 1689 loss=3.532, nll_loss=1.981, ppl=3.95, wps=551305, ups=1.11, wpb=494632, bsz=16447.3, num_updates=33000, lr=0.000348155, gnorm=0.169, clip=0, loss_scale=4, train_wall=88, gb_free=22.4, wall=30480 epoch 020: 959 / 1689 loss=3.532, nll_loss=1.981, ppl=3.95, wps=551305, ups=1.11, wpb=494632, bsz=16447.3, num_updates=33000, lr=0.000348155, gnorm=0.169, clip=0, loss_scale=4, train_wall=88, gb_free=22.4, wall=30480 epoch 020: 959 / 1689 loss=3.532, nll_loss=1.981, ppl=3.95, wps=551305, ups=1.11, wpb=494632, bsz=16447.3, num_updates=33000, lr=0.000348155, gnorm=0.169, clip=0, loss_scale=4, train_wall=88, gb_free=22.4, wall=30480 epoch 020: 959 / 1689 loss=3.532, nll_loss=1.981, ppl=3.95, wps=551305, ups=1.11, wpb=494632, bsz=16447.3, num_updates=33000, lr=0.000348155, gnorm=0.169, clip=0, loss_scale=4, train_wall=88, gb_free=22.4, wall=30480 epoch 020: 959 / 1689 loss=3.532, nll_loss=1.981, ppl=3.95, wps=551305, ups=1.11, wpb=494632, bsz=16447.3, num_updates=33000, lr=0.000348155, gnorm=0.169, clip=0, loss_scale=4, train_wall=88, gb_free=22.4, wall=30480 epoch 020: 959 / 1689 loss=3.532, nll_loss=1.981, ppl=3.95, wps=551305, ups=1.11, wpb=494632, bsz=16447.3, num_updates=33000, lr=0.000348155, gnorm=0.169, clip=0, loss_scale=4, train_wall=88, gb_free=22.4, wall=30480 epoch 020: 959 / 1689 loss=3.532, nll_loss=1.981, ppl=3.95, wps=551305, ups=1.11, wpb=494632, bsz=16447.3, num_updates=33000, lr=0.000348155, gnorm=0.169, clip=0, loss_scale=4, train_wall=88, gb_free=22.4, wall=30480 epoch 020: 959 / 1689 loss=3.532, nll_loss=1.981, ppl=3.95, wps=551305, ups=1.11, wpb=494632, bsz=16447.3, num_updates=33000, lr=0.000348155, gnorm=0.169, clip=0, loss_scale=4, train_wall=88, gb_free=22.4, wall=30480 begin validation on "valid" subset epoch 020 | valid on 'valid' subset | loss 3.725 | nll_loss 2.178 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 33000 | best_loss 3.714 epoch 020 | valid on 'valid' subset | loss 3.725 | nll_loss 2.178 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 33000 | best_loss 3.714 epoch 020 | valid on 'valid' subset | loss 3.725 | nll_loss 2.178 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 33000 | best_loss 3.714 epoch 020 | valid on 'valid' subset | loss 3.725 | nll_loss 2.178 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 33000 | best_loss 3.714 epoch 020 | valid on 'valid' subset | loss 3.725 | nll_loss 2.178 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 33000 | best_loss 3.714 epoch 020 | valid on 'valid' subset | loss 3.725 | nll_loss 2.178 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 33000 | best_loss 3.714 epoch 020 | valid on 'valid' subset | loss 3.725 | nll_loss 2.178 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 33000 | best_loss 3.714 epoch 020 | valid on 'valid' subset | loss 3.725 | nll_loss 2.178 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 33000 | best_loss 3.714 epoch 020 | valid on 'valid' subset | loss 3.725 | nll_loss 2.178 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 33000 | best_loss 3.714 epoch 020 | valid on 'valid' subset | loss 3.725 | nll_loss 2.178 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 33000 | best_loss 3.714 epoch 020 | valid on 'valid' subset | loss 3.725 | nll_loss 2.178 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 33000 | best_loss 3.714 epoch 020 | valid on 'valid' subset | loss 3.725 | nll_loss 2.178 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 33000 | best_loss 3.714 epoch 020 | valid on 'valid' subset | loss 3.725 | nll_loss 2.178 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 33000 | best_loss 3.714 epoch 020 | valid on 'valid' subset | loss 3.725 | nll_loss 2.178 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 33000 | best_loss 3.714 epoch 020 | valid on 'valid' subset | loss 3.725 | nll_loss 2.178 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 33000 | best_loss 3.714 epoch 020 | valid on 'valid' subset | loss 3.725 | nll_loss 2.178 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 33000 | best_loss 3.714 epoch 020 | valid on 'valid' subset | loss 3.725 | nll_loss 2.178 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 33000 | best_loss 3.714 epoch 020 | valid on 'valid' subset | loss 3.725 | nll_loss 2.178 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 33000 | best_loss 3.714 epoch 020 | valid on 'valid' subset | loss 3.725 | nll_loss 2.178 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 33000 | best_loss 3.714 epoch 020 | valid on 'valid' subset | loss 3.725 | nll_loss 2.178 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 33000 | best_loss 3.714 epoch 020: 1060 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=475724, ups=0.96, wpb=494643, bsz=16267.8, num_updates=33100, lr=0.000347629, gnorm=0.172, clip=0, loss_scale=2, train_wall=89, gb_free=22.1, wall=30584 epoch 020: 1060 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=475724, ups=0.96, wpb=494643, bsz=16267.8, num_updates=33100, lr=0.000347629, gnorm=0.172, clip=0, loss_scale=2, train_wall=89, gb_free=22.1, wall=30584 epoch 020: 1060 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=475724, ups=0.96, wpb=494643, bsz=16267.8, num_updates=33100, lr=0.000347629, gnorm=0.172, clip=0, loss_scale=2, train_wall=89, gb_free=22.1, wall=30584 epoch 020: 1060 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=475724, ups=0.96, wpb=494643, bsz=16267.8, num_updates=33100, lr=0.000347629, gnorm=0.172, clip=0, loss_scale=2, train_wall=89, gb_free=22.1, wall=30584 epoch 020: 1060 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=475724, ups=0.96, wpb=494643, bsz=16267.8, num_updates=33100, lr=0.000347629, gnorm=0.172, clip=0, loss_scale=2, train_wall=89, gb_free=22.1, wall=30584 epoch 020: 1060 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=475724, ups=0.96, wpb=494643, bsz=16267.8, num_updates=33100, lr=0.000347629, gnorm=0.172, clip=0, loss_scale=2, train_wall=89, gb_free=22.1, wall=30584 epoch 020: 1060 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=475724, ups=0.96, wpb=494643, bsz=16267.8, num_updates=33100, lr=0.000347629, gnorm=0.172, clip=0, loss_scale=2, train_wall=89, gb_free=22.1, wall=30584 epoch 020: 1060 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=475724, ups=0.96, wpb=494643, bsz=16267.8, num_updates=33100, lr=0.000347629, gnorm=0.172, clip=0, loss_scale=2, train_wall=89, gb_free=22.1, wall=30584 epoch 020: 1060 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=475724, ups=0.96, wpb=494643, bsz=16267.8, num_updates=33100, lr=0.000347629, gnorm=0.172, clip=0, loss_scale=2, train_wall=89, gb_free=22.1, wall=30584 epoch 020: 1060 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=475724, ups=0.96, wpb=494643, bsz=16267.8, num_updates=33100, lr=0.000347629, gnorm=0.172, clip=0, loss_scale=2, train_wall=89, gb_free=22.1, wall=30584 epoch 020: 1060 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=475724, ups=0.96, wpb=494643, bsz=16267.8, num_updates=33100, lr=0.000347629, gnorm=0.172, clip=0, loss_scale=2, train_wall=89, gb_free=22.1, wall=30584 epoch 020: 1060 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=475724, ups=0.96, wpb=494643, bsz=16267.8, num_updates=33100, lr=0.000347629, gnorm=0.172, clip=0, loss_scale=2, train_wall=89, gb_free=22.1, wall=30584 epoch 020: 1060 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=475724, ups=0.96, wpb=494643, bsz=16267.8, num_updates=33100, lr=0.000347629, gnorm=0.172, clip=0, loss_scale=2, train_wall=89, gb_free=22.1, wall=30584 epoch 020: 1060 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=475724, ups=0.96, wpb=494643, bsz=16267.8, num_updates=33100, lr=0.000347629, gnorm=0.172, clip=0, loss_scale=2, train_wall=89, gb_free=22.1, wall=30584 epoch 020: 1060 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=475724, ups=0.96, wpb=494643, bsz=16267.8, num_updates=33100, lr=0.000347629, gnorm=0.172, clip=0, loss_scale=2, train_wall=89, gb_free=22.1, wall=30584 epoch 020: 1060 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=475724, ups=0.96, wpb=494643, bsz=16267.8, num_updates=33100, lr=0.000347629, gnorm=0.172, clip=0, loss_scale=2, train_wall=89, gb_free=22.1, wall=30584 epoch 020: 1060 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=475724, ups=0.96, wpb=494643, bsz=16267.8, num_updates=33100, lr=0.000347629, gnorm=0.172, clip=0, loss_scale=2, train_wall=89, gb_free=22.1, wall=30584 epoch 020: 1060 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=475724, ups=0.96, wpb=494643, bsz=16267.8, num_updates=33100, lr=0.000347629, gnorm=0.172, clip=0, loss_scale=2, train_wall=89, gb_free=22.1, wall=30584 epoch 020: 1060 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=475724, ups=0.96, wpb=494643, bsz=16267.8, num_updates=33100, lr=0.000347629, gnorm=0.172, clip=0, loss_scale=2, train_wall=89, gb_free=22.1, wall=30584 epoch 020: 1060 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=475724, ups=0.96, wpb=494643, bsz=16267.8, num_updates=33100, lr=0.000347629, gnorm=0.172, clip=0, loss_scale=2, train_wall=89, gb_free=22.1, wall=30584 epoch 020: 1160 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=551427, ups=1.11, wpb=495107, bsz=16623.6, num_updates=33200, lr=0.000347105, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=30673 epoch 020: 1160 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=551427, ups=1.11, wpb=495107, bsz=16623.6, num_updates=33200, lr=0.000347105, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=30673 epoch 020: 1160 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=551427, ups=1.11, wpb=495107, bsz=16623.6, num_updates=33200, lr=0.000347105, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=30673 epoch 020: 1160 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=551427, ups=1.11, wpb=495107, bsz=16623.6, num_updates=33200, lr=0.000347105, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=30673 epoch 020: 1160 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=551427, ups=1.11, wpb=495107, bsz=16623.6, num_updates=33200, lr=0.000347105, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=30673 epoch 020: 1160 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=551427, ups=1.11, wpb=495107, bsz=16623.6, num_updates=33200, lr=0.000347105, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=30673 epoch 020: 1160 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=551427, ups=1.11, wpb=495107, bsz=16623.6, num_updates=33200, lr=0.000347105, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=30673 epoch 020: 1160 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=551427, ups=1.11, wpb=495107, bsz=16623.6, num_updates=33200, lr=0.000347105, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=30673 epoch 020: 1160 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=551427, ups=1.11, wpb=495107, bsz=16623.6, num_updates=33200, lr=0.000347105, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=30673 epoch 020: 1160 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=551427, ups=1.11, wpb=495107, bsz=16623.6, num_updates=33200, lr=0.000347105, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=30673 epoch 020: 1160 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=551427, ups=1.11, wpb=495107, bsz=16623.6, num_updates=33200, lr=0.000347105, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=30673 epoch 020: 1160 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=551427, ups=1.11, wpb=495107, bsz=16623.6, num_updates=33200, lr=0.000347105, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=30673 epoch 020: 1160 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=551427, ups=1.11, wpb=495107, bsz=16623.6, num_updates=33200, lr=0.000347105, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=30673 epoch 020: 1160 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=551427, ups=1.11, wpb=495107, bsz=16623.6, num_updates=33200, lr=0.000347105, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=30673 epoch 020: 1160 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=551427, ups=1.11, wpb=495107, bsz=16623.6, num_updates=33200, lr=0.000347105, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=30673 epoch 020: 1160 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=551427, ups=1.11, wpb=495107, bsz=16623.6, num_updates=33200, lr=0.000347105, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=30673 epoch 020: 1160 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=551427, ups=1.11, wpb=495107, bsz=16623.6, num_updates=33200, lr=0.000347105, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=30673 epoch 020: 1160 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=551427, ups=1.11, wpb=495107, bsz=16623.6, num_updates=33200, lr=0.000347105, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=30673 epoch 020: 1160 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=551427, ups=1.11, wpb=495107, bsz=16623.6, num_updates=33200, lr=0.000347105, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=30673 epoch 020: 1160 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=551427, ups=1.11, wpb=495107, bsz=16623.6, num_updates=33200, lr=0.000347105, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.1, wall=30673 epoch 020: 1260 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=557466, ups=1.13, wpb=495189, bsz=16638.2, num_updates=33300, lr=0.000346583, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.5, wall=30762 epoch 020: 1260 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=557466, ups=1.13, wpb=495189, bsz=16638.2, num_updates=33300, lr=0.000346583, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.5, wall=30762 epoch 020: 1260 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=557466, ups=1.13, wpb=495189, bsz=16638.2, num_updates=33300, lr=0.000346583, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.5, wall=30762 epoch 020: 1260 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=557466, ups=1.13, wpb=495189, bsz=16638.2, num_updates=33300, lr=0.000346583, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.5, wall=30762 epoch 020: 1260 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=557466, ups=1.13, wpb=495189, bsz=16638.2, num_updates=33300, lr=0.000346583, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.5, wall=30762 epoch 020: 1260 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=557466, ups=1.13, wpb=495189, bsz=16638.2, num_updates=33300, lr=0.000346583, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.5, wall=30762 epoch 020: 1260 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=557466, ups=1.13, wpb=495189, bsz=16638.2, num_updates=33300, lr=0.000346583, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.5, wall=30762 epoch 020: 1260 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=557466, ups=1.13, wpb=495189, bsz=16638.2, num_updates=33300, lr=0.000346583, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.5, wall=30762 epoch 020: 1260 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=557466, ups=1.13, wpb=495189, bsz=16638.2, num_updates=33300, lr=0.000346583, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.5, wall=30762 epoch 020: 1260 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=557466, ups=1.13, wpb=495189, bsz=16638.2, num_updates=33300, lr=0.000346583, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.5, wall=30762 epoch 020: 1260 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=557466, ups=1.13, wpb=495189, bsz=16638.2, num_updates=33300, lr=0.000346583, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.5, wall=30762 epoch 020: 1260 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=557466, ups=1.13, wpb=495189, bsz=16638.2, num_updates=33300, lr=0.000346583, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.5, wall=30762 epoch 020: 1260 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=557466, ups=1.13, wpb=495189, bsz=16638.2, num_updates=33300, lr=0.000346583, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.5, wall=30762 epoch 020: 1260 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=557466, ups=1.13, wpb=495189, bsz=16638.2, num_updates=33300, lr=0.000346583, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.5, wall=30762 epoch 020: 1260 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=557466, ups=1.13, wpb=495189, bsz=16638.2, num_updates=33300, lr=0.000346583, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.5, wall=30762 epoch 020: 1260 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=557466, ups=1.13, wpb=495189, bsz=16638.2, num_updates=33300, lr=0.000346583, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.5, wall=30762 epoch 020: 1260 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=557466, ups=1.13, wpb=495189, bsz=16638.2, num_updates=33300, lr=0.000346583, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.5, wall=30762 epoch 020: 1260 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=557466, ups=1.13, wpb=495189, bsz=16638.2, num_updates=33300, lr=0.000346583, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.5, wall=30762 epoch 020: 1260 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=557466, ups=1.13, wpb=495189, bsz=16638.2, num_updates=33300, lr=0.000346583, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.5, wall=30762 epoch 020: 1260 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=557466, ups=1.13, wpb=495189, bsz=16638.2, num_updates=33300, lr=0.000346583, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.5, wall=30762 epoch 020: 1360 / 1689 loss=3.534, nll_loss=1.984, ppl=3.95, wps=552866, ups=1.12, wpb=494987, bsz=16414.5, num_updates=33400, lr=0.000346064, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=30852 epoch 020: 1360 / 1689 loss=3.534, nll_loss=1.984, ppl=3.95, wps=552866, ups=1.12, wpb=494987, bsz=16414.5, num_updates=33400, lr=0.000346064, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=30852 epoch 020: 1360 / 1689 loss=3.534, nll_loss=1.984, ppl=3.95, wps=552866, ups=1.12, wpb=494987, bsz=16414.5, num_updates=33400, lr=0.000346064, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=30852 epoch 020: 1360 / 1689 loss=3.534, nll_loss=1.984, ppl=3.95, wps=552866, ups=1.12, wpb=494987, bsz=16414.5, num_updates=33400, lr=0.000346064, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=30852 epoch 020: 1360 / 1689 loss=3.534, nll_loss=1.984, ppl=3.95, wps=552866, ups=1.12, wpb=494987, bsz=16414.5, num_updates=33400, lr=0.000346064, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=30852 epoch 020: 1360 / 1689 loss=3.534, nll_loss=1.984, ppl=3.95, wps=552866, ups=1.12, wpb=494987, bsz=16414.5, num_updates=33400, lr=0.000346064, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=30852 epoch 020: 1360 / 1689 loss=3.534, nll_loss=1.984, ppl=3.95, wps=552866, ups=1.12, wpb=494987, bsz=16414.5, num_updates=33400, lr=0.000346064, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=30852 epoch 020: 1360 / 1689 loss=3.534, nll_loss=1.984, ppl=3.95, wps=552866, ups=1.12, wpb=494987, bsz=16414.5, num_updates=33400, lr=0.000346064, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=30852 epoch 020: 1360 / 1689 loss=3.534, nll_loss=1.984, ppl=3.95, wps=552866, ups=1.12, wpb=494987, bsz=16414.5, num_updates=33400, lr=0.000346064, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=30852 epoch 020: 1360 / 1689 loss=3.534, nll_loss=1.984, ppl=3.95, wps=552866, ups=1.12, wpb=494987, bsz=16414.5, num_updates=33400, lr=0.000346064, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=30852 epoch 020: 1360 / 1689 loss=3.534, nll_loss=1.984, ppl=3.95, wps=552866, ups=1.12, wpb=494987, bsz=16414.5, num_updates=33400, lr=0.000346064, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=30852 epoch 020: 1360 / 1689 loss=3.534, nll_loss=1.984, ppl=3.95, wps=552866, ups=1.12, wpb=494987, bsz=16414.5, num_updates=33400, lr=0.000346064, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=30852 epoch 020: 1360 / 1689 loss=3.534, nll_loss=1.984, ppl=3.95, wps=552866, ups=1.12, wpb=494987, bsz=16414.5, num_updates=33400, lr=0.000346064, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=30852 epoch 020: 1360 / 1689 loss=3.534, nll_loss=1.984, ppl=3.95, wps=552866, ups=1.12, wpb=494987, bsz=16414.5, num_updates=33400, lr=0.000346064, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=30852 epoch 020: 1360 / 1689 loss=3.534, nll_loss=1.984, ppl=3.95, wps=552866, ups=1.12, wpb=494987, bsz=16414.5, num_updates=33400, lr=0.000346064, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=30852 epoch 020: 1360 / 1689 loss=3.534, nll_loss=1.984, ppl=3.95, wps=552866, ups=1.12, wpb=494987, bsz=16414.5, num_updates=33400, lr=0.000346064, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=30852 epoch 020: 1360 / 1689 loss=3.534, nll_loss=1.984, ppl=3.95, wps=552866, ups=1.12, wpb=494987, bsz=16414.5, num_updates=33400, lr=0.000346064, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=30852 epoch 020: 1360 / 1689 loss=3.534, nll_loss=1.984, ppl=3.95, wps=552866, ups=1.12, wpb=494987, bsz=16414.5, num_updates=33400, lr=0.000346064, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=30852 epoch 020: 1360 / 1689 loss=3.534, nll_loss=1.984, ppl=3.95, wps=552866, ups=1.12, wpb=494987, bsz=16414.5, num_updates=33400, lr=0.000346064, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=30852 epoch 020: 1360 / 1689 loss=3.534, nll_loss=1.984, ppl=3.95, wps=552866, ups=1.12, wpb=494987, bsz=16414.5, num_updates=33400, lr=0.000346064, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=30852 epoch 020: 1460 / 1689 loss=3.533, nll_loss=1.983, ppl=3.95, wps=553889, ups=1.12, wpb=495011, bsz=16525.9, num_updates=33500, lr=0.000345547, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30941 epoch 020: 1460 / 1689 loss=3.533, nll_loss=1.983, ppl=3.95, wps=553889, ups=1.12, wpb=495011, bsz=16525.9, num_updates=33500, lr=0.000345547, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30941 epoch 020: 1460 / 1689 loss=3.533, nll_loss=1.983, ppl=3.95, wps=553889, ups=1.12, wpb=495011, bsz=16525.9, num_updates=33500, lr=0.000345547, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30941 epoch 020: 1460 / 1689 loss=3.533, nll_loss=1.983, ppl=3.95, wps=553889, ups=1.12, wpb=495011, bsz=16525.9, num_updates=33500, lr=0.000345547, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30941 epoch 020: 1460 / 1689 loss=3.533, nll_loss=1.983, ppl=3.95, wps=553889, ups=1.12, wpb=495011, bsz=16525.9, num_updates=33500, lr=0.000345547, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30941 epoch 020: 1460 / 1689 loss=3.533, nll_loss=1.983, ppl=3.95, wps=553889, ups=1.12, wpb=495011, bsz=16525.9, num_updates=33500, lr=0.000345547, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30941 epoch 020: 1460 / 1689 loss=3.533, nll_loss=1.983, ppl=3.95, wps=553889, ups=1.12, wpb=495011, bsz=16525.9, num_updates=33500, lr=0.000345547, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30941 epoch 020: 1460 / 1689 loss=3.533, nll_loss=1.983, ppl=3.95, wps=553889, ups=1.12, wpb=495011, bsz=16525.9, num_updates=33500, lr=0.000345547, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30941 epoch 020: 1460 / 1689 loss=3.533, nll_loss=1.983, ppl=3.95, wps=553889, ups=1.12, wpb=495011, bsz=16525.9, num_updates=33500, lr=0.000345547, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30941 epoch 020: 1460 / 1689 loss=3.533, nll_loss=1.983, ppl=3.95, wps=553889, ups=1.12, wpb=495011, bsz=16525.9, num_updates=33500, lr=0.000345547, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30941 epoch 020: 1460 / 1689 loss=3.533, nll_loss=1.983, ppl=3.95, wps=553889, ups=1.12, wpb=495011, bsz=16525.9, num_updates=33500, lr=0.000345547, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30941 epoch 020: 1460 / 1689 loss=3.533, nll_loss=1.983, ppl=3.95, wps=553889, ups=1.12, wpb=495011, bsz=16525.9, num_updates=33500, lr=0.000345547, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30941 epoch 020: 1460 / 1689 loss=3.533, nll_loss=1.983, ppl=3.95, wps=553889, ups=1.12, wpb=495011, bsz=16525.9, num_updates=33500, lr=0.000345547, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30941 epoch 020: 1460 / 1689 loss=3.533, nll_loss=1.983, ppl=3.95, wps=553889, ups=1.12, wpb=495011, bsz=16525.9, num_updates=33500, lr=0.000345547, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30941 epoch 020: 1460 / 1689 loss=3.533, nll_loss=1.983, ppl=3.95, wps=553889, ups=1.12, wpb=495011, bsz=16525.9, num_updates=33500, lr=0.000345547, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30941 epoch 020: 1460 / 1689 loss=3.533, nll_loss=1.983, ppl=3.95, wps=553889, ups=1.12, wpb=495011, bsz=16525.9, num_updates=33500, lr=0.000345547, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30941 epoch 020: 1460 / 1689 loss=3.533, nll_loss=1.983, ppl=3.95, wps=553889, ups=1.12, wpb=495011, bsz=16525.9, num_updates=33500, lr=0.000345547, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30941 epoch 020: 1460 / 1689 loss=3.533, nll_loss=1.983, ppl=3.95, wps=553889, ups=1.12, wpb=495011, bsz=16525.9, num_updates=33500, lr=0.000345547, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30941 epoch 020: 1460 / 1689 loss=3.533, nll_loss=1.983, ppl=3.95, wps=553889, ups=1.12, wpb=495011, bsz=16525.9, num_updates=33500, lr=0.000345547, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30941 epoch 020: 1460 / 1689 loss=3.533, nll_loss=1.983, ppl=3.95, wps=553889, ups=1.12, wpb=495011, bsz=16525.9, num_updates=33500, lr=0.000345547, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=30941 epoch 020: 1561 / 1689 loss=3.533, nll_loss=1.982, ppl=3.95, wps=550914, ups=1.11, wpb=494705, bsz=16209.3, num_updates=33600, lr=0.000345033, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=31031 epoch 020: 1561 / 1689 loss=3.533, nll_loss=1.982, ppl=3.95, wps=550914, ups=1.11, wpb=494705, bsz=16209.3, num_updates=33600, lr=0.000345033, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=31031 epoch 020: 1561 / 1689 loss=3.533, nll_loss=1.982, ppl=3.95, wps=550914, ups=1.11, wpb=494705, bsz=16209.3, num_updates=33600, lr=0.000345033, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=31031 epoch 020: 1561 / 1689 loss=3.533, nll_loss=1.982, ppl=3.95, wps=550914, ups=1.11, wpb=494705, bsz=16209.3, num_updates=33600, lr=0.000345033, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=31031 epoch 020: 1561 / 1689 loss=3.533, nll_loss=1.982, ppl=3.95, wps=550914, ups=1.11, wpb=494705, bsz=16209.3, num_updates=33600, lr=0.000345033, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=31031 epoch 020: 1561 / 1689 loss=3.533, nll_loss=1.982, ppl=3.95, wps=550914, ups=1.11, wpb=494705, bsz=16209.3, num_updates=33600, lr=0.000345033, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=31031 epoch 020: 1561 / 1689 loss=3.533, nll_loss=1.982, ppl=3.95, wps=550914, ups=1.11, wpb=494705, bsz=16209.3, num_updates=33600, lr=0.000345033, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=31031 epoch 020: 1561 / 1689 loss=3.533, nll_loss=1.982, ppl=3.95, wps=550914, ups=1.11, wpb=494705, bsz=16209.3, num_updates=33600, lr=0.000345033, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=31031 epoch 020: 1561 / 1689 loss=3.533, nll_loss=1.982, ppl=3.95, wps=550914, ups=1.11, wpb=494705, bsz=16209.3, num_updates=33600, lr=0.000345033, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=31031 epoch 020: 1561 / 1689 loss=3.533, nll_loss=1.982, ppl=3.95, wps=550914, ups=1.11, wpb=494705, bsz=16209.3, num_updates=33600, lr=0.000345033, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=31031 epoch 020: 1561 / 1689 loss=3.533, nll_loss=1.982, ppl=3.95, wps=550914, ups=1.11, wpb=494705, bsz=16209.3, num_updates=33600, lr=0.000345033, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=31031 epoch 020: 1561 / 1689 loss=3.533, nll_loss=1.982, ppl=3.95, wps=550914, ups=1.11, wpb=494705, bsz=16209.3, num_updates=33600, lr=0.000345033, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=31031 epoch 020: 1561 / 1689 loss=3.533, nll_loss=1.982, ppl=3.95, wps=550914, ups=1.11, wpb=494705, bsz=16209.3, num_updates=33600, lr=0.000345033, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=31031 epoch 020: 1561 / 1689 loss=3.533, nll_loss=1.982, ppl=3.95, wps=550914, ups=1.11, wpb=494705, bsz=16209.3, num_updates=33600, lr=0.000345033, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=31031 epoch 020: 1561 / 1689 loss=3.533, nll_loss=1.982, ppl=3.95, wps=550914, ups=1.11, wpb=494705, bsz=16209.3, num_updates=33600, lr=0.000345033, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=31031 epoch 020: 1561 / 1689 loss=3.533, nll_loss=1.982, ppl=3.95, wps=550914, ups=1.11, wpb=494705, bsz=16209.3, num_updates=33600, lr=0.000345033, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=31031 epoch 020: 1561 / 1689 loss=3.533, nll_loss=1.982, ppl=3.95, wps=550914, ups=1.11, wpb=494705, bsz=16209.3, num_updates=33600, lr=0.000345033, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=31031 epoch 020: 1561 / 1689 loss=3.533, nll_loss=1.982, ppl=3.95, wps=550914, ups=1.11, wpb=494705, bsz=16209.3, num_updates=33600, lr=0.000345033, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=31031 epoch 020: 1561 / 1689 loss=3.533, nll_loss=1.982, ppl=3.95, wps=550914, ups=1.11, wpb=494705, bsz=16209.3, num_updates=33600, lr=0.000345033, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=31031 epoch 020: 1561 / 1689 loss=3.533, nll_loss=1.982, ppl=3.95, wps=550914, ups=1.11, wpb=494705, bsz=16209.3, num_updates=33600, lr=0.000345033, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=31031 epoch 020: 1661 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=555889, ups=1.12, wpb=496510, bsz=16555.5, num_updates=33700, lr=0.00034452, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=31120 epoch 020: 1661 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=555889, ups=1.12, wpb=496510, bsz=16555.5, num_updates=33700, lr=0.00034452, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=31120 epoch 020: 1661 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=555889, ups=1.12, wpb=496510, bsz=16555.5, num_updates=33700, lr=0.00034452, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=31120 epoch 020: 1661 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=555889, ups=1.12, wpb=496510, bsz=16555.5, num_updates=33700, lr=0.00034452, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=31120 epoch 020: 1661 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=555889, ups=1.12, wpb=496510, bsz=16555.5, num_updates=33700, lr=0.00034452, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=31120 epoch 020: 1661 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=555889, ups=1.12, wpb=496510, bsz=16555.5, num_updates=33700, lr=0.00034452, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=31120 epoch 020: 1661 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=555889, ups=1.12, wpb=496510, bsz=16555.5, num_updates=33700, lr=0.00034452, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=31120 epoch 020: 1661 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=555889, ups=1.12, wpb=496510, bsz=16555.5, num_updates=33700, lr=0.00034452, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=31120 epoch 020: 1661 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=555889, ups=1.12, wpb=496510, bsz=16555.5, num_updates=33700, lr=0.00034452, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=31120 epoch 020: 1661 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=555889, ups=1.12, wpb=496510, bsz=16555.5, num_updates=33700, lr=0.00034452, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=31120 epoch 020: 1661 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=555889, ups=1.12, wpb=496510, bsz=16555.5, num_updates=33700, lr=0.00034452, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=31120 epoch 020: 1661 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=555889, ups=1.12, wpb=496510, bsz=16555.5, num_updates=33700, lr=0.00034452, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=31120 epoch 020: 1661 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=555889, ups=1.12, wpb=496510, bsz=16555.5, num_updates=33700, lr=0.00034452, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=31120 epoch 020: 1661 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=555889, ups=1.12, wpb=496510, bsz=16555.5, num_updates=33700, lr=0.00034452, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=31120 epoch 020: 1661 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=555889, ups=1.12, wpb=496510, bsz=16555.5, num_updates=33700, lr=0.00034452, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=31120 epoch 020: 1661 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=555889, ups=1.12, wpb=496510, bsz=16555.5, num_updates=33700, lr=0.00034452, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=31120 epoch 020: 1661 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=555889, ups=1.12, wpb=496510, bsz=16555.5, num_updates=33700, lr=0.00034452, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=31120 epoch 020: 1661 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=555889, ups=1.12, wpb=496510, bsz=16555.5, num_updates=33700, lr=0.00034452, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=31120 epoch 020: 1661 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=555889, ups=1.12, wpb=496510, bsz=16555.5, num_updates=33700, lr=0.00034452, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=31120 epoch 020: 1661 / 1689 loss=3.531, nll_loss=1.98, ppl=3.95, wps=555889, ups=1.12, wpb=496510, bsz=16555.5, num_updates=33700, lr=0.00034452, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=31120 end of epoch 20 (average epoch stats below) epoch 020 | loss 3.523 | nll_loss 1.971 | ppl 3.92 | wps 547687 | ups 1.11 | wpb 495124 | bsz 16505.7 | num_updates 33728 | lr 0.000344377 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.5 | wall 31144 epoch 020 | loss 3.523 | nll_loss 1.971 | ppl 3.92 | wps 547687 | ups 1.11 | wpb 495124 | bsz 16505.7 | num_updates 33728 | lr 0.000344377 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.5 | wall 31144 epoch 020 | loss 3.523 | nll_loss 1.971 | ppl 3.92 | wps 547687 | ups 1.11 | wpb 495124 | bsz 16505.7 | num_updates 33728 | lr 0.000344377 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.5 | wall 31144 epoch 020 | loss 3.523 | nll_loss 1.971 | ppl 3.92 | wps 547687 | ups 1.11 | wpb 495124 | bsz 16505.7 | num_updates 33728 | lr 0.000344377 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.5 | wall 31144 epoch 020 | loss 3.523 | nll_loss 1.971 | ppl 3.92 | wps 547687 | ups 1.11 | wpb 495124 | bsz 16505.7 | num_updates 33728 | lr 0.000344377 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.5 | wall 31144 epoch 020 | loss 3.523 | nll_loss 1.971 | ppl 3.92 | wps 547687 | ups 1.11 | wpb 495124 | bsz 16505.7 | num_updates 33728 | lr 0.000344377 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.5 | wall 31144 epoch 020 | loss 3.523 | nll_loss 1.971 | ppl 3.92 | wps 547687 | ups 1.11 | wpb 495124 | bsz 16505.7 | num_updates 33728 | lr 0.000344377 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.5 | wall 31144 epoch 020 | loss 3.523 | nll_loss 1.971 | ppl 3.92 | wps 547687 | ups 1.11 | wpb 495124 | bsz 16505.7 | num_updates 33728 | lr 0.000344377 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.5 | wall 31144 epoch 020 | loss 3.523 | nll_loss 1.971 | ppl 3.92 | wps 547687 | ups 1.11 | wpb 495124 | bsz 16505.7 | num_updates 33728 | lr 0.000344377 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.5 | wall 31144 epoch 020 | loss 3.523 | nll_loss 1.971 | ppl 3.92 | wps 547687 | ups 1.11 | wpb 495124 | bsz 16505.7 | num_updates 33728 | lr 0.000344377 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.5 | wall 31144 epoch 020 | loss 3.523 | nll_loss 1.971 | ppl 3.92 | wps 547687 | ups 1.11 | wpb 495124 | bsz 16505.7 | num_updates 33728 | lr 0.000344377 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.5 | wall 31144 epoch 020 | loss 3.523 | nll_loss 1.971 | ppl 3.92 | wps 547687 | ups 1.11 | wpb 495124 | bsz 16505.7 | num_updates 33728 | lr 0.000344377 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.5 | wall 31144 epoch 020 | loss 3.523 | nll_loss 1.971 | ppl 3.92 | wps 547687 | ups 1.11 | wpb 495124 | bsz 16505.7 | num_updates 33728 | lr 0.000344377 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.5 | wall 31144 epoch 020 | loss 3.523 | nll_loss 1.971 | ppl 3.92 | wps 547687 | ups 1.11 | wpb 495124 | bsz 16505.7 | num_updates 33728 | lr 0.000344377 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.5 | wall 31144 epoch 020 | loss 3.523 | nll_loss 1.971 | ppl 3.92 | wps 547687 | ups 1.11 | wpb 495124 | bsz 16505.7 | num_updates 33728 | lr 0.000344377 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.5 | wall 31144 epoch 020 | loss 3.523 | nll_loss 1.971 | ppl 3.92 | wps 547687 | ups 1.11 | wpb 495124 | bsz 16505.7 | num_updates 33728 | lr 0.000344377 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.5 | wall 31144 epoch 020 | loss 3.523 | nll_loss 1.971 | ppl 3.92 | wps 547687 | ups 1.11 | wpb 495124 | bsz 16505.7 | num_updates 33728 | lr 0.000344377 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.5 | wall 31144 epoch 020 | loss 3.523 | nll_loss 1.971 | ppl 3.92 | wps 547687 | ups 1.11 | wpb 495124 | bsz 16505.7 | num_updates 33728 | lr 0.000344377 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.5 | wall 31144 epoch 020 | loss 3.523 | nll_loss 1.971 | ppl 3.92 | wps 547687 | ups 1.11 | wpb 495124 | bsz 16505.7 | num_updates 33728 | lr 0.000344377 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.5 | wall 31144 epoch 020 | loss 3.523 | nll_loss 1.971 | ppl 3.92 | wps 547687 | ups 1.11 | wpb 495124 | bsz 16505.7 | num_updates 33728 | lr 0.000344377 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1484 | gb_free 23.5 | wall 31144 Start iterating over samples epoch 021: 72 / 1689 loss=3.505, nll_loss=1.95, ppl=3.86, wps=548238, ups=1.11, wpb=492237, bsz=16275.6, num_updates=33800, lr=0.00034401, gnorm=0.175, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=31210 epoch 021: 72 / 1689 loss=3.505, nll_loss=1.95, ppl=3.86, wps=548238, ups=1.11, wpb=492237, bsz=16275.6, num_updates=33800, lr=0.00034401, gnorm=0.175, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=31210 epoch 021: 72 / 1689 loss=3.505, nll_loss=1.95, ppl=3.86, wps=548238, ups=1.11, wpb=492237, bsz=16275.6, num_updates=33800, lr=0.00034401, gnorm=0.175, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=31210 epoch 021: 72 / 1689 loss=3.505, nll_loss=1.95, ppl=3.86, wps=548238, ups=1.11, wpb=492237, bsz=16275.6, num_updates=33800, lr=0.00034401, gnorm=0.175, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=31210 epoch 021: 72 / 1689 loss=3.505, nll_loss=1.95, ppl=3.86, wps=548238, ups=1.11, wpb=492237, bsz=16275.6, num_updates=33800, lr=0.00034401, gnorm=0.175, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=31210 epoch 021: 72 / 1689 loss=3.505, nll_loss=1.95, ppl=3.86, wps=548238, ups=1.11, wpb=492237, bsz=16275.6, num_updates=33800, lr=0.00034401, gnorm=0.175, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=31210 epoch 021: 72 / 1689 loss=3.505, nll_loss=1.95, ppl=3.86, wps=548238, ups=1.11, wpb=492237, bsz=16275.6, num_updates=33800, lr=0.00034401, gnorm=0.175, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=31210 epoch 021: 72 / 1689 loss=3.505, nll_loss=1.95, ppl=3.86, wps=548238, ups=1.11, wpb=492237, bsz=16275.6, num_updates=33800, lr=0.00034401, gnorm=0.175, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=31210 epoch 021: 72 / 1689 loss=3.505, nll_loss=1.95, ppl=3.86, wps=548238, ups=1.11, wpb=492237, bsz=16275.6, num_updates=33800, lr=0.00034401, gnorm=0.175, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=31210 epoch 021: 72 / 1689 loss=3.505, nll_loss=1.95, ppl=3.86, wps=548238, ups=1.11, wpb=492237, bsz=16275.6, num_updates=33800, lr=0.00034401, gnorm=0.175, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=31210 epoch 021: 72 / 1689 loss=3.505, nll_loss=1.95, ppl=3.86, wps=548238, ups=1.11, wpb=492237, bsz=16275.6, num_updates=33800, lr=0.00034401, gnorm=0.175, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=31210 epoch 021: 72 / 1689 loss=3.505, nll_loss=1.95, ppl=3.86, wps=548238, ups=1.11, wpb=492237, bsz=16275.6, num_updates=33800, lr=0.00034401, gnorm=0.175, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=31210 epoch 021: 72 / 1689 loss=3.505, nll_loss=1.95, ppl=3.86, wps=548238, ups=1.11, wpb=492237, bsz=16275.6, num_updates=33800, lr=0.00034401, gnorm=0.175, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=31210 epoch 021: 72 / 1689 loss=3.505, nll_loss=1.95, ppl=3.86, wps=548238, ups=1.11, wpb=492237, bsz=16275.6, num_updates=33800, lr=0.00034401, gnorm=0.175, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=31210 epoch 021: 72 / 1689 loss=3.505, nll_loss=1.95, ppl=3.86, wps=548238, ups=1.11, wpb=492237, bsz=16275.6, num_updates=33800, lr=0.00034401, gnorm=0.175, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=31210 epoch 021: 72 / 1689 loss=3.505, nll_loss=1.95, ppl=3.86, wps=548238, ups=1.11, wpb=492237, bsz=16275.6, num_updates=33800, lr=0.00034401, gnorm=0.175, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=31210 epoch 021: 72 / 1689 loss=3.505, nll_loss=1.95, ppl=3.86, wps=548238, ups=1.11, wpb=492237, bsz=16275.6, num_updates=33800, lr=0.00034401, gnorm=0.175, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=31210 epoch 021: 72 / 1689 loss=3.505, nll_loss=1.95, ppl=3.86, wps=548238, ups=1.11, wpb=492237, bsz=16275.6, num_updates=33800, lr=0.00034401, gnorm=0.175, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=31210 epoch 021: 72 / 1689 loss=3.505, nll_loss=1.95, ppl=3.86, wps=548238, ups=1.11, wpb=492237, bsz=16275.6, num_updates=33800, lr=0.00034401, gnorm=0.175, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=31210 epoch 021: 72 / 1689 loss=3.505, nll_loss=1.95, ppl=3.86, wps=548238, ups=1.11, wpb=492237, bsz=16275.6, num_updates=33800, lr=0.00034401, gnorm=0.175, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=31210 epoch 021: 72 / 1689 loss=3.505, nll_loss=1.95, ppl=3.86, wps=548238, ups=1.11, wpb=492237, bsz=16275.6, num_updates=33800, lr=0.00034401, gnorm=0.175, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=31210 epoch 021: 172 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=555364, ups=1.12, wpb=494930, bsz=16446.8, num_updates=33900, lr=0.000343503, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31299 epoch 021: 172 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=555364, ups=1.12, wpb=494930, bsz=16446.8, num_updates=33900, lr=0.000343503, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31299 epoch 021: 172 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=555364, ups=1.12, wpb=494930, bsz=16446.8, num_updates=33900, lr=0.000343503, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31299 epoch 021: 172 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=555364, ups=1.12, wpb=494930, bsz=16446.8, num_updates=33900, lr=0.000343503, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31299 epoch 021: 172 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=555364, ups=1.12, wpb=494930, bsz=16446.8, num_updates=33900, lr=0.000343503, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31299 epoch 021: 172 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=555364, ups=1.12, wpb=494930, bsz=16446.8, num_updates=33900, lr=0.000343503, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31299 epoch 021: 172 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=555364, ups=1.12, wpb=494930, bsz=16446.8, num_updates=33900, lr=0.000343503, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31299 epoch 021: 172 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=555364, ups=1.12, wpb=494930, bsz=16446.8, num_updates=33900, lr=0.000343503, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31299 epoch 021: 172 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=555364, ups=1.12, wpb=494930, bsz=16446.8, num_updates=33900, lr=0.000343503, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31299 epoch 021: 172 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=555364, ups=1.12, wpb=494930, bsz=16446.8, num_updates=33900, lr=0.000343503, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31299 epoch 021: 172 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=555364, ups=1.12, wpb=494930, bsz=16446.8, num_updates=33900, lr=0.000343503, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31299 epoch 021: 172 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=555364, ups=1.12, wpb=494930, bsz=16446.8, num_updates=33900, lr=0.000343503, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31299 epoch 021: 172 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=555364, ups=1.12, wpb=494930, bsz=16446.8, num_updates=33900, lr=0.000343503, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31299 epoch 021: 172 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=555364, ups=1.12, wpb=494930, bsz=16446.8, num_updates=33900, lr=0.000343503, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31299 epoch 021: 172 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=555364, ups=1.12, wpb=494930, bsz=16446.8, num_updates=33900, lr=0.000343503, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31299 epoch 021: 172 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=555364, ups=1.12, wpb=494930, bsz=16446.8, num_updates=33900, lr=0.000343503, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31299 epoch 021: 172 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=555364, ups=1.12, wpb=494930, bsz=16446.8, num_updates=33900, lr=0.000343503, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31299 epoch 021: 172 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=555364, ups=1.12, wpb=494930, bsz=16446.8, num_updates=33900, lr=0.000343503, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31299 epoch 021: 172 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=555364, ups=1.12, wpb=494930, bsz=16446.8, num_updates=33900, lr=0.000343503, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31299 epoch 021: 172 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=555364, ups=1.12, wpb=494930, bsz=16446.8, num_updates=33900, lr=0.000343503, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31299 epoch 021: 172 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=555364, ups=1.12, wpb=494930, bsz=16446.8, num_updates=33900, lr=0.000343503, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31299 epoch 021: 273 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=547083, ups=1.1, wpb=495602, bsz=16566.5, num_updates=34000, lr=0.000342997, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=31390 epoch 021: 273 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=547083, ups=1.1, wpb=495602, bsz=16566.5, num_updates=34000, lr=0.000342997, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=31390 epoch 021: 273 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=547083, ups=1.1, wpb=495602, bsz=16566.5, num_updates=34000, lr=0.000342997, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=31390 epoch 021: 273 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=547083, ups=1.1, wpb=495602, bsz=16566.5, num_updates=34000, lr=0.000342997, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=31390 epoch 021: 273 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=547083, ups=1.1, wpb=495602, bsz=16566.5, num_updates=34000, lr=0.000342997, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=31390 epoch 021: 273 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=547083, ups=1.1, wpb=495602, bsz=16566.5, num_updates=34000, lr=0.000342997, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=31390 epoch 021: 273 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=547083, ups=1.1, wpb=495602, bsz=16566.5, num_updates=34000, lr=0.000342997, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=31390 epoch 021: 273 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=547083, ups=1.1, wpb=495602, bsz=16566.5, num_updates=34000, lr=0.000342997, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=31390 epoch 021: 273 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=547083, ups=1.1, wpb=495602, bsz=16566.5, num_updates=34000, lr=0.000342997, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=31390 epoch 021: 273 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=547083, ups=1.1, wpb=495602, bsz=16566.5, num_updates=34000, lr=0.000342997, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=31390 epoch 021: 273 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=547083, ups=1.1, wpb=495602, bsz=16566.5, num_updates=34000, lr=0.000342997, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=31390 epoch 021: 273 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=547083, ups=1.1, wpb=495602, bsz=16566.5, num_updates=34000, lr=0.000342997, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=31390 epoch 021: 273 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=547083, ups=1.1, wpb=495602, bsz=16566.5, num_updates=34000, lr=0.000342997, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=31390 epoch 021: 273 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=547083, ups=1.1, wpb=495602, bsz=16566.5, num_updates=34000, lr=0.000342997, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=31390 epoch 021: 273 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=547083, ups=1.1, wpb=495602, bsz=16566.5, num_updates=34000, lr=0.000342997, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=31390 epoch 021: 273 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=547083, ups=1.1, wpb=495602, bsz=16566.5, num_updates=34000, lr=0.000342997, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=31390 epoch 021: 273 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=547083, ups=1.1, wpb=495602, bsz=16566.5, num_updates=34000, lr=0.000342997, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=31390 epoch 021: 273 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=547083, ups=1.1, wpb=495602, bsz=16566.5, num_updates=34000, lr=0.000342997, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=31390 epoch 021: 273 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=547083, ups=1.1, wpb=495602, bsz=16566.5, num_updates=34000, lr=0.000342997, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=31390 epoch 021: 273 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=547083, ups=1.1, wpb=495602, bsz=16566.5, num_updates=34000, lr=0.000342997, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=31390 epoch 021: 273 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=547083, ups=1.1, wpb=495602, bsz=16566.5, num_updates=34000, lr=0.000342997, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=31390 begin validation on "valid" subset epoch 021 | valid on 'valid' subset | loss 3.717 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 34000 | best_loss 3.714 epoch 021 | valid on 'valid' subset | loss 3.717 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 34000 | best_loss 3.714 epoch 021 | valid on 'valid' subset | loss 3.717 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 34000 | best_loss 3.714 epoch 021 | valid on 'valid' subset | loss 3.717 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 34000 | best_loss 3.714 epoch 021 | valid on 'valid' subset | loss 3.717 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 34000 | best_loss 3.714 epoch 021 | valid on 'valid' subset | loss 3.717 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 34000 | best_loss 3.714 epoch 021 | valid on 'valid' subset | loss 3.717 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 34000 | best_loss 3.714 epoch 021 | valid on 'valid' subset | loss 3.717 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 34000 | best_loss 3.714 epoch 021 | valid on 'valid' subset | loss 3.717 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 34000 | best_loss 3.714 epoch 021 | valid on 'valid' subset | loss 3.717 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 34000 | best_loss 3.714 epoch 021 | valid on 'valid' subset | loss 3.717 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 34000 | best_loss 3.714 epoch 021 | valid on 'valid' subset | loss 3.717 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 34000 | best_loss 3.714 epoch 021 | valid on 'valid' subset | loss 3.717 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 34000 | best_loss 3.714 epoch 021 | valid on 'valid' subset | loss 3.717 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 34000 | best_loss 3.714 epoch 021 | valid on 'valid' subset | loss 3.717 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 34000 | best_loss 3.714 epoch 021 | valid on 'valid' subset | loss 3.717 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 34000 | best_loss 3.714 epoch 021 | valid on 'valid' subset | loss 3.717 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 34000 | best_loss 3.714 epoch 021 | valid on 'valid' subset | loss 3.717 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 34000 | best_loss 3.714 epoch 021 | valid on 'valid' subset | loss 3.717 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 34000 | best_loss 3.714 epoch 021 | valid on 'valid' subset | loss 3.717 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 34000 | best_loss 3.714 epoch 021 | valid on 'valid' subset | loss 3.717 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 34000 | best_loss 3.714 epoch 021: 373 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=486704, ups=0.99, wpb=494054, bsz=16774.6, num_updates=34100, lr=0.000342494, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=31491 epoch 021: 373 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=486704, ups=0.99, wpb=494054, bsz=16774.6, num_updates=34100, lr=0.000342494, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=31491 epoch 021: 373 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=486704, ups=0.99, wpb=494054, bsz=16774.6, num_updates=34100, lr=0.000342494, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=31491 epoch 021: 373 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=486704, ups=0.99, wpb=494054, bsz=16774.6, num_updates=34100, lr=0.000342494, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=31491 epoch 021: 373 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=486704, ups=0.99, wpb=494054, bsz=16774.6, num_updates=34100, lr=0.000342494, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=31491 epoch 021: 373 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=486704, ups=0.99, wpb=494054, bsz=16774.6, num_updates=34100, lr=0.000342494, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=31491 epoch 021: 373 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=486704, ups=0.99, wpb=494054, bsz=16774.6, num_updates=34100, lr=0.000342494, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=31491 epoch 021: 373 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=486704, ups=0.99, wpb=494054, bsz=16774.6, num_updates=34100, lr=0.000342494, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=31491 epoch 021: 373 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=486704, ups=0.99, wpb=494054, bsz=16774.6, num_updates=34100, lr=0.000342494, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=31491 epoch 021: 373 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=486704, ups=0.99, wpb=494054, bsz=16774.6, num_updates=34100, lr=0.000342494, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=31491 epoch 021: 373 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=486704, ups=0.99, wpb=494054, bsz=16774.6, num_updates=34100, lr=0.000342494, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=31491 epoch 021: 373 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=486704, ups=0.99, wpb=494054, bsz=16774.6, num_updates=34100, lr=0.000342494, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=31491 epoch 021: 373 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=486704, ups=0.99, wpb=494054, bsz=16774.6, num_updates=34100, lr=0.000342494, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=31491 epoch 021: 373 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=486704, ups=0.99, wpb=494054, bsz=16774.6, num_updates=34100, lr=0.000342494, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=31491 epoch 021: 373 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=486704, ups=0.99, wpb=494054, bsz=16774.6, num_updates=34100, lr=0.000342494, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=31491 epoch 021: 373 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=486704, ups=0.99, wpb=494054, bsz=16774.6, num_updates=34100, lr=0.000342494, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=31491 epoch 021: 373 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=486704, ups=0.99, wpb=494054, bsz=16774.6, num_updates=34100, lr=0.000342494, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=31491 epoch 021: 373 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=486704, ups=0.99, wpb=494054, bsz=16774.6, num_updates=34100, lr=0.000342494, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=31491 epoch 021: 373 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=486704, ups=0.99, wpb=494054, bsz=16774.6, num_updates=34100, lr=0.000342494, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=31491 epoch 021: 373 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=486704, ups=0.99, wpb=494054, bsz=16774.6, num_updates=34100, lr=0.000342494, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=31491 epoch 021: 373 / 1689 loss=3.51, nll_loss=1.956, ppl=3.88, wps=486704, ups=0.99, wpb=494054, bsz=16774.6, num_updates=34100, lr=0.000342494, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=31491 epoch 021: 473 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=555088, ups=1.12, wpb=494689, bsz=16751.4, num_updates=34200, lr=0.000341993, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=31580 epoch 021: 473 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=555088, ups=1.12, wpb=494689, bsz=16751.4, num_updates=34200, lr=0.000341993, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=31580 epoch 021: 473 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=555088, ups=1.12, wpb=494689, bsz=16751.4, num_updates=34200, lr=0.000341993, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=31580 epoch 021: 473 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=555088, ups=1.12, wpb=494689, bsz=16751.4, num_updates=34200, lr=0.000341993, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=31580 epoch 021: 473 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=555088, ups=1.12, wpb=494689, bsz=16751.4, num_updates=34200, lr=0.000341993, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=31580 epoch 021: 473 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=555088, ups=1.12, wpb=494689, bsz=16751.4, num_updates=34200, lr=0.000341993, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=31580 epoch 021: 473 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=555088, ups=1.12, wpb=494689, bsz=16751.4, num_updates=34200, lr=0.000341993, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=31580 epoch 021: 473 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=555088, ups=1.12, wpb=494689, bsz=16751.4, num_updates=34200, lr=0.000341993, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=31580 epoch 021: 473 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=555088, ups=1.12, wpb=494689, bsz=16751.4, num_updates=34200, lr=0.000341993, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=31580 epoch 021: 473 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=555088, ups=1.12, wpb=494689, bsz=16751.4, num_updates=34200, lr=0.000341993, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=31580 epoch 021: 473 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=555088, ups=1.12, wpb=494689, bsz=16751.4, num_updates=34200, lr=0.000341993, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=31580 epoch 021: 473 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=555088, ups=1.12, wpb=494689, bsz=16751.4, num_updates=34200, lr=0.000341993, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=31580 epoch 021: 473 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=555088, ups=1.12, wpb=494689, bsz=16751.4, num_updates=34200, lr=0.000341993, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=31580 epoch 021: 473 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=555088, ups=1.12, wpb=494689, bsz=16751.4, num_updates=34200, lr=0.000341993, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=31580 epoch 021: 473 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=555088, ups=1.12, wpb=494689, bsz=16751.4, num_updates=34200, lr=0.000341993, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=31580 epoch 021: 473 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=555088, ups=1.12, wpb=494689, bsz=16751.4, num_updates=34200, lr=0.000341993, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=31580 epoch 021: 473 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=555088, ups=1.12, wpb=494689, bsz=16751.4, num_updates=34200, lr=0.000341993, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=31580 epoch 021: 473 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=555088, ups=1.12, wpb=494689, bsz=16751.4, num_updates=34200, lr=0.000341993, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=31580 epoch 021: 473 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=555088, ups=1.12, wpb=494689, bsz=16751.4, num_updates=34200, lr=0.000341993, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=31580 epoch 021: 473 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=555088, ups=1.12, wpb=494689, bsz=16751.4, num_updates=34200, lr=0.000341993, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=31580 epoch 021: 473 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=555088, ups=1.12, wpb=494689, bsz=16751.4, num_updates=34200, lr=0.000341993, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=31580 epoch 021: 573 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=559983, ups=1.13, wpb=494700, bsz=16606.2, num_updates=34300, lr=0.000341494, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=31669 epoch 021: 573 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=559983, ups=1.13, wpb=494700, bsz=16606.2, num_updates=34300, lr=0.000341494, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=31669 epoch 021: 573 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=559983, ups=1.13, wpb=494700, bsz=16606.2, num_updates=34300, lr=0.000341494, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=31669 epoch 021: 573 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=559983, ups=1.13, wpb=494700, bsz=16606.2, num_updates=34300, lr=0.000341494, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=31669 epoch 021: 573 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=559983, ups=1.13, wpb=494700, bsz=16606.2, num_updates=34300, lr=0.000341494, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=31669 epoch 021: 573 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=559983, ups=1.13, wpb=494700, bsz=16606.2, num_updates=34300, lr=0.000341494, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=31669 epoch 021: 573 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=559983, ups=1.13, wpb=494700, bsz=16606.2, num_updates=34300, lr=0.000341494, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=31669 epoch 021: 573 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=559983, ups=1.13, wpb=494700, bsz=16606.2, num_updates=34300, lr=0.000341494, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=31669 epoch 021: 573 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=559983, ups=1.13, wpb=494700, bsz=16606.2, num_updates=34300, lr=0.000341494, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=31669 epoch 021: 573 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=559983, ups=1.13, wpb=494700, bsz=16606.2, num_updates=34300, lr=0.000341494, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=31669 epoch 021: 573 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=559983, ups=1.13, wpb=494700, bsz=16606.2, num_updates=34300, lr=0.000341494, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=31669 epoch 021: 573 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=559983, ups=1.13, wpb=494700, bsz=16606.2, num_updates=34300, lr=0.000341494, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=31669 epoch 021: 573 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=559983, ups=1.13, wpb=494700, bsz=16606.2, num_updates=34300, lr=0.000341494, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=31669 epoch 021: 573 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=559983, ups=1.13, wpb=494700, bsz=16606.2, num_updates=34300, lr=0.000341494, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=31669 epoch 021: 573 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=559983, ups=1.13, wpb=494700, bsz=16606.2, num_updates=34300, lr=0.000341494, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=31669 epoch 021: 573 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=559983, ups=1.13, wpb=494700, bsz=16606.2, num_updates=34300, lr=0.000341494, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=31669 epoch 021: 573 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=559983, ups=1.13, wpb=494700, bsz=16606.2, num_updates=34300, lr=0.000341494, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=31669 epoch 021: 573 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=559983, ups=1.13, wpb=494700, bsz=16606.2, num_updates=34300, lr=0.000341494, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=31669 epoch 021: 573 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=559983, ups=1.13, wpb=494700, bsz=16606.2, num_updates=34300, lr=0.000341494, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=31669 epoch 021: 573 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=559983, ups=1.13, wpb=494700, bsz=16606.2, num_updates=34300, lr=0.000341494, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=31669 epoch 021: 573 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=559983, ups=1.13, wpb=494700, bsz=16606.2, num_updates=34300, lr=0.000341494, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=31669 epoch 021: 673 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=558503, ups=1.13, wpb=495287, bsz=16496, num_updates=34400, lr=0.000340997, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=31757 epoch 021: 673 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=558503, ups=1.13, wpb=495287, bsz=16496, num_updates=34400, lr=0.000340997, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=31757 epoch 021: 673 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=558503, ups=1.13, wpb=495287, bsz=16496, num_updates=34400, lr=0.000340997, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=31757 epoch 021: 673 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=558503, ups=1.13, wpb=495287, bsz=16496, num_updates=34400, lr=0.000340997, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=31757 epoch 021: 673 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=558503, ups=1.13, wpb=495287, bsz=16496, num_updates=34400, lr=0.000340997, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=31757 epoch 021: 673 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=558503, ups=1.13, wpb=495287, bsz=16496, num_updates=34400, lr=0.000340997, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=31757 epoch 021: 673 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=558503, ups=1.13, wpb=495287, bsz=16496, num_updates=34400, lr=0.000340997, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=31757 epoch 021: 673 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=558503, ups=1.13, wpb=495287, bsz=16496, num_updates=34400, lr=0.000340997, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=31757 epoch 021: 673 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=558503, ups=1.13, wpb=495287, bsz=16496, num_updates=34400, lr=0.000340997, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=31757 epoch 021: 673 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=558503, ups=1.13, wpb=495287, bsz=16496, num_updates=34400, lr=0.000340997, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=31757 epoch 021: 673 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=558503, ups=1.13, wpb=495287, bsz=16496, num_updates=34400, lr=0.000340997, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=31757 epoch 021: 673 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=558503, ups=1.13, wpb=495287, bsz=16496, num_updates=34400, lr=0.000340997, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=31757 epoch 021: 673 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=558503, ups=1.13, wpb=495287, bsz=16496, num_updates=34400, lr=0.000340997, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=31757 epoch 021: 673 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=558503, ups=1.13, wpb=495287, bsz=16496, num_updates=34400, lr=0.000340997, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=31757 epoch 021: 673 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=558503, ups=1.13, wpb=495287, bsz=16496, num_updates=34400, lr=0.000340997, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=31757 epoch 021: 673 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=558503, ups=1.13, wpb=495287, bsz=16496, num_updates=34400, lr=0.000340997, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=31757 epoch 021: 673 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=558503, ups=1.13, wpb=495287, bsz=16496, num_updates=34400, lr=0.000340997, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=31757 epoch 021: 673 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=558503, ups=1.13, wpb=495287, bsz=16496, num_updates=34400, lr=0.000340997, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=31757 epoch 021: 673 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=558503, ups=1.13, wpb=495287, bsz=16496, num_updates=34400, lr=0.000340997, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=31757 epoch 021: 673 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=558503, ups=1.13, wpb=495287, bsz=16496, num_updates=34400, lr=0.000340997, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=31757 epoch 021: 673 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=558503, ups=1.13, wpb=495287, bsz=16496, num_updates=34400, lr=0.000340997, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=31757 epoch 021: 773 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=557036, ups=1.13, wpb=494992, bsz=16660.7, num_updates=34500, lr=0.000340503, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31846 epoch 021: 773 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=557036, ups=1.13, wpb=494992, bsz=16660.7, num_updates=34500, lr=0.000340503, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31846 epoch 021: 773 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=557036, ups=1.13, wpb=494992, bsz=16660.7, num_updates=34500, lr=0.000340503, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31846 epoch 021: 773 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=557036, ups=1.13, wpb=494992, bsz=16660.7, num_updates=34500, lr=0.000340503, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31846 epoch 021: 773 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=557036, ups=1.13, wpb=494992, bsz=16660.7, num_updates=34500, lr=0.000340503, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31846 epoch 021: 773 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=557036, ups=1.13, wpb=494992, bsz=16660.7, num_updates=34500, lr=0.000340503, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31846 epoch 021: 773 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=557036, ups=1.13, wpb=494992, bsz=16660.7, num_updates=34500, lr=0.000340503, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31846 epoch 021: 773 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=557036, ups=1.13, wpb=494992, bsz=16660.7, num_updates=34500, lr=0.000340503, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31846 epoch 021: 773 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=557036, ups=1.13, wpb=494992, bsz=16660.7, num_updates=34500, lr=0.000340503, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31846 epoch 021: 773 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=557036, ups=1.13, wpb=494992, bsz=16660.7, num_updates=34500, lr=0.000340503, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31846 epoch 021: 773 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=557036, ups=1.13, wpb=494992, bsz=16660.7, num_updates=34500, lr=0.000340503, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31846 epoch 021: 773 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=557036, ups=1.13, wpb=494992, bsz=16660.7, num_updates=34500, lr=0.000340503, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31846 epoch 021: 773 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=557036, ups=1.13, wpb=494992, bsz=16660.7, num_updates=34500, lr=0.000340503, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31846 epoch 021: 773 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=557036, ups=1.13, wpb=494992, bsz=16660.7, num_updates=34500, lr=0.000340503, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31846 epoch 021: 773 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=557036, ups=1.13, wpb=494992, bsz=16660.7, num_updates=34500, lr=0.000340503, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31846 epoch 021: 773 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=557036, ups=1.13, wpb=494992, bsz=16660.7, num_updates=34500, lr=0.000340503, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31846 epoch 021: 773 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=557036, ups=1.13, wpb=494992, bsz=16660.7, num_updates=34500, lr=0.000340503, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31846 epoch 021: 773 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=557036, ups=1.13, wpb=494992, bsz=16660.7, num_updates=34500, lr=0.000340503, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31846 epoch 021: 773 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=557036, ups=1.13, wpb=494992, bsz=16660.7, num_updates=34500, lr=0.000340503, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31846 epoch 021: 773 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=557036, ups=1.13, wpb=494992, bsz=16660.7, num_updates=34500, lr=0.000340503, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31846 epoch 021: 773 / 1689 loss=3.522, nll_loss=1.969, ppl=3.92, wps=557036, ups=1.13, wpb=494992, bsz=16660.7, num_updates=34500, lr=0.000340503, gnorm=0.178, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=31846 epoch 021: 873 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=557067, ups=1.12, wpb=495201, bsz=16241.3, num_updates=34600, lr=0.00034001, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=31935 epoch 021: 873 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=557067, ups=1.12, wpb=495201, bsz=16241.3, num_updates=34600, lr=0.00034001, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=31935 epoch 021: 873 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=557067, ups=1.12, wpb=495201, bsz=16241.3, num_updates=34600, lr=0.00034001, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=31935 epoch 021: 873 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=557067, ups=1.12, wpb=495201, bsz=16241.3, num_updates=34600, lr=0.00034001, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=31935 epoch 021: 873 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=557067, ups=1.12, wpb=495201, bsz=16241.3, num_updates=34600, lr=0.00034001, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=31935 epoch 021: 873 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=557067, ups=1.12, wpb=495201, bsz=16241.3, num_updates=34600, lr=0.00034001, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=31935 epoch 021: 873 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=557067, ups=1.12, wpb=495201, bsz=16241.3, num_updates=34600, lr=0.00034001, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=31935 epoch 021: 873 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=557067, ups=1.12, wpb=495201, bsz=16241.3, num_updates=34600, lr=0.00034001, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=31935 epoch 021: 873 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=557067, ups=1.12, wpb=495201, bsz=16241.3, num_updates=34600, lr=0.00034001, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=31935 epoch 021: 873 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=557067, ups=1.12, wpb=495201, bsz=16241.3, num_updates=34600, lr=0.00034001, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=31935 epoch 021: 873 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=557067, ups=1.12, wpb=495201, bsz=16241.3, num_updates=34600, lr=0.00034001, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=31935 epoch 021: 873 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=557067, ups=1.12, wpb=495201, bsz=16241.3, num_updates=34600, lr=0.00034001, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=31935 epoch 021: 873 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=557067, ups=1.12, wpb=495201, bsz=16241.3, num_updates=34600, lr=0.00034001, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=31935 epoch 021: 873 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=557067, ups=1.12, wpb=495201, bsz=16241.3, num_updates=34600, lr=0.00034001, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=31935 epoch 021: 873 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=557067, ups=1.12, wpb=495201, bsz=16241.3, num_updates=34600, lr=0.00034001, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=31935 epoch 021: 873 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=557067, ups=1.12, wpb=495201, bsz=16241.3, num_updates=34600, lr=0.00034001, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=31935 epoch 021: 873 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=557067, ups=1.12, wpb=495201, bsz=16241.3, num_updates=34600, lr=0.00034001, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=31935 epoch 021: 873 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=557067, ups=1.12, wpb=495201, bsz=16241.3, num_updates=34600, lr=0.00034001, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=31935 epoch 021: 873 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=557067, ups=1.12, wpb=495201, bsz=16241.3, num_updates=34600, lr=0.00034001, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=31935 epoch 021: 873 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=557067, ups=1.12, wpb=495201, bsz=16241.3, num_updates=34600, lr=0.00034001, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=31935 epoch 021: 873 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=557067, ups=1.12, wpb=495201, bsz=16241.3, num_updates=34600, lr=0.00034001, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=22.6, wall=31935 epoch 021: 973 / 1689 loss=3.514, nll_loss=1.96, ppl=3.89, wps=560426, ups=1.13, wpb=496588, bsz=16303.4, num_updates=34700, lr=0.00033952, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32024 epoch 021: 973 / 1689 loss=3.514, nll_loss=1.96, ppl=3.89, wps=560426, ups=1.13, wpb=496588, bsz=16303.4, num_updates=34700, lr=0.00033952, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32024 epoch 021: 973 / 1689 loss=3.514, nll_loss=1.96, ppl=3.89, wps=560426, ups=1.13, wpb=496588, bsz=16303.4, num_updates=34700, lr=0.00033952, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32024 epoch 021: 973 / 1689 loss=3.514, nll_loss=1.96, ppl=3.89, wps=560426, ups=1.13, wpb=496588, bsz=16303.4, num_updates=34700, lr=0.00033952, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32024 epoch 021: 973 / 1689 loss=3.514, nll_loss=1.96, ppl=3.89, wps=560426, ups=1.13, wpb=496588, bsz=16303.4, num_updates=34700, lr=0.00033952, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32024 epoch 021: 973 / 1689 loss=3.514, nll_loss=1.96, ppl=3.89, wps=560426, ups=1.13, wpb=496588, bsz=16303.4, num_updates=34700, lr=0.00033952, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32024 epoch 021: 973 / 1689 loss=3.514, nll_loss=1.96, ppl=3.89, wps=560426, ups=1.13, wpb=496588, bsz=16303.4, num_updates=34700, lr=0.00033952, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32024 epoch 021: 973 / 1689 loss=3.514, nll_loss=1.96, ppl=3.89, wps=560426, ups=1.13, wpb=496588, bsz=16303.4, num_updates=34700, lr=0.00033952, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32024 epoch 021: 973 / 1689 loss=3.514, nll_loss=1.96, ppl=3.89, wps=560426, ups=1.13, wpb=496588, bsz=16303.4, num_updates=34700, lr=0.00033952, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32024 epoch 021: 973 / 1689 loss=3.514, nll_loss=1.96, ppl=3.89, wps=560426, ups=1.13, wpb=496588, bsz=16303.4, num_updates=34700, lr=0.00033952, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32024 epoch 021: 973 / 1689 loss=3.514, nll_loss=1.96, ppl=3.89, wps=560426, ups=1.13, wpb=496588, bsz=16303.4, num_updates=34700, lr=0.00033952, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32024 epoch 021: 973 / 1689 loss=3.514, nll_loss=1.96, ppl=3.89, wps=560426, ups=1.13, wpb=496588, bsz=16303.4, num_updates=34700, lr=0.00033952, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32024 epoch 021: 973 / 1689 loss=3.514, nll_loss=1.96, ppl=3.89, wps=560426, ups=1.13, wpb=496588, bsz=16303.4, num_updates=34700, lr=0.00033952, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32024 epoch 021: 973 / 1689 loss=3.514, nll_loss=1.96, ppl=3.89, wps=560426, ups=1.13, wpb=496588, bsz=16303.4, num_updates=34700, lr=0.00033952, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32024 epoch 021: 973 / 1689 loss=3.514, nll_loss=1.96, ppl=3.89, wps=560426, ups=1.13, wpb=496588, bsz=16303.4, num_updates=34700, lr=0.00033952, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32024 epoch 021: 973 / 1689 loss=3.514, nll_loss=1.96, ppl=3.89, wps=560426, ups=1.13, wpb=496588, bsz=16303.4, num_updates=34700, lr=0.00033952, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32024 epoch 021: 973 / 1689 loss=3.514, nll_loss=1.96, ppl=3.89, wps=560426, ups=1.13, wpb=496588, bsz=16303.4, num_updates=34700, lr=0.00033952, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32024 epoch 021: 973 / 1689 loss=3.514, nll_loss=1.96, ppl=3.89, wps=560426, ups=1.13, wpb=496588, bsz=16303.4, num_updates=34700, lr=0.00033952, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32024 epoch 021: 973 / 1689 loss=3.514, nll_loss=1.96, ppl=3.89, wps=560426, ups=1.13, wpb=496588, bsz=16303.4, num_updates=34700, lr=0.00033952, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32024 epoch 021: 973 / 1689 loss=3.514, nll_loss=1.96, ppl=3.89, wps=560426, ups=1.13, wpb=496588, bsz=16303.4, num_updates=34700, lr=0.00033952, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32024 epoch 021: 973 / 1689 loss=3.514, nll_loss=1.96, ppl=3.89, wps=560426, ups=1.13, wpb=496588, bsz=16303.4, num_updates=34700, lr=0.00033952, gnorm=0.179, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32024 epoch 021: 1074 / 1689 loss=3.518, nll_loss=1.966, ppl=3.91, wps=552069, ups=1.11, wpb=495152, bsz=16421.9, num_updates=34800, lr=0.000339032, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=32113 epoch 021: 1074 / 1689 loss=3.518, nll_loss=1.966, ppl=3.91, wps=552069, ups=1.11, wpb=495152, bsz=16421.9, num_updates=34800, lr=0.000339032, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=32113 epoch 021: 1074 / 1689 loss=3.518, nll_loss=1.966, ppl=3.91, wps=552069, ups=1.11, wpb=495152, bsz=16421.9, num_updates=34800, lr=0.000339032, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=32113 epoch 021: 1074 / 1689 loss=3.518, nll_loss=1.966, ppl=3.91, wps=552069, ups=1.11, wpb=495152, bsz=16421.9, num_updates=34800, lr=0.000339032, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=32113 epoch 021: 1074 / 1689 loss=3.518, nll_loss=1.966, ppl=3.91, wps=552069, ups=1.11, wpb=495152, bsz=16421.9, num_updates=34800, lr=0.000339032, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=32113 epoch 021: 1074 / 1689 loss=3.518, nll_loss=1.966, ppl=3.91, wps=552069, ups=1.11, wpb=495152, bsz=16421.9, num_updates=34800, lr=0.000339032, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=32113 epoch 021: 1074 / 1689 loss=3.518, nll_loss=1.966, ppl=3.91, wps=552069, ups=1.11, wpb=495152, bsz=16421.9, num_updates=34800, lr=0.000339032, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=32113 epoch 021: 1074 / 1689 loss=3.518, nll_loss=1.966, ppl=3.91, wps=552069, ups=1.11, wpb=495152, bsz=16421.9, num_updates=34800, lr=0.000339032, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=32113 epoch 021: 1074 / 1689 loss=3.518, nll_loss=1.966, ppl=3.91, wps=552069, ups=1.11, wpb=495152, bsz=16421.9, num_updates=34800, lr=0.000339032, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=32113 epoch 021: 1074 / 1689 loss=3.518, nll_loss=1.966, ppl=3.91, wps=552069, ups=1.11, wpb=495152, bsz=16421.9, num_updates=34800, lr=0.000339032, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=32113 epoch 021: 1074 / 1689 loss=3.518, nll_loss=1.966, ppl=3.91, wps=552069, ups=1.11, wpb=495152, bsz=16421.9, num_updates=34800, lr=0.000339032, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=32113 epoch 021: 1074 / 1689 loss=3.518, nll_loss=1.966, ppl=3.91, wps=552069, ups=1.11, wpb=495152, bsz=16421.9, num_updates=34800, lr=0.000339032, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=32113 epoch 021: 1074 / 1689 loss=3.518, nll_loss=1.966, ppl=3.91, wps=552069, ups=1.11, wpb=495152, bsz=16421.9, num_updates=34800, lr=0.000339032, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=32113 epoch 021: 1074 / 1689 loss=3.518, nll_loss=1.966, ppl=3.91, wps=552069, ups=1.11, wpb=495152, bsz=16421.9, num_updates=34800, lr=0.000339032, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=32113 epoch 021: 1074 / 1689 loss=3.518, nll_loss=1.966, ppl=3.91, wps=552069, ups=1.11, wpb=495152, bsz=16421.9, num_updates=34800, lr=0.000339032, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=32113 epoch 021: 1074 / 1689 loss=3.518, nll_loss=1.966, ppl=3.91, wps=552069, ups=1.11, wpb=495152, bsz=16421.9, num_updates=34800, lr=0.000339032, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=32113 epoch 021: 1074 / 1689 loss=3.518, nll_loss=1.966, ppl=3.91, wps=552069, ups=1.11, wpb=495152, bsz=16421.9, num_updates=34800, lr=0.000339032, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=32113 epoch 021: 1074 / 1689 loss=3.518, nll_loss=1.966, ppl=3.91, wps=552069, ups=1.11, wpb=495152, bsz=16421.9, num_updates=34800, lr=0.000339032, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=32113 epoch 021: 1074 / 1689 loss=3.518, nll_loss=1.966, ppl=3.91, wps=552069, ups=1.11, wpb=495152, bsz=16421.9, num_updates=34800, lr=0.000339032, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=32113 epoch 021: 1074 / 1689 loss=3.518, nll_loss=1.966, ppl=3.91, wps=552069, ups=1.11, wpb=495152, bsz=16421.9, num_updates=34800, lr=0.000339032, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=32113 epoch 021: 1074 / 1689 loss=3.518, nll_loss=1.966, ppl=3.91, wps=552069, ups=1.11, wpb=495152, bsz=16421.9, num_updates=34800, lr=0.000339032, gnorm=0.18, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=32113 epoch 021: 1174 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=556165, ups=1.12, wpb=495333, bsz=16790.2, num_updates=34900, lr=0.000338546, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=20.4, wall=32203 epoch 021: 1174 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=556165, ups=1.12, wpb=495333, bsz=16790.2, num_updates=34900, lr=0.000338546, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=20.4, wall=32203 epoch 021: 1174 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=556165, ups=1.12, wpb=495333, bsz=16790.2, num_updates=34900, lr=0.000338546, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=20.4, wall=32203 epoch 021: 1174 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=556165, ups=1.12, wpb=495333, bsz=16790.2, num_updates=34900, lr=0.000338546, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=20.4, wall=32203 epoch 021: 1174 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=556165, ups=1.12, wpb=495333, bsz=16790.2, num_updates=34900, lr=0.000338546, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=20.4, wall=32203 epoch 021: 1174 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=556165, ups=1.12, wpb=495333, bsz=16790.2, num_updates=34900, lr=0.000338546, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=20.4, wall=32203 epoch 021: 1174 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=556165, ups=1.12, wpb=495333, bsz=16790.2, num_updates=34900, lr=0.000338546, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=20.4, wall=32203 epoch 021: 1174 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=556165, ups=1.12, wpb=495333, bsz=16790.2, num_updates=34900, lr=0.000338546, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=20.4, wall=32203 epoch 021: 1174 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=556165, ups=1.12, wpb=495333, bsz=16790.2, num_updates=34900, lr=0.000338546, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=20.4, wall=32203 epoch 021: 1174 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=556165, ups=1.12, wpb=495333, bsz=16790.2, num_updates=34900, lr=0.000338546, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=20.4, wall=32203 epoch 021: 1174 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=556165, ups=1.12, wpb=495333, bsz=16790.2, num_updates=34900, lr=0.000338546, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=20.4, wall=32203 epoch 021: 1174 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=556165, ups=1.12, wpb=495333, bsz=16790.2, num_updates=34900, lr=0.000338546, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=20.4, wall=32203 epoch 021: 1174 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=556165, ups=1.12, wpb=495333, bsz=16790.2, num_updates=34900, lr=0.000338546, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=20.4, wall=32203 epoch 021: 1174 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=556165, ups=1.12, wpb=495333, bsz=16790.2, num_updates=34900, lr=0.000338546, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=20.4, wall=32203 epoch 021: 1174 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=556165, ups=1.12, wpb=495333, bsz=16790.2, num_updates=34900, lr=0.000338546, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=20.4, wall=32203 epoch 021: 1174 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=556165, ups=1.12, wpb=495333, bsz=16790.2, num_updates=34900, lr=0.000338546, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=20.4, wall=32203 epoch 021: 1174 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=556165, ups=1.12, wpb=495333, bsz=16790.2, num_updates=34900, lr=0.000338546, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=20.4, wall=32203 epoch 021: 1174 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=556165, ups=1.12, wpb=495333, bsz=16790.2, num_updates=34900, lr=0.000338546, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=20.4, wall=32203 epoch 021: 1174 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=556165, ups=1.12, wpb=495333, bsz=16790.2, num_updates=34900, lr=0.000338546, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=20.4, wall=32203 epoch 021: 1174 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=556165, ups=1.12, wpb=495333, bsz=16790.2, num_updates=34900, lr=0.000338546, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=20.4, wall=32203 epoch 021: 1174 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=556165, ups=1.12, wpb=495333, bsz=16790.2, num_updates=34900, lr=0.000338546, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=20.4, wall=32203 epoch 021: 1274 / 1689 loss=3.521, nll_loss=1.969, ppl=3.92, wps=554248, ups=1.12, wpb=494773, bsz=16614, num_updates=35000, lr=0.000338062, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=32292 epoch 021: 1274 / 1689 loss=3.521, nll_loss=1.969, ppl=3.92, wps=554248, ups=1.12, wpb=494773, bsz=16614, num_updates=35000, lr=0.000338062, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=32292 epoch 021: 1274 / 1689 loss=3.521, nll_loss=1.969, ppl=3.92, wps=554248, ups=1.12, wpb=494773, bsz=16614, num_updates=35000, lr=0.000338062, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=32292 epoch 021: 1274 / 1689 loss=3.521, nll_loss=1.969, ppl=3.92, wps=554248, ups=1.12, wpb=494773, bsz=16614, num_updates=35000, lr=0.000338062, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=32292 epoch 021: 1274 / 1689 loss=3.521, nll_loss=1.969, ppl=3.92, wps=554248, ups=1.12, wpb=494773, bsz=16614, num_updates=35000, lr=0.000338062, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=32292 epoch 021: 1274 / 1689 loss=3.521, nll_loss=1.969, ppl=3.92, wps=554248, ups=1.12, wpb=494773, bsz=16614, num_updates=35000, lr=0.000338062, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=32292 epoch 021: 1274 / 1689 loss=3.521, nll_loss=1.969, ppl=3.92, wps=554248, ups=1.12, wpb=494773, bsz=16614, num_updates=35000, lr=0.000338062, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=32292 epoch 021: 1274 / 1689 loss=3.521, nll_loss=1.969, ppl=3.92, wps=554248, ups=1.12, wpb=494773, bsz=16614, num_updates=35000, lr=0.000338062, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=32292 epoch 021: 1274 / 1689 loss=3.521, nll_loss=1.969, ppl=3.92, wps=554248, ups=1.12, wpb=494773, bsz=16614, num_updates=35000, lr=0.000338062, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=32292 epoch 021: 1274 / 1689 loss=3.521, nll_loss=1.969, ppl=3.92, wps=554248, ups=1.12, wpb=494773, bsz=16614, num_updates=35000, lr=0.000338062, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=32292 epoch 021: 1274 / 1689 loss=3.521, nll_loss=1.969, ppl=3.92, wps=554248, ups=1.12, wpb=494773, bsz=16614, num_updates=35000, lr=0.000338062, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=32292 epoch 021: 1274 / 1689 loss=3.521, nll_loss=1.969, ppl=3.92, wps=554248, ups=1.12, wpb=494773, bsz=16614, num_updates=35000, lr=0.000338062, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=32292 epoch 021: 1274 / 1689 loss=3.521, nll_loss=1.969, ppl=3.92, wps=554248, ups=1.12, wpb=494773, bsz=16614, num_updates=35000, lr=0.000338062, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=32292 epoch 021: 1274 / 1689 loss=3.521, nll_loss=1.969, ppl=3.92, wps=554248, ups=1.12, wpb=494773, bsz=16614, num_updates=35000, lr=0.000338062, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=32292 epoch 021: 1274 / 1689 loss=3.521, nll_loss=1.969, ppl=3.92, wps=554248, ups=1.12, wpb=494773, bsz=16614, num_updates=35000, lr=0.000338062, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=32292 epoch 021: 1274 / 1689 loss=3.521, nll_loss=1.969, ppl=3.92, wps=554248, ups=1.12, wpb=494773, bsz=16614, num_updates=35000, lr=0.000338062, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=32292 epoch 021: 1274 / 1689 loss=3.521, nll_loss=1.969, ppl=3.92, wps=554248, ups=1.12, wpb=494773, bsz=16614, num_updates=35000, lr=0.000338062, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=32292 epoch 021: 1274 / 1689 loss=3.521, nll_loss=1.969, ppl=3.92, wps=554248, ups=1.12, wpb=494773, bsz=16614, num_updates=35000, lr=0.000338062, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=32292 epoch 021: 1274 / 1689 loss=3.521, nll_loss=1.969, ppl=3.92, wps=554248, ups=1.12, wpb=494773, bsz=16614, num_updates=35000, lr=0.000338062, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=32292 epoch 021: 1274 / 1689 loss=3.521, nll_loss=1.969, ppl=3.92, wps=554248, ups=1.12, wpb=494773, bsz=16614, num_updates=35000, lr=0.000338062, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=32292 epoch 021: 1274 / 1689 loss=3.521, nll_loss=1.969, ppl=3.92, wps=554248, ups=1.12, wpb=494773, bsz=16614, num_updates=35000, lr=0.000338062, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=32292 begin validation on "valid" subset epoch 021 | valid on 'valid' subset | loss 3.709 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 35000 | best_loss 3.709 epoch 021 | valid on 'valid' subset | loss 3.709 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 35000 | best_loss 3.709 epoch 021 | valid on 'valid' subset | loss 3.709 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 35000 | best_loss 3.709 epoch 021 | valid on 'valid' subset | loss 3.709 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 35000 | best_loss 3.709 epoch 021 | valid on 'valid' subset | loss 3.709 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 35000 | best_loss 3.709 epoch 021 | valid on 'valid' subset | loss 3.709 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 35000 | best_loss 3.709 epoch 021 | valid on 'valid' subset | loss 3.709 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 35000 | best_loss 3.709 epoch 021 | valid on 'valid' subset | loss 3.709 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 35000 | best_loss 3.709 epoch 021 | valid on 'valid' subset | loss 3.709 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 35000 | best_loss 3.709 epoch 021 | valid on 'valid' subset | loss 3.709 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 35000 | best_loss 3.709 epoch 021 | valid on 'valid' subset | loss 3.709 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 35000 | best_loss 3.709 epoch 021 | valid on 'valid' subset | loss 3.709 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 35000 | best_loss 3.709 epoch 021 | valid on 'valid' subset | loss 3.709 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 35000 | best_loss 3.709 epoch 021 | valid on 'valid' subset | loss 3.709 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 35000 | best_loss 3.709 epoch 021 | valid on 'valid' subset | loss 3.709 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 35000 | best_loss 3.709 epoch 021 | valid on 'valid' subset | loss 3.709 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 35000 | best_loss 3.709 epoch 021 | valid on 'valid' subset | loss 3.709 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 35000 | best_loss 3.709 epoch 021 | valid on 'valid' subset | loss 3.709 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 35000 | best_loss 3.709 epoch 021 | valid on 'valid' subset | loss 3.709 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 35000 | best_loss 3.709 epoch 021 | valid on 'valid' subset | loss 3.709 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 35000 | best_loss 3.709 epoch 021 | valid on 'valid' subset | loss 3.709 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 35000 | best_loss 3.709 epoch 021: 1374 / 1689 loss=3.524, nll_loss=1.973, ppl=3.92, wps=457360, ups=0.92, wpb=495182, bsz=16174.9, num_updates=35100, lr=0.00033758, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=32400 epoch 021: 1374 / 1689 loss=3.524, nll_loss=1.973, ppl=3.92, wps=457360, ups=0.92, wpb=495182, bsz=16174.9, num_updates=35100, lr=0.00033758, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=32400 epoch 021: 1374 / 1689 loss=3.524, nll_loss=1.973, ppl=3.92, wps=457360, ups=0.92, wpb=495182, bsz=16174.9, num_updates=35100, lr=0.00033758, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=32400 epoch 021: 1374 / 1689 loss=3.524, nll_loss=1.973, ppl=3.92, wps=457360, ups=0.92, wpb=495182, bsz=16174.9, num_updates=35100, lr=0.00033758, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=32400 epoch 021: 1374 / 1689 loss=3.524, nll_loss=1.973, ppl=3.92, wps=457360, ups=0.92, wpb=495182, bsz=16174.9, num_updates=35100, lr=0.00033758, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=32400 epoch 021: 1374 / 1689 loss=3.524, nll_loss=1.973, ppl=3.92, wps=457360, ups=0.92, wpb=495182, bsz=16174.9, num_updates=35100, lr=0.00033758, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=32400 epoch 021: 1374 / 1689 loss=3.524, nll_loss=1.973, ppl=3.92, wps=457360, ups=0.92, wpb=495182, bsz=16174.9, num_updates=35100, lr=0.00033758, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=32400 epoch 021: 1374 / 1689 loss=3.524, nll_loss=1.973, ppl=3.92, wps=457360, ups=0.92, wpb=495182, bsz=16174.9, num_updates=35100, lr=0.00033758, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=32400 epoch 021: 1374 / 1689 loss=3.524, nll_loss=1.973, ppl=3.92, wps=457360, ups=0.92, wpb=495182, bsz=16174.9, num_updates=35100, lr=0.00033758, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=32400 epoch 021: 1374 / 1689 loss=3.524, nll_loss=1.973, ppl=3.92, wps=457360, ups=0.92, wpb=495182, bsz=16174.9, num_updates=35100, lr=0.00033758, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=32400 epoch 021: 1374 / 1689 loss=3.524, nll_loss=1.973, ppl=3.92, wps=457360, ups=0.92, wpb=495182, bsz=16174.9, num_updates=35100, lr=0.00033758, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=32400 epoch 021: 1374 / 1689 loss=3.524, nll_loss=1.973, ppl=3.92, wps=457360, ups=0.92, wpb=495182, bsz=16174.9, num_updates=35100, lr=0.00033758, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=32400 epoch 021: 1374 / 1689 loss=3.524, nll_loss=1.973, ppl=3.92, wps=457360, ups=0.92, wpb=495182, bsz=16174.9, num_updates=35100, lr=0.00033758, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=32400 epoch 021: 1374 / 1689 loss=3.524, nll_loss=1.973, ppl=3.92, wps=457360, ups=0.92, wpb=495182, bsz=16174.9, num_updates=35100, lr=0.00033758, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=32400 epoch 021: 1374 / 1689 loss=3.524, nll_loss=1.973, ppl=3.92, wps=457360, ups=0.92, wpb=495182, bsz=16174.9, num_updates=35100, lr=0.00033758, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=32400 epoch 021: 1374 / 1689 loss=3.524, nll_loss=1.973, ppl=3.92, wps=457360, ups=0.92, wpb=495182, bsz=16174.9, num_updates=35100, lr=0.00033758, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=32400 epoch 021: 1374 / 1689 loss=3.524, nll_loss=1.973, ppl=3.92, wps=457360, ups=0.92, wpb=495182, bsz=16174.9, num_updates=35100, lr=0.00033758, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=32400 epoch 021: 1374 / 1689 loss=3.524, nll_loss=1.973, ppl=3.92, wps=457360, ups=0.92, wpb=495182, bsz=16174.9, num_updates=35100, lr=0.00033758, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=32400 epoch 021: 1374 / 1689 loss=3.524, nll_loss=1.973, ppl=3.92, wps=457360, ups=0.92, wpb=495182, bsz=16174.9, num_updates=35100, lr=0.00033758, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=32400 epoch 021: 1374 / 1689 loss=3.524, nll_loss=1.973, ppl=3.92, wps=457360, ups=0.92, wpb=495182, bsz=16174.9, num_updates=35100, lr=0.00033758, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=32400 epoch 021: 1374 / 1689 loss=3.524, nll_loss=1.973, ppl=3.92, wps=457360, ups=0.92, wpb=495182, bsz=16174.9, num_updates=35100, lr=0.00033758, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=32400 epoch 021: 1474 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=554748, ups=1.12, wpb=497438, bsz=16355.9, num_updates=35200, lr=0.0003371, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=32490 epoch 021: 1474 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=554748, ups=1.12, wpb=497438, bsz=16355.9, num_updates=35200, lr=0.0003371, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=32490 epoch 021: 1474 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=554748, ups=1.12, wpb=497438, bsz=16355.9, num_updates=35200, lr=0.0003371, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=32490 epoch 021: 1474 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=554748, ups=1.12, wpb=497438, bsz=16355.9, num_updates=35200, lr=0.0003371, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=32490 epoch 021: 1474 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=554748, ups=1.12, wpb=497438, bsz=16355.9, num_updates=35200, lr=0.0003371, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=32490 epoch 021: 1474 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=554748, ups=1.12, wpb=497438, bsz=16355.9, num_updates=35200, lr=0.0003371, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=32490 epoch 021: 1474 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=554748, ups=1.12, wpb=497438, bsz=16355.9, num_updates=35200, lr=0.0003371, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=32490 epoch 021: 1474 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=554748, ups=1.12, wpb=497438, bsz=16355.9, num_updates=35200, lr=0.0003371, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=32490 epoch 021: 1474 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=554748, ups=1.12, wpb=497438, bsz=16355.9, num_updates=35200, lr=0.0003371, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=32490 epoch 021: 1474 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=554748, ups=1.12, wpb=497438, bsz=16355.9, num_updates=35200, lr=0.0003371, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=32490 epoch 021: 1474 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=554748, ups=1.12, wpb=497438, bsz=16355.9, num_updates=35200, lr=0.0003371, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=32490 epoch 021: 1474 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=554748, ups=1.12, wpb=497438, bsz=16355.9, num_updates=35200, lr=0.0003371, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=32490 epoch 021: 1474 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=554748, ups=1.12, wpb=497438, bsz=16355.9, num_updates=35200, lr=0.0003371, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=32490 epoch 021: 1474 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=554748, ups=1.12, wpb=497438, bsz=16355.9, num_updates=35200, lr=0.0003371, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=32490 epoch 021: 1474 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=554748, ups=1.12, wpb=497438, bsz=16355.9, num_updates=35200, lr=0.0003371, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=32490 epoch 021: 1474 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=554748, ups=1.12, wpb=497438, bsz=16355.9, num_updates=35200, lr=0.0003371, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=32490 epoch 021: 1474 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=554748, ups=1.12, wpb=497438, bsz=16355.9, num_updates=35200, lr=0.0003371, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=32490 epoch 021: 1474 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=554748, ups=1.12, wpb=497438, bsz=16355.9, num_updates=35200, lr=0.0003371, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=32490 epoch 021: 1474 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=554748, ups=1.12, wpb=497438, bsz=16355.9, num_updates=35200, lr=0.0003371, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=32490 epoch 021: 1474 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=554748, ups=1.12, wpb=497438, bsz=16355.9, num_updates=35200, lr=0.0003371, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=32490 epoch 021: 1474 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=554748, ups=1.12, wpb=497438, bsz=16355.9, num_updates=35200, lr=0.0003371, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=32490 epoch 021: 1574 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=555542, ups=1.12, wpb=496351, bsz=16446.3, num_updates=35300, lr=0.000336622, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=32579 epoch 021: 1574 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=555542, ups=1.12, wpb=496351, bsz=16446.3, num_updates=35300, lr=0.000336622, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=32579 epoch 021: 1574 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=555542, ups=1.12, wpb=496351, bsz=16446.3, num_updates=35300, lr=0.000336622, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=32579 epoch 021: 1574 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=555542, ups=1.12, wpb=496351, bsz=16446.3, num_updates=35300, lr=0.000336622, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=32579 epoch 021: 1574 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=555542, ups=1.12, wpb=496351, bsz=16446.3, num_updates=35300, lr=0.000336622, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=32579 epoch 021: 1574 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=555542, ups=1.12, wpb=496351, bsz=16446.3, num_updates=35300, lr=0.000336622, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=32579 epoch 021: 1574 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=555542, ups=1.12, wpb=496351, bsz=16446.3, num_updates=35300, lr=0.000336622, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=32579 epoch 021: 1574 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=555542, ups=1.12, wpb=496351, bsz=16446.3, num_updates=35300, lr=0.000336622, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=32579 epoch 021: 1574 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=555542, ups=1.12, wpb=496351, bsz=16446.3, num_updates=35300, lr=0.000336622, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=32579 epoch 021: 1574 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=555542, ups=1.12, wpb=496351, bsz=16446.3, num_updates=35300, lr=0.000336622, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=32579 epoch 021: 1574 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=555542, ups=1.12, wpb=496351, bsz=16446.3, num_updates=35300, lr=0.000336622, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=32579 epoch 021: 1574 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=555542, ups=1.12, wpb=496351, bsz=16446.3, num_updates=35300, lr=0.000336622, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=32579 epoch 021: 1574 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=555542, ups=1.12, wpb=496351, bsz=16446.3, num_updates=35300, lr=0.000336622, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=32579 epoch 021: 1574 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=555542, ups=1.12, wpb=496351, bsz=16446.3, num_updates=35300, lr=0.000336622, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=32579 epoch 021: 1574 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=555542, ups=1.12, wpb=496351, bsz=16446.3, num_updates=35300, lr=0.000336622, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=32579 epoch 021: 1574 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=555542, ups=1.12, wpb=496351, bsz=16446.3, num_updates=35300, lr=0.000336622, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=32579 epoch 021: 1574 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=555542, ups=1.12, wpb=496351, bsz=16446.3, num_updates=35300, lr=0.000336622, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=32579 epoch 021: 1574 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=555542, ups=1.12, wpb=496351, bsz=16446.3, num_updates=35300, lr=0.000336622, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=32579 epoch 021: 1574 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=555542, ups=1.12, wpb=496351, bsz=16446.3, num_updates=35300, lr=0.000336622, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=32579 epoch 021: 1574 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=555542, ups=1.12, wpb=496351, bsz=16446.3, num_updates=35300, lr=0.000336622, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=32579 epoch 021: 1574 / 1689 loss=3.529, nll_loss=1.978, ppl=3.94, wps=555542, ups=1.12, wpb=496351, bsz=16446.3, num_updates=35300, lr=0.000336622, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=32579 epoch 021: 1674 / 1689 loss=3.524, nll_loss=1.972, ppl=3.92, wps=547955, ups=1.11, wpb=494640, bsz=16514.4, num_updates=35400, lr=0.000336146, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=32669 epoch 021: 1674 / 1689 loss=3.524, nll_loss=1.972, ppl=3.92, wps=547955, ups=1.11, wpb=494640, bsz=16514.4, num_updates=35400, lr=0.000336146, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=32669 epoch 021: 1674 / 1689 loss=3.524, nll_loss=1.972, ppl=3.92, wps=547955, ups=1.11, wpb=494640, bsz=16514.4, num_updates=35400, lr=0.000336146, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=32669 epoch 021: 1674 / 1689 loss=3.524, nll_loss=1.972, ppl=3.92, wps=547955, ups=1.11, wpb=494640, bsz=16514.4, num_updates=35400, lr=0.000336146, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=32669 epoch 021: 1674 / 1689 loss=3.524, nll_loss=1.972, ppl=3.92, wps=547955, ups=1.11, wpb=494640, bsz=16514.4, num_updates=35400, lr=0.000336146, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=32669 epoch 021: 1674 / 1689 loss=3.524, nll_loss=1.972, ppl=3.92, wps=547955, ups=1.11, wpb=494640, bsz=16514.4, num_updates=35400, lr=0.000336146, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=32669 epoch 021: 1674 / 1689 loss=3.524, nll_loss=1.972, ppl=3.92, wps=547955, ups=1.11, wpb=494640, bsz=16514.4, num_updates=35400, lr=0.000336146, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=32669 epoch 021: 1674 / 1689 loss=3.524, nll_loss=1.972, ppl=3.92, wps=547955, ups=1.11, wpb=494640, bsz=16514.4, num_updates=35400, lr=0.000336146, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=32669 epoch 021: 1674 / 1689 loss=3.524, nll_loss=1.972, ppl=3.92, wps=547955, ups=1.11, wpb=494640, bsz=16514.4, num_updates=35400, lr=0.000336146, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=32669 epoch 021: 1674 / 1689 loss=3.524, nll_loss=1.972, ppl=3.92, wps=547955, ups=1.11, wpb=494640, bsz=16514.4, num_updates=35400, lr=0.000336146, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=32669 epoch 021: 1674 / 1689 loss=3.524, nll_loss=1.972, ppl=3.92, wps=547955, ups=1.11, wpb=494640, bsz=16514.4, num_updates=35400, lr=0.000336146, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=32669 epoch 021: 1674 / 1689 loss=3.524, nll_loss=1.972, ppl=3.92, wps=547955, ups=1.11, wpb=494640, bsz=16514.4, num_updates=35400, lr=0.000336146, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=32669 epoch 021: 1674 / 1689 loss=3.524, nll_loss=1.972, ppl=3.92, wps=547955, ups=1.11, wpb=494640, bsz=16514.4, num_updates=35400, lr=0.000336146, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=32669 epoch 021: 1674 / 1689 loss=3.524, nll_loss=1.972, ppl=3.92, wps=547955, ups=1.11, wpb=494640, bsz=16514.4, num_updates=35400, lr=0.000336146, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=32669 epoch 021: 1674 / 1689 loss=3.524, nll_loss=1.972, ppl=3.92, wps=547955, ups=1.11, wpb=494640, bsz=16514.4, num_updates=35400, lr=0.000336146, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=32669 epoch 021: 1674 / 1689 loss=3.524, nll_loss=1.972, ppl=3.92, wps=547955, ups=1.11, wpb=494640, bsz=16514.4, num_updates=35400, lr=0.000336146, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=32669 epoch 021: 1674 / 1689 loss=3.524, nll_loss=1.972, ppl=3.92, wps=547955, ups=1.11, wpb=494640, bsz=16514.4, num_updates=35400, lr=0.000336146, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=32669 epoch 021: 1674 / 1689 loss=3.524, nll_loss=1.972, ppl=3.92, wps=547955, ups=1.11, wpb=494640, bsz=16514.4, num_updates=35400, lr=0.000336146, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=32669 epoch 021: 1674 / 1689 loss=3.524, nll_loss=1.972, ppl=3.92, wps=547955, ups=1.11, wpb=494640, bsz=16514.4, num_updates=35400, lr=0.000336146, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=32669 epoch 021: 1674 / 1689 loss=3.524, nll_loss=1.972, ppl=3.92, wps=547955, ups=1.11, wpb=494640, bsz=16514.4, num_updates=35400, lr=0.000336146, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=32669 epoch 021: 1674 / 1689 loss=3.524, nll_loss=1.972, ppl=3.92, wps=547955, ups=1.11, wpb=494640, bsz=16514.4, num_updates=35400, lr=0.000336146, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=32669 end of epoch 21 (average epoch stats below) epoch 021 | loss 3.516 | nll_loss 1.963 | ppl 3.9 | wps 543044 | ups 1.1 | wpb 495119 | bsz 16507.6 | num_updates 35415 | lr 0.000336075 | gnorm 0.172 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 24 | wall 32683 epoch 021 | loss 3.516 | nll_loss 1.963 | ppl 3.9 | wps 543044 | ups 1.1 | wpb 495119 | bsz 16507.6 | num_updates 35415 | lr 0.000336075 | gnorm 0.172 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 24 | wall 32683 epoch 021 | loss 3.516 | nll_loss 1.963 | ppl 3.9 | wps 543044 | ups 1.1 | wpb 495119 | bsz 16507.6 | num_updates 35415 | lr 0.000336075 | gnorm 0.172 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 24 | wall 32683 epoch 021 | loss 3.516 | nll_loss 1.963 | ppl 3.9 | wps 543044 | ups 1.1 | wpb 495119 | bsz 16507.6 | num_updates 35415 | lr 0.000336075 | gnorm 0.172 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 24 | wall 32683 epoch 021 | loss 3.516 | nll_loss 1.963 | ppl 3.9 | wps 543044 | ups 1.1 | wpb 495119 | bsz 16507.6 | num_updates 35415 | lr 0.000336075 | gnorm 0.172 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 24 | wall 32683 epoch 021 | loss 3.516 | nll_loss 1.963 | ppl 3.9 | wps 543044 | ups 1.1 | wpb 495119 | bsz 16507.6 | num_updates 35415 | lr 0.000336075 | gnorm 0.172 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 24 | wall 32683 epoch 021 | loss 3.516 | nll_loss 1.963 | ppl 3.9 | wps 543044 | ups 1.1 | wpb 495119 | bsz 16507.6 | num_updates 35415 | lr 0.000336075 | gnorm 0.172 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 24 | wall 32683 epoch 021 | loss 3.516 | nll_loss 1.963 | ppl 3.9 | wps 543044 | ups 1.1 | wpb 495119 | bsz 16507.6 | num_updates 35415 | lr 0.000336075 | gnorm 0.172 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 24 | wall 32683 epoch 021 | loss 3.516 | nll_loss 1.963 | ppl 3.9 | wps 543044 | ups 1.1 | wpb 495119 | bsz 16507.6 | num_updates 35415 | lr 0.000336075 | gnorm 0.172 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 24 | wall 32683 epoch 021 | loss 3.516 | nll_loss 1.963 | ppl 3.9 | wps 543044 | ups 1.1 | wpb 495119 | bsz 16507.6 | num_updates 35415 | lr 0.000336075 | gnorm 0.172 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 24 | wall 32683 epoch 021 | loss 3.516 | nll_loss 1.963 | ppl 3.9 | wps 543044 | ups 1.1 | wpb 495119 | bsz 16507.6 | num_updates 35415 | lr 0.000336075 | gnorm 0.172 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 24 | wall 32683 epoch 021 | loss 3.516 | nll_loss 1.963 | ppl 3.9 | wps 543044 | ups 1.1 | wpb 495119 | bsz 16507.6 | num_updates 35415 | lr 0.000336075 | gnorm 0.172 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 24 | wall 32683 epoch 021 | loss 3.516 | nll_loss 1.963 | ppl 3.9 | wps 543044 | ups 1.1 | wpb 495119 | bsz 16507.6 | num_updates 35415 | lr 0.000336075 | gnorm 0.172 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 24 | wall 32683 epoch 021 | loss 3.516 | nll_loss 1.963 | ppl 3.9 | wps 543044 | ups 1.1 | wpb 495119 | bsz 16507.6 | num_updates 35415 | lr 0.000336075 | gnorm 0.172 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 24 | wall 32683 epoch 021 | loss 3.516 | nll_loss 1.963 | ppl 3.9 | wps 543044 | ups 1.1 | wpb 495119 | bsz 16507.6 | num_updates 35415 | lr 0.000336075 | gnorm 0.172 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 24 | wall 32683 epoch 021 | loss 3.516 | nll_loss 1.963 | ppl 3.9 | wps 543044 | ups 1.1 | wpb 495119 | bsz 16507.6 | num_updates 35415 | lr 0.000336075 | gnorm 0.172 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 24 | wall 32683 epoch 021 | loss 3.516 | nll_loss 1.963 | ppl 3.9 | wps 543044 | ups 1.1 | wpb 495119 | bsz 16507.6 | num_updates 35415 | lr 0.000336075 | gnorm 0.172 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 24 | wall 32683 epoch 021 | loss 3.516 | nll_loss 1.963 | ppl 3.9 | wps 543044 | ups 1.1 | wpb 495119 | bsz 16507.6 | num_updates 35415 | lr 0.000336075 | gnorm 0.172 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 24 | wall 32683 epoch 021 | loss 3.516 | nll_loss 1.963 | ppl 3.9 | wps 543044 | ups 1.1 | wpb 495119 | bsz 16507.6 | num_updates 35415 | lr 0.000336075 | gnorm 0.172 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 24 | wall 32683 epoch 021 | loss 3.516 | nll_loss 1.963 | ppl 3.9 | wps 543044 | ups 1.1 | wpb 495119 | bsz 16507.6 | num_updates 35415 | lr 0.000336075 | gnorm 0.172 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 24 | wall 32683 epoch 021 | loss 3.516 | nll_loss 1.963 | ppl 3.9 | wps 543044 | ups 1.1 | wpb 495119 | bsz 16507.6 | num_updates 35415 | lr 0.000336075 | gnorm 0.172 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 24 | wall 32683 Start iterating over samples epoch 022: 85 / 1689 loss=3.49, nll_loss=1.933, ppl=3.82, wps=540256, ups=1.1, wpb=491104, bsz=16607.2, num_updates=35500, lr=0.000335673, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=32760 epoch 022: 85 / 1689 loss=3.49, nll_loss=1.933, ppl=3.82, wps=540256, ups=1.1, wpb=491104, bsz=16607.2, num_updates=35500, lr=0.000335673, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=32760 epoch 022: 85 / 1689 loss=3.49, nll_loss=1.933, ppl=3.82, wps=540256, ups=1.1, wpb=491104, bsz=16607.2, num_updates=35500, lr=0.000335673, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=32760 epoch 022: 85 / 1689 loss=3.49, nll_loss=1.933, ppl=3.82, wps=540256, ups=1.1, wpb=491104, bsz=16607.2, num_updates=35500, lr=0.000335673, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=32760 epoch 022: 85 / 1689 loss=3.49, nll_loss=1.933, ppl=3.82, wps=540256, ups=1.1, wpb=491104, bsz=16607.2, num_updates=35500, lr=0.000335673, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=32760 epoch 022: 85 / 1689 loss=3.49, nll_loss=1.933, ppl=3.82, wps=540256, ups=1.1, wpb=491104, bsz=16607.2, num_updates=35500, lr=0.000335673, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=32760 epoch 022: 85 / 1689 loss=3.49, nll_loss=1.933, ppl=3.82, wps=540256, ups=1.1, wpb=491104, bsz=16607.2, num_updates=35500, lr=0.000335673, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=32760 epoch 022: 85 / 1689 loss=3.49, nll_loss=1.933, ppl=3.82, wps=540256, ups=1.1, wpb=491104, bsz=16607.2, num_updates=35500, lr=0.000335673, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=32760 epoch 022: 85 / 1689 loss=3.49, nll_loss=1.933, ppl=3.82, wps=540256, ups=1.1, wpb=491104, bsz=16607.2, num_updates=35500, lr=0.000335673, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=32760 epoch 022: 85 / 1689 loss=3.49, nll_loss=1.933, ppl=3.82, wps=540256, ups=1.1, wpb=491104, bsz=16607.2, num_updates=35500, lr=0.000335673, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=32760 epoch 022: 85 / 1689 loss=3.49, nll_loss=1.933, ppl=3.82, wps=540256, ups=1.1, wpb=491104, bsz=16607.2, num_updates=35500, lr=0.000335673, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=32760 epoch 022: 85 / 1689 loss=3.49, nll_loss=1.933, ppl=3.82, wps=540256, ups=1.1, wpb=491104, bsz=16607.2, num_updates=35500, lr=0.000335673, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=32760 epoch 022: 85 / 1689 loss=3.49, nll_loss=1.933, ppl=3.82, wps=540256, ups=1.1, wpb=491104, bsz=16607.2, num_updates=35500, lr=0.000335673, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=32760 epoch 022: 85 / 1689 loss=3.49, nll_loss=1.933, ppl=3.82, wps=540256, ups=1.1, wpb=491104, bsz=16607.2, num_updates=35500, lr=0.000335673, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=32760 epoch 022: 85 / 1689 loss=3.49, nll_loss=1.933, ppl=3.82, wps=540256, ups=1.1, wpb=491104, bsz=16607.2, num_updates=35500, lr=0.000335673, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=32760 epoch 022: 85 / 1689 loss=3.49, nll_loss=1.933, ppl=3.82, wps=540256, ups=1.1, wpb=491104, bsz=16607.2, num_updates=35500, lr=0.000335673, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=32760 epoch 022: 85 / 1689 loss=3.49, nll_loss=1.933, ppl=3.82, wps=540256, ups=1.1, wpb=491104, bsz=16607.2, num_updates=35500, lr=0.000335673, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=32760 epoch 022: 85 / 1689 loss=3.49, nll_loss=1.933, ppl=3.82, wps=540256, ups=1.1, wpb=491104, bsz=16607.2, num_updates=35500, lr=0.000335673, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=32760 epoch 022: 85 / 1689 loss=3.49, nll_loss=1.933, ppl=3.82, wps=540256, ups=1.1, wpb=491104, bsz=16607.2, num_updates=35500, lr=0.000335673, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=32760 epoch 022: 85 / 1689 loss=3.49, nll_loss=1.933, ppl=3.82, wps=540256, ups=1.1, wpb=491104, bsz=16607.2, num_updates=35500, lr=0.000335673, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=32760 epoch 022: 85 / 1689 loss=3.49, nll_loss=1.933, ppl=3.82, wps=540256, ups=1.1, wpb=491104, bsz=16607.2, num_updates=35500, lr=0.000335673, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=32760 epoch 022: 85 / 1689 loss=3.49, nll_loss=1.933, ppl=3.82, wps=540256, ups=1.1, wpb=491104, bsz=16607.2, num_updates=35500, lr=0.000335673, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=32760 epoch 022: 185 / 1689 loss=3.49, nll_loss=1.932, ppl=3.82, wps=554727, ups=1.12, wpb=493534, bsz=15971.9, num_updates=35600, lr=0.000335201, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=32849 epoch 022: 185 / 1689 loss=3.49, nll_loss=1.932, ppl=3.82, wps=554727, ups=1.12, wpb=493534, bsz=15971.9, num_updates=35600, lr=0.000335201, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=32849 epoch 022: 185 / 1689 loss=3.49, nll_loss=1.932, ppl=3.82, wps=554727, ups=1.12, wpb=493534, bsz=15971.9, num_updates=35600, lr=0.000335201, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=32849 epoch 022: 185 / 1689 loss=3.49, nll_loss=1.932, ppl=3.82, wps=554727, ups=1.12, wpb=493534, bsz=15971.9, num_updates=35600, lr=0.000335201, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=32849 epoch 022: 185 / 1689 loss=3.49, nll_loss=1.932, ppl=3.82, wps=554727, ups=1.12, wpb=493534, bsz=15971.9, num_updates=35600, lr=0.000335201, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=32849 epoch 022: 185 / 1689 loss=3.49, nll_loss=1.932, ppl=3.82, wps=554727, ups=1.12, wpb=493534, bsz=15971.9, num_updates=35600, lr=0.000335201, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=32849 epoch 022: 185 / 1689 loss=3.49, nll_loss=1.932, ppl=3.82, wps=554727, ups=1.12, wpb=493534, bsz=15971.9, num_updates=35600, lr=0.000335201, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=32849 epoch 022: 185 / 1689 loss=3.49, nll_loss=1.932, ppl=3.82, wps=554727, ups=1.12, wpb=493534, bsz=15971.9, num_updates=35600, lr=0.000335201, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=32849 epoch 022: 185 / 1689 loss=3.49, nll_loss=1.932, ppl=3.82, wps=554727, ups=1.12, wpb=493534, bsz=15971.9, num_updates=35600, lr=0.000335201, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=32849 epoch 022: 185 / 1689 loss=3.49, nll_loss=1.932, ppl=3.82, wps=554727, ups=1.12, wpb=493534, bsz=15971.9, num_updates=35600, lr=0.000335201, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=32849 epoch 022: 185 / 1689 loss=3.49, nll_loss=1.932, ppl=3.82, wps=554727, ups=1.12, wpb=493534, bsz=15971.9, num_updates=35600, lr=0.000335201, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=32849 epoch 022: 185 / 1689 loss=3.49, nll_loss=1.932, ppl=3.82, wps=554727, ups=1.12, wpb=493534, bsz=15971.9, num_updates=35600, lr=0.000335201, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=32849 epoch 022: 185 / 1689 loss=3.49, nll_loss=1.932, ppl=3.82, wps=554727, ups=1.12, wpb=493534, bsz=15971.9, num_updates=35600, lr=0.000335201, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=32849 epoch 022: 185 / 1689 loss=3.49, nll_loss=1.932, ppl=3.82, wps=554727, ups=1.12, wpb=493534, bsz=15971.9, num_updates=35600, lr=0.000335201, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=32849 epoch 022: 185 / 1689 loss=3.49, nll_loss=1.932, ppl=3.82, wps=554727, ups=1.12, wpb=493534, bsz=15971.9, num_updates=35600, lr=0.000335201, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=32849 epoch 022: 185 / 1689 loss=3.49, nll_loss=1.932, ppl=3.82, wps=554727, ups=1.12, wpb=493534, bsz=15971.9, num_updates=35600, lr=0.000335201, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=32849 epoch 022: 185 / 1689 loss=3.49, nll_loss=1.932, ppl=3.82, wps=554727, ups=1.12, wpb=493534, bsz=15971.9, num_updates=35600, lr=0.000335201, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=32849 epoch 022: 185 / 1689 loss=3.49, nll_loss=1.932, ppl=3.82, wps=554727, ups=1.12, wpb=493534, bsz=15971.9, num_updates=35600, lr=0.000335201, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=32849 epoch 022: 185 / 1689 loss=3.49, nll_loss=1.932, ppl=3.82, wps=554727, ups=1.12, wpb=493534, bsz=15971.9, num_updates=35600, lr=0.000335201, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=32849 epoch 022: 185 / 1689 loss=3.49, nll_loss=1.932, ppl=3.82, wps=554727, ups=1.12, wpb=493534, bsz=15971.9, num_updates=35600, lr=0.000335201, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=32849 epoch 022: 185 / 1689 loss=3.49, nll_loss=1.932, ppl=3.82, wps=554727, ups=1.12, wpb=493534, bsz=15971.9, num_updates=35600, lr=0.000335201, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=32849 epoch 022: 185 / 1689 loss=3.49, nll_loss=1.932, ppl=3.82, wps=554727, ups=1.12, wpb=493534, bsz=15971.9, num_updates=35600, lr=0.000335201, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=32849 epoch 022: 285 / 1689 loss=3.499, nll_loss=1.943, ppl=3.85, wps=550284, ups=1.11, wpb=493998, bsz=16699.6, num_updates=35700, lr=0.000334731, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32939 epoch 022: 285 / 1689 loss=3.499, nll_loss=1.943, ppl=3.85, wps=550284, ups=1.11, wpb=493998, bsz=16699.6, num_updates=35700, lr=0.000334731, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32939 epoch 022: 285 / 1689 loss=3.499, nll_loss=1.943, ppl=3.85, wps=550284, ups=1.11, wpb=493998, bsz=16699.6, num_updates=35700, lr=0.000334731, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32939 epoch 022: 285 / 1689 loss=3.499, nll_loss=1.943, ppl=3.85, wps=550284, ups=1.11, wpb=493998, bsz=16699.6, num_updates=35700, lr=0.000334731, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32939 epoch 022: 285 / 1689 loss=3.499, nll_loss=1.943, ppl=3.85, wps=550284, ups=1.11, wpb=493998, bsz=16699.6, num_updates=35700, lr=0.000334731, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32939 epoch 022: 285 / 1689 loss=3.499, nll_loss=1.943, ppl=3.85, wps=550284, ups=1.11, wpb=493998, bsz=16699.6, num_updates=35700, lr=0.000334731, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32939 epoch 022: 285 / 1689 loss=3.499, nll_loss=1.943, ppl=3.85, wps=550284, ups=1.11, wpb=493998, bsz=16699.6, num_updates=35700, lr=0.000334731, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32939 epoch 022: 285 / 1689 loss=3.499, nll_loss=1.943, ppl=3.85, wps=550284, ups=1.11, wpb=493998, bsz=16699.6, num_updates=35700, lr=0.000334731, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32939 epoch 022: 285 / 1689 loss=3.499, nll_loss=1.943, ppl=3.85, wps=550284, ups=1.11, wpb=493998, bsz=16699.6, num_updates=35700, lr=0.000334731, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32939 epoch 022: 285 / 1689 loss=3.499, nll_loss=1.943, ppl=3.85, wps=550284, ups=1.11, wpb=493998, bsz=16699.6, num_updates=35700, lr=0.000334731, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32939 epoch 022: 285 / 1689 loss=3.499, nll_loss=1.943, ppl=3.85, wps=550284, ups=1.11, wpb=493998, bsz=16699.6, num_updates=35700, lr=0.000334731, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32939 epoch 022: 285 / 1689 loss=3.499, nll_loss=1.943, ppl=3.85, wps=550284, ups=1.11, wpb=493998, bsz=16699.6, num_updates=35700, lr=0.000334731, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32939 epoch 022: 285 / 1689 loss=3.499, nll_loss=1.943, ppl=3.85, wps=550284, ups=1.11, wpb=493998, bsz=16699.6, num_updates=35700, lr=0.000334731, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32939 epoch 022: 285 / 1689 loss=3.499, nll_loss=1.943, ppl=3.85, wps=550284, ups=1.11, wpb=493998, bsz=16699.6, num_updates=35700, lr=0.000334731, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32939 epoch 022: 285 / 1689 loss=3.499, nll_loss=1.943, ppl=3.85, wps=550284, ups=1.11, wpb=493998, bsz=16699.6, num_updates=35700, lr=0.000334731, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32939 epoch 022: 285 / 1689 loss=3.499, nll_loss=1.943, ppl=3.85, wps=550284, ups=1.11, wpb=493998, bsz=16699.6, num_updates=35700, lr=0.000334731, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32939 epoch 022: 285 / 1689 loss=3.499, nll_loss=1.943, ppl=3.85, wps=550284, ups=1.11, wpb=493998, bsz=16699.6, num_updates=35700, lr=0.000334731, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32939 epoch 022: 285 / 1689 loss=3.499, nll_loss=1.943, ppl=3.85, wps=550284, ups=1.11, wpb=493998, bsz=16699.6, num_updates=35700, lr=0.000334731, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32939 epoch 022: 285 / 1689 loss=3.499, nll_loss=1.943, ppl=3.85, wps=550284, ups=1.11, wpb=493998, bsz=16699.6, num_updates=35700, lr=0.000334731, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32939 epoch 022: 285 / 1689 loss=3.499, nll_loss=1.943, ppl=3.85, wps=550284, ups=1.11, wpb=493998, bsz=16699.6, num_updates=35700, lr=0.000334731, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32939 epoch 022: 285 / 1689 loss=3.499, nll_loss=1.943, ppl=3.85, wps=550284, ups=1.11, wpb=493998, bsz=16699.6, num_updates=35700, lr=0.000334731, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32939 epoch 022: 285 / 1689 loss=3.499, nll_loss=1.943, ppl=3.85, wps=550284, ups=1.11, wpb=493998, bsz=16699.6, num_updates=35700, lr=0.000334731, gnorm=0.173, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=32939 epoch 022: 386 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=546287, ups=1.1, wpb=495922, bsz=16761.1, num_updates=35800, lr=0.000334263, gnorm=0.173, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=33030 epoch 022: 386 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=546287, ups=1.1, wpb=495922, bsz=16761.1, num_updates=35800, lr=0.000334263, gnorm=0.173, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=33030 epoch 022: 386 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=546287, ups=1.1, wpb=495922, bsz=16761.1, num_updates=35800, lr=0.000334263, gnorm=0.173, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=33030 epoch 022: 386 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=546287, ups=1.1, wpb=495922, bsz=16761.1, num_updates=35800, lr=0.000334263, gnorm=0.173, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=33030 epoch 022: 386 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=546287, ups=1.1, wpb=495922, bsz=16761.1, num_updates=35800, lr=0.000334263, gnorm=0.173, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=33030 epoch 022: 386 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=546287, ups=1.1, wpb=495922, bsz=16761.1, num_updates=35800, lr=0.000334263, gnorm=0.173, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=33030 epoch 022: 386 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=546287, ups=1.1, wpb=495922, bsz=16761.1, num_updates=35800, lr=0.000334263, gnorm=0.173, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=33030 epoch 022: 386 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=546287, ups=1.1, wpb=495922, bsz=16761.1, num_updates=35800, lr=0.000334263, gnorm=0.173, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=33030 epoch 022: 386 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=546287, ups=1.1, wpb=495922, bsz=16761.1, num_updates=35800, lr=0.000334263, gnorm=0.173, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=33030 epoch 022: 386 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=546287, ups=1.1, wpb=495922, bsz=16761.1, num_updates=35800, lr=0.000334263, gnorm=0.173, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=33030 epoch 022: 386 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=546287, ups=1.1, wpb=495922, bsz=16761.1, num_updates=35800, lr=0.000334263, gnorm=0.173, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=33030 epoch 022: 386 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=546287, ups=1.1, wpb=495922, bsz=16761.1, num_updates=35800, lr=0.000334263, gnorm=0.173, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=33030 epoch 022: 386 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=546287, ups=1.1, wpb=495922, bsz=16761.1, num_updates=35800, lr=0.000334263, gnorm=0.173, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=33030 epoch 022: 386 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=546287, ups=1.1, wpb=495922, bsz=16761.1, num_updates=35800, lr=0.000334263, gnorm=0.173, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=33030 epoch 022: 386 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=546287, ups=1.1, wpb=495922, bsz=16761.1, num_updates=35800, lr=0.000334263, gnorm=0.173, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=33030 epoch 022: 386 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=546287, ups=1.1, wpb=495922, bsz=16761.1, num_updates=35800, lr=0.000334263, gnorm=0.173, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=33030 epoch 022: 386 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=546287, ups=1.1, wpb=495922, bsz=16761.1, num_updates=35800, lr=0.000334263, gnorm=0.173, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=33030 epoch 022: 386 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=546287, ups=1.1, wpb=495922, bsz=16761.1, num_updates=35800, lr=0.000334263, gnorm=0.173, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=33030 epoch 022: 386 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=546287, ups=1.1, wpb=495922, bsz=16761.1, num_updates=35800, lr=0.000334263, gnorm=0.173, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=33030 epoch 022: 386 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=546287, ups=1.1, wpb=495922, bsz=16761.1, num_updates=35800, lr=0.000334263, gnorm=0.173, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=33030 epoch 022: 386 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=546287, ups=1.1, wpb=495922, bsz=16761.1, num_updates=35800, lr=0.000334263, gnorm=0.173, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=33030 epoch 022: 386 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=546287, ups=1.1, wpb=495922, bsz=16761.1, num_updates=35800, lr=0.000334263, gnorm=0.173, clip=0, loss_scale=2, train_wall=89, gb_free=21.2, wall=33030 epoch 022: 487 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=541384, ups=1.09, wpb=495503, bsz=16555.1, num_updates=35900, lr=0.000333797, gnorm=0.169, clip=0, loss_scale=1, train_wall=90, gb_free=22.4, wall=33121 epoch 022: 487 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=541384, ups=1.09, wpb=495503, bsz=16555.1, num_updates=35900, lr=0.000333797, gnorm=0.169, clip=0, loss_scale=1, train_wall=90, gb_free=22.4, wall=33121 epoch 022: 487 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=541384, ups=1.09, wpb=495503, bsz=16555.1, num_updates=35900, lr=0.000333797, gnorm=0.169, clip=0, loss_scale=1, train_wall=90, gb_free=22.4, wall=33121 epoch 022: 487 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=541384, ups=1.09, wpb=495503, bsz=16555.1, num_updates=35900, lr=0.000333797, gnorm=0.169, clip=0, loss_scale=1, train_wall=90, gb_free=22.4, wall=33121 epoch 022: 487 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=541384, ups=1.09, wpb=495503, bsz=16555.1, num_updates=35900, lr=0.000333797, gnorm=0.169, clip=0, loss_scale=1, train_wall=90, gb_free=22.4, wall=33121 epoch 022: 487 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=541384, ups=1.09, wpb=495503, bsz=16555.1, num_updates=35900, lr=0.000333797, gnorm=0.169, clip=0, loss_scale=1, train_wall=90, gb_free=22.4, wall=33121 epoch 022: 487 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=541384, ups=1.09, wpb=495503, bsz=16555.1, num_updates=35900, lr=0.000333797, gnorm=0.169, clip=0, loss_scale=1, train_wall=90, gb_free=22.4, wall=33121 epoch 022: 487 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=541384, ups=1.09, wpb=495503, bsz=16555.1, num_updates=35900, lr=0.000333797, gnorm=0.169, clip=0, loss_scale=1, train_wall=90, gb_free=22.4, wall=33121 epoch 022: 487 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=541384, ups=1.09, wpb=495503, bsz=16555.1, num_updates=35900, lr=0.000333797, gnorm=0.169, clip=0, loss_scale=1, train_wall=90, gb_free=22.4, wall=33121 epoch 022: 487 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=541384, ups=1.09, wpb=495503, bsz=16555.1, num_updates=35900, lr=0.000333797, gnorm=0.169, clip=0, loss_scale=1, train_wall=90, gb_free=22.4, wall=33121 epoch 022: 487 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=541384, ups=1.09, wpb=495503, bsz=16555.1, num_updates=35900, lr=0.000333797, gnorm=0.169, clip=0, loss_scale=1, train_wall=90, gb_free=22.4, wall=33121 epoch 022: 487 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=541384, ups=1.09, wpb=495503, bsz=16555.1, num_updates=35900, lr=0.000333797, gnorm=0.169, clip=0, loss_scale=1, train_wall=90, gb_free=22.4, wall=33121 epoch 022: 487 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=541384, ups=1.09, wpb=495503, bsz=16555.1, num_updates=35900, lr=0.000333797, gnorm=0.169, clip=0, loss_scale=1, train_wall=90, gb_free=22.4, wall=33121 epoch 022: 487 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=541384, ups=1.09, wpb=495503, bsz=16555.1, num_updates=35900, lr=0.000333797, gnorm=0.169, clip=0, loss_scale=1, train_wall=90, gb_free=22.4, wall=33121 epoch 022: 487 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=541384, ups=1.09, wpb=495503, bsz=16555.1, num_updates=35900, lr=0.000333797, gnorm=0.169, clip=0, loss_scale=1, train_wall=90, gb_free=22.4, wall=33121 epoch 022: 487 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=541384, ups=1.09, wpb=495503, bsz=16555.1, num_updates=35900, lr=0.000333797, gnorm=0.169, clip=0, loss_scale=1, train_wall=90, gb_free=22.4, wall=33121 epoch 022: 487 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=541384, ups=1.09, wpb=495503, bsz=16555.1, num_updates=35900, lr=0.000333797, gnorm=0.169, clip=0, loss_scale=1, train_wall=90, gb_free=22.4, wall=33121 epoch 022: 487 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=541384, ups=1.09, wpb=495503, bsz=16555.1, num_updates=35900, lr=0.000333797, gnorm=0.169, clip=0, loss_scale=1, train_wall=90, gb_free=22.4, wall=33121 epoch 022: 487 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=541384, ups=1.09, wpb=495503, bsz=16555.1, num_updates=35900, lr=0.000333797, gnorm=0.169, clip=0, loss_scale=1, train_wall=90, gb_free=22.4, wall=33121 epoch 022: 487 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=541384, ups=1.09, wpb=495503, bsz=16555.1, num_updates=35900, lr=0.000333797, gnorm=0.169, clip=0, loss_scale=1, train_wall=90, gb_free=22.4, wall=33121 epoch 022: 487 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=541384, ups=1.09, wpb=495503, bsz=16555.1, num_updates=35900, lr=0.000333797, gnorm=0.169, clip=0, loss_scale=1, train_wall=90, gb_free=22.4, wall=33121 epoch 022: 487 / 1689 loss=3.5, nll_loss=1.945, ppl=3.85, wps=541384, ups=1.09, wpb=495503, bsz=16555.1, num_updates=35900, lr=0.000333797, gnorm=0.169, clip=0, loss_scale=1, train_wall=90, gb_free=22.4, wall=33121 epoch 022: 587 / 1689 loss=3.508, nll_loss=1.954, ppl=3.88, wps=551044, ups=1.11, wpb=495599, bsz=16638.7, num_updates=36000, lr=0.000333333, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=33211 epoch 022: 587 / 1689 loss=3.508, nll_loss=1.954, ppl=3.88, wps=551044, ups=1.11, wpb=495599, bsz=16638.7, num_updates=36000, lr=0.000333333, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=33211 epoch 022: 587 / 1689 loss=3.508, nll_loss=1.954, ppl=3.88, wps=551044, ups=1.11, wpb=495599, bsz=16638.7, num_updates=36000, lr=0.000333333, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=33211 epoch 022: 587 / 1689 loss=3.508, nll_loss=1.954, ppl=3.88, wps=551044, ups=1.11, wpb=495599, bsz=16638.7, num_updates=36000, lr=0.000333333, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=33211 epoch 022: 587 / 1689 loss=3.508, nll_loss=1.954, ppl=3.88, wps=551044, ups=1.11, wpb=495599, bsz=16638.7, num_updates=36000, lr=0.000333333, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=33211 epoch 022: 587 / 1689 loss=3.508, nll_loss=1.954, ppl=3.88, wps=551044, ups=1.11, wpb=495599, bsz=16638.7, num_updates=36000, lr=0.000333333, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=33211 epoch 022: 587 / 1689 loss=3.508, nll_loss=1.954, ppl=3.88, wps=551044, ups=1.11, wpb=495599, bsz=16638.7, num_updates=36000, lr=0.000333333, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=33211 epoch 022: 587 / 1689 loss=3.508, nll_loss=1.954, ppl=3.88, wps=551044, ups=1.11, wpb=495599, bsz=16638.7, num_updates=36000, lr=0.000333333, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=33211 epoch 022: 587 / 1689 loss=3.508, nll_loss=1.954, ppl=3.88, wps=551044, ups=1.11, wpb=495599, bsz=16638.7, num_updates=36000, lr=0.000333333, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=33211 epoch 022: 587 / 1689 loss=3.508, nll_loss=1.954, ppl=3.88, wps=551044, ups=1.11, wpb=495599, bsz=16638.7, num_updates=36000, lr=0.000333333, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=33211 epoch 022: 587 / 1689 loss=3.508, nll_loss=1.954, ppl=3.88, wps=551044, ups=1.11, wpb=495599, bsz=16638.7, num_updates=36000, lr=0.000333333, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=33211 epoch 022: 587 / 1689 loss=3.508, nll_loss=1.954, ppl=3.88, wps=551044, ups=1.11, wpb=495599, bsz=16638.7, num_updates=36000, lr=0.000333333, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=33211 epoch 022: 587 / 1689 loss=3.508, nll_loss=1.954, ppl=3.88, wps=551044, ups=1.11, wpb=495599, bsz=16638.7, num_updates=36000, lr=0.000333333, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=33211 epoch 022: 587 / 1689 loss=3.508, nll_loss=1.954, ppl=3.88, wps=551044, ups=1.11, wpb=495599, bsz=16638.7, num_updates=36000, lr=0.000333333, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=33211 epoch 022: 587 / 1689 loss=3.508, nll_loss=1.954, ppl=3.88, wps=551044, ups=1.11, wpb=495599, bsz=16638.7, num_updates=36000, lr=0.000333333, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=33211 epoch 022: 587 / 1689 loss=3.508, nll_loss=1.954, ppl=3.88, wps=551044, ups=1.11, wpb=495599, bsz=16638.7, num_updates=36000, lr=0.000333333, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=33211 epoch 022: 587 / 1689 loss=3.508, nll_loss=1.954, ppl=3.88, wps=551044, ups=1.11, wpb=495599, bsz=16638.7, num_updates=36000, lr=0.000333333, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=33211 epoch 022: 587 / 1689 loss=3.508, nll_loss=1.954, ppl=3.88, wps=551044, ups=1.11, wpb=495599, bsz=16638.7, num_updates=36000, lr=0.000333333, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=33211 epoch 022: 587 / 1689 loss=3.508, nll_loss=1.954, ppl=3.88, wps=551044, ups=1.11, wpb=495599, bsz=16638.7, num_updates=36000, lr=0.000333333, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=33211 epoch 022: 587 / 1689 loss=3.508, nll_loss=1.954, ppl=3.88, wps=551044, ups=1.11, wpb=495599, bsz=16638.7, num_updates=36000, lr=0.000333333, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=33211 epoch 022: 587 / 1689 loss=3.508, nll_loss=1.954, ppl=3.88, wps=551044, ups=1.11, wpb=495599, bsz=16638.7, num_updates=36000, lr=0.000333333, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=33211 epoch 022: 587 / 1689 loss=3.508, nll_loss=1.954, ppl=3.88, wps=551044, ups=1.11, wpb=495599, bsz=16638.7, num_updates=36000, lr=0.000333333, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=33211 begin validation on "valid" subset epoch 022 | valid on 'valid' subset | loss 3.718 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 36000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.718 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 36000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.718 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 36000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.718 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 36000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.718 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 36000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.718 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 36000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.718 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 36000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.718 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 36000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.718 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 36000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.718 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 36000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.718 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 36000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.718 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 36000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.718 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 36000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.718 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 36000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.718 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 36000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.718 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 36000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.718 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 36000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.718 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 36000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.718 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 36000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.718 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 36000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.718 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 36000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.718 | nll_loss 2.172 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 36000 | best_loss 3.709 epoch 022: 687 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=376488, ups=0.76, wpb=495757, bsz=16457.6, num_updates=36100, lr=0.000332871, gnorm=0.172, clip=0, loss_scale=1, train_wall=113, gb_free=21.1, wall=33343 epoch 022: 687 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=376488, ups=0.76, wpb=495757, bsz=16457.6, num_updates=36100, lr=0.000332871, gnorm=0.172, clip=0, loss_scale=1, train_wall=113, gb_free=21.1, wall=33343 epoch 022: 687 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=376488, ups=0.76, wpb=495757, bsz=16457.6, num_updates=36100, lr=0.000332871, gnorm=0.172, clip=0, loss_scale=1, train_wall=113, gb_free=21.1, wall=33343 epoch 022: 687 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=376488, ups=0.76, wpb=495757, bsz=16457.6, num_updates=36100, lr=0.000332871, gnorm=0.172, clip=0, loss_scale=1, train_wall=113, gb_free=21.1, wall=33343 epoch 022: 687 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=376488, ups=0.76, wpb=495757, bsz=16457.6, num_updates=36100, lr=0.000332871, gnorm=0.172, clip=0, loss_scale=1, train_wall=113, gb_free=21.1, wall=33343 epoch 022: 687 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=376488, ups=0.76, wpb=495757, bsz=16457.6, num_updates=36100, lr=0.000332871, gnorm=0.172, clip=0, loss_scale=1, train_wall=113, gb_free=21.1, wall=33343 epoch 022: 687 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=376488, ups=0.76, wpb=495757, bsz=16457.6, num_updates=36100, lr=0.000332871, gnorm=0.172, clip=0, loss_scale=1, train_wall=113, gb_free=21.1, wall=33343 epoch 022: 687 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=376488, ups=0.76, wpb=495757, bsz=16457.6, num_updates=36100, lr=0.000332871, gnorm=0.172, clip=0, loss_scale=1, train_wall=113, gb_free=21.1, wall=33343 epoch 022: 687 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=376488, ups=0.76, wpb=495757, bsz=16457.6, num_updates=36100, lr=0.000332871, gnorm=0.172, clip=0, loss_scale=1, train_wall=113, gb_free=21.1, wall=33343 epoch 022: 687 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=376488, ups=0.76, wpb=495757, bsz=16457.6, num_updates=36100, lr=0.000332871, gnorm=0.172, clip=0, loss_scale=1, train_wall=113, gb_free=21.1, wall=33343 epoch 022: 687 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=376488, ups=0.76, wpb=495757, bsz=16457.6, num_updates=36100, lr=0.000332871, gnorm=0.172, clip=0, loss_scale=1, train_wall=113, gb_free=21.1, wall=33343 epoch 022: 687 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=376488, ups=0.76, wpb=495757, bsz=16457.6, num_updates=36100, lr=0.000332871, gnorm=0.172, clip=0, loss_scale=1, train_wall=113, gb_free=21.1, wall=33343 epoch 022: 687 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=376488, ups=0.76, wpb=495757, bsz=16457.6, num_updates=36100, lr=0.000332871, gnorm=0.172, clip=0, loss_scale=1, train_wall=113, gb_free=21.1, wall=33343 epoch 022: 687 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=376488, ups=0.76, wpb=495757, bsz=16457.6, num_updates=36100, lr=0.000332871, gnorm=0.172, clip=0, loss_scale=1, train_wall=113, gb_free=21.1, wall=33343 epoch 022: 687 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=376488, ups=0.76, wpb=495757, bsz=16457.6, num_updates=36100, lr=0.000332871, gnorm=0.172, clip=0, loss_scale=1, train_wall=113, gb_free=21.1, wall=33343 epoch 022: 687 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=376488, ups=0.76, wpb=495757, bsz=16457.6, num_updates=36100, lr=0.000332871, gnorm=0.172, clip=0, loss_scale=1, train_wall=113, gb_free=21.1, wall=33343 epoch 022: 687 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=376488, ups=0.76, wpb=495757, bsz=16457.6, num_updates=36100, lr=0.000332871, gnorm=0.172, clip=0, loss_scale=1, train_wall=113, gb_free=21.1, wall=33343 epoch 022: 687 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=376488, ups=0.76, wpb=495757, bsz=16457.6, num_updates=36100, lr=0.000332871, gnorm=0.172, clip=0, loss_scale=1, train_wall=113, gb_free=21.1, wall=33343 epoch 022: 687 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=376488, ups=0.76, wpb=495757, bsz=16457.6, num_updates=36100, lr=0.000332871, gnorm=0.172, clip=0, loss_scale=1, train_wall=113, gb_free=21.1, wall=33343 epoch 022: 687 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=376488, ups=0.76, wpb=495757, bsz=16457.6, num_updates=36100, lr=0.000332871, gnorm=0.172, clip=0, loss_scale=1, train_wall=113, gb_free=21.1, wall=33343 epoch 022: 687 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=376488, ups=0.76, wpb=495757, bsz=16457.6, num_updates=36100, lr=0.000332871, gnorm=0.172, clip=0, loss_scale=1, train_wall=113, gb_free=21.1, wall=33343 epoch 022: 687 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=376488, ups=0.76, wpb=495757, bsz=16457.6, num_updates=36100, lr=0.000332871, gnorm=0.172, clip=0, loss_scale=1, train_wall=113, gb_free=21.1, wall=33343 epoch 022: 787 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=551576, ups=1.12, wpb=494549, bsz=16290.8, num_updates=36200, lr=0.000332411, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=33433 epoch 022: 787 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=551576, ups=1.12, wpb=494549, bsz=16290.8, num_updates=36200, lr=0.000332411, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=33433 epoch 022: 787 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=551576, ups=1.12, wpb=494549, bsz=16290.8, num_updates=36200, lr=0.000332411, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=33433 epoch 022: 787 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=551576, ups=1.12, wpb=494549, bsz=16290.8, num_updates=36200, lr=0.000332411, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=33433 epoch 022: 787 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=551576, ups=1.12, wpb=494549, bsz=16290.8, num_updates=36200, lr=0.000332411, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=33433 epoch 022: 787 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=551576, ups=1.12, wpb=494549, bsz=16290.8, num_updates=36200, lr=0.000332411, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=33433 epoch 022: 787 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=551576, ups=1.12, wpb=494549, bsz=16290.8, num_updates=36200, lr=0.000332411, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=33433 epoch 022: 787 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=551576, ups=1.12, wpb=494549, bsz=16290.8, num_updates=36200, lr=0.000332411, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=33433 epoch 022: 787 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=551576, ups=1.12, wpb=494549, bsz=16290.8, num_updates=36200, lr=0.000332411, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=33433 epoch 022: 787 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=551576, ups=1.12, wpb=494549, bsz=16290.8, num_updates=36200, lr=0.000332411, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=33433 epoch 022: 787 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=551576, ups=1.12, wpb=494549, bsz=16290.8, num_updates=36200, lr=0.000332411, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=33433 epoch 022: 787 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=551576, ups=1.12, wpb=494549, bsz=16290.8, num_updates=36200, lr=0.000332411, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=33433 epoch 022: 787 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=551576, ups=1.12, wpb=494549, bsz=16290.8, num_updates=36200, lr=0.000332411, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=33433 epoch 022: 787 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=551576, ups=1.12, wpb=494549, bsz=16290.8, num_updates=36200, lr=0.000332411, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=33433 epoch 022: 787 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=551576, ups=1.12, wpb=494549, bsz=16290.8, num_updates=36200, lr=0.000332411, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=33433 epoch 022: 787 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=551576, ups=1.12, wpb=494549, bsz=16290.8, num_updates=36200, lr=0.000332411, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=33433 epoch 022: 787 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=551576, ups=1.12, wpb=494549, bsz=16290.8, num_updates=36200, lr=0.000332411, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=33433 epoch 022: 787 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=551576, ups=1.12, wpb=494549, bsz=16290.8, num_updates=36200, lr=0.000332411, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=33433 epoch 022: 787 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=551576, ups=1.12, wpb=494549, bsz=16290.8, num_updates=36200, lr=0.000332411, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=33433 epoch 022: 787 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=551576, ups=1.12, wpb=494549, bsz=16290.8, num_updates=36200, lr=0.000332411, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=33433 epoch 022: 787 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=551576, ups=1.12, wpb=494549, bsz=16290.8, num_updates=36200, lr=0.000332411, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=33433 epoch 022: 787 / 1689 loss=3.511, nll_loss=1.957, ppl=3.88, wps=551576, ups=1.12, wpb=494549, bsz=16290.8, num_updates=36200, lr=0.000332411, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=33433 epoch 022: 887 / 1689 loss=3.515, nll_loss=1.961, ppl=3.89, wps=548949, ups=1.11, wpb=494591, bsz=16698.6, num_updates=36300, lr=0.000331953, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=33523 epoch 022: 887 / 1689 loss=3.515, nll_loss=1.961, ppl=3.89, wps=548949, ups=1.11, wpb=494591, bsz=16698.6, num_updates=36300, lr=0.000331953, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=33523 epoch 022: 887 / 1689 loss=3.515, nll_loss=1.961, ppl=3.89, wps=548949, ups=1.11, wpb=494591, bsz=16698.6, num_updates=36300, lr=0.000331953, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=33523 epoch 022: 887 / 1689 loss=3.515, nll_loss=1.961, ppl=3.89, wps=548949, ups=1.11, wpb=494591, bsz=16698.6, num_updates=36300, lr=0.000331953, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=33523 epoch 022: 887 / 1689 loss=3.515, nll_loss=1.961, ppl=3.89, wps=548949, ups=1.11, wpb=494591, bsz=16698.6, num_updates=36300, lr=0.000331953, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=33523 epoch 022: 887 / 1689 loss=3.515, nll_loss=1.961, ppl=3.89, wps=548949, ups=1.11, wpb=494591, bsz=16698.6, num_updates=36300, lr=0.000331953, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=33523 epoch 022: 887 / 1689 loss=3.515, nll_loss=1.961, ppl=3.89, wps=548949, ups=1.11, wpb=494591, bsz=16698.6, num_updates=36300, lr=0.000331953, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=33523 epoch 022: 887 / 1689 loss=3.515, nll_loss=1.961, ppl=3.89, wps=548949, ups=1.11, wpb=494591, bsz=16698.6, num_updates=36300, lr=0.000331953, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=33523 epoch 022: 887 / 1689 loss=3.515, nll_loss=1.961, ppl=3.89, wps=548949, ups=1.11, wpb=494591, bsz=16698.6, num_updates=36300, lr=0.000331953, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=33523 epoch 022: 887 / 1689 loss=3.515, nll_loss=1.961, ppl=3.89, wps=548949, ups=1.11, wpb=494591, bsz=16698.6, num_updates=36300, lr=0.000331953, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=33523 epoch 022: 887 / 1689 loss=3.515, nll_loss=1.961, ppl=3.89, wps=548949, ups=1.11, wpb=494591, bsz=16698.6, num_updates=36300, lr=0.000331953, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=33523 epoch 022: 887 / 1689 loss=3.515, nll_loss=1.961, ppl=3.89, wps=548949, ups=1.11, wpb=494591, bsz=16698.6, num_updates=36300, lr=0.000331953, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=33523 epoch 022: 887 / 1689 loss=3.515, nll_loss=1.961, ppl=3.89, wps=548949, ups=1.11, wpb=494591, bsz=16698.6, num_updates=36300, lr=0.000331953, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=33523 epoch 022: 887 / 1689 loss=3.515, nll_loss=1.961, ppl=3.89, wps=548949, ups=1.11, wpb=494591, bsz=16698.6, num_updates=36300, lr=0.000331953, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=33523 epoch 022: 887 / 1689 loss=3.515, nll_loss=1.961, ppl=3.89, wps=548949, ups=1.11, wpb=494591, bsz=16698.6, num_updates=36300, lr=0.000331953, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=33523 epoch 022: 887 / 1689 loss=3.515, nll_loss=1.961, ppl=3.89, wps=548949, ups=1.11, wpb=494591, bsz=16698.6, num_updates=36300, lr=0.000331953, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=33523 epoch 022: 887 / 1689 loss=3.515, nll_loss=1.961, ppl=3.89, wps=548949, ups=1.11, wpb=494591, bsz=16698.6, num_updates=36300, lr=0.000331953, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=33523 epoch 022: 887 / 1689 loss=3.515, nll_loss=1.961, ppl=3.89, wps=548949, ups=1.11, wpb=494591, bsz=16698.6, num_updates=36300, lr=0.000331953, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=33523 epoch 022: 887 / 1689 loss=3.515, nll_loss=1.961, ppl=3.89, wps=548949, ups=1.11, wpb=494591, bsz=16698.6, num_updates=36300, lr=0.000331953, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=33523 epoch 022: 887 / 1689 loss=3.515, nll_loss=1.961, ppl=3.89, wps=548949, ups=1.11, wpb=494591, bsz=16698.6, num_updates=36300, lr=0.000331953, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=33523 epoch 022: 887 / 1689 loss=3.515, nll_loss=1.961, ppl=3.89, wps=548949, ups=1.11, wpb=494591, bsz=16698.6, num_updates=36300, lr=0.000331953, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=33523 epoch 022: 887 / 1689 loss=3.515, nll_loss=1.961, ppl=3.89, wps=548949, ups=1.11, wpb=494591, bsz=16698.6, num_updates=36300, lr=0.000331953, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=33523 epoch 022: 987 / 1689 loss=3.516, nll_loss=1.964, ppl=3.9, wps=555456, ups=1.12, wpb=496404, bsz=16628.1, num_updates=36400, lr=0.000331497, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=33612 epoch 022: 987 / 1689 loss=3.516, nll_loss=1.964, ppl=3.9, wps=555456, ups=1.12, wpb=496404, bsz=16628.1, num_updates=36400, lr=0.000331497, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=33612 epoch 022: 987 / 1689 loss=3.516, nll_loss=1.964, ppl=3.9, wps=555456, ups=1.12, wpb=496404, bsz=16628.1, num_updates=36400, lr=0.000331497, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=33612 epoch 022: 987 / 1689 loss=3.516, nll_loss=1.964, ppl=3.9, wps=555456, ups=1.12, wpb=496404, bsz=16628.1, num_updates=36400, lr=0.000331497, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=33612 epoch 022: 987 / 1689 loss=3.516, nll_loss=1.964, ppl=3.9, wps=555456, ups=1.12, wpb=496404, bsz=16628.1, num_updates=36400, lr=0.000331497, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=33612 epoch 022: 987 / 1689 loss=3.516, nll_loss=1.964, ppl=3.9, wps=555456, ups=1.12, wpb=496404, bsz=16628.1, num_updates=36400, lr=0.000331497, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=33612 epoch 022: 987 / 1689 loss=3.516, nll_loss=1.964, ppl=3.9, wps=555456, ups=1.12, wpb=496404, bsz=16628.1, num_updates=36400, lr=0.000331497, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=33612 epoch 022: 987 / 1689 loss=3.516, nll_loss=1.964, ppl=3.9, wps=555456, ups=1.12, wpb=496404, bsz=16628.1, num_updates=36400, lr=0.000331497, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=33612 epoch 022: 987 / 1689 loss=3.516, nll_loss=1.964, ppl=3.9, wps=555456, ups=1.12, wpb=496404, bsz=16628.1, num_updates=36400, lr=0.000331497, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=33612 epoch 022: 987 / 1689 loss=3.516, nll_loss=1.964, ppl=3.9, wps=555456, ups=1.12, wpb=496404, bsz=16628.1, num_updates=36400, lr=0.000331497, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=33612 epoch 022: 987 / 1689 loss=3.516, nll_loss=1.964, ppl=3.9, wps=555456, ups=1.12, wpb=496404, bsz=16628.1, num_updates=36400, lr=0.000331497, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=33612 epoch 022: 987 / 1689 loss=3.516, nll_loss=1.964, ppl=3.9, wps=555456, ups=1.12, wpb=496404, bsz=16628.1, num_updates=36400, lr=0.000331497, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=33612 epoch 022: 987 / 1689 loss=3.516, nll_loss=1.964, ppl=3.9, wps=555456, ups=1.12, wpb=496404, bsz=16628.1, num_updates=36400, lr=0.000331497, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=33612 epoch 022: 987 / 1689 loss=3.516, nll_loss=1.964, ppl=3.9, wps=555456, ups=1.12, wpb=496404, bsz=16628.1, num_updates=36400, lr=0.000331497, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=33612 epoch 022: 987 / 1689 loss=3.516, nll_loss=1.964, ppl=3.9, wps=555456, ups=1.12, wpb=496404, bsz=16628.1, num_updates=36400, lr=0.000331497, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=33612 epoch 022: 987 / 1689 loss=3.516, nll_loss=1.964, ppl=3.9, wps=555456, ups=1.12, wpb=496404, bsz=16628.1, num_updates=36400, lr=0.000331497, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=33612 epoch 022: 987 / 1689 loss=3.516, nll_loss=1.964, ppl=3.9, wps=555456, ups=1.12, wpb=496404, bsz=16628.1, num_updates=36400, lr=0.000331497, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=33612 epoch 022: 987 / 1689 loss=3.516, nll_loss=1.964, ppl=3.9, wps=555456, ups=1.12, wpb=496404, bsz=16628.1, num_updates=36400, lr=0.000331497, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=33612 epoch 022: 987 / 1689 loss=3.516, nll_loss=1.964, ppl=3.9, wps=555456, ups=1.12, wpb=496404, bsz=16628.1, num_updates=36400, lr=0.000331497, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=33612 epoch 022: 987 / 1689 loss=3.516, nll_loss=1.964, ppl=3.9, wps=555456, ups=1.12, wpb=496404, bsz=16628.1, num_updates=36400, lr=0.000331497, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=33612 epoch 022: 987 / 1689 loss=3.516, nll_loss=1.964, ppl=3.9, wps=555456, ups=1.12, wpb=496404, bsz=16628.1, num_updates=36400, lr=0.000331497, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=33612 epoch 022: 987 / 1689 loss=3.516, nll_loss=1.964, ppl=3.9, wps=555456, ups=1.12, wpb=496404, bsz=16628.1, num_updates=36400, lr=0.000331497, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=33612 epoch 022: 1087 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=552704, ups=1.12, wpb=495099, bsz=16580.6, num_updates=36500, lr=0.000331042, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=33702 epoch 022: 1087 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=552704, ups=1.12, wpb=495099, bsz=16580.6, num_updates=36500, lr=0.000331042, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=33702 epoch 022: 1087 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=552704, ups=1.12, wpb=495099, bsz=16580.6, num_updates=36500, lr=0.000331042, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=33702 epoch 022: 1087 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=552704, ups=1.12, wpb=495099, bsz=16580.6, num_updates=36500, lr=0.000331042, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=33702 epoch 022: 1087 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=552704, ups=1.12, wpb=495099, bsz=16580.6, num_updates=36500, lr=0.000331042, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=33702 epoch 022: 1087 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=552704, ups=1.12, wpb=495099, bsz=16580.6, num_updates=36500, lr=0.000331042, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=33702 epoch 022: 1087 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=552704, ups=1.12, wpb=495099, bsz=16580.6, num_updates=36500, lr=0.000331042, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=33702 epoch 022: 1087 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=552704, ups=1.12, wpb=495099, bsz=16580.6, num_updates=36500, lr=0.000331042, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=33702 epoch 022: 1087 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=552704, ups=1.12, wpb=495099, bsz=16580.6, num_updates=36500, lr=0.000331042, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=33702 epoch 022: 1087 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=552704, ups=1.12, wpb=495099, bsz=16580.6, num_updates=36500, lr=0.000331042, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=33702 epoch 022: 1087 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=552704, ups=1.12, wpb=495099, bsz=16580.6, num_updates=36500, lr=0.000331042, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=33702 epoch 022: 1087 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=552704, ups=1.12, wpb=495099, bsz=16580.6, num_updates=36500, lr=0.000331042, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=33702 epoch 022: 1087 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=552704, ups=1.12, wpb=495099, bsz=16580.6, num_updates=36500, lr=0.000331042, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=33702 epoch 022: 1087 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=552704, ups=1.12, wpb=495099, bsz=16580.6, num_updates=36500, lr=0.000331042, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=33702 epoch 022: 1087 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=552704, ups=1.12, wpb=495099, bsz=16580.6, num_updates=36500, lr=0.000331042, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=33702 epoch 022: 1087 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=552704, ups=1.12, wpb=495099, bsz=16580.6, num_updates=36500, lr=0.000331042, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=33702 epoch 022: 1087 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=552704, ups=1.12, wpb=495099, bsz=16580.6, num_updates=36500, lr=0.000331042, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=33702 epoch 022: 1087 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=552704, ups=1.12, wpb=495099, bsz=16580.6, num_updates=36500, lr=0.000331042, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=33702 epoch 022: 1087 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=552704, ups=1.12, wpb=495099, bsz=16580.6, num_updates=36500, lr=0.000331042, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=33702 epoch 022: 1087 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=552704, ups=1.12, wpb=495099, bsz=16580.6, num_updates=36500, lr=0.000331042, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=33702 epoch 022: 1087 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=552704, ups=1.12, wpb=495099, bsz=16580.6, num_updates=36500, lr=0.000331042, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=33702 epoch 022: 1087 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=552704, ups=1.12, wpb=495099, bsz=16580.6, num_updates=36500, lr=0.000331042, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=33702 epoch 022: 1187 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=553891, ups=1.12, wpb=495829, bsz=16336.7, num_updates=36600, lr=0.00033059, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.4, wall=33791 epoch 022: 1187 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=553891, ups=1.12, wpb=495829, bsz=16336.7, num_updates=36600, lr=0.00033059, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.4, wall=33791 epoch 022: 1187 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=553891, ups=1.12, wpb=495829, bsz=16336.7, num_updates=36600, lr=0.00033059, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.4, wall=33791 epoch 022: 1187 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=553891, ups=1.12, wpb=495829, bsz=16336.7, num_updates=36600, lr=0.00033059, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.4, wall=33791 epoch 022: 1187 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=553891, ups=1.12, wpb=495829, bsz=16336.7, num_updates=36600, lr=0.00033059, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.4, wall=33791 epoch 022: 1187 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=553891, ups=1.12, wpb=495829, bsz=16336.7, num_updates=36600, lr=0.00033059, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.4, wall=33791 epoch 022: 1187 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=553891, ups=1.12, wpb=495829, bsz=16336.7, num_updates=36600, lr=0.00033059, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.4, wall=33791 epoch 022: 1187 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=553891, ups=1.12, wpb=495829, bsz=16336.7, num_updates=36600, lr=0.00033059, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.4, wall=33791 epoch 022: 1187 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=553891, ups=1.12, wpb=495829, bsz=16336.7, num_updates=36600, lr=0.00033059, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.4, wall=33791 epoch 022: 1187 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=553891, ups=1.12, wpb=495829, bsz=16336.7, num_updates=36600, lr=0.00033059, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.4, wall=33791 epoch 022: 1187 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=553891, ups=1.12, wpb=495829, bsz=16336.7, num_updates=36600, lr=0.00033059, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.4, wall=33791 epoch 022: 1187 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=553891, ups=1.12, wpb=495829, bsz=16336.7, num_updates=36600, lr=0.00033059, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.4, wall=33791 epoch 022: 1187 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=553891, ups=1.12, wpb=495829, bsz=16336.7, num_updates=36600, lr=0.00033059, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.4, wall=33791 epoch 022: 1187 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=553891, ups=1.12, wpb=495829, bsz=16336.7, num_updates=36600, lr=0.00033059, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.4, wall=33791 epoch 022: 1187 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=553891, ups=1.12, wpb=495829, bsz=16336.7, num_updates=36600, lr=0.00033059, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.4, wall=33791 epoch 022: 1187 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=553891, ups=1.12, wpb=495829, bsz=16336.7, num_updates=36600, lr=0.00033059, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.4, wall=33791 epoch 022: 1187 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=553891, ups=1.12, wpb=495829, bsz=16336.7, num_updates=36600, lr=0.00033059, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.4, wall=33791 epoch 022: 1187 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=553891, ups=1.12, wpb=495829, bsz=16336.7, num_updates=36600, lr=0.00033059, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.4, wall=33791 epoch 022: 1187 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=553891, ups=1.12, wpb=495829, bsz=16336.7, num_updates=36600, lr=0.00033059, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.4, wall=33791 epoch 022: 1187 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=553891, ups=1.12, wpb=495829, bsz=16336.7, num_updates=36600, lr=0.00033059, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.4, wall=33791 epoch 022: 1187 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=553891, ups=1.12, wpb=495829, bsz=16336.7, num_updates=36600, lr=0.00033059, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.4, wall=33791 epoch 022: 1187 / 1689 loss=3.519, nll_loss=1.966, ppl=3.91, wps=553891, ups=1.12, wpb=495829, bsz=16336.7, num_updates=36600, lr=0.00033059, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.4, wall=33791 epoch 022: 1287 / 1689 loss=3.515, nll_loss=1.962, ppl=3.9, wps=552884, ups=1.11, wpb=497133, bsz=16472.2, num_updates=36700, lr=0.000330139, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=33881 epoch 022: 1287 / 1689 loss=3.515, nll_loss=1.962, ppl=3.9, wps=552884, ups=1.11, wpb=497133, bsz=16472.2, num_updates=36700, lr=0.000330139, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=33881 epoch 022: 1287 / 1689 loss=3.515, nll_loss=1.962, ppl=3.9, wps=552884, ups=1.11, wpb=497133, bsz=16472.2, num_updates=36700, lr=0.000330139, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=33881 epoch 022: 1287 / 1689 loss=3.515, nll_loss=1.962, ppl=3.9, wps=552884, ups=1.11, wpb=497133, bsz=16472.2, num_updates=36700, lr=0.000330139, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=33881 epoch 022: 1287 / 1689 loss=3.515, nll_loss=1.962, ppl=3.9, wps=552884, ups=1.11, wpb=497133, bsz=16472.2, num_updates=36700, lr=0.000330139, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=33881 epoch 022: 1287 / 1689 loss=3.515, nll_loss=1.962, ppl=3.9, wps=552884, ups=1.11, wpb=497133, bsz=16472.2, num_updates=36700, lr=0.000330139, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=33881 epoch 022: 1287 / 1689 loss=3.515, nll_loss=1.962, ppl=3.9, wps=552884, ups=1.11, wpb=497133, bsz=16472.2, num_updates=36700, lr=0.000330139, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=33881 epoch 022: 1287 / 1689 loss=3.515, nll_loss=1.962, ppl=3.9, wps=552884, ups=1.11, wpb=497133, bsz=16472.2, num_updates=36700, lr=0.000330139, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=33881 epoch 022: 1287 / 1689 loss=3.515, nll_loss=1.962, ppl=3.9, wps=552884, ups=1.11, wpb=497133, bsz=16472.2, num_updates=36700, lr=0.000330139, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=33881 epoch 022: 1287 / 1689 loss=3.515, nll_loss=1.962, ppl=3.9, wps=552884, ups=1.11, wpb=497133, bsz=16472.2, num_updates=36700, lr=0.000330139, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=33881 epoch 022: 1287 / 1689 loss=3.515, nll_loss=1.962, ppl=3.9, wps=552884, ups=1.11, wpb=497133, bsz=16472.2, num_updates=36700, lr=0.000330139, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=33881 epoch 022: 1287 / 1689 loss=3.515, nll_loss=1.962, ppl=3.9, wps=552884, ups=1.11, wpb=497133, bsz=16472.2, num_updates=36700, lr=0.000330139, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=33881 epoch 022: 1287 / 1689 loss=3.515, nll_loss=1.962, ppl=3.9, wps=552884, ups=1.11, wpb=497133, bsz=16472.2, num_updates=36700, lr=0.000330139, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=33881 epoch 022: 1287 / 1689 loss=3.515, nll_loss=1.962, ppl=3.9, wps=552884, ups=1.11, wpb=497133, bsz=16472.2, num_updates=36700, lr=0.000330139, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=33881 epoch 022: 1287 / 1689 loss=3.515, nll_loss=1.962, ppl=3.9, wps=552884, ups=1.11, wpb=497133, bsz=16472.2, num_updates=36700, lr=0.000330139, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=33881 epoch 022: 1287 / 1689 loss=3.515, nll_loss=1.962, ppl=3.9, wps=552884, ups=1.11, wpb=497133, bsz=16472.2, num_updates=36700, lr=0.000330139, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=33881 epoch 022: 1287 / 1689 loss=3.515, nll_loss=1.962, ppl=3.9, wps=552884, ups=1.11, wpb=497133, bsz=16472.2, num_updates=36700, lr=0.000330139, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=33881 epoch 022: 1287 / 1689 loss=3.515, nll_loss=1.962, ppl=3.9, wps=552884, ups=1.11, wpb=497133, bsz=16472.2, num_updates=36700, lr=0.000330139, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=33881 epoch 022: 1287 / 1689 loss=3.515, nll_loss=1.962, ppl=3.9, wps=552884, ups=1.11, wpb=497133, bsz=16472.2, num_updates=36700, lr=0.000330139, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=33881 epoch 022: 1287 / 1689 loss=3.515, nll_loss=1.962, ppl=3.9, wps=552884, ups=1.11, wpb=497133, bsz=16472.2, num_updates=36700, lr=0.000330139, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=33881 epoch 022: 1287 / 1689 loss=3.515, nll_loss=1.962, ppl=3.9, wps=552884, ups=1.11, wpb=497133, bsz=16472.2, num_updates=36700, lr=0.000330139, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=33881 epoch 022: 1287 / 1689 loss=3.515, nll_loss=1.962, ppl=3.9, wps=552884, ups=1.11, wpb=497133, bsz=16472.2, num_updates=36700, lr=0.000330139, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=33881 epoch 022: 1387 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=553304, ups=1.12, wpb=495797, bsz=16639.4, num_updates=36800, lr=0.00032969, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.8, wall=33971 epoch 022: 1387 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=553304, ups=1.12, wpb=495797, bsz=16639.4, num_updates=36800, lr=0.00032969, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.8, wall=33971 epoch 022: 1387 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=553304, ups=1.12, wpb=495797, bsz=16639.4, num_updates=36800, lr=0.00032969, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.8, wall=33971 epoch 022: 1387 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=553304, ups=1.12, wpb=495797, bsz=16639.4, num_updates=36800, lr=0.00032969, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.8, wall=33971 epoch 022: 1387 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=553304, ups=1.12, wpb=495797, bsz=16639.4, num_updates=36800, lr=0.00032969, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.8, wall=33971 epoch 022: 1387 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=553304, ups=1.12, wpb=495797, bsz=16639.4, num_updates=36800, lr=0.00032969, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.8, wall=33971 epoch 022: 1387 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=553304, ups=1.12, wpb=495797, bsz=16639.4, num_updates=36800, lr=0.00032969, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.8, wall=33971 epoch 022: 1387 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=553304, ups=1.12, wpb=495797, bsz=16639.4, num_updates=36800, lr=0.00032969, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.8, wall=33971 epoch 022: 1387 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=553304, ups=1.12, wpb=495797, bsz=16639.4, num_updates=36800, lr=0.00032969, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.8, wall=33971 epoch 022: 1387 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=553304, ups=1.12, wpb=495797, bsz=16639.4, num_updates=36800, lr=0.00032969, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.8, wall=33971 epoch 022: 1387 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=553304, ups=1.12, wpb=495797, bsz=16639.4, num_updates=36800, lr=0.00032969, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.8, wall=33971 epoch 022: 1387 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=553304, ups=1.12, wpb=495797, bsz=16639.4, num_updates=36800, lr=0.00032969, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.8, wall=33971 epoch 022: 1387 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=553304, ups=1.12, wpb=495797, bsz=16639.4, num_updates=36800, lr=0.00032969, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.8, wall=33971 epoch 022: 1387 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=553304, ups=1.12, wpb=495797, bsz=16639.4, num_updates=36800, lr=0.00032969, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.8, wall=33971 epoch 022: 1387 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=553304, ups=1.12, wpb=495797, bsz=16639.4, num_updates=36800, lr=0.00032969, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.8, wall=33971 epoch 022: 1387 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=553304, ups=1.12, wpb=495797, bsz=16639.4, num_updates=36800, lr=0.00032969, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.8, wall=33971 epoch 022: 1387 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=553304, ups=1.12, wpb=495797, bsz=16639.4, num_updates=36800, lr=0.00032969, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.8, wall=33971 epoch 022: 1387 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=553304, ups=1.12, wpb=495797, bsz=16639.4, num_updates=36800, lr=0.00032969, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.8, wall=33971 epoch 022: 1387 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=553304, ups=1.12, wpb=495797, bsz=16639.4, num_updates=36800, lr=0.00032969, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.8, wall=33971 epoch 022: 1387 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=553304, ups=1.12, wpb=495797, bsz=16639.4, num_updates=36800, lr=0.00032969, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.8, wall=33971 epoch 022: 1387 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=553304, ups=1.12, wpb=495797, bsz=16639.4, num_updates=36800, lr=0.00032969, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.8, wall=33971 epoch 022: 1387 / 1689 loss=3.523, nll_loss=1.971, ppl=3.92, wps=553304, ups=1.12, wpb=495797, bsz=16639.4, num_updates=36800, lr=0.00032969, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.8, wall=33971 epoch 022: 1488 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=542729, ups=1.1, wpb=495161, bsz=16267.8, num_updates=36900, lr=0.000329243, gnorm=0.167, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=34062 epoch 022: 1488 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=542729, ups=1.1, wpb=495161, bsz=16267.8, num_updates=36900, lr=0.000329243, gnorm=0.167, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=34062 epoch 022: 1488 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=542729, ups=1.1, wpb=495161, bsz=16267.8, num_updates=36900, lr=0.000329243, gnorm=0.167, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=34062 epoch 022: 1488 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=542729, ups=1.1, wpb=495161, bsz=16267.8, num_updates=36900, lr=0.000329243, gnorm=0.167, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=34062 epoch 022: 1488 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=542729, ups=1.1, wpb=495161, bsz=16267.8, num_updates=36900, lr=0.000329243, gnorm=0.167, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=34062 epoch 022: 1488 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=542729, ups=1.1, wpb=495161, bsz=16267.8, num_updates=36900, lr=0.000329243, gnorm=0.167, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=34062 epoch 022: 1488 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=542729, ups=1.1, wpb=495161, bsz=16267.8, num_updates=36900, lr=0.000329243, gnorm=0.167, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=34062 epoch 022: 1488 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=542729, ups=1.1, wpb=495161, bsz=16267.8, num_updates=36900, lr=0.000329243, gnorm=0.167, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=34062 epoch 022: 1488 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=542729, ups=1.1, wpb=495161, bsz=16267.8, num_updates=36900, lr=0.000329243, gnorm=0.167, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=34062 epoch 022: 1488 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=542729, ups=1.1, wpb=495161, bsz=16267.8, num_updates=36900, lr=0.000329243, gnorm=0.167, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=34062 epoch 022: 1488 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=542729, ups=1.1, wpb=495161, bsz=16267.8, num_updates=36900, lr=0.000329243, gnorm=0.167, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=34062 epoch 022: 1488 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=542729, ups=1.1, wpb=495161, bsz=16267.8, num_updates=36900, lr=0.000329243, gnorm=0.167, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=34062 epoch 022: 1488 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=542729, ups=1.1, wpb=495161, bsz=16267.8, num_updates=36900, lr=0.000329243, gnorm=0.167, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=34062 epoch 022: 1488 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=542729, ups=1.1, wpb=495161, bsz=16267.8, num_updates=36900, lr=0.000329243, gnorm=0.167, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=34062 epoch 022: 1488 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=542729, ups=1.1, wpb=495161, bsz=16267.8, num_updates=36900, lr=0.000329243, gnorm=0.167, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=34062 epoch 022: 1488 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=542729, ups=1.1, wpb=495161, bsz=16267.8, num_updates=36900, lr=0.000329243, gnorm=0.167, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=34062 epoch 022: 1488 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=542729, ups=1.1, wpb=495161, bsz=16267.8, num_updates=36900, lr=0.000329243, gnorm=0.167, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=34062 epoch 022: 1488 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=542729, ups=1.1, wpb=495161, bsz=16267.8, num_updates=36900, lr=0.000329243, gnorm=0.167, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=34062 epoch 022: 1488 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=542729, ups=1.1, wpb=495161, bsz=16267.8, num_updates=36900, lr=0.000329243, gnorm=0.167, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=34062 epoch 022: 1488 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=542729, ups=1.1, wpb=495161, bsz=16267.8, num_updates=36900, lr=0.000329243, gnorm=0.167, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=34062 epoch 022: 1488 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=542729, ups=1.1, wpb=495161, bsz=16267.8, num_updates=36900, lr=0.000329243, gnorm=0.167, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=34062 epoch 022: 1488 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=542729, ups=1.1, wpb=495161, bsz=16267.8, num_updates=36900, lr=0.000329243, gnorm=0.167, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=34062 epoch 022: 1588 / 1689 loss=3.52, nll_loss=1.968, ppl=3.91, wps=549552, ups=1.11, wpb=495502, bsz=16642.2, num_updates=37000, lr=0.000328798, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=34152 epoch 022: 1588 / 1689 loss=3.52, nll_loss=1.968, ppl=3.91, wps=549552, ups=1.11, wpb=495502, bsz=16642.2, num_updates=37000, lr=0.000328798, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=34152 epoch 022: 1588 / 1689 loss=3.52, nll_loss=1.968, ppl=3.91, wps=549552, ups=1.11, wpb=495502, bsz=16642.2, num_updates=37000, lr=0.000328798, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=34152 epoch 022: 1588 / 1689 loss=3.52, nll_loss=1.968, ppl=3.91, wps=549552, ups=1.11, wpb=495502, bsz=16642.2, num_updates=37000, lr=0.000328798, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=34152 epoch 022: 1588 / 1689 loss=3.52, nll_loss=1.968, ppl=3.91, wps=549552, ups=1.11, wpb=495502, bsz=16642.2, num_updates=37000, lr=0.000328798, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=34152 epoch 022: 1588 / 1689 loss=3.52, nll_loss=1.968, ppl=3.91, wps=549552, ups=1.11, wpb=495502, bsz=16642.2, num_updates=37000, lr=0.000328798, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=34152 epoch 022: 1588 / 1689 loss=3.52, nll_loss=1.968, ppl=3.91, wps=549552, ups=1.11, wpb=495502, bsz=16642.2, num_updates=37000, lr=0.000328798, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=34152 epoch 022: 1588 / 1689 loss=3.52, nll_loss=1.968, ppl=3.91, wps=549552, ups=1.11, wpb=495502, bsz=16642.2, num_updates=37000, lr=0.000328798, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=34152 epoch 022: 1588 / 1689 loss=3.52, nll_loss=1.968, ppl=3.91, wps=549552, ups=1.11, wpb=495502, bsz=16642.2, num_updates=37000, lr=0.000328798, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=34152 epoch 022: 1588 / 1689 loss=3.52, nll_loss=1.968, ppl=3.91, wps=549552, ups=1.11, wpb=495502, bsz=16642.2, num_updates=37000, lr=0.000328798, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=34152 epoch 022: 1588 / 1689 loss=3.52, nll_loss=1.968, ppl=3.91, wps=549552, ups=1.11, wpb=495502, bsz=16642.2, num_updates=37000, lr=0.000328798, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=34152 epoch 022: 1588 / 1689 loss=3.52, nll_loss=1.968, ppl=3.91, wps=549552, ups=1.11, wpb=495502, bsz=16642.2, num_updates=37000, lr=0.000328798, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=34152 epoch 022: 1588 / 1689 loss=3.52, nll_loss=1.968, ppl=3.91, wps=549552, ups=1.11, wpb=495502, bsz=16642.2, num_updates=37000, lr=0.000328798, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=34152 epoch 022: 1588 / 1689 loss=3.52, nll_loss=1.968, ppl=3.91, wps=549552, ups=1.11, wpb=495502, bsz=16642.2, num_updates=37000, lr=0.000328798, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=34152 epoch 022: 1588 / 1689 loss=3.52, nll_loss=1.968, ppl=3.91, wps=549552, ups=1.11, wpb=495502, bsz=16642.2, num_updates=37000, lr=0.000328798, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=34152 epoch 022: 1588 / 1689 loss=3.52, nll_loss=1.968, ppl=3.91, wps=549552, ups=1.11, wpb=495502, bsz=16642.2, num_updates=37000, lr=0.000328798, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=34152 epoch 022: 1588 / 1689 loss=3.52, nll_loss=1.968, ppl=3.91, wps=549552, ups=1.11, wpb=495502, bsz=16642.2, num_updates=37000, lr=0.000328798, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=34152 epoch 022: 1588 / 1689 loss=3.52, nll_loss=1.968, ppl=3.91, wps=549552, ups=1.11, wpb=495502, bsz=16642.2, num_updates=37000, lr=0.000328798, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=34152 epoch 022: 1588 / 1689 loss=3.52, nll_loss=1.968, ppl=3.91, wps=549552, ups=1.11, wpb=495502, bsz=16642.2, num_updates=37000, lr=0.000328798, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=34152 epoch 022: 1588 / 1689 loss=3.52, nll_loss=1.968, ppl=3.91, wps=549552, ups=1.11, wpb=495502, bsz=16642.2, num_updates=37000, lr=0.000328798, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=34152 epoch 022: 1588 / 1689 loss=3.52, nll_loss=1.968, ppl=3.91, wps=549552, ups=1.11, wpb=495502, bsz=16642.2, num_updates=37000, lr=0.000328798, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=34152 epoch 022: 1588 / 1689 loss=3.52, nll_loss=1.968, ppl=3.91, wps=549552, ups=1.11, wpb=495502, bsz=16642.2, num_updates=37000, lr=0.000328798, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=34152 begin validation on "valid" subset epoch 022 | valid on 'valid' subset | loss 3.712 | nll_loss 2.166 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 37000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.712 | nll_loss 2.166 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 37000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.712 | nll_loss 2.166 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 37000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.712 | nll_loss 2.166 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 37000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.712 | nll_loss 2.166 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 37000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.712 | nll_loss 2.166 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 37000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.712 | nll_loss 2.166 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 37000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.712 | nll_loss 2.166 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 37000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.712 | nll_loss 2.166 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 37000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.712 | nll_loss 2.166 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 37000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.712 | nll_loss 2.166 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 37000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.712 | nll_loss 2.166 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 37000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.712 | nll_loss 2.166 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 37000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.712 | nll_loss 2.166 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 37000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.712 | nll_loss 2.166 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 37000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.712 | nll_loss 2.166 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 37000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.712 | nll_loss 2.166 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 37000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.712 | nll_loss 2.166 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 37000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.712 | nll_loss 2.166 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 37000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.712 | nll_loss 2.166 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 37000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.712 | nll_loss 2.166 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 37000 | best_loss 3.709 epoch 022 | valid on 'valid' subset | loss 3.712 | nll_loss 2.166 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 37000 | best_loss 3.709 epoch 022: 1688 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=489474, ups=0.99, wpb=495407, bsz=16520.2, num_updates=37100, lr=0.000328355, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=34253 epoch 022: 1688 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=489474, ups=0.99, wpb=495407, bsz=16520.2, num_updates=37100, lr=0.000328355, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=34253 epoch 022: 1688 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=489474, ups=0.99, wpb=495407, bsz=16520.2, num_updates=37100, lr=0.000328355, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=34253 epoch 022: 1688 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=489474, ups=0.99, wpb=495407, bsz=16520.2, num_updates=37100, lr=0.000328355, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=34253 epoch 022: 1688 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=489474, ups=0.99, wpb=495407, bsz=16520.2, num_updates=37100, lr=0.000328355, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=34253 epoch 022: 1688 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=489474, ups=0.99, wpb=495407, bsz=16520.2, num_updates=37100, lr=0.000328355, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=34253 epoch 022: 1688 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=489474, ups=0.99, wpb=495407, bsz=16520.2, num_updates=37100, lr=0.000328355, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=34253 epoch 022: 1688 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=489474, ups=0.99, wpb=495407, bsz=16520.2, num_updates=37100, lr=0.000328355, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=34253 epoch 022: 1688 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=489474, ups=0.99, wpb=495407, bsz=16520.2, num_updates=37100, lr=0.000328355, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=34253 epoch 022: 1688 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=489474, ups=0.99, wpb=495407, bsz=16520.2, num_updates=37100, lr=0.000328355, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=34253 epoch 022: 1688 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=489474, ups=0.99, wpb=495407, bsz=16520.2, num_updates=37100, lr=0.000328355, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=34253 epoch 022: 1688 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=489474, ups=0.99, wpb=495407, bsz=16520.2, num_updates=37100, lr=0.000328355, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=34253 epoch 022: 1688 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=489474, ups=0.99, wpb=495407, bsz=16520.2, num_updates=37100, lr=0.000328355, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=34253 epoch 022: 1688 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=489474, ups=0.99, wpb=495407, bsz=16520.2, num_updates=37100, lr=0.000328355, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=34253 epoch 022: 1688 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=489474, ups=0.99, wpb=495407, bsz=16520.2, num_updates=37100, lr=0.000328355, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=34253 epoch 022: 1688 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=489474, ups=0.99, wpb=495407, bsz=16520.2, num_updates=37100, lr=0.000328355, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=34253 epoch 022: 1688 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=489474, ups=0.99, wpb=495407, bsz=16520.2, num_updates=37100, lr=0.000328355, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=34253 epoch 022: 1688 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=489474, ups=0.99, wpb=495407, bsz=16520.2, num_updates=37100, lr=0.000328355, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=34253 epoch 022: 1688 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=489474, ups=0.99, wpb=495407, bsz=16520.2, num_updates=37100, lr=0.000328355, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=34253 epoch 022: 1688 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=489474, ups=0.99, wpb=495407, bsz=16520.2, num_updates=37100, lr=0.000328355, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=34253 epoch 022: 1688 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=489474, ups=0.99, wpb=495407, bsz=16520.2, num_updates=37100, lr=0.000328355, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=34253 epoch 022: 1688 / 1689 loss=3.519, nll_loss=1.967, ppl=3.91, wps=489474, ups=0.99, wpb=495407, bsz=16520.2, num_updates=37100, lr=0.000328355, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=34253 end of epoch 22 (average epoch stats below) epoch 022 | loss 3.51 | nll_loss 1.956 | ppl 3.88 | wps 531328 | ups 1.07 | wpb 495105 | bsz 16506.1 | num_updates 37101 | lr 0.00032835 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 22.7 | wall 34254 epoch 022 | loss 3.51 | nll_loss 1.956 | ppl 3.88 | wps 531328 | ups 1.07 | wpb 495105 | bsz 16506.1 | num_updates 37101 | lr 0.00032835 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 22.7 | wall 34254 epoch 022 | loss 3.51 | nll_loss 1.956 | ppl 3.88 | wps 531328 | ups 1.07 | wpb 495105 | bsz 16506.1 | num_updates 37101 | lr 0.00032835 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 22.7 | wall 34254 epoch 022 | loss 3.51 | nll_loss 1.956 | ppl 3.88 | wps 531328 | ups 1.07 | wpb 495105 | bsz 16506.1 | num_updates 37101 | lr 0.00032835 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 22.7 | wall 34254 epoch 022 | loss 3.51 | nll_loss 1.956 | ppl 3.88 | wps 531328 | ups 1.07 | wpb 495105 | bsz 16506.1 | num_updates 37101 | lr 0.00032835 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 22.7 | wall 34254 epoch 022 | loss 3.51 | nll_loss 1.956 | ppl 3.88 | wps 531328 | ups 1.07 | wpb 495105 | bsz 16506.1 | num_updates 37101 | lr 0.00032835 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 22.7 | wall 34254 epoch 022 | loss 3.51 | nll_loss 1.956 | ppl 3.88 | wps 531328 | ups 1.07 | wpb 495105 | bsz 16506.1 | num_updates 37101 | lr 0.00032835 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 22.7 | wall 34254 epoch 022 | loss 3.51 | nll_loss 1.956 | ppl 3.88 | wps 531328 | ups 1.07 | wpb 495105 | bsz 16506.1 | num_updates 37101 | lr 0.00032835 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 22.7 | wall 34254 epoch 022 | loss 3.51 | nll_loss 1.956 | ppl 3.88 | wps 531328 | ups 1.07 | wpb 495105 | bsz 16506.1 | num_updates 37101 | lr 0.00032835 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 22.7 | wall 34254 epoch 022 | loss 3.51 | nll_loss 1.956 | ppl 3.88 | wps 531328 | ups 1.07 | wpb 495105 | bsz 16506.1 | num_updates 37101 | lr 0.00032835 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 22.7 | wall 34254 epoch 022 | loss 3.51 | nll_loss 1.956 | ppl 3.88 | wps 531328 | ups 1.07 | wpb 495105 | bsz 16506.1 | num_updates 37101 | lr 0.00032835 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 22.7 | wall 34254 epoch 022 | loss 3.51 | nll_loss 1.956 | ppl 3.88 | wps 531328 | ups 1.07 | wpb 495105 | bsz 16506.1 | num_updates 37101 | lr 0.00032835 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 22.7 | wall 34254 epoch 022 | loss 3.51 | nll_loss 1.956 | ppl 3.88 | wps 531328 | ups 1.07 | wpb 495105 | bsz 16506.1 | num_updates 37101 | lr 0.00032835 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 22.7 | wall 34254 epoch 022 | loss 3.51 | nll_loss 1.956 | ppl 3.88 | wps 531328 | ups 1.07 | wpb 495105 | bsz 16506.1 | num_updates 37101 | lr 0.00032835 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 22.7 | wall 34254 epoch 022 | loss 3.51 | nll_loss 1.956 | ppl 3.88 | wps 531328 | ups 1.07 | wpb 495105 | bsz 16506.1 | num_updates 37101 | lr 0.00032835 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 22.7 | wall 34254 epoch 022 | loss 3.51 | nll_loss 1.956 | ppl 3.88 | wps 531328 | ups 1.07 | wpb 495105 | bsz 16506.1 | num_updates 37101 | lr 0.00032835 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 22.7 | wall 34254 epoch 022 | loss 3.51 | nll_loss 1.956 | ppl 3.88 | wps 531328 | ups 1.07 | wpb 495105 | bsz 16506.1 | num_updates 37101 | lr 0.00032835 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 22.7 | wall 34254 epoch 022 | loss 3.51 | nll_loss 1.956 | ppl 3.88 | wps 531328 | ups 1.07 | wpb 495105 | bsz 16506.1 | num_updates 37101 | lr 0.00032835 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 22.7 | wall 34254 epoch 022 | loss 3.51 | nll_loss 1.956 | ppl 3.88 | wps 531328 | ups 1.07 | wpb 495105 | bsz 16506.1 | num_updates 37101 | lr 0.00032835 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 22.7 | wall 34254 epoch 022 | loss 3.51 | nll_loss 1.956 | ppl 3.88 | wps 531328 | ups 1.07 | wpb 495105 | bsz 16506.1 | num_updates 37101 | lr 0.00032835 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 22.7 | wall 34254 epoch 022 | loss 3.51 | nll_loss 1.956 | ppl 3.88 | wps 531328 | ups 1.07 | wpb 495105 | bsz 16506.1 | num_updates 37101 | lr 0.00032835 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 22.7 | wall 34254 epoch 022 | loss 3.51 | nll_loss 1.956 | ppl 3.88 | wps 531328 | ups 1.07 | wpb 495105 | bsz 16506.1 | num_updates 37101 | lr 0.00032835 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 22.7 | wall 34254 Start iterating over samples epoch 023: 99 / 1689 loss=3.482, nll_loss=1.924, ppl=3.79, wps=547282, ups=1.11, wpb=491664, bsz=16436.9, num_updates=37200, lr=0.000327913, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=34343 epoch 023: 99 / 1689 loss=3.482, nll_loss=1.924, ppl=3.79, wps=547282, ups=1.11, wpb=491664, bsz=16436.9, num_updates=37200, lr=0.000327913, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=34343 epoch 023: 99 / 1689 loss=3.482, nll_loss=1.924, ppl=3.79, wps=547282, ups=1.11, wpb=491664, bsz=16436.9, num_updates=37200, lr=0.000327913, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=34343 epoch 023: 99 / 1689 loss=3.482, nll_loss=1.924, ppl=3.79, wps=547282, ups=1.11, wpb=491664, bsz=16436.9, num_updates=37200, lr=0.000327913, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=34343 epoch 023: 99 / 1689 loss=3.482, nll_loss=1.924, ppl=3.79, wps=547282, ups=1.11, wpb=491664, bsz=16436.9, num_updates=37200, lr=0.000327913, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=34343 epoch 023: 99 / 1689 loss=3.482, nll_loss=1.924, ppl=3.79, wps=547282, ups=1.11, wpb=491664, bsz=16436.9, num_updates=37200, lr=0.000327913, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=34343 epoch 023: 99 / 1689 loss=3.482, nll_loss=1.924, ppl=3.79, wps=547282, ups=1.11, wpb=491664, bsz=16436.9, num_updates=37200, lr=0.000327913, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=34343 epoch 023: 99 / 1689 loss=3.482, nll_loss=1.924, ppl=3.79, wps=547282, ups=1.11, wpb=491664, bsz=16436.9, num_updates=37200, lr=0.000327913, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=34343 epoch 023: 99 / 1689 loss=3.482, nll_loss=1.924, ppl=3.79, wps=547282, ups=1.11, wpb=491664, bsz=16436.9, num_updates=37200, lr=0.000327913, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=34343 epoch 023: 99 / 1689 loss=3.482, nll_loss=1.924, ppl=3.79, wps=547282, ups=1.11, wpb=491664, bsz=16436.9, num_updates=37200, lr=0.000327913, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=34343 epoch 023: 99 / 1689 loss=3.482, nll_loss=1.924, ppl=3.79, wps=547282, ups=1.11, wpb=491664, bsz=16436.9, num_updates=37200, lr=0.000327913, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=34343 epoch 023: 99 / 1689 loss=3.482, nll_loss=1.924, ppl=3.79, wps=547282, ups=1.11, wpb=491664, bsz=16436.9, num_updates=37200, lr=0.000327913, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=34343 epoch 023: 99 / 1689 loss=3.482, nll_loss=1.924, ppl=3.79, wps=547282, ups=1.11, wpb=491664, bsz=16436.9, num_updates=37200, lr=0.000327913, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=34343 epoch 023: 99 / 1689 loss=3.482, nll_loss=1.924, ppl=3.79, wps=547282, ups=1.11, wpb=491664, bsz=16436.9, num_updates=37200, lr=0.000327913, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=34343 epoch 023: 99 / 1689 loss=3.482, nll_loss=1.924, ppl=3.79, wps=547282, ups=1.11, wpb=491664, bsz=16436.9, num_updates=37200, lr=0.000327913, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=34343 epoch 023: 99 / 1689 loss=3.482, nll_loss=1.924, ppl=3.79, wps=547282, ups=1.11, wpb=491664, bsz=16436.9, num_updates=37200, lr=0.000327913, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=34343 epoch 023: 99 / 1689 loss=3.482, nll_loss=1.924, ppl=3.79, wps=547282, ups=1.11, wpb=491664, bsz=16436.9, num_updates=37200, lr=0.000327913, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=34343 epoch 023: 99 / 1689 loss=3.482, nll_loss=1.924, ppl=3.79, wps=547282, ups=1.11, wpb=491664, bsz=16436.9, num_updates=37200, lr=0.000327913, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=34343 epoch 023: 99 / 1689 loss=3.482, nll_loss=1.924, ppl=3.79, wps=547282, ups=1.11, wpb=491664, bsz=16436.9, num_updates=37200, lr=0.000327913, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=34343 epoch 023: 99 / 1689 loss=3.482, nll_loss=1.924, ppl=3.79, wps=547282, ups=1.11, wpb=491664, bsz=16436.9, num_updates=37200, lr=0.000327913, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=34343 epoch 023: 99 / 1689 loss=3.482, nll_loss=1.924, ppl=3.79, wps=547282, ups=1.11, wpb=491664, bsz=16436.9, num_updates=37200, lr=0.000327913, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=34343 epoch 023: 99 / 1689 loss=3.482, nll_loss=1.924, ppl=3.79, wps=547282, ups=1.11, wpb=491664, bsz=16436.9, num_updates=37200, lr=0.000327913, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=34343 epoch 023: 99 / 1689 loss=3.482, nll_loss=1.924, ppl=3.79, wps=547282, ups=1.11, wpb=491664, bsz=16436.9, num_updates=37200, lr=0.000327913, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=34343 epoch 023: 200 / 1689 loss=3.488, nll_loss=1.931, ppl=3.81, wps=547730, ups=1.11, wpb=494951, bsz=17072.8, num_updates=37300, lr=0.000327473, gnorm=0.176, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=34434 epoch 023: 200 / 1689 loss=3.488, nll_loss=1.931, ppl=3.81, wps=547730, ups=1.11, wpb=494951, bsz=17072.8, num_updates=37300, lr=0.000327473, gnorm=0.176, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=34434 epoch 023: 200 / 1689 loss=3.488, nll_loss=1.931, ppl=3.81, wps=547730, ups=1.11, wpb=494951, bsz=17072.8, num_updates=37300, lr=0.000327473, gnorm=0.176, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=34434 epoch 023: 200 / 1689 loss=3.488, nll_loss=1.931, ppl=3.81, wps=547730, ups=1.11, wpb=494951, bsz=17072.8, num_updates=37300, lr=0.000327473, gnorm=0.176, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=34434 epoch 023: 200 / 1689 loss=3.488, nll_loss=1.931, ppl=3.81, wps=547730, ups=1.11, wpb=494951, bsz=17072.8, num_updates=37300, lr=0.000327473, gnorm=0.176, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=34434 epoch 023: 200 / 1689 loss=3.488, nll_loss=1.931, ppl=3.81, wps=547730, ups=1.11, wpb=494951, bsz=17072.8, num_updates=37300, lr=0.000327473, gnorm=0.176, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=34434 epoch 023: 200 / 1689 loss=3.488, nll_loss=1.931, ppl=3.81, wps=547730, ups=1.11, wpb=494951, bsz=17072.8, num_updates=37300, lr=0.000327473, gnorm=0.176, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=34434 epoch 023: 200 / 1689 loss=3.488, nll_loss=1.931, ppl=3.81, wps=547730, ups=1.11, wpb=494951, bsz=17072.8, num_updates=37300, lr=0.000327473, gnorm=0.176, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=34434 epoch 023: 200 / 1689 loss=3.488, nll_loss=1.931, ppl=3.81, wps=547730, ups=1.11, wpb=494951, bsz=17072.8, num_updates=37300, lr=0.000327473, gnorm=0.176, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=34434 epoch 023: 200 / 1689 loss=3.488, nll_loss=1.931, ppl=3.81, wps=547730, ups=1.11, wpb=494951, bsz=17072.8, num_updates=37300, lr=0.000327473, gnorm=0.176, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=34434 epoch 023: 200 / 1689 loss=3.488, nll_loss=1.931, ppl=3.81, wps=547730, ups=1.11, wpb=494951, bsz=17072.8, num_updates=37300, lr=0.000327473, gnorm=0.176, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=34434 epoch 023: 200 / 1689 loss=3.488, nll_loss=1.931, ppl=3.81, wps=547730, ups=1.11, wpb=494951, bsz=17072.8, num_updates=37300, lr=0.000327473, gnorm=0.176, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=34434 epoch 023: 200 / 1689 loss=3.488, nll_loss=1.931, ppl=3.81, wps=547730, ups=1.11, wpb=494951, bsz=17072.8, num_updates=37300, lr=0.000327473, gnorm=0.176, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=34434 epoch 023: 200 / 1689 loss=3.488, nll_loss=1.931, ppl=3.81, wps=547730, ups=1.11, wpb=494951, bsz=17072.8, num_updates=37300, lr=0.000327473, gnorm=0.176, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=34434 epoch 023: 200 / 1689 loss=3.488, nll_loss=1.931, ppl=3.81, wps=547730, ups=1.11, wpb=494951, bsz=17072.8, num_updates=37300, lr=0.000327473, gnorm=0.176, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=34434 epoch 023: 200 / 1689 loss=3.488, nll_loss=1.931, ppl=3.81, wps=547730, ups=1.11, wpb=494951, bsz=17072.8, num_updates=37300, lr=0.000327473, gnorm=0.176, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=34434 epoch 023: 200 / 1689 loss=3.488, nll_loss=1.931, ppl=3.81, wps=547730, ups=1.11, wpb=494951, bsz=17072.8, num_updates=37300, lr=0.000327473, gnorm=0.176, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=34434 epoch 023: 200 / 1689 loss=3.488, nll_loss=1.931, ppl=3.81, wps=547730, ups=1.11, wpb=494951, bsz=17072.8, num_updates=37300, lr=0.000327473, gnorm=0.176, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=34434 epoch 023: 200 / 1689 loss=3.488, nll_loss=1.931, ppl=3.81, wps=547730, ups=1.11, wpb=494951, bsz=17072.8, num_updates=37300, lr=0.000327473, gnorm=0.176, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=34434 epoch 023: 200 / 1689 loss=3.488, nll_loss=1.931, ppl=3.81, wps=547730, ups=1.11, wpb=494951, bsz=17072.8, num_updates=37300, lr=0.000327473, gnorm=0.176, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=34434 epoch 023: 200 / 1689 loss=3.488, nll_loss=1.931, ppl=3.81, wps=547730, ups=1.11, wpb=494951, bsz=17072.8, num_updates=37300, lr=0.000327473, gnorm=0.176, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=34434 epoch 023: 200 / 1689 loss=3.488, nll_loss=1.931, ppl=3.81, wps=547730, ups=1.11, wpb=494951, bsz=17072.8, num_updates=37300, lr=0.000327473, gnorm=0.176, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=34434 epoch 023: 200 / 1689 loss=3.488, nll_loss=1.931, ppl=3.81, wps=547730, ups=1.11, wpb=494951, bsz=17072.8, num_updates=37300, lr=0.000327473, gnorm=0.176, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=34434 epoch 023: 300 / 1689 loss=3.496, nll_loss=1.94, ppl=3.84, wps=554979, ups=1.12, wpb=496848, bsz=16735.6, num_updates=37400, lr=0.000327035, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=34523 epoch 023: 300 / 1689 loss=3.496, nll_loss=1.94, ppl=3.84, wps=554979, ups=1.12, wpb=496848, bsz=16735.6, num_updates=37400, lr=0.000327035, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=34523 epoch 023: 300 / 1689 loss=3.496, nll_loss=1.94, ppl=3.84, wps=554979, ups=1.12, wpb=496848, bsz=16735.6, num_updates=37400, lr=0.000327035, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=34523 epoch 023: 300 / 1689 loss=3.496, nll_loss=1.94, ppl=3.84, wps=554979, ups=1.12, wpb=496848, bsz=16735.6, num_updates=37400, lr=0.000327035, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=34523 epoch 023: 300 / 1689 loss=3.496, nll_loss=1.94, ppl=3.84, wps=554979, ups=1.12, wpb=496848, bsz=16735.6, num_updates=37400, lr=0.000327035, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=34523 epoch 023: 300 / 1689 loss=3.496, nll_loss=1.94, ppl=3.84, wps=554979, ups=1.12, wpb=496848, bsz=16735.6, num_updates=37400, lr=0.000327035, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=34523 epoch 023: 300 / 1689 loss=3.496, nll_loss=1.94, ppl=3.84, wps=554979, ups=1.12, wpb=496848, bsz=16735.6, num_updates=37400, lr=0.000327035, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=34523 epoch 023: 300 / 1689 loss=3.496, nll_loss=1.94, ppl=3.84, wps=554979, ups=1.12, wpb=496848, bsz=16735.6, num_updates=37400, lr=0.000327035, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=34523 epoch 023: 300 / 1689 loss=3.496, nll_loss=1.94, ppl=3.84, wps=554979, ups=1.12, wpb=496848, bsz=16735.6, num_updates=37400, lr=0.000327035, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=34523 epoch 023: 300 / 1689 loss=3.496, nll_loss=1.94, ppl=3.84, wps=554979, ups=1.12, wpb=496848, bsz=16735.6, num_updates=37400, lr=0.000327035, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=34523 epoch 023: 300 / 1689 loss=3.496, nll_loss=1.94, ppl=3.84, wps=554979, ups=1.12, wpb=496848, bsz=16735.6, num_updates=37400, lr=0.000327035, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=34523 epoch 023: 300 / 1689 loss=3.496, nll_loss=1.94, ppl=3.84, wps=554979, ups=1.12, wpb=496848, bsz=16735.6, num_updates=37400, lr=0.000327035, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=34523 epoch 023: 300 / 1689 loss=3.496, nll_loss=1.94, ppl=3.84, wps=554979, ups=1.12, wpb=496848, bsz=16735.6, num_updates=37400, lr=0.000327035, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=34523 epoch 023: 300 / 1689 loss=3.496, nll_loss=1.94, ppl=3.84, wps=554979, ups=1.12, wpb=496848, bsz=16735.6, num_updates=37400, lr=0.000327035, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=34523 epoch 023: 300 / 1689 loss=3.496, nll_loss=1.94, ppl=3.84, wps=554979, ups=1.12, wpb=496848, bsz=16735.6, num_updates=37400, lr=0.000327035, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=34523 epoch 023: 300 / 1689 loss=3.496, nll_loss=1.94, ppl=3.84, wps=554979, ups=1.12, wpb=496848, bsz=16735.6, num_updates=37400, lr=0.000327035, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=34523 epoch 023: 300 / 1689 loss=3.496, nll_loss=1.94, ppl=3.84, wps=554979, ups=1.12, wpb=496848, bsz=16735.6, num_updates=37400, lr=0.000327035, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=34523 epoch 023: 300 / 1689 loss=3.496, nll_loss=1.94, ppl=3.84, wps=554979, ups=1.12, wpb=496848, bsz=16735.6, num_updates=37400, lr=0.000327035, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=34523 epoch 023: 300 / 1689 loss=3.496, nll_loss=1.94, ppl=3.84, wps=554979, ups=1.12, wpb=496848, bsz=16735.6, num_updates=37400, lr=0.000327035, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=34523 epoch 023: 300 / 1689 loss=3.496, nll_loss=1.94, ppl=3.84, wps=554979, ups=1.12, wpb=496848, bsz=16735.6, num_updates=37400, lr=0.000327035, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=34523 epoch 023: 300 / 1689 loss=3.496, nll_loss=1.94, ppl=3.84, wps=554979, ups=1.12, wpb=496848, bsz=16735.6, num_updates=37400, lr=0.000327035, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=34523 epoch 023: 300 / 1689 loss=3.496, nll_loss=1.94, ppl=3.84, wps=554979, ups=1.12, wpb=496848, bsz=16735.6, num_updates=37400, lr=0.000327035, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=34523 epoch 023: 300 / 1689 loss=3.496, nll_loss=1.94, ppl=3.84, wps=554979, ups=1.12, wpb=496848, bsz=16735.6, num_updates=37400, lr=0.000327035, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=34523 epoch 023: 400 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554957, ups=1.12, wpb=495877, bsz=16563.5, num_updates=37500, lr=0.000326599, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=34612 epoch 023: 400 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554957, ups=1.12, wpb=495877, bsz=16563.5, num_updates=37500, lr=0.000326599, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=34612 epoch 023: 400 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554957, ups=1.12, wpb=495877, bsz=16563.5, num_updates=37500, lr=0.000326599, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=34612 epoch 023: 400 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554957, ups=1.12, wpb=495877, bsz=16563.5, num_updates=37500, lr=0.000326599, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=34612 epoch 023: 400 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554957, ups=1.12, wpb=495877, bsz=16563.5, num_updates=37500, lr=0.000326599, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=34612 epoch 023: 400 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554957, ups=1.12, wpb=495877, bsz=16563.5, num_updates=37500, lr=0.000326599, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=34612 epoch 023: 400 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554957, ups=1.12, wpb=495877, bsz=16563.5, num_updates=37500, lr=0.000326599, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=34612 epoch 023: 400 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554957, ups=1.12, wpb=495877, bsz=16563.5, num_updates=37500, lr=0.000326599, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=34612 epoch 023: 400 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554957, ups=1.12, wpb=495877, bsz=16563.5, num_updates=37500, lr=0.000326599, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=34612 epoch 023: 400 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554957, ups=1.12, wpb=495877, bsz=16563.5, num_updates=37500, lr=0.000326599, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=34612 epoch 023: 400 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554957, ups=1.12, wpb=495877, bsz=16563.5, num_updates=37500, lr=0.000326599, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=34612 epoch 023: 400 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554957, ups=1.12, wpb=495877, bsz=16563.5, num_updates=37500, lr=0.000326599, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=34612 epoch 023: 400 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554957, ups=1.12, wpb=495877, bsz=16563.5, num_updates=37500, lr=0.000326599, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=34612 epoch 023: 400 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554957, ups=1.12, wpb=495877, bsz=16563.5, num_updates=37500, lr=0.000326599, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=34612 epoch 023: 400 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554957, ups=1.12, wpb=495877, bsz=16563.5, num_updates=37500, lr=0.000326599, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=34612 epoch 023: 400 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554957, ups=1.12, wpb=495877, bsz=16563.5, num_updates=37500, lr=0.000326599, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=34612 epoch 023: 400 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554957, ups=1.12, wpb=495877, bsz=16563.5, num_updates=37500, lr=0.000326599, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=34612 epoch 023: 400 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554957, ups=1.12, wpb=495877, bsz=16563.5, num_updates=37500, lr=0.000326599, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=34612 epoch 023: 400 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554957, ups=1.12, wpb=495877, bsz=16563.5, num_updates=37500, lr=0.000326599, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=34612 epoch 023: 400 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554957, ups=1.12, wpb=495877, bsz=16563.5, num_updates=37500, lr=0.000326599, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=34612 epoch 023: 400 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554957, ups=1.12, wpb=495877, bsz=16563.5, num_updates=37500, lr=0.000326599, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=34612 epoch 023: 400 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554957, ups=1.12, wpb=495877, bsz=16563.5, num_updates=37500, lr=0.000326599, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=34612 epoch 023: 400 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554957, ups=1.12, wpb=495877, bsz=16563.5, num_updates=37500, lr=0.000326599, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=34612 epoch 023: 500 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=553457, ups=1.12, wpb=495853, bsz=16804.6, num_updates=37600, lr=0.000326164, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=34702 epoch 023: 500 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=553457, ups=1.12, wpb=495853, bsz=16804.6, num_updates=37600, lr=0.000326164, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=34702 epoch 023: 500 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=553457, ups=1.12, wpb=495853, bsz=16804.6, num_updates=37600, lr=0.000326164, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=34702 epoch 023: 500 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=553457, ups=1.12, wpb=495853, bsz=16804.6, num_updates=37600, lr=0.000326164, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=34702 epoch 023: 500 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=553457, ups=1.12, wpb=495853, bsz=16804.6, num_updates=37600, lr=0.000326164, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=34702 epoch 023: 500 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=553457, ups=1.12, wpb=495853, bsz=16804.6, num_updates=37600, lr=0.000326164, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=34702 epoch 023: 500 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=553457, ups=1.12, wpb=495853, bsz=16804.6, num_updates=37600, lr=0.000326164, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=34702 epoch 023: 500 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=553457, ups=1.12, wpb=495853, bsz=16804.6, num_updates=37600, lr=0.000326164, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=34702 epoch 023: 500 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=553457, ups=1.12, wpb=495853, bsz=16804.6, num_updates=37600, lr=0.000326164, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=34702 epoch 023: 500 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=553457, ups=1.12, wpb=495853, bsz=16804.6, num_updates=37600, lr=0.000326164, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=34702 epoch 023: 500 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=553457, ups=1.12, wpb=495853, bsz=16804.6, num_updates=37600, lr=0.000326164, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=34702 epoch 023: 500 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=553457, ups=1.12, wpb=495853, bsz=16804.6, num_updates=37600, lr=0.000326164, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=34702 epoch 023: 500 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=553457, ups=1.12, wpb=495853, bsz=16804.6, num_updates=37600, lr=0.000326164, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=34702 epoch 023: 500 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=553457, ups=1.12, wpb=495853, bsz=16804.6, num_updates=37600, lr=0.000326164, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=34702 epoch 023: 500 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=553457, ups=1.12, wpb=495853, bsz=16804.6, num_updates=37600, lr=0.000326164, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=34702 epoch 023: 500 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=553457, ups=1.12, wpb=495853, bsz=16804.6, num_updates=37600, lr=0.000326164, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=34702 epoch 023: 500 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=553457, ups=1.12, wpb=495853, bsz=16804.6, num_updates=37600, lr=0.000326164, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=34702 epoch 023: 500 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=553457, ups=1.12, wpb=495853, bsz=16804.6, num_updates=37600, lr=0.000326164, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=34702 epoch 023: 500 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=553457, ups=1.12, wpb=495853, bsz=16804.6, num_updates=37600, lr=0.000326164, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=34702 epoch 023: 500 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=553457, ups=1.12, wpb=495853, bsz=16804.6, num_updates=37600, lr=0.000326164, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=34702 epoch 023: 500 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=553457, ups=1.12, wpb=495853, bsz=16804.6, num_updates=37600, lr=0.000326164, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=34702 epoch 023: 500 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=553457, ups=1.12, wpb=495853, bsz=16804.6, num_updates=37600, lr=0.000326164, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=34702 epoch 023: 500 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=553457, ups=1.12, wpb=495853, bsz=16804.6, num_updates=37600, lr=0.000326164, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=34702 epoch 023: 600 / 1689 loss=3.503, nll_loss=1.948, ppl=3.86, wps=552137, ups=1.12, wpb=494238, bsz=16157.5, num_updates=37700, lr=0.000325731, gnorm=0.176, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=34792 epoch 023: 600 / 1689 loss=3.503, nll_loss=1.948, ppl=3.86, wps=552137, ups=1.12, wpb=494238, bsz=16157.5, num_updates=37700, lr=0.000325731, gnorm=0.176, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=34792 epoch 023: 600 / 1689 loss=3.503, nll_loss=1.948, ppl=3.86, wps=552137, ups=1.12, wpb=494238, bsz=16157.5, num_updates=37700, lr=0.000325731, gnorm=0.176, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=34792 epoch 023: 600 / 1689 loss=3.503, nll_loss=1.948, ppl=3.86, wps=552137, ups=1.12, wpb=494238, bsz=16157.5, num_updates=37700, lr=0.000325731, gnorm=0.176, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=34792 epoch 023: 600 / 1689 loss=3.503, nll_loss=1.948, ppl=3.86, wps=552137, ups=1.12, wpb=494238, bsz=16157.5, num_updates=37700, lr=0.000325731, gnorm=0.176, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=34792 epoch 023: 600 / 1689 loss=3.503, nll_loss=1.948, ppl=3.86, wps=552137, ups=1.12, wpb=494238, bsz=16157.5, num_updates=37700, lr=0.000325731, gnorm=0.176, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=34792 epoch 023: 600 / 1689 loss=3.503, nll_loss=1.948, ppl=3.86, wps=552137, ups=1.12, wpb=494238, bsz=16157.5, num_updates=37700, lr=0.000325731, gnorm=0.176, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=34792 epoch 023: 600 / 1689 loss=3.503, nll_loss=1.948, ppl=3.86, wps=552137, ups=1.12, wpb=494238, bsz=16157.5, num_updates=37700, lr=0.000325731, gnorm=0.176, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=34792 epoch 023: 600 / 1689 loss=3.503, nll_loss=1.948, ppl=3.86, wps=552137, ups=1.12, wpb=494238, bsz=16157.5, num_updates=37700, lr=0.000325731, gnorm=0.176, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=34792 epoch 023: 600 / 1689 loss=3.503, nll_loss=1.948, ppl=3.86, wps=552137, ups=1.12, wpb=494238, bsz=16157.5, num_updates=37700, lr=0.000325731, gnorm=0.176, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=34792 epoch 023: 600 / 1689 loss=3.503, nll_loss=1.948, ppl=3.86, wps=552137, ups=1.12, wpb=494238, bsz=16157.5, num_updates=37700, lr=0.000325731, gnorm=0.176, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=34792 epoch 023: 600 / 1689 loss=3.503, nll_loss=1.948, ppl=3.86, wps=552137, ups=1.12, wpb=494238, bsz=16157.5, num_updates=37700, lr=0.000325731, gnorm=0.176, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=34792 epoch 023: 600 / 1689 loss=3.503, nll_loss=1.948, ppl=3.86, wps=552137, ups=1.12, wpb=494238, bsz=16157.5, num_updates=37700, lr=0.000325731, gnorm=0.176, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=34792 epoch 023: 600 / 1689 loss=3.503, nll_loss=1.948, ppl=3.86, wps=552137, ups=1.12, wpb=494238, bsz=16157.5, num_updates=37700, lr=0.000325731, gnorm=0.176, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=34792 epoch 023: 600 / 1689 loss=3.503, nll_loss=1.948, ppl=3.86, wps=552137, ups=1.12, wpb=494238, bsz=16157.5, num_updates=37700, lr=0.000325731, gnorm=0.176, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=34792 epoch 023: 600 / 1689 loss=3.503, nll_loss=1.948, ppl=3.86, wps=552137, ups=1.12, wpb=494238, bsz=16157.5, num_updates=37700, lr=0.000325731, gnorm=0.176, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=34792 epoch 023: 600 / 1689 loss=3.503, nll_loss=1.948, ppl=3.86, wps=552137, ups=1.12, wpb=494238, bsz=16157.5, num_updates=37700, lr=0.000325731, gnorm=0.176, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=34792 epoch 023: 600 / 1689 loss=3.503, nll_loss=1.948, ppl=3.86, wps=552137, ups=1.12, wpb=494238, bsz=16157.5, num_updates=37700, lr=0.000325731, gnorm=0.176, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=34792 epoch 023: 600 / 1689 loss=3.503, nll_loss=1.948, ppl=3.86, wps=552137, ups=1.12, wpb=494238, bsz=16157.5, num_updates=37700, lr=0.000325731, gnorm=0.176, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=34792 epoch 023: 600 / 1689 loss=3.503, nll_loss=1.948, ppl=3.86, wps=552137, ups=1.12, wpb=494238, bsz=16157.5, num_updates=37700, lr=0.000325731, gnorm=0.176, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=34792 epoch 023: 600 / 1689 loss=3.503, nll_loss=1.948, ppl=3.86, wps=552137, ups=1.12, wpb=494238, bsz=16157.5, num_updates=37700, lr=0.000325731, gnorm=0.176, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=34792 epoch 023: 600 / 1689 loss=3.503, nll_loss=1.948, ppl=3.86, wps=552137, ups=1.12, wpb=494238, bsz=16157.5, num_updates=37700, lr=0.000325731, gnorm=0.176, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=34792 epoch 023: 600 / 1689 loss=3.503, nll_loss=1.948, ppl=3.86, wps=552137, ups=1.12, wpb=494238, bsz=16157.5, num_updates=37700, lr=0.000325731, gnorm=0.176, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=34792 epoch 023: 700 / 1689 loss=3.501, nll_loss=1.946, ppl=3.85, wps=554650, ups=1.12, wpb=494433, bsz=16507.8, num_updates=37800, lr=0.0003253, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=34881 epoch 023: 700 / 1689 loss=3.501, nll_loss=1.946, ppl=3.85, wps=554650, ups=1.12, wpb=494433, bsz=16507.8, num_updates=37800, lr=0.0003253, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=34881 epoch 023: 700 / 1689 loss=3.501, nll_loss=1.946, ppl=3.85, wps=554650, ups=1.12, wpb=494433, bsz=16507.8, num_updates=37800, lr=0.0003253, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=34881 epoch 023: 700 / 1689 loss=3.501, nll_loss=1.946, ppl=3.85, wps=554650, ups=1.12, wpb=494433, bsz=16507.8, num_updates=37800, lr=0.0003253, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=34881 epoch 023: 700 / 1689 loss=3.501, nll_loss=1.946, ppl=3.85, wps=554650, ups=1.12, wpb=494433, bsz=16507.8, num_updates=37800, lr=0.0003253, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=34881 epoch 023: 700 / 1689 loss=3.501, nll_loss=1.946, ppl=3.85, wps=554650, ups=1.12, wpb=494433, bsz=16507.8, num_updates=37800, lr=0.0003253, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=34881 epoch 023: 700 / 1689 loss=3.501, nll_loss=1.946, ppl=3.85, wps=554650, ups=1.12, wpb=494433, bsz=16507.8, num_updates=37800, lr=0.0003253, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=34881 epoch 023: 700 / 1689 loss=3.501, nll_loss=1.946, ppl=3.85, wps=554650, ups=1.12, wpb=494433, bsz=16507.8, num_updates=37800, lr=0.0003253, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=34881 epoch 023: 700 / 1689 loss=3.501, nll_loss=1.946, ppl=3.85, wps=554650, ups=1.12, wpb=494433, bsz=16507.8, num_updates=37800, lr=0.0003253, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=34881 epoch 023: 700 / 1689 loss=3.501, nll_loss=1.946, ppl=3.85, wps=554650, ups=1.12, wpb=494433, bsz=16507.8, num_updates=37800, lr=0.0003253, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=34881 epoch 023: 700 / 1689 loss=3.501, nll_loss=1.946, ppl=3.85, wps=554650, ups=1.12, wpb=494433, bsz=16507.8, num_updates=37800, lr=0.0003253, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=34881 epoch 023: 700 / 1689 loss=3.501, nll_loss=1.946, ppl=3.85, wps=554650, ups=1.12, wpb=494433, bsz=16507.8, num_updates=37800, lr=0.0003253, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=34881 epoch 023: 700 / 1689 loss=3.501, nll_loss=1.946, ppl=3.85, wps=554650, ups=1.12, wpb=494433, bsz=16507.8, num_updates=37800, lr=0.0003253, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=34881 epoch 023: 700 / 1689 loss=3.501, nll_loss=1.946, ppl=3.85, wps=554650, ups=1.12, wpb=494433, bsz=16507.8, num_updates=37800, lr=0.0003253, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=34881 epoch 023: 700 / 1689 loss=3.501, nll_loss=1.946, ppl=3.85, wps=554650, ups=1.12, wpb=494433, bsz=16507.8, num_updates=37800, lr=0.0003253, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=34881 epoch 023: 700 / 1689 loss=3.501, nll_loss=1.946, ppl=3.85, wps=554650, ups=1.12, wpb=494433, bsz=16507.8, num_updates=37800, lr=0.0003253, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=34881 epoch 023: 700 / 1689 loss=3.501, nll_loss=1.946, ppl=3.85, wps=554650, ups=1.12, wpb=494433, bsz=16507.8, num_updates=37800, lr=0.0003253, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=34881 epoch 023: 700 / 1689 loss=3.501, nll_loss=1.946, ppl=3.85, wps=554650, ups=1.12, wpb=494433, bsz=16507.8, num_updates=37800, lr=0.0003253, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=34881 epoch 023: 700 / 1689 loss=3.501, nll_loss=1.946, ppl=3.85, wps=554650, ups=1.12, wpb=494433, bsz=16507.8, num_updates=37800, lr=0.0003253, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=34881 epoch 023: 700 / 1689 loss=3.501, nll_loss=1.946, ppl=3.85, wps=554650, ups=1.12, wpb=494433, bsz=16507.8, num_updates=37800, lr=0.0003253, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=34881 epoch 023: 700 / 1689 loss=3.501, nll_loss=1.946, ppl=3.85, wps=554650, ups=1.12, wpb=494433, bsz=16507.8, num_updates=37800, lr=0.0003253, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=34881 epoch 023: 700 / 1689 loss=3.501, nll_loss=1.946, ppl=3.85, wps=554650, ups=1.12, wpb=494433, bsz=16507.8, num_updates=37800, lr=0.0003253, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=34881 epoch 023: 700 / 1689 loss=3.501, nll_loss=1.946, ppl=3.85, wps=554650, ups=1.12, wpb=494433, bsz=16507.8, num_updates=37800, lr=0.0003253, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=34881 epoch 023: 800 / 1689 loss=3.506, nll_loss=1.952, ppl=3.87, wps=553475, ups=1.12, wpb=496262, bsz=16506.1, num_updates=37900, lr=0.000324871, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=34970 epoch 023: 800 / 1689 loss=3.506, nll_loss=1.952, ppl=3.87, wps=553475, ups=1.12, wpb=496262, bsz=16506.1, num_updates=37900, lr=0.000324871, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=34970 epoch 023: 800 / 1689 loss=3.506, nll_loss=1.952, ppl=3.87, wps=553475, ups=1.12, wpb=496262, bsz=16506.1, num_updates=37900, lr=0.000324871, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=34970 epoch 023: 800 / 1689 loss=3.506, nll_loss=1.952, ppl=3.87, wps=553475, ups=1.12, wpb=496262, bsz=16506.1, num_updates=37900, lr=0.000324871, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=34970 epoch 023: 800 / 1689 loss=3.506, nll_loss=1.952, ppl=3.87, wps=553475, ups=1.12, wpb=496262, bsz=16506.1, num_updates=37900, lr=0.000324871, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=34970 epoch 023: 800 / 1689 loss=3.506, nll_loss=1.952, ppl=3.87, wps=553475, ups=1.12, wpb=496262, bsz=16506.1, num_updates=37900, lr=0.000324871, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=34970 epoch 023: 800 / 1689 loss=3.506, nll_loss=1.952, ppl=3.87, wps=553475, ups=1.12, wpb=496262, bsz=16506.1, num_updates=37900, lr=0.000324871, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=34970 epoch 023: 800 / 1689 loss=3.506, nll_loss=1.952, ppl=3.87, wps=553475, ups=1.12, wpb=496262, bsz=16506.1, num_updates=37900, lr=0.000324871, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=34970 epoch 023: 800 / 1689 loss=3.506, nll_loss=1.952, ppl=3.87, wps=553475, ups=1.12, wpb=496262, bsz=16506.1, num_updates=37900, lr=0.000324871, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=34970 epoch 023: 800 / 1689 loss=3.506, nll_loss=1.952, ppl=3.87, wps=553475, ups=1.12, wpb=496262, bsz=16506.1, num_updates=37900, lr=0.000324871, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=34970 epoch 023: 800 / 1689 loss=3.506, nll_loss=1.952, ppl=3.87, wps=553475, ups=1.12, wpb=496262, bsz=16506.1, num_updates=37900, lr=0.000324871, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=34970 epoch 023: 800 / 1689 loss=3.506, nll_loss=1.952, ppl=3.87, wps=553475, ups=1.12, wpb=496262, bsz=16506.1, num_updates=37900, lr=0.000324871, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=34970 epoch 023: 800 / 1689 loss=3.506, nll_loss=1.952, ppl=3.87, wps=553475, ups=1.12, wpb=496262, bsz=16506.1, num_updates=37900, lr=0.000324871, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=34970 epoch 023: 800 / 1689 loss=3.506, nll_loss=1.952, ppl=3.87, wps=553475, ups=1.12, wpb=496262, bsz=16506.1, num_updates=37900, lr=0.000324871, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=34970 epoch 023: 800 / 1689 loss=3.506, nll_loss=1.952, ppl=3.87, wps=553475, ups=1.12, wpb=496262, bsz=16506.1, num_updates=37900, lr=0.000324871, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=34970 epoch 023: 800 / 1689 loss=3.506, nll_loss=1.952, ppl=3.87, wps=553475, ups=1.12, wpb=496262, bsz=16506.1, num_updates=37900, lr=0.000324871, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=34970 epoch 023: 800 / 1689 loss=3.506, nll_loss=1.952, ppl=3.87, wps=553475, ups=1.12, wpb=496262, bsz=16506.1, num_updates=37900, lr=0.000324871, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=34970 epoch 023: 800 / 1689 loss=3.506, nll_loss=1.952, ppl=3.87, wps=553475, ups=1.12, wpb=496262, bsz=16506.1, num_updates=37900, lr=0.000324871, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=34970 epoch 023: 800 / 1689 loss=3.506, nll_loss=1.952, ppl=3.87, wps=553475, ups=1.12, wpb=496262, bsz=16506.1, num_updates=37900, lr=0.000324871, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=34970 epoch 023: 800 / 1689 loss=3.506, nll_loss=1.952, ppl=3.87, wps=553475, ups=1.12, wpb=496262, bsz=16506.1, num_updates=37900, lr=0.000324871, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=34970 epoch 023: 800 / 1689 loss=3.506, nll_loss=1.952, ppl=3.87, wps=553475, ups=1.12, wpb=496262, bsz=16506.1, num_updates=37900, lr=0.000324871, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=34970 epoch 023: 800 / 1689 loss=3.506, nll_loss=1.952, ppl=3.87, wps=553475, ups=1.12, wpb=496262, bsz=16506.1, num_updates=37900, lr=0.000324871, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=34970 epoch 023: 800 / 1689 loss=3.506, nll_loss=1.952, ppl=3.87, wps=553475, ups=1.12, wpb=496262, bsz=16506.1, num_updates=37900, lr=0.000324871, gnorm=0.166, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=34970 epoch 023: 900 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=550612, ups=1.11, wpb=494942, bsz=16893.6, num_updates=38000, lr=0.000324443, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=35060 epoch 023: 900 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=550612, ups=1.11, wpb=494942, bsz=16893.6, num_updates=38000, lr=0.000324443, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=35060 epoch 023: 900 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=550612, ups=1.11, wpb=494942, bsz=16893.6, num_updates=38000, lr=0.000324443, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=35060 epoch 023: 900 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=550612, ups=1.11, wpb=494942, bsz=16893.6, num_updates=38000, lr=0.000324443, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=35060 epoch 023: 900 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=550612, ups=1.11, wpb=494942, bsz=16893.6, num_updates=38000, lr=0.000324443, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=35060 epoch 023: 900 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=550612, ups=1.11, wpb=494942, bsz=16893.6, num_updates=38000, lr=0.000324443, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=35060 epoch 023: 900 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=550612, ups=1.11, wpb=494942, bsz=16893.6, num_updates=38000, lr=0.000324443, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=35060 epoch 023: 900 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=550612, ups=1.11, wpb=494942, bsz=16893.6, num_updates=38000, lr=0.000324443, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=35060 epoch 023: 900 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=550612, ups=1.11, wpb=494942, bsz=16893.6, num_updates=38000, lr=0.000324443, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=35060 epoch 023: 900 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=550612, ups=1.11, wpb=494942, bsz=16893.6, num_updates=38000, lr=0.000324443, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=35060 epoch 023: 900 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=550612, ups=1.11, wpb=494942, bsz=16893.6, num_updates=38000, lr=0.000324443, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=35060 epoch 023: 900 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=550612, ups=1.11, wpb=494942, bsz=16893.6, num_updates=38000, lr=0.000324443, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=35060 epoch 023: 900 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=550612, ups=1.11, wpb=494942, bsz=16893.6, num_updates=38000, lr=0.000324443, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=35060 epoch 023: 900 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=550612, ups=1.11, wpb=494942, bsz=16893.6, num_updates=38000, lr=0.000324443, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=35060 epoch 023: 900 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=550612, ups=1.11, wpb=494942, bsz=16893.6, num_updates=38000, lr=0.000324443, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=35060 epoch 023: 900 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=550612, ups=1.11, wpb=494942, bsz=16893.6, num_updates=38000, lr=0.000324443, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=35060 epoch 023: 900 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=550612, ups=1.11, wpb=494942, bsz=16893.6, num_updates=38000, lr=0.000324443, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=35060 epoch 023: 900 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=550612, ups=1.11, wpb=494942, bsz=16893.6, num_updates=38000, lr=0.000324443, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=35060 epoch 023: 900 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=550612, ups=1.11, wpb=494942, bsz=16893.6, num_updates=38000, lr=0.000324443, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=35060 epoch 023: 900 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=550612, ups=1.11, wpb=494942, bsz=16893.6, num_updates=38000, lr=0.000324443, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=35060 epoch 023: 900 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=550612, ups=1.11, wpb=494942, bsz=16893.6, num_updates=38000, lr=0.000324443, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=35060 epoch 023: 900 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=550612, ups=1.11, wpb=494942, bsz=16893.6, num_updates=38000, lr=0.000324443, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=35060 epoch 023: 900 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=550612, ups=1.11, wpb=494942, bsz=16893.6, num_updates=38000, lr=0.000324443, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=35060 begin validation on "valid" subset epoch 023 | valid on 'valid' subset | loss 3.725 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 38000 | best_loss 3.709 epoch 023 | valid on 'valid' subset | loss 3.725 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 38000 | best_loss 3.709 epoch 023 | valid on 'valid' subset | loss 3.725 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 38000 | best_loss 3.709 epoch 023 | valid on 'valid' subset | loss 3.725 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 38000 | best_loss 3.709 epoch 023 | valid on 'valid' subset | loss 3.725 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 38000 | best_loss 3.709 epoch 023 | valid on 'valid' subset | loss 3.725 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 38000 | best_loss 3.709 epoch 023 | valid on 'valid' subset | loss 3.725 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 38000 | best_loss 3.709 epoch 023 | valid on 'valid' subset | loss 3.725 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 38000 | best_loss 3.709 epoch 023 | valid on 'valid' subset | loss 3.725 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 38000 | best_loss 3.709 epoch 023 | valid on 'valid' subset | loss 3.725 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 38000 | best_loss 3.709 epoch 023 | valid on 'valid' subset | loss 3.725 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 38000 | best_loss 3.709 epoch 023 | valid on 'valid' subset | loss 3.725 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 38000 | best_loss 3.709 epoch 023 | valid on 'valid' subset | loss 3.725 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 38000 | best_loss 3.709 epoch 023 | valid on 'valid' subset | loss 3.725 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 38000 | best_loss 3.709 epoch 023 | valid on 'valid' subset | loss 3.725 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 38000 | best_loss 3.709 epoch 023 | valid on 'valid' subset | loss 3.725 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 38000 | best_loss 3.709 epoch 023 | valid on 'valid' subset | loss 3.725 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 38000 | best_loss 3.709 epoch 023 | valid on 'valid' subset | loss 3.725 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 38000 | best_loss 3.709 epoch 023 | valid on 'valid' subset | loss 3.725 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 38000 | best_loss 3.709 epoch 023 | valid on 'valid' subset | loss 3.725 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 38000 | best_loss 3.709 epoch 023 | valid on 'valid' subset | loss 3.725 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 38000 | best_loss 3.709 epoch 023 | valid on 'valid' subset | loss 3.725 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 38000 | best_loss 3.709 epoch 023 | valid on 'valid' subset | loss 3.725 | nll_loss 2.183 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 38000 | best_loss 3.709 epoch 023: 1001 / 1689 loss=3.504, nll_loss=1.95, ppl=3.86, wps=478684, ups=0.97, wpb=495167, bsz=16182.1, num_updates=38100, lr=0.000324017, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21, wall=35164 epoch 023: 1001 / 1689 loss=3.504, nll_loss=1.95, ppl=3.86, wps=478684, ups=0.97, wpb=495167, bsz=16182.1, num_updates=38100, lr=0.000324017, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21, wall=35164 epoch 023: 1001 / 1689 loss=3.504, nll_loss=1.95, ppl=3.86, wps=478684, ups=0.97, wpb=495167, bsz=16182.1, num_updates=38100, lr=0.000324017, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21, wall=35164 epoch 023: 1001 / 1689 loss=3.504, nll_loss=1.95, ppl=3.86, wps=478684, ups=0.97, wpb=495167, bsz=16182.1, num_updates=38100, lr=0.000324017, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21, wall=35164 epoch 023: 1001 / 1689 loss=3.504, nll_loss=1.95, ppl=3.86, wps=478684, ups=0.97, wpb=495167, bsz=16182.1, num_updates=38100, lr=0.000324017, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21, wall=35164 epoch 023: 1001 / 1689 loss=3.504, nll_loss=1.95, ppl=3.86, wps=478684, ups=0.97, wpb=495167, bsz=16182.1, num_updates=38100, lr=0.000324017, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21, wall=35164 epoch 023: 1001 / 1689 loss=3.504, nll_loss=1.95, ppl=3.86, wps=478684, ups=0.97, wpb=495167, bsz=16182.1, num_updates=38100, lr=0.000324017, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21, wall=35164 epoch 023: 1001 / 1689 loss=3.504, nll_loss=1.95, ppl=3.86, wps=478684, ups=0.97, wpb=495167, bsz=16182.1, num_updates=38100, lr=0.000324017, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21, wall=35164 epoch 023: 1001 / 1689 loss=3.504, nll_loss=1.95, ppl=3.86, wps=478684, ups=0.97, wpb=495167, bsz=16182.1, num_updates=38100, lr=0.000324017, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21, wall=35164 epoch 023: 1001 / 1689 loss=3.504, nll_loss=1.95, ppl=3.86, wps=478684, ups=0.97, wpb=495167, bsz=16182.1, num_updates=38100, lr=0.000324017, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21, wall=35164 epoch 023: 1001 / 1689 loss=3.504, nll_loss=1.95, ppl=3.86, wps=478684, ups=0.97, wpb=495167, bsz=16182.1, num_updates=38100, lr=0.000324017, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21, wall=35164 epoch 023: 1001 / 1689 loss=3.504, nll_loss=1.95, ppl=3.86, wps=478684, ups=0.97, wpb=495167, bsz=16182.1, num_updates=38100, lr=0.000324017, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21, wall=35164 epoch 023: 1001 / 1689 loss=3.504, nll_loss=1.95, ppl=3.86, wps=478684, ups=0.97, wpb=495167, bsz=16182.1, num_updates=38100, lr=0.000324017, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21, wall=35164 epoch 023: 1001 / 1689 loss=3.504, nll_loss=1.95, ppl=3.86, wps=478684, ups=0.97, wpb=495167, bsz=16182.1, num_updates=38100, lr=0.000324017, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21, wall=35164 epoch 023: 1001 / 1689 loss=3.504, nll_loss=1.95, ppl=3.86, wps=478684, ups=0.97, wpb=495167, bsz=16182.1, num_updates=38100, lr=0.000324017, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21, wall=35164 epoch 023: 1001 / 1689 loss=3.504, nll_loss=1.95, ppl=3.86, wps=478684, ups=0.97, wpb=495167, bsz=16182.1, num_updates=38100, lr=0.000324017, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21, wall=35164 epoch 023: 1001 / 1689 loss=3.504, nll_loss=1.95, ppl=3.86, wps=478684, ups=0.97, wpb=495167, bsz=16182.1, num_updates=38100, lr=0.000324017, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21, wall=35164 epoch 023: 1001 / 1689 loss=3.504, nll_loss=1.95, ppl=3.86, wps=478684, ups=0.97, wpb=495167, bsz=16182.1, num_updates=38100, lr=0.000324017, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21, wall=35164 epoch 023: 1001 / 1689 loss=3.504, nll_loss=1.95, ppl=3.86, wps=478684, ups=0.97, wpb=495167, bsz=16182.1, num_updates=38100, lr=0.000324017, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21, wall=35164 epoch 023: 1001 / 1689 loss=3.504, nll_loss=1.95, ppl=3.86, wps=478684, ups=0.97, wpb=495167, bsz=16182.1, num_updates=38100, lr=0.000324017, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21, wall=35164 epoch 023: 1001 / 1689 loss=3.504, nll_loss=1.95, ppl=3.86, wps=478684, ups=0.97, wpb=495167, bsz=16182.1, num_updates=38100, lr=0.000324017, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21, wall=35164 epoch 023: 1001 / 1689 loss=3.504, nll_loss=1.95, ppl=3.86, wps=478684, ups=0.97, wpb=495167, bsz=16182.1, num_updates=38100, lr=0.000324017, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21, wall=35164 epoch 023: 1001 / 1689 loss=3.504, nll_loss=1.95, ppl=3.86, wps=478684, ups=0.97, wpb=495167, bsz=16182.1, num_updates=38100, lr=0.000324017, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21, wall=35164 epoch 023: 1101 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=558865, ups=1.13, wpb=494551, bsz=16336.2, num_updates=38200, lr=0.000323592, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=35252 epoch 023: 1101 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=558865, ups=1.13, wpb=494551, bsz=16336.2, num_updates=38200, lr=0.000323592, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=35252 epoch 023: 1101 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=558865, ups=1.13, wpb=494551, bsz=16336.2, num_updates=38200, lr=0.000323592, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=35252 epoch 023: 1101 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=558865, ups=1.13, wpb=494551, bsz=16336.2, num_updates=38200, lr=0.000323592, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=35252 epoch 023: 1101 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=558865, ups=1.13, wpb=494551, bsz=16336.2, num_updates=38200, lr=0.000323592, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=35252 epoch 023: 1101 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=558865, ups=1.13, wpb=494551, bsz=16336.2, num_updates=38200, lr=0.000323592, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=35252 epoch 023: 1101 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=558865, ups=1.13, wpb=494551, bsz=16336.2, num_updates=38200, lr=0.000323592, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=35252 epoch 023: 1101 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=558865, ups=1.13, wpb=494551, bsz=16336.2, num_updates=38200, lr=0.000323592, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=35252 epoch 023: 1101 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=558865, ups=1.13, wpb=494551, bsz=16336.2, num_updates=38200, lr=0.000323592, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=35252 epoch 023: 1101 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=558865, ups=1.13, wpb=494551, bsz=16336.2, num_updates=38200, lr=0.000323592, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=35252 epoch 023: 1101 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=558865, ups=1.13, wpb=494551, bsz=16336.2, num_updates=38200, lr=0.000323592, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=35252 epoch 023: 1101 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=558865, ups=1.13, wpb=494551, bsz=16336.2, num_updates=38200, lr=0.000323592, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=35252 epoch 023: 1101 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=558865, ups=1.13, wpb=494551, bsz=16336.2, num_updates=38200, lr=0.000323592, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=35252 epoch 023: 1101 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=558865, ups=1.13, wpb=494551, bsz=16336.2, num_updates=38200, lr=0.000323592, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=35252 epoch 023: 1101 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=558865, ups=1.13, wpb=494551, bsz=16336.2, num_updates=38200, lr=0.000323592, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=35252 epoch 023: 1101 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=558865, ups=1.13, wpb=494551, bsz=16336.2, num_updates=38200, lr=0.000323592, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=35252 epoch 023: 1101 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=558865, ups=1.13, wpb=494551, bsz=16336.2, num_updates=38200, lr=0.000323592, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=35252 epoch 023: 1101 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=558865, ups=1.13, wpb=494551, bsz=16336.2, num_updates=38200, lr=0.000323592, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=35252 epoch 023: 1101 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=558865, ups=1.13, wpb=494551, bsz=16336.2, num_updates=38200, lr=0.000323592, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=35252 epoch 023: 1101 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=558865, ups=1.13, wpb=494551, bsz=16336.2, num_updates=38200, lr=0.000323592, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=35252 epoch 023: 1101 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=558865, ups=1.13, wpb=494551, bsz=16336.2, num_updates=38200, lr=0.000323592, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=35252 epoch 023: 1101 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=558865, ups=1.13, wpb=494551, bsz=16336.2, num_updates=38200, lr=0.000323592, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=35252 epoch 023: 1101 / 1689 loss=3.514, nll_loss=1.961, ppl=3.89, wps=558865, ups=1.13, wpb=494551, bsz=16336.2, num_updates=38200, lr=0.000323592, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=35252 epoch 023: 1201 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=559828, ups=1.13, wpb=495512, bsz=16245.6, num_updates=38300, lr=0.00032317, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=20.6, wall=35341 epoch 023: 1201 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=559828, ups=1.13, wpb=495512, bsz=16245.6, num_updates=38300, lr=0.00032317, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=20.6, wall=35341 epoch 023: 1201 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=559828, ups=1.13, wpb=495512, bsz=16245.6, num_updates=38300, lr=0.00032317, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=20.6, wall=35341 epoch 023: 1201 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=559828, ups=1.13, wpb=495512, bsz=16245.6, num_updates=38300, lr=0.00032317, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=20.6, wall=35341 epoch 023: 1201 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=559828, ups=1.13, wpb=495512, bsz=16245.6, num_updates=38300, lr=0.00032317, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=20.6, wall=35341 epoch 023: 1201 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=559828, ups=1.13, wpb=495512, bsz=16245.6, num_updates=38300, lr=0.00032317, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=20.6, wall=35341 epoch 023: 1201 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=559828, ups=1.13, wpb=495512, bsz=16245.6, num_updates=38300, lr=0.00032317, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=20.6, wall=35341 epoch 023: 1201 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=559828, ups=1.13, wpb=495512, bsz=16245.6, num_updates=38300, lr=0.00032317, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=20.6, wall=35341 epoch 023: 1201 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=559828, ups=1.13, wpb=495512, bsz=16245.6, num_updates=38300, lr=0.00032317, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=20.6, wall=35341 epoch 023: 1201 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=559828, ups=1.13, wpb=495512, bsz=16245.6, num_updates=38300, lr=0.00032317, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=20.6, wall=35341 epoch 023: 1201 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=559828, ups=1.13, wpb=495512, bsz=16245.6, num_updates=38300, lr=0.00032317, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=20.6, wall=35341 epoch 023: 1201 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=559828, ups=1.13, wpb=495512, bsz=16245.6, num_updates=38300, lr=0.00032317, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=20.6, wall=35341 epoch 023: 1201 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=559828, ups=1.13, wpb=495512, bsz=16245.6, num_updates=38300, lr=0.00032317, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=20.6, wall=35341 epoch 023: 1201 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=559828, ups=1.13, wpb=495512, bsz=16245.6, num_updates=38300, lr=0.00032317, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=20.6, wall=35341 epoch 023: 1201 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=559828, ups=1.13, wpb=495512, bsz=16245.6, num_updates=38300, lr=0.00032317, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=20.6, wall=35341 epoch 023: 1201 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=559828, ups=1.13, wpb=495512, bsz=16245.6, num_updates=38300, lr=0.00032317, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=20.6, wall=35341 epoch 023: 1201 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=559828, ups=1.13, wpb=495512, bsz=16245.6, num_updates=38300, lr=0.00032317, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=20.6, wall=35341 epoch 023: 1201 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=559828, ups=1.13, wpb=495512, bsz=16245.6, num_updates=38300, lr=0.00032317, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=20.6, wall=35341 epoch 023: 1201 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=559828, ups=1.13, wpb=495512, bsz=16245.6, num_updates=38300, lr=0.00032317, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=20.6, wall=35341 epoch 023: 1201 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=559828, ups=1.13, wpb=495512, bsz=16245.6, num_updates=38300, lr=0.00032317, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=20.6, wall=35341 epoch 023: 1201 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=559828, ups=1.13, wpb=495512, bsz=16245.6, num_updates=38300, lr=0.00032317, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=20.6, wall=35341 epoch 023: 1201 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=559828, ups=1.13, wpb=495512, bsz=16245.6, num_updates=38300, lr=0.00032317, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=20.6, wall=35341 epoch 023: 1201 / 1689 loss=3.508, nll_loss=1.954, ppl=3.87, wps=559828, ups=1.13, wpb=495512, bsz=16245.6, num_updates=38300, lr=0.00032317, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=20.6, wall=35341 epoch 023: 1301 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=555981, ups=1.12, wpb=495281, bsz=16473.4, num_updates=38400, lr=0.000322749, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=35430 epoch 023: 1301 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=555981, ups=1.12, wpb=495281, bsz=16473.4, num_updates=38400, lr=0.000322749, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=35430 epoch 023: 1301 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=555981, ups=1.12, wpb=495281, bsz=16473.4, num_updates=38400, lr=0.000322749, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=35430 epoch 023: 1301 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=555981, ups=1.12, wpb=495281, bsz=16473.4, num_updates=38400, lr=0.000322749, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=35430 epoch 023: 1301 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=555981, ups=1.12, wpb=495281, bsz=16473.4, num_updates=38400, lr=0.000322749, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=35430 epoch 023: 1301 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=555981, ups=1.12, wpb=495281, bsz=16473.4, num_updates=38400, lr=0.000322749, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=35430 epoch 023: 1301 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=555981, ups=1.12, wpb=495281, bsz=16473.4, num_updates=38400, lr=0.000322749, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=35430 epoch 023: 1301 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=555981, ups=1.12, wpb=495281, bsz=16473.4, num_updates=38400, lr=0.000322749, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=35430 epoch 023: 1301 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=555981, ups=1.12, wpb=495281, bsz=16473.4, num_updates=38400, lr=0.000322749, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=35430 epoch 023: 1301 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=555981, ups=1.12, wpb=495281, bsz=16473.4, num_updates=38400, lr=0.000322749, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=35430 epoch 023: 1301 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=555981, ups=1.12, wpb=495281, bsz=16473.4, num_updates=38400, lr=0.000322749, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=35430 epoch 023: 1301 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=555981, ups=1.12, wpb=495281, bsz=16473.4, num_updates=38400, lr=0.000322749, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=35430 epoch 023: 1301 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=555981, ups=1.12, wpb=495281, bsz=16473.4, num_updates=38400, lr=0.000322749, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=35430 epoch 023: 1301 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=555981, ups=1.12, wpb=495281, bsz=16473.4, num_updates=38400, lr=0.000322749, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=35430 epoch 023: 1301 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=555981, ups=1.12, wpb=495281, bsz=16473.4, num_updates=38400, lr=0.000322749, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=35430 epoch 023: 1301 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=555981, ups=1.12, wpb=495281, bsz=16473.4, num_updates=38400, lr=0.000322749, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=35430 epoch 023: 1301 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=555981, ups=1.12, wpb=495281, bsz=16473.4, num_updates=38400, lr=0.000322749, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=35430 epoch 023: 1301 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=555981, ups=1.12, wpb=495281, bsz=16473.4, num_updates=38400, lr=0.000322749, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=35430 epoch 023: 1301 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=555981, ups=1.12, wpb=495281, bsz=16473.4, num_updates=38400, lr=0.000322749, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=35430 epoch 023: 1301 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=555981, ups=1.12, wpb=495281, bsz=16473.4, num_updates=38400, lr=0.000322749, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=35430 epoch 023: 1301 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=555981, ups=1.12, wpb=495281, bsz=16473.4, num_updates=38400, lr=0.000322749, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=35430 epoch 023: 1301 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=555981, ups=1.12, wpb=495281, bsz=16473.4, num_updates=38400, lr=0.000322749, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=35430 epoch 023: 1301 / 1689 loss=3.513, nll_loss=1.96, ppl=3.89, wps=555981, ups=1.12, wpb=495281, bsz=16473.4, num_updates=38400, lr=0.000322749, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=35430 epoch 023: 1401 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=554803, ups=1.12, wpb=494634, bsz=16467.2, num_updates=38500, lr=0.000322329, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=35519 epoch 023: 1401 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=554803, ups=1.12, wpb=494634, bsz=16467.2, num_updates=38500, lr=0.000322329, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=35519 epoch 023: 1401 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=554803, ups=1.12, wpb=494634, bsz=16467.2, num_updates=38500, lr=0.000322329, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=35519 epoch 023: 1401 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=554803, ups=1.12, wpb=494634, bsz=16467.2, num_updates=38500, lr=0.000322329, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=35519 epoch 023: 1401 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=554803, ups=1.12, wpb=494634, bsz=16467.2, num_updates=38500, lr=0.000322329, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=35519 epoch 023: 1401 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=554803, ups=1.12, wpb=494634, bsz=16467.2, num_updates=38500, lr=0.000322329, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=35519 epoch 023: 1401 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=554803, ups=1.12, wpb=494634, bsz=16467.2, num_updates=38500, lr=0.000322329, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=35519 epoch 023: 1401 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=554803, ups=1.12, wpb=494634, bsz=16467.2, num_updates=38500, lr=0.000322329, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=35519 epoch 023: 1401 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=554803, ups=1.12, wpb=494634, bsz=16467.2, num_updates=38500, lr=0.000322329, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=35519 epoch 023: 1401 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=554803, ups=1.12, wpb=494634, bsz=16467.2, num_updates=38500, lr=0.000322329, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=35519 epoch 023: 1401 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=554803, ups=1.12, wpb=494634, bsz=16467.2, num_updates=38500, lr=0.000322329, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=35519 epoch 023: 1401 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=554803, ups=1.12, wpb=494634, bsz=16467.2, num_updates=38500, lr=0.000322329, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=35519 epoch 023: 1401 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=554803, ups=1.12, wpb=494634, bsz=16467.2, num_updates=38500, lr=0.000322329, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=35519 epoch 023: 1401 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=554803, ups=1.12, wpb=494634, bsz=16467.2, num_updates=38500, lr=0.000322329, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=35519 epoch 023: 1401 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=554803, ups=1.12, wpb=494634, bsz=16467.2, num_updates=38500, lr=0.000322329, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=35519 epoch 023: 1401 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=554803, ups=1.12, wpb=494634, bsz=16467.2, num_updates=38500, lr=0.000322329, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=35519 epoch 023: 1401 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=554803, ups=1.12, wpb=494634, bsz=16467.2, num_updates=38500, lr=0.000322329, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=35519 epoch 023: 1401 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=554803, ups=1.12, wpb=494634, bsz=16467.2, num_updates=38500, lr=0.000322329, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=35519 epoch 023: 1401 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=554803, ups=1.12, wpb=494634, bsz=16467.2, num_updates=38500, lr=0.000322329, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=35519 epoch 023: 1401 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=554803, ups=1.12, wpb=494634, bsz=16467.2, num_updates=38500, lr=0.000322329, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=35519 epoch 023: 1401 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=554803, ups=1.12, wpb=494634, bsz=16467.2, num_updates=38500, lr=0.000322329, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=35519 epoch 023: 1401 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=554803, ups=1.12, wpb=494634, bsz=16467.2, num_updates=38500, lr=0.000322329, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=35519 epoch 023: 1401 / 1689 loss=3.508, nll_loss=1.955, ppl=3.88, wps=554803, ups=1.12, wpb=494634, bsz=16467.2, num_updates=38500, lr=0.000322329, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=35519 epoch 023: 1502 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=541611, ups=1.09, wpb=494947, bsz=16558, num_updates=38600, lr=0.000321911, gnorm=0.17, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=35610 epoch 023: 1502 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=541611, ups=1.09, wpb=494947, bsz=16558, num_updates=38600, lr=0.000321911, gnorm=0.17, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=35610 epoch 023: 1502 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=541611, ups=1.09, wpb=494947, bsz=16558, num_updates=38600, lr=0.000321911, gnorm=0.17, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=35610 epoch 023: 1502 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=541611, ups=1.09, wpb=494947, bsz=16558, num_updates=38600, lr=0.000321911, gnorm=0.17, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=35610 epoch 023: 1502 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=541611, ups=1.09, wpb=494947, bsz=16558, num_updates=38600, lr=0.000321911, gnorm=0.17, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=35610 epoch 023: 1502 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=541611, ups=1.09, wpb=494947, bsz=16558, num_updates=38600, lr=0.000321911, gnorm=0.17, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=35610 epoch 023: 1502 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=541611, ups=1.09, wpb=494947, bsz=16558, num_updates=38600, lr=0.000321911, gnorm=0.17, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=35610 epoch 023: 1502 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=541611, ups=1.09, wpb=494947, bsz=16558, num_updates=38600, lr=0.000321911, gnorm=0.17, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=35610 epoch 023: 1502 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=541611, ups=1.09, wpb=494947, bsz=16558, num_updates=38600, lr=0.000321911, gnorm=0.17, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=35610 epoch 023: 1502 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=541611, ups=1.09, wpb=494947, bsz=16558, num_updates=38600, lr=0.000321911, gnorm=0.17, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=35610 epoch 023: 1502 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=541611, ups=1.09, wpb=494947, bsz=16558, num_updates=38600, lr=0.000321911, gnorm=0.17, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=35610 epoch 023: 1502 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=541611, ups=1.09, wpb=494947, bsz=16558, num_updates=38600, lr=0.000321911, gnorm=0.17, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=35610 epoch 023: 1502 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=541611, ups=1.09, wpb=494947, bsz=16558, num_updates=38600, lr=0.000321911, gnorm=0.17, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=35610 epoch 023: 1502 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=541611, ups=1.09, wpb=494947, bsz=16558, num_updates=38600, lr=0.000321911, gnorm=0.17, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=35610 epoch 023: 1502 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=541611, ups=1.09, wpb=494947, bsz=16558, num_updates=38600, lr=0.000321911, gnorm=0.17, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=35610 epoch 023: 1502 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=541611, ups=1.09, wpb=494947, bsz=16558, num_updates=38600, lr=0.000321911, gnorm=0.17, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=35610 epoch 023: 1502 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=541611, ups=1.09, wpb=494947, bsz=16558, num_updates=38600, lr=0.000321911, gnorm=0.17, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=35610 epoch 023: 1502 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=541611, ups=1.09, wpb=494947, bsz=16558, num_updates=38600, lr=0.000321911, gnorm=0.17, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=35610 epoch 023: 1502 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=541611, ups=1.09, wpb=494947, bsz=16558, num_updates=38600, lr=0.000321911, gnorm=0.17, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=35610 epoch 023: 1502 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=541611, ups=1.09, wpb=494947, bsz=16558, num_updates=38600, lr=0.000321911, gnorm=0.17, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=35610 epoch 023: 1502 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=541611, ups=1.09, wpb=494947, bsz=16558, num_updates=38600, lr=0.000321911, gnorm=0.17, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=35610 epoch 023: 1502 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=541611, ups=1.09, wpb=494947, bsz=16558, num_updates=38600, lr=0.000321911, gnorm=0.17, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=35610 epoch 023: 1502 / 1689 loss=3.507, nll_loss=1.953, ppl=3.87, wps=541611, ups=1.09, wpb=494947, bsz=16558, num_updates=38600, lr=0.000321911, gnorm=0.17, clip=0, loss_scale=1, train_wall=90, gb_free=22.2, wall=35610 epoch 023: 1602 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=558075, ups=1.12, wpb=496419, bsz=16336.3, num_updates=38700, lr=0.000321495, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=35699 epoch 023: 1602 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=558075, ups=1.12, wpb=496419, bsz=16336.3, num_updates=38700, lr=0.000321495, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=35699 epoch 023: 1602 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=558075, ups=1.12, wpb=496419, bsz=16336.3, num_updates=38700, lr=0.000321495, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=35699 epoch 023: 1602 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=558075, ups=1.12, wpb=496419, bsz=16336.3, num_updates=38700, lr=0.000321495, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=35699 epoch 023: 1602 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=558075, ups=1.12, wpb=496419, bsz=16336.3, num_updates=38700, lr=0.000321495, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=35699 epoch 023: 1602 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=558075, ups=1.12, wpb=496419, bsz=16336.3, num_updates=38700, lr=0.000321495, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=35699 epoch 023: 1602 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=558075, ups=1.12, wpb=496419, bsz=16336.3, num_updates=38700, lr=0.000321495, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=35699 epoch 023: 1602 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=558075, ups=1.12, wpb=496419, bsz=16336.3, num_updates=38700, lr=0.000321495, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=35699 epoch 023: 1602 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=558075, ups=1.12, wpb=496419, bsz=16336.3, num_updates=38700, lr=0.000321495, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=35699 epoch 023: 1602 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=558075, ups=1.12, wpb=496419, bsz=16336.3, num_updates=38700, lr=0.000321495, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=35699 epoch 023: 1602 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=558075, ups=1.12, wpb=496419, bsz=16336.3, num_updates=38700, lr=0.000321495, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=35699 epoch 023: 1602 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=558075, ups=1.12, wpb=496419, bsz=16336.3, num_updates=38700, lr=0.000321495, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=35699 epoch 023: 1602 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=558075, ups=1.12, wpb=496419, bsz=16336.3, num_updates=38700, lr=0.000321495, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=35699 epoch 023: 1602 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=558075, ups=1.12, wpb=496419, bsz=16336.3, num_updates=38700, lr=0.000321495, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=35699 epoch 023: 1602 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=558075, ups=1.12, wpb=496419, bsz=16336.3, num_updates=38700, lr=0.000321495, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=35699 epoch 023: 1602 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=558075, ups=1.12, wpb=496419, bsz=16336.3, num_updates=38700, lr=0.000321495, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=35699 epoch 023: 1602 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=558075, ups=1.12, wpb=496419, bsz=16336.3, num_updates=38700, lr=0.000321495, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=35699 epoch 023: 1602 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=558075, ups=1.12, wpb=496419, bsz=16336.3, num_updates=38700, lr=0.000321495, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=35699 epoch 023: 1602 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=558075, ups=1.12, wpb=496419, bsz=16336.3, num_updates=38700, lr=0.000321495, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=35699 epoch 023: 1602 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=558075, ups=1.12, wpb=496419, bsz=16336.3, num_updates=38700, lr=0.000321495, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=35699 epoch 023: 1602 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=558075, ups=1.12, wpb=496419, bsz=16336.3, num_updates=38700, lr=0.000321495, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=35699 epoch 023: 1602 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=558075, ups=1.12, wpb=496419, bsz=16336.3, num_updates=38700, lr=0.000321495, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=35699 epoch 023: 1602 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=558075, ups=1.12, wpb=496419, bsz=16336.3, num_updates=38700, lr=0.000321495, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=35699 end of epoch 23 (average epoch stats below) epoch 023 | loss 3.504 | nll_loss 1.949 | ppl 3.86 | wps 548313 | ups 1.11 | wpb 495116 | bsz 16504.3 | num_updates 38787 | lr 0.000321134 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 23.7 | wall 35776 epoch 023 | loss 3.504 | nll_loss 1.949 | ppl 3.86 | wps 548313 | ups 1.11 | wpb 495116 | bsz 16504.3 | num_updates 38787 | lr 0.000321134 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 23.7 | wall 35776 epoch 023 | loss 3.504 | nll_loss 1.949 | ppl 3.86 | wps 548313 | ups 1.11 | wpb 495116 | bsz 16504.3 | num_updates 38787 | lr 0.000321134 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 23.7 | wall 35776 epoch 023 | loss 3.504 | nll_loss 1.949 | ppl 3.86 | wps 548313 | ups 1.11 | wpb 495116 | bsz 16504.3 | num_updates 38787 | lr 0.000321134 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 23.7 | wall 35776 epoch 023 | loss 3.504 | nll_loss 1.949 | ppl 3.86 | wps 548313 | ups 1.11 | wpb 495116 | bsz 16504.3 | num_updates 38787 | lr 0.000321134 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 23.7 | wall 35776 epoch 023 | loss 3.504 | nll_loss 1.949 | ppl 3.86 | wps 548313 | ups 1.11 | wpb 495116 | bsz 16504.3 | num_updates 38787 | lr 0.000321134 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 23.7 | wall 35776 epoch 023 | loss 3.504 | nll_loss 1.949 | ppl 3.86 | wps 548313 | ups 1.11 | wpb 495116 | bsz 16504.3 | num_updates 38787 | lr 0.000321134 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 23.7 | wall 35776 epoch 023 | loss 3.504 | nll_loss 1.949 | ppl 3.86 | wps 548313 | ups 1.11 | wpb 495116 | bsz 16504.3 | num_updates 38787 | lr 0.000321134 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 23.7 | wall 35776 epoch 023 | loss 3.504 | nll_loss 1.949 | ppl 3.86 | wps 548313 | ups 1.11 | wpb 495116 | bsz 16504.3 | num_updates 38787 | lr 0.000321134 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 23.7 | wall 35776 epoch 023 | loss 3.504 | nll_loss 1.949 | ppl 3.86 | wps 548313 | ups 1.11 | wpb 495116 | bsz 16504.3 | num_updates 38787 | lr 0.000321134 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 23.7 | wall 35776 epoch 023 | loss 3.504 | nll_loss 1.949 | ppl 3.86 | wps 548313 | ups 1.11 | wpb 495116 | bsz 16504.3 | num_updates 38787 | lr 0.000321134 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 23.7 | wall 35776 epoch 023 | loss 3.504 | nll_loss 1.949 | ppl 3.86 | wps 548313 | ups 1.11 | wpb 495116 | bsz 16504.3 | num_updates 38787 | lr 0.000321134 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 23.7 | wall 35776 epoch 023 | loss 3.504 | nll_loss 1.949 | ppl 3.86 | wps 548313 | ups 1.11 | wpb 495116 | bsz 16504.3 | num_updates 38787 | lr 0.000321134 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 23.7 | wall 35776 epoch 023 | loss 3.504 | nll_loss 1.949 | ppl 3.86 | wps 548313 | ups 1.11 | wpb 495116 | bsz 16504.3 | num_updates 38787 | lr 0.000321134 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 23.7 | wall 35776 epoch 023 | loss 3.504 | nll_loss 1.949 | ppl 3.86 | wps 548313 | ups 1.11 | wpb 495116 | bsz 16504.3 | num_updates 38787 | lr 0.000321134 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 23.7 | wall 35776 epoch 023 | loss 3.504 | nll_loss 1.949 | ppl 3.86 | wps 548313 | ups 1.11 | wpb 495116 | bsz 16504.3 | num_updates 38787 | lr 0.000321134 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 23.7 | wall 35776 epoch 023 | loss 3.504 | nll_loss 1.949 | ppl 3.86 | wps 548313 | ups 1.11 | wpb 495116 | bsz 16504.3 | num_updates 38787 | lr 0.000321134 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 23.7 | wall 35776 epoch 023 | loss 3.504 | nll_loss 1.949 | ppl 3.86 | wps 548313 | ups 1.11 | wpb 495116 | bsz 16504.3 | num_updates 38787 | lr 0.000321134 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 23.7 | wall 35776 epoch 023 | loss 3.504 | nll_loss 1.949 | ppl 3.86 | wps 548313 | ups 1.11 | wpb 495116 | bsz 16504.3 | num_updates 38787 | lr 0.000321134 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 23.7 | wall 35776 epoch 023 | loss 3.504 | nll_loss 1.949 | ppl 3.86 | wps 548313 | ups 1.11 | wpb 495116 | bsz 16504.3 | num_updates 38787 | lr 0.000321134 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 23.7 | wall 35776 epoch 023 | loss 3.504 | nll_loss 1.949 | ppl 3.86 | wps 548313 | ups 1.11 | wpb 495116 | bsz 16504.3 | num_updates 38787 | lr 0.000321134 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 23.7 | wall 35776 epoch 023 | loss 3.504 | nll_loss 1.949 | ppl 3.86 | wps 548313 | ups 1.11 | wpb 495116 | bsz 16504.3 | num_updates 38787 | lr 0.000321134 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 23.7 | wall 35776 epoch 023 | loss 3.504 | nll_loss 1.949 | ppl 3.86 | wps 548313 | ups 1.11 | wpb 495116 | bsz 16504.3 | num_updates 38787 | lr 0.000321134 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1487 | gb_free 23.7 | wall 35776 Start iterating over samples epoch 024: 13 / 1689 loss=3.509, nll_loss=1.956, ppl=3.88, wps=540893, ups=1.1, wpb=491512, bsz=16189.9, num_updates=38800, lr=0.000321081, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=35790 epoch 024: 13 / 1689 loss=3.509, nll_loss=1.956, ppl=3.88, wps=540893, ups=1.1, wpb=491512, bsz=16189.9, num_updates=38800, lr=0.000321081, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=35790 epoch 024: 13 / 1689 loss=3.509, nll_loss=1.956, ppl=3.88, wps=540893, ups=1.1, wpb=491512, bsz=16189.9, num_updates=38800, lr=0.000321081, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=35790 epoch 024: 13 / 1689 loss=3.509, nll_loss=1.956, ppl=3.88, wps=540893, ups=1.1, wpb=491512, bsz=16189.9, num_updates=38800, lr=0.000321081, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=35790 epoch 024: 13 / 1689 loss=3.509, nll_loss=1.956, ppl=3.88, wps=540893, ups=1.1, wpb=491512, bsz=16189.9, num_updates=38800, lr=0.000321081, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=35790 epoch 024: 13 / 1689 loss=3.509, nll_loss=1.956, ppl=3.88, wps=540893, ups=1.1, wpb=491512, bsz=16189.9, num_updates=38800, lr=0.000321081, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=35790 epoch 024: 13 / 1689 loss=3.509, nll_loss=1.956, ppl=3.88, wps=540893, ups=1.1, wpb=491512, bsz=16189.9, num_updates=38800, lr=0.000321081, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=35790 epoch 024: 13 / 1689 loss=3.509, nll_loss=1.956, ppl=3.88, wps=540893, ups=1.1, wpb=491512, bsz=16189.9, num_updates=38800, lr=0.000321081, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=35790 epoch 024: 13 / 1689 loss=3.509, nll_loss=1.956, ppl=3.88, wps=540893, ups=1.1, wpb=491512, bsz=16189.9, num_updates=38800, lr=0.000321081, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=35790 epoch 024: 13 / 1689 loss=3.509, nll_loss=1.956, ppl=3.88, wps=540893, ups=1.1, wpb=491512, bsz=16189.9, num_updates=38800, lr=0.000321081, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=35790 epoch 024: 13 / 1689 loss=3.509, nll_loss=1.956, ppl=3.88, wps=540893, ups=1.1, wpb=491512, bsz=16189.9, num_updates=38800, lr=0.000321081, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=35790 epoch 024: 13 / 1689 loss=3.509, nll_loss=1.956, ppl=3.88, wps=540893, ups=1.1, wpb=491512, bsz=16189.9, num_updates=38800, lr=0.000321081, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=35790 epoch 024: 13 / 1689 loss=3.509, nll_loss=1.956, ppl=3.88, wps=540893, ups=1.1, wpb=491512, bsz=16189.9, num_updates=38800, lr=0.000321081, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=35790 epoch 024: 13 / 1689 loss=3.509, nll_loss=1.956, ppl=3.88, wps=540893, ups=1.1, wpb=491512, bsz=16189.9, num_updates=38800, lr=0.000321081, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=35790 epoch 024: 13 / 1689 loss=3.509, nll_loss=1.956, ppl=3.88, wps=540893, ups=1.1, wpb=491512, bsz=16189.9, num_updates=38800, lr=0.000321081, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=35790 epoch 024: 13 / 1689 loss=3.509, nll_loss=1.956, ppl=3.88, wps=540893, ups=1.1, wpb=491512, bsz=16189.9, num_updates=38800, lr=0.000321081, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=35790 epoch 024: 13 / 1689 loss=3.509, nll_loss=1.956, ppl=3.88, wps=540893, ups=1.1, wpb=491512, bsz=16189.9, num_updates=38800, lr=0.000321081, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=35790 epoch 024: 13 / 1689 loss=3.509, nll_loss=1.956, ppl=3.88, wps=540893, ups=1.1, wpb=491512, bsz=16189.9, num_updates=38800, lr=0.000321081, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=35790 epoch 024: 13 / 1689 loss=3.509, nll_loss=1.956, ppl=3.88, wps=540893, ups=1.1, wpb=491512, bsz=16189.9, num_updates=38800, lr=0.000321081, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=35790 epoch 024: 13 / 1689 loss=3.509, nll_loss=1.956, ppl=3.88, wps=540893, ups=1.1, wpb=491512, bsz=16189.9, num_updates=38800, lr=0.000321081, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=35790 epoch 024: 13 / 1689 loss=3.509, nll_loss=1.956, ppl=3.88, wps=540893, ups=1.1, wpb=491512, bsz=16189.9, num_updates=38800, lr=0.000321081, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=35790 epoch 024: 13 / 1689 loss=3.509, nll_loss=1.956, ppl=3.88, wps=540893, ups=1.1, wpb=491512, bsz=16189.9, num_updates=38800, lr=0.000321081, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=35790 epoch 024: 13 / 1689 loss=3.509, nll_loss=1.956, ppl=3.88, wps=540893, ups=1.1, wpb=491512, bsz=16189.9, num_updates=38800, lr=0.000321081, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=35790 epoch 024: 13 / 1689 loss=3.509, nll_loss=1.956, ppl=3.88, wps=540893, ups=1.1, wpb=491512, bsz=16189.9, num_updates=38800, lr=0.000321081, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=35790 epoch 024: 113 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=553549, ups=1.12, wpb=496381, bsz=16728.9, num_updates=38900, lr=0.000320668, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=35880 epoch 024: 113 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=553549, ups=1.12, wpb=496381, bsz=16728.9, num_updates=38900, lr=0.000320668, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=35880 epoch 024: 113 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=553549, ups=1.12, wpb=496381, bsz=16728.9, num_updates=38900, lr=0.000320668, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=35880 epoch 024: 113 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=553549, ups=1.12, wpb=496381, bsz=16728.9, num_updates=38900, lr=0.000320668, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=35880 epoch 024: 113 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=553549, ups=1.12, wpb=496381, bsz=16728.9, num_updates=38900, lr=0.000320668, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=35880 epoch 024: 113 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=553549, ups=1.12, wpb=496381, bsz=16728.9, num_updates=38900, lr=0.000320668, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=35880 epoch 024: 113 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=553549, ups=1.12, wpb=496381, bsz=16728.9, num_updates=38900, lr=0.000320668, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=35880 epoch 024: 113 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=553549, ups=1.12, wpb=496381, bsz=16728.9, num_updates=38900, lr=0.000320668, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=35880 epoch 024: 113 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=553549, ups=1.12, wpb=496381, bsz=16728.9, num_updates=38900, lr=0.000320668, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=35880 epoch 024: 113 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=553549, ups=1.12, wpb=496381, bsz=16728.9, num_updates=38900, lr=0.000320668, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=35880 epoch 024: 113 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=553549, ups=1.12, wpb=496381, bsz=16728.9, num_updates=38900, lr=0.000320668, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=35880 epoch 024: 113 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=553549, ups=1.12, wpb=496381, bsz=16728.9, num_updates=38900, lr=0.000320668, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=35880 epoch 024: 113 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=553549, ups=1.12, wpb=496381, bsz=16728.9, num_updates=38900, lr=0.000320668, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=35880 epoch 024: 113 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=553549, ups=1.12, wpb=496381, bsz=16728.9, num_updates=38900, lr=0.000320668, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=35880 epoch 024: 113 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=553549, ups=1.12, wpb=496381, bsz=16728.9, num_updates=38900, lr=0.000320668, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=35880 epoch 024: 113 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=553549, ups=1.12, wpb=496381, bsz=16728.9, num_updates=38900, lr=0.000320668, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=35880 epoch 024: 113 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=553549, ups=1.12, wpb=496381, bsz=16728.9, num_updates=38900, lr=0.000320668, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=35880 epoch 024: 113 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=553549, ups=1.12, wpb=496381, bsz=16728.9, num_updates=38900, lr=0.000320668, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=35880 epoch 024: 113 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=553549, ups=1.12, wpb=496381, bsz=16728.9, num_updates=38900, lr=0.000320668, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=35880 epoch 024: 113 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=553549, ups=1.12, wpb=496381, bsz=16728.9, num_updates=38900, lr=0.000320668, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=35880 epoch 024: 113 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=553549, ups=1.12, wpb=496381, bsz=16728.9, num_updates=38900, lr=0.000320668, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=35880 epoch 024: 113 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=553549, ups=1.12, wpb=496381, bsz=16728.9, num_updates=38900, lr=0.000320668, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=35880 epoch 024: 113 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=553549, ups=1.12, wpb=496381, bsz=16728.9, num_updates=38900, lr=0.000320668, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=35880 epoch 024: 113 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=553549, ups=1.12, wpb=496381, bsz=16728.9, num_updates=38900, lr=0.000320668, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=35880 epoch 024: 213 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=551431, ups=1.11, wpb=496060, bsz=16788.5, num_updates=39000, lr=0.000320256, gnorm=0.182, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=35970 epoch 024: 213 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=551431, ups=1.11, wpb=496060, bsz=16788.5, num_updates=39000, lr=0.000320256, gnorm=0.182, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=35970 epoch 024: 213 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=551431, ups=1.11, wpb=496060, bsz=16788.5, num_updates=39000, lr=0.000320256, gnorm=0.182, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=35970 epoch 024: 213 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=551431, ups=1.11, wpb=496060, bsz=16788.5, num_updates=39000, lr=0.000320256, gnorm=0.182, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=35970 epoch 024: 213 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=551431, ups=1.11, wpb=496060, bsz=16788.5, num_updates=39000, lr=0.000320256, gnorm=0.182, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=35970 epoch 024: 213 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=551431, ups=1.11, wpb=496060, bsz=16788.5, num_updates=39000, lr=0.000320256, gnorm=0.182, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=35970 epoch 024: 213 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=551431, ups=1.11, wpb=496060, bsz=16788.5, num_updates=39000, lr=0.000320256, gnorm=0.182, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=35970 epoch 024: 213 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=551431, ups=1.11, wpb=496060, bsz=16788.5, num_updates=39000, lr=0.000320256, gnorm=0.182, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=35970 epoch 024: 213 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=551431, ups=1.11, wpb=496060, bsz=16788.5, num_updates=39000, lr=0.000320256, gnorm=0.182, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=35970 epoch 024: 213 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=551431, ups=1.11, wpb=496060, bsz=16788.5, num_updates=39000, lr=0.000320256, gnorm=0.182, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=35970 epoch 024: 213 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=551431, ups=1.11, wpb=496060, bsz=16788.5, num_updates=39000, lr=0.000320256, gnorm=0.182, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=35970 epoch 024: 213 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=551431, ups=1.11, wpb=496060, bsz=16788.5, num_updates=39000, lr=0.000320256, gnorm=0.182, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=35970 epoch 024: 213 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=551431, ups=1.11, wpb=496060, bsz=16788.5, num_updates=39000, lr=0.000320256, gnorm=0.182, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=35970 epoch 024: 213 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=551431, ups=1.11, wpb=496060, bsz=16788.5, num_updates=39000, lr=0.000320256, gnorm=0.182, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=35970 epoch 024: 213 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=551431, ups=1.11, wpb=496060, bsz=16788.5, num_updates=39000, lr=0.000320256, gnorm=0.182, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=35970 epoch 024: 213 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=551431, ups=1.11, wpb=496060, bsz=16788.5, num_updates=39000, lr=0.000320256, gnorm=0.182, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=35970 epoch 024: 213 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=551431, ups=1.11, wpb=496060, bsz=16788.5, num_updates=39000, lr=0.000320256, gnorm=0.182, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=35970 epoch 024: 213 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=551431, ups=1.11, wpb=496060, bsz=16788.5, num_updates=39000, lr=0.000320256, gnorm=0.182, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=35970 epoch 024: 213 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=551431, ups=1.11, wpb=496060, bsz=16788.5, num_updates=39000, lr=0.000320256, gnorm=0.182, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=35970 epoch 024: 213 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=551431, ups=1.11, wpb=496060, bsz=16788.5, num_updates=39000, lr=0.000320256, gnorm=0.182, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=35970 epoch 024: 213 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=551431, ups=1.11, wpb=496060, bsz=16788.5, num_updates=39000, lr=0.000320256, gnorm=0.182, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=35970 epoch 024: 213 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=551431, ups=1.11, wpb=496060, bsz=16788.5, num_updates=39000, lr=0.000320256, gnorm=0.182, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=35970 epoch 024: 213 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=551431, ups=1.11, wpb=496060, bsz=16788.5, num_updates=39000, lr=0.000320256, gnorm=0.182, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=35970 epoch 024: 213 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=551431, ups=1.11, wpb=496060, bsz=16788.5, num_updates=39000, lr=0.000320256, gnorm=0.182, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=35970 begin validation on "valid" subset epoch 024 | valid on 'valid' subset | loss 3.716 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 39000 | best_loss 3.709 epoch 024 | valid on 'valid' subset | loss 3.716 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 39000 | best_loss 3.709 epoch 024 | valid on 'valid' subset | loss 3.716 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 39000 | best_loss 3.709 epoch 024 | valid on 'valid' subset | loss 3.716 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 39000 | best_loss 3.709 epoch 024 | valid on 'valid' subset | loss 3.716 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 39000 | best_loss 3.709 epoch 024 | valid on 'valid' subset | loss 3.716 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 39000 | best_loss 3.709 epoch 024 | valid on 'valid' subset | loss 3.716 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 39000 | best_loss 3.709 epoch 024 | valid on 'valid' subset | loss 3.716 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 39000 | best_loss 3.709 epoch 024 | valid on 'valid' subset | loss 3.716 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 39000 | best_loss 3.709 epoch 024 | valid on 'valid' subset | loss 3.716 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 39000 | best_loss 3.709 epoch 024 | valid on 'valid' subset | loss 3.716 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 39000 | best_loss 3.709 epoch 024 | valid on 'valid' subset | loss 3.716 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 39000 | best_loss 3.709 epoch 024 | valid on 'valid' subset | loss 3.716 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 39000 | best_loss 3.709 epoch 024 | valid on 'valid' subset | loss 3.716 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 39000 | best_loss 3.709 epoch 024 | valid on 'valid' subset | loss 3.716 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 39000 | best_loss 3.709 epoch 024 | valid on 'valid' subset | loss 3.716 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 39000 | best_loss 3.709 epoch 024 | valid on 'valid' subset | loss 3.716 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 39000 | best_loss 3.709 epoch 024 | valid on 'valid' subset | loss 3.716 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 39000 | best_loss 3.709 epoch 024 | valid on 'valid' subset | loss 3.716 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 39000 | best_loss 3.709 epoch 024 | valid on 'valid' subset | loss 3.716 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 39000 | best_loss 3.709 epoch 024 | valid on 'valid' subset | loss 3.716 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 39000 | best_loss 3.709 epoch 024 | valid on 'valid' subset | loss 3.716 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 39000 | best_loss 3.709 epoch 024 | valid on 'valid' subset | loss 3.716 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 39000 | best_loss 3.709 epoch 024 | valid on 'valid' subset | loss 3.716 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 39000 | best_loss 3.709 epoch 024: 313 / 1689 loss=3.486, nll_loss=1.928, ppl=3.81, wps=483719, ups=0.98, wpb=495322, bsz=16253.7, num_updates=39100, lr=0.000319847, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=36072 epoch 024: 313 / 1689 loss=3.486, nll_loss=1.928, ppl=3.81, wps=483719, ups=0.98, wpb=495322, bsz=16253.7, num_updates=39100, lr=0.000319847, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=36072 epoch 024: 313 / 1689 loss=3.486, nll_loss=1.928, ppl=3.81, wps=483719, ups=0.98, wpb=495322, bsz=16253.7, num_updates=39100, lr=0.000319847, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=36072 epoch 024: 313 / 1689 loss=3.486, nll_loss=1.928, ppl=3.81, wps=483719, ups=0.98, wpb=495322, bsz=16253.7, num_updates=39100, lr=0.000319847, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=36072 epoch 024: 313 / 1689 loss=3.486, nll_loss=1.928, ppl=3.81, wps=483719, ups=0.98, wpb=495322, bsz=16253.7, num_updates=39100, lr=0.000319847, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=36072 epoch 024: 313 / 1689 loss=3.486, nll_loss=1.928, ppl=3.81, wps=483719, ups=0.98, wpb=495322, bsz=16253.7, num_updates=39100, lr=0.000319847, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=36072 epoch 024: 313 / 1689 loss=3.486, nll_loss=1.928, ppl=3.81, wps=483719, ups=0.98, wpb=495322, bsz=16253.7, num_updates=39100, lr=0.000319847, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=36072 epoch 024: 313 / 1689 loss=3.486, nll_loss=1.928, ppl=3.81, wps=483719, ups=0.98, wpb=495322, bsz=16253.7, num_updates=39100, lr=0.000319847, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=36072 epoch 024: 313 / 1689 loss=3.486, nll_loss=1.928, ppl=3.81, wps=483719, ups=0.98, wpb=495322, bsz=16253.7, num_updates=39100, lr=0.000319847, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=36072 epoch 024: 313 / 1689 loss=3.486, nll_loss=1.928, ppl=3.81, wps=483719, ups=0.98, wpb=495322, bsz=16253.7, num_updates=39100, lr=0.000319847, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=36072 epoch 024: 313 / 1689 loss=3.486, nll_loss=1.928, ppl=3.81, wps=483719, ups=0.98, wpb=495322, bsz=16253.7, num_updates=39100, lr=0.000319847, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=36072 epoch 024: 313 / 1689 loss=3.486, nll_loss=1.928, ppl=3.81, wps=483719, ups=0.98, wpb=495322, bsz=16253.7, num_updates=39100, lr=0.000319847, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=36072 epoch 024: 313 / 1689 loss=3.486, nll_loss=1.928, ppl=3.81, wps=483719, ups=0.98, wpb=495322, bsz=16253.7, num_updates=39100, lr=0.000319847, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=36072 epoch 024: 313 / 1689 loss=3.486, nll_loss=1.928, ppl=3.81, wps=483719, ups=0.98, wpb=495322, bsz=16253.7, num_updates=39100, lr=0.000319847, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=36072 epoch 024: 313 / 1689 loss=3.486, nll_loss=1.928, ppl=3.81, wps=483719, ups=0.98, wpb=495322, bsz=16253.7, num_updates=39100, lr=0.000319847, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=36072 epoch 024: 313 / 1689 loss=3.486, nll_loss=1.928, ppl=3.81, wps=483719, ups=0.98, wpb=495322, bsz=16253.7, num_updates=39100, lr=0.000319847, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=36072 epoch 024: 313 / 1689 loss=3.486, nll_loss=1.928, ppl=3.81, wps=483719, ups=0.98, wpb=495322, bsz=16253.7, num_updates=39100, lr=0.000319847, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=36072 epoch 024: 313 / 1689 loss=3.486, nll_loss=1.928, ppl=3.81, wps=483719, ups=0.98, wpb=495322, bsz=16253.7, num_updates=39100, lr=0.000319847, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=36072 epoch 024: 313 / 1689 loss=3.486, nll_loss=1.928, ppl=3.81, wps=483719, ups=0.98, wpb=495322, bsz=16253.7, num_updates=39100, lr=0.000319847, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=36072 epoch 024: 313 / 1689 loss=3.486, nll_loss=1.928, ppl=3.81, wps=483719, ups=0.98, wpb=495322, bsz=16253.7, num_updates=39100, lr=0.000319847, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=36072 epoch 024: 313 / 1689 loss=3.486, nll_loss=1.928, ppl=3.81, wps=483719, ups=0.98, wpb=495322, bsz=16253.7, num_updates=39100, lr=0.000319847, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=36072 epoch 024: 313 / 1689 loss=3.486, nll_loss=1.928, ppl=3.81, wps=483719, ups=0.98, wpb=495322, bsz=16253.7, num_updates=39100, lr=0.000319847, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=36072 epoch 024: 313 / 1689 loss=3.486, nll_loss=1.928, ppl=3.81, wps=483719, ups=0.98, wpb=495322, bsz=16253.7, num_updates=39100, lr=0.000319847, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=36072 epoch 024: 313 / 1689 loss=3.486, nll_loss=1.928, ppl=3.81, wps=483719, ups=0.98, wpb=495322, bsz=16253.7, num_updates=39100, lr=0.000319847, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=36072 epoch 024: 413 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=551506, ups=1.11, wpb=494796, bsz=16402.6, num_updates=39200, lr=0.000319438, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=36162 epoch 024: 413 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=551506, ups=1.11, wpb=494796, bsz=16402.6, num_updates=39200, lr=0.000319438, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=36162 epoch 024: 413 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=551506, ups=1.11, wpb=494796, bsz=16402.6, num_updates=39200, lr=0.000319438, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=36162 epoch 024: 413 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=551506, ups=1.11, wpb=494796, bsz=16402.6, num_updates=39200, lr=0.000319438, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=36162 epoch 024: 413 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=551506, ups=1.11, wpb=494796, bsz=16402.6, num_updates=39200, lr=0.000319438, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=36162 epoch 024: 413 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=551506, ups=1.11, wpb=494796, bsz=16402.6, num_updates=39200, lr=0.000319438, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=36162 epoch 024: 413 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=551506, ups=1.11, wpb=494796, bsz=16402.6, num_updates=39200, lr=0.000319438, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=36162 epoch 024: 413 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=551506, ups=1.11, wpb=494796, bsz=16402.6, num_updates=39200, lr=0.000319438, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=36162 epoch 024: 413 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=551506, ups=1.11, wpb=494796, bsz=16402.6, num_updates=39200, lr=0.000319438, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=36162 epoch 024: 413 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=551506, ups=1.11, wpb=494796, bsz=16402.6, num_updates=39200, lr=0.000319438, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=36162 epoch 024: 413 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=551506, ups=1.11, wpb=494796, bsz=16402.6, num_updates=39200, lr=0.000319438, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=36162 epoch 024: 413 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=551506, ups=1.11, wpb=494796, bsz=16402.6, num_updates=39200, lr=0.000319438, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=36162 epoch 024: 413 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=551506, ups=1.11, wpb=494796, bsz=16402.6, num_updates=39200, lr=0.000319438, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=36162 epoch 024: 413 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=551506, ups=1.11, wpb=494796, bsz=16402.6, num_updates=39200, lr=0.000319438, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=36162 epoch 024: 413 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=551506, ups=1.11, wpb=494796, bsz=16402.6, num_updates=39200, lr=0.000319438, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=36162 epoch 024: 413 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=551506, ups=1.11, wpb=494796, bsz=16402.6, num_updates=39200, lr=0.000319438, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=36162 epoch 024: 413 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=551506, ups=1.11, wpb=494796, bsz=16402.6, num_updates=39200, lr=0.000319438, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=36162 epoch 024: 413 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=551506, ups=1.11, wpb=494796, bsz=16402.6, num_updates=39200, lr=0.000319438, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=36162 epoch 024: 413 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=551506, ups=1.11, wpb=494796, bsz=16402.6, num_updates=39200, lr=0.000319438, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=36162 epoch 024: 413 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=551506, ups=1.11, wpb=494796, bsz=16402.6, num_updates=39200, lr=0.000319438, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=36162 epoch 024: 413 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=551506, ups=1.11, wpb=494796, bsz=16402.6, num_updates=39200, lr=0.000319438, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=36162 epoch 024: 413 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=551506, ups=1.11, wpb=494796, bsz=16402.6, num_updates=39200, lr=0.000319438, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=36162 epoch 024: 413 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=551506, ups=1.11, wpb=494796, bsz=16402.6, num_updates=39200, lr=0.000319438, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=36162 epoch 024: 413 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=551506, ups=1.11, wpb=494796, bsz=16402.6, num_updates=39200, lr=0.000319438, gnorm=0.175, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=36162 epoch 024: 513 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=559195, ups=1.13, wpb=496748, bsz=16595.8, num_updates=39300, lr=0.000319032, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=36251 epoch 024: 513 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=559195, ups=1.13, wpb=496748, bsz=16595.8, num_updates=39300, lr=0.000319032, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=36251 epoch 024: 513 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=559195, ups=1.13, wpb=496748, bsz=16595.8, num_updates=39300, lr=0.000319032, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=36251 epoch 024: 513 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=559195, ups=1.13, wpb=496748, bsz=16595.8, num_updates=39300, lr=0.000319032, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=36251 epoch 024: 513 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=559195, ups=1.13, wpb=496748, bsz=16595.8, num_updates=39300, lr=0.000319032, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=36251 epoch 024: 513 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=559195, ups=1.13, wpb=496748, bsz=16595.8, num_updates=39300, lr=0.000319032, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=36251 epoch 024: 513 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=559195, ups=1.13, wpb=496748, bsz=16595.8, num_updates=39300, lr=0.000319032, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=36251 epoch 024: 513 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=559195, ups=1.13, wpb=496748, bsz=16595.8, num_updates=39300, lr=0.000319032, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=36251 epoch 024: 513 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=559195, ups=1.13, wpb=496748, bsz=16595.8, num_updates=39300, lr=0.000319032, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=36251 epoch 024: 513 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=559195, ups=1.13, wpb=496748, bsz=16595.8, num_updates=39300, lr=0.000319032, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=36251 epoch 024: 513 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=559195, ups=1.13, wpb=496748, bsz=16595.8, num_updates=39300, lr=0.000319032, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=36251 epoch 024: 513 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=559195, ups=1.13, wpb=496748, bsz=16595.8, num_updates=39300, lr=0.000319032, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=36251 epoch 024: 513 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=559195, ups=1.13, wpb=496748, bsz=16595.8, num_updates=39300, lr=0.000319032, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=36251 epoch 024: 513 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=559195, ups=1.13, wpb=496748, bsz=16595.8, num_updates=39300, lr=0.000319032, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=36251 epoch 024: 513 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=559195, ups=1.13, wpb=496748, bsz=16595.8, num_updates=39300, lr=0.000319032, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=36251 epoch 024: 513 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=559195, ups=1.13, wpb=496748, bsz=16595.8, num_updates=39300, lr=0.000319032, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=36251 epoch 024: 513 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=559195, ups=1.13, wpb=496748, bsz=16595.8, num_updates=39300, lr=0.000319032, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=36251 epoch 024: 513 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=559195, ups=1.13, wpb=496748, bsz=16595.8, num_updates=39300, lr=0.000319032, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=36251 epoch 024: 513 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=559195, ups=1.13, wpb=496748, bsz=16595.8, num_updates=39300, lr=0.000319032, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=36251 epoch 024: 513 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=559195, ups=1.13, wpb=496748, bsz=16595.8, num_updates=39300, lr=0.000319032, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=36251 epoch 024: 513 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=559195, ups=1.13, wpb=496748, bsz=16595.8, num_updates=39300, lr=0.000319032, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=36251 epoch 024: 513 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=559195, ups=1.13, wpb=496748, bsz=16595.8, num_updates=39300, lr=0.000319032, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=36251 epoch 024: 513 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=559195, ups=1.13, wpb=496748, bsz=16595.8, num_updates=39300, lr=0.000319032, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=36251 epoch 024: 513 / 1689 loss=3.494, nll_loss=1.939, ppl=3.83, wps=559195, ups=1.13, wpb=496748, bsz=16595.8, num_updates=39300, lr=0.000319032, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=22.2, wall=36251 epoch 024: 614 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=548849, ups=1.11, wpb=495031, bsz=16382, num_updates=39400, lr=0.000318626, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=36341 epoch 024: 614 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=548849, ups=1.11, wpb=495031, bsz=16382, num_updates=39400, lr=0.000318626, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=36341 epoch 024: 614 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=548849, ups=1.11, wpb=495031, bsz=16382, num_updates=39400, lr=0.000318626, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=36341 epoch 024: 614 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=548849, ups=1.11, wpb=495031, bsz=16382, num_updates=39400, lr=0.000318626, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=36341 epoch 024: 614 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=548849, ups=1.11, wpb=495031, bsz=16382, num_updates=39400, lr=0.000318626, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=36341 epoch 024: 614 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=548849, ups=1.11, wpb=495031, bsz=16382, num_updates=39400, lr=0.000318626, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=36341 epoch 024: 614 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=548849, ups=1.11, wpb=495031, bsz=16382, num_updates=39400, lr=0.000318626, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=36341 epoch 024: 614 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=548849, ups=1.11, wpb=495031, bsz=16382, num_updates=39400, lr=0.000318626, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=36341 epoch 024: 614 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=548849, ups=1.11, wpb=495031, bsz=16382, num_updates=39400, lr=0.000318626, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=36341 epoch 024: 614 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=548849, ups=1.11, wpb=495031, bsz=16382, num_updates=39400, lr=0.000318626, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=36341 epoch 024: 614 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=548849, ups=1.11, wpb=495031, bsz=16382, num_updates=39400, lr=0.000318626, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=36341 epoch 024: 614 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=548849, ups=1.11, wpb=495031, bsz=16382, num_updates=39400, lr=0.000318626, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=36341 epoch 024: 614 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=548849, ups=1.11, wpb=495031, bsz=16382, num_updates=39400, lr=0.000318626, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=36341 epoch 024: 614 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=548849, ups=1.11, wpb=495031, bsz=16382, num_updates=39400, lr=0.000318626, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=36341 epoch 024: 614 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=548849, ups=1.11, wpb=495031, bsz=16382, num_updates=39400, lr=0.000318626, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=36341 epoch 024: 614 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=548849, ups=1.11, wpb=495031, bsz=16382, num_updates=39400, lr=0.000318626, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=36341 epoch 024: 614 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=548849, ups=1.11, wpb=495031, bsz=16382, num_updates=39400, lr=0.000318626, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=36341 epoch 024: 614 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=548849, ups=1.11, wpb=495031, bsz=16382, num_updates=39400, lr=0.000318626, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=36341 epoch 024: 614 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=548849, ups=1.11, wpb=495031, bsz=16382, num_updates=39400, lr=0.000318626, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=36341 epoch 024: 614 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=548849, ups=1.11, wpb=495031, bsz=16382, num_updates=39400, lr=0.000318626, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=36341 epoch 024: 614 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=548849, ups=1.11, wpb=495031, bsz=16382, num_updates=39400, lr=0.000318626, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=36341 epoch 024: 614 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=548849, ups=1.11, wpb=495031, bsz=16382, num_updates=39400, lr=0.000318626, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=36341 epoch 024: 614 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=548849, ups=1.11, wpb=495031, bsz=16382, num_updates=39400, lr=0.000318626, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=36341 epoch 024: 614 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=548849, ups=1.11, wpb=495031, bsz=16382, num_updates=39400, lr=0.000318626, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=36341 epoch 024: 714 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=552889, ups=1.12, wpb=494225, bsz=16342.1, num_updates=39500, lr=0.000318223, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=36430 epoch 024: 714 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=552889, ups=1.12, wpb=494225, bsz=16342.1, num_updates=39500, lr=0.000318223, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=36430 epoch 024: 714 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=552889, ups=1.12, wpb=494225, bsz=16342.1, num_updates=39500, lr=0.000318223, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=36430 epoch 024: 714 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=552889, ups=1.12, wpb=494225, bsz=16342.1, num_updates=39500, lr=0.000318223, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=36430 epoch 024: 714 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=552889, ups=1.12, wpb=494225, bsz=16342.1, num_updates=39500, lr=0.000318223, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=36430 epoch 024: 714 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=552889, ups=1.12, wpb=494225, bsz=16342.1, num_updates=39500, lr=0.000318223, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=36430 epoch 024: 714 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=552889, ups=1.12, wpb=494225, bsz=16342.1, num_updates=39500, lr=0.000318223, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=36430 epoch 024: 714 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=552889, ups=1.12, wpb=494225, bsz=16342.1, num_updates=39500, lr=0.000318223, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=36430 epoch 024: 714 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=552889, ups=1.12, wpb=494225, bsz=16342.1, num_updates=39500, lr=0.000318223, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=36430 epoch 024: 714 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=552889, ups=1.12, wpb=494225, bsz=16342.1, num_updates=39500, lr=0.000318223, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=36430 epoch 024: 714 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=552889, ups=1.12, wpb=494225, bsz=16342.1, num_updates=39500, lr=0.000318223, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=36430 epoch 024: 714 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=552889, ups=1.12, wpb=494225, bsz=16342.1, num_updates=39500, lr=0.000318223, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=36430 epoch 024: 714 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=552889, ups=1.12, wpb=494225, bsz=16342.1, num_updates=39500, lr=0.000318223, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=36430 epoch 024: 714 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=552889, ups=1.12, wpb=494225, bsz=16342.1, num_updates=39500, lr=0.000318223, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=36430 epoch 024: 714 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=552889, ups=1.12, wpb=494225, bsz=16342.1, num_updates=39500, lr=0.000318223, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=36430 epoch 024: 714 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=552889, ups=1.12, wpb=494225, bsz=16342.1, num_updates=39500, lr=0.000318223, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=36430 epoch 024: 714 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=552889, ups=1.12, wpb=494225, bsz=16342.1, num_updates=39500, lr=0.000318223, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=36430 epoch 024: 714 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=552889, ups=1.12, wpb=494225, bsz=16342.1, num_updates=39500, lr=0.000318223, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=36430 epoch 024: 714 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=552889, ups=1.12, wpb=494225, bsz=16342.1, num_updates=39500, lr=0.000318223, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=36430 epoch 024: 714 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=552889, ups=1.12, wpb=494225, bsz=16342.1, num_updates=39500, lr=0.000318223, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=36430 epoch 024: 714 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=552889, ups=1.12, wpb=494225, bsz=16342.1, num_updates=39500, lr=0.000318223, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=36430 epoch 024: 714 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=552889, ups=1.12, wpb=494225, bsz=16342.1, num_updates=39500, lr=0.000318223, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=36430 epoch 024: 714 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=552889, ups=1.12, wpb=494225, bsz=16342.1, num_updates=39500, lr=0.000318223, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=36430 epoch 024: 714 / 1689 loss=3.491, nll_loss=1.935, ppl=3.82, wps=552889, ups=1.12, wpb=494225, bsz=16342.1, num_updates=39500, lr=0.000318223, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=36430 epoch 024: 814 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=553604, ups=1.11, wpb=496716, bsz=16361, num_updates=39600, lr=0.000317821, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=36520 epoch 024: 814 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=553604, ups=1.11, wpb=496716, bsz=16361, num_updates=39600, lr=0.000317821, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=36520 epoch 024: 814 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=553604, ups=1.11, wpb=496716, bsz=16361, num_updates=39600, lr=0.000317821, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=36520 epoch 024: 814 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=553604, ups=1.11, wpb=496716, bsz=16361, num_updates=39600, lr=0.000317821, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=36520 epoch 024: 814 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=553604, ups=1.11, wpb=496716, bsz=16361, num_updates=39600, lr=0.000317821, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=36520 epoch 024: 814 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=553604, ups=1.11, wpb=496716, bsz=16361, num_updates=39600, lr=0.000317821, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=36520 epoch 024: 814 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=553604, ups=1.11, wpb=496716, bsz=16361, num_updates=39600, lr=0.000317821, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=36520 epoch 024: 814 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=553604, ups=1.11, wpb=496716, bsz=16361, num_updates=39600, lr=0.000317821, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=36520 epoch 024: 814 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=553604, ups=1.11, wpb=496716, bsz=16361, num_updates=39600, lr=0.000317821, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=36520 epoch 024: 814 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=553604, ups=1.11, wpb=496716, bsz=16361, num_updates=39600, lr=0.000317821, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=36520 epoch 024: 814 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=553604, ups=1.11, wpb=496716, bsz=16361, num_updates=39600, lr=0.000317821, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=36520 epoch 024: 814 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=553604, ups=1.11, wpb=496716, bsz=16361, num_updates=39600, lr=0.000317821, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=36520 epoch 024: 814 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=553604, ups=1.11, wpb=496716, bsz=16361, num_updates=39600, lr=0.000317821, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=36520 epoch 024: 814 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=553604, ups=1.11, wpb=496716, bsz=16361, num_updates=39600, lr=0.000317821, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=36520 epoch 024: 814 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=553604, ups=1.11, wpb=496716, bsz=16361, num_updates=39600, lr=0.000317821, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=36520 epoch 024: 814 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=553604, ups=1.11, wpb=496716, bsz=16361, num_updates=39600, lr=0.000317821, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=36520 epoch 024: 814 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=553604, ups=1.11, wpb=496716, bsz=16361, num_updates=39600, lr=0.000317821, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=36520 epoch 024: 814 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=553604, ups=1.11, wpb=496716, bsz=16361, num_updates=39600, lr=0.000317821, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=36520 epoch 024: 814 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=553604, ups=1.11, wpb=496716, bsz=16361, num_updates=39600, lr=0.000317821, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=36520 epoch 024: 814 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=553604, ups=1.11, wpb=496716, bsz=16361, num_updates=39600, lr=0.000317821, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=36520 epoch 024: 814 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=553604, ups=1.11, wpb=496716, bsz=16361, num_updates=39600, lr=0.000317821, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=36520 epoch 024: 814 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=553604, ups=1.11, wpb=496716, bsz=16361, num_updates=39600, lr=0.000317821, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=36520 epoch 024: 814 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=553604, ups=1.11, wpb=496716, bsz=16361, num_updates=39600, lr=0.000317821, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=36520 epoch 024: 814 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=553604, ups=1.11, wpb=496716, bsz=16361, num_updates=39600, lr=0.000317821, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=36520 epoch 024: 914 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=554300, ups=1.12, wpb=495694, bsz=16607.3, num_updates=39700, lr=0.00031742, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=36610 epoch 024: 914 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=554300, ups=1.12, wpb=495694, bsz=16607.3, num_updates=39700, lr=0.00031742, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=36610 epoch 024: 914 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=554300, ups=1.12, wpb=495694, bsz=16607.3, num_updates=39700, lr=0.00031742, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=36610 epoch 024: 914 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=554300, ups=1.12, wpb=495694, bsz=16607.3, num_updates=39700, lr=0.00031742, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=36610 epoch 024: 914 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=554300, ups=1.12, wpb=495694, bsz=16607.3, num_updates=39700, lr=0.00031742, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=36610 epoch 024: 914 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=554300, ups=1.12, wpb=495694, bsz=16607.3, num_updates=39700, lr=0.00031742, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=36610 epoch 024: 914 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=554300, ups=1.12, wpb=495694, bsz=16607.3, num_updates=39700, lr=0.00031742, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=36610 epoch 024: 914 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=554300, ups=1.12, wpb=495694, bsz=16607.3, num_updates=39700, lr=0.00031742, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=36610 epoch 024: 914 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=554300, ups=1.12, wpb=495694, bsz=16607.3, num_updates=39700, lr=0.00031742, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=36610 epoch 024: 914 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=554300, ups=1.12, wpb=495694, bsz=16607.3, num_updates=39700, lr=0.00031742, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=36610 epoch 024: 914 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=554300, ups=1.12, wpb=495694, bsz=16607.3, num_updates=39700, lr=0.00031742, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=36610 epoch 024: 914 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=554300, ups=1.12, wpb=495694, bsz=16607.3, num_updates=39700, lr=0.00031742, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=36610 epoch 024: 914 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=554300, ups=1.12, wpb=495694, bsz=16607.3, num_updates=39700, lr=0.00031742, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=36610 epoch 024: 914 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=554300, ups=1.12, wpb=495694, bsz=16607.3, num_updates=39700, lr=0.00031742, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=36610 epoch 024: 914 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=554300, ups=1.12, wpb=495694, bsz=16607.3, num_updates=39700, lr=0.00031742, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=36610 epoch 024: 914 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=554300, ups=1.12, wpb=495694, bsz=16607.3, num_updates=39700, lr=0.00031742, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=36610 epoch 024: 914 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=554300, ups=1.12, wpb=495694, bsz=16607.3, num_updates=39700, lr=0.00031742, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=36610 epoch 024: 914 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=554300, ups=1.12, wpb=495694, bsz=16607.3, num_updates=39700, lr=0.00031742, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=36610 epoch 024: 914 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=554300, ups=1.12, wpb=495694, bsz=16607.3, num_updates=39700, lr=0.00031742, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=36610 epoch 024: 914 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=554300, ups=1.12, wpb=495694, bsz=16607.3, num_updates=39700, lr=0.00031742, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=36610 epoch 024: 914 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=554300, ups=1.12, wpb=495694, bsz=16607.3, num_updates=39700, lr=0.00031742, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=36610 epoch 024: 914 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=554300, ups=1.12, wpb=495694, bsz=16607.3, num_updates=39700, lr=0.00031742, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=36610 epoch 024: 914 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=554300, ups=1.12, wpb=495694, bsz=16607.3, num_updates=39700, lr=0.00031742, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=36610 epoch 024: 914 / 1689 loss=3.498, nll_loss=1.943, ppl=3.85, wps=554300, ups=1.12, wpb=495694, bsz=16607.3, num_updates=39700, lr=0.00031742, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=36610 epoch 024: 1014 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=552976, ups=1.12, wpb=495421, bsz=16530.2, num_updates=39800, lr=0.000317021, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=36699 epoch 024: 1014 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=552976, ups=1.12, wpb=495421, bsz=16530.2, num_updates=39800, lr=0.000317021, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=36699 epoch 024: 1014 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=552976, ups=1.12, wpb=495421, bsz=16530.2, num_updates=39800, lr=0.000317021, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=36699 epoch 024: 1014 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=552976, ups=1.12, wpb=495421, bsz=16530.2, num_updates=39800, lr=0.000317021, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=36699 epoch 024: 1014 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=552976, ups=1.12, wpb=495421, bsz=16530.2, num_updates=39800, lr=0.000317021, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=36699 epoch 024: 1014 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=552976, ups=1.12, wpb=495421, bsz=16530.2, num_updates=39800, lr=0.000317021, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=36699 epoch 024: 1014 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=552976, ups=1.12, wpb=495421, bsz=16530.2, num_updates=39800, lr=0.000317021, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=36699 epoch 024: 1014 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=552976, ups=1.12, wpb=495421, bsz=16530.2, num_updates=39800, lr=0.000317021, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=36699 epoch 024: 1014 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=552976, ups=1.12, wpb=495421, bsz=16530.2, num_updates=39800, lr=0.000317021, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=36699 epoch 024: 1014 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=552976, ups=1.12, wpb=495421, bsz=16530.2, num_updates=39800, lr=0.000317021, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=36699 epoch 024: 1014 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=552976, ups=1.12, wpb=495421, bsz=16530.2, num_updates=39800, lr=0.000317021, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=36699 epoch 024: 1014 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=552976, ups=1.12, wpb=495421, bsz=16530.2, num_updates=39800, lr=0.000317021, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=36699 epoch 024: 1014 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=552976, ups=1.12, wpb=495421, bsz=16530.2, num_updates=39800, lr=0.000317021, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=36699 epoch 024: 1014 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=552976, ups=1.12, wpb=495421, bsz=16530.2, num_updates=39800, lr=0.000317021, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=36699 epoch 024: 1014 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=552976, ups=1.12, wpb=495421, bsz=16530.2, num_updates=39800, lr=0.000317021, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=36699 epoch 024: 1014 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=552976, ups=1.12, wpb=495421, bsz=16530.2, num_updates=39800, lr=0.000317021, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=36699 epoch 024: 1014 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=552976, ups=1.12, wpb=495421, bsz=16530.2, num_updates=39800, lr=0.000317021, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=36699 epoch 024: 1014 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=552976, ups=1.12, wpb=495421, bsz=16530.2, num_updates=39800, lr=0.000317021, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=36699 epoch 024: 1014 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=552976, ups=1.12, wpb=495421, bsz=16530.2, num_updates=39800, lr=0.000317021, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=36699 epoch 024: 1014 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=552976, ups=1.12, wpb=495421, bsz=16530.2, num_updates=39800, lr=0.000317021, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=36699 epoch 024: 1014 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=552976, ups=1.12, wpb=495421, bsz=16530.2, num_updates=39800, lr=0.000317021, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=36699 epoch 024: 1014 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=552976, ups=1.12, wpb=495421, bsz=16530.2, num_updates=39800, lr=0.000317021, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=36699 epoch 024: 1014 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=552976, ups=1.12, wpb=495421, bsz=16530.2, num_updates=39800, lr=0.000317021, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=36699 epoch 024: 1014 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=552976, ups=1.12, wpb=495421, bsz=16530.2, num_updates=39800, lr=0.000317021, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=36699 epoch 024: 1114 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=553547, ups=1.12, wpb=495865, bsz=16898.1, num_updates=39900, lr=0.000316624, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=36789 epoch 024: 1114 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=553547, ups=1.12, wpb=495865, bsz=16898.1, num_updates=39900, lr=0.000316624, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=36789 epoch 024: 1114 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=553547, ups=1.12, wpb=495865, bsz=16898.1, num_updates=39900, lr=0.000316624, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=36789 epoch 024: 1114 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=553547, ups=1.12, wpb=495865, bsz=16898.1, num_updates=39900, lr=0.000316624, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=36789 epoch 024: 1114 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=553547, ups=1.12, wpb=495865, bsz=16898.1, num_updates=39900, lr=0.000316624, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=36789 epoch 024: 1114 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=553547, ups=1.12, wpb=495865, bsz=16898.1, num_updates=39900, lr=0.000316624, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=36789 epoch 024: 1114 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=553547, ups=1.12, wpb=495865, bsz=16898.1, num_updates=39900, lr=0.000316624, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=36789 epoch 024: 1114 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=553547, ups=1.12, wpb=495865, bsz=16898.1, num_updates=39900, lr=0.000316624, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=36789 epoch 024: 1114 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=553547, ups=1.12, wpb=495865, bsz=16898.1, num_updates=39900, lr=0.000316624, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=36789 epoch 024: 1114 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=553547, ups=1.12, wpb=495865, bsz=16898.1, num_updates=39900, lr=0.000316624, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=36789 epoch 024: 1114 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=553547, ups=1.12, wpb=495865, bsz=16898.1, num_updates=39900, lr=0.000316624, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=36789 epoch 024: 1114 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=553547, ups=1.12, wpb=495865, bsz=16898.1, num_updates=39900, lr=0.000316624, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=36789 epoch 024: 1114 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=553547, ups=1.12, wpb=495865, bsz=16898.1, num_updates=39900, lr=0.000316624, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=36789 epoch 024: 1114 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=553547, ups=1.12, wpb=495865, bsz=16898.1, num_updates=39900, lr=0.000316624, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=36789 epoch 024: 1114 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=553547, ups=1.12, wpb=495865, bsz=16898.1, num_updates=39900, lr=0.000316624, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=36789 epoch 024: 1114 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=553547, ups=1.12, wpb=495865, bsz=16898.1, num_updates=39900, lr=0.000316624, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=36789 epoch 024: 1114 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=553547, ups=1.12, wpb=495865, bsz=16898.1, num_updates=39900, lr=0.000316624, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=36789 epoch 024: 1114 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=553547, ups=1.12, wpb=495865, bsz=16898.1, num_updates=39900, lr=0.000316624, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=36789 epoch 024: 1114 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=553547, ups=1.12, wpb=495865, bsz=16898.1, num_updates=39900, lr=0.000316624, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=36789 epoch 024: 1114 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=553547, ups=1.12, wpb=495865, bsz=16898.1, num_updates=39900, lr=0.000316624, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=36789 epoch 024: 1114 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=553547, ups=1.12, wpb=495865, bsz=16898.1, num_updates=39900, lr=0.000316624, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=36789 epoch 024: 1114 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=553547, ups=1.12, wpb=495865, bsz=16898.1, num_updates=39900, lr=0.000316624, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=36789 epoch 024: 1114 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=553547, ups=1.12, wpb=495865, bsz=16898.1, num_updates=39900, lr=0.000316624, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=36789 epoch 024: 1114 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=553547, ups=1.12, wpb=495865, bsz=16898.1, num_updates=39900, lr=0.000316624, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=36789 epoch 024: 1214 / 1689 loss=3.5, nll_loss=1.946, ppl=3.85, wps=552146, ups=1.12, wpb=494599, bsz=16687.8, num_updates=40000, lr=0.000316228, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=36878 epoch 024: 1214 / 1689 loss=3.5, nll_loss=1.946, ppl=3.85, wps=552146, ups=1.12, wpb=494599, bsz=16687.8, num_updates=40000, lr=0.000316228, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=36878 epoch 024: 1214 / 1689 loss=3.5, nll_loss=1.946, ppl=3.85, wps=552146, ups=1.12, wpb=494599, bsz=16687.8, num_updates=40000, lr=0.000316228, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=36878 epoch 024: 1214 / 1689 loss=3.5, nll_loss=1.946, ppl=3.85, wps=552146, ups=1.12, wpb=494599, bsz=16687.8, num_updates=40000, lr=0.000316228, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=36878 epoch 024: 1214 / 1689 loss=3.5, nll_loss=1.946, ppl=3.85, wps=552146, ups=1.12, wpb=494599, bsz=16687.8, num_updates=40000, lr=0.000316228, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=36878 epoch 024: 1214 / 1689 loss=3.5, nll_loss=1.946, ppl=3.85, wps=552146, ups=1.12, wpb=494599, bsz=16687.8, num_updates=40000, lr=0.000316228, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=36878 epoch 024: 1214 / 1689 loss=3.5, nll_loss=1.946, ppl=3.85, wps=552146, ups=1.12, wpb=494599, bsz=16687.8, num_updates=40000, lr=0.000316228, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=36878 epoch 024: 1214 / 1689 loss=3.5, nll_loss=1.946, ppl=3.85, wps=552146, ups=1.12, wpb=494599, bsz=16687.8, num_updates=40000, lr=0.000316228, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=36878 epoch 024: 1214 / 1689 loss=3.5, nll_loss=1.946, ppl=3.85, wps=552146, ups=1.12, wpb=494599, bsz=16687.8, num_updates=40000, lr=0.000316228, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=36878 epoch 024: 1214 / 1689 loss=3.5, nll_loss=1.946, ppl=3.85, wps=552146, ups=1.12, wpb=494599, bsz=16687.8, num_updates=40000, lr=0.000316228, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=36878 epoch 024: 1214 / 1689 loss=3.5, nll_loss=1.946, ppl=3.85, wps=552146, ups=1.12, wpb=494599, bsz=16687.8, num_updates=40000, lr=0.000316228, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=36878 epoch 024: 1214 / 1689 loss=3.5, nll_loss=1.946, ppl=3.85, wps=552146, ups=1.12, wpb=494599, bsz=16687.8, num_updates=40000, lr=0.000316228, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=36878 epoch 024: 1214 / 1689 loss=3.5, nll_loss=1.946, ppl=3.85, wps=552146, ups=1.12, wpb=494599, bsz=16687.8, num_updates=40000, lr=0.000316228, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=36878 epoch 024: 1214 / 1689 loss=3.5, nll_loss=1.946, ppl=3.85, wps=552146, ups=1.12, wpb=494599, bsz=16687.8, num_updates=40000, lr=0.000316228, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=36878 epoch 024: 1214 / 1689 loss=3.5, nll_loss=1.946, ppl=3.85, wps=552146, ups=1.12, wpb=494599, bsz=16687.8, num_updates=40000, lr=0.000316228, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=36878 epoch 024: 1214 / 1689 loss=3.5, nll_loss=1.946, ppl=3.85, wps=552146, ups=1.12, wpb=494599, bsz=16687.8, num_updates=40000, lr=0.000316228, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=36878 epoch 024: 1214 / 1689 loss=3.5, nll_loss=1.946, ppl=3.85, wps=552146, ups=1.12, wpb=494599, bsz=16687.8, num_updates=40000, lr=0.000316228, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=36878 epoch 024: 1214 / 1689 loss=3.5, nll_loss=1.946, ppl=3.85, wps=552146, ups=1.12, wpb=494599, bsz=16687.8, num_updates=40000, lr=0.000316228, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=36878 epoch 024: 1214 / 1689 loss=3.5, nll_loss=1.946, ppl=3.85, wps=552146, ups=1.12, wpb=494599, bsz=16687.8, num_updates=40000, lr=0.000316228, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=36878 epoch 024: 1214 / 1689 loss=3.5, nll_loss=1.946, ppl=3.85, wps=552146, ups=1.12, wpb=494599, bsz=16687.8, num_updates=40000, lr=0.000316228, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=36878 epoch 024: 1214 / 1689 loss=3.5, nll_loss=1.946, ppl=3.85, wps=552146, ups=1.12, wpb=494599, bsz=16687.8, num_updates=40000, lr=0.000316228, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=36878 epoch 024: 1214 / 1689 loss=3.5, nll_loss=1.946, ppl=3.85, wps=552146, ups=1.12, wpb=494599, bsz=16687.8, num_updates=40000, lr=0.000316228, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=36878 epoch 024: 1214 / 1689 loss=3.5, nll_loss=1.946, ppl=3.85, wps=552146, ups=1.12, wpb=494599, bsz=16687.8, num_updates=40000, lr=0.000316228, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=36878 epoch 024: 1214 / 1689 loss=3.5, nll_loss=1.946, ppl=3.85, wps=552146, ups=1.12, wpb=494599, bsz=16687.8, num_updates=40000, lr=0.000316228, gnorm=0.18, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=36878 begin validation on "valid" subset epoch 024 | valid on 'valid' subset | loss 3.708 | nll_loss 2.162 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 40000 | best_loss 3.708 epoch 024 | valid on 'valid' subset | loss 3.708 | nll_loss 2.162 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 40000 | best_loss 3.708 epoch 024 | valid on 'valid' subset | loss 3.708 | nll_loss 2.162 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 40000 | best_loss 3.708 epoch 024 | valid on 'valid' subset | loss 3.708 | nll_loss 2.162 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 40000 | best_loss 3.708 epoch 024 | valid on 'valid' subset | loss 3.708 | nll_loss 2.162 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 40000 | best_loss 3.708 epoch 024 | valid on 'valid' subset | loss 3.708 | nll_loss 2.162 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 40000 | best_loss 3.708 epoch 024 | valid on 'valid' subset | loss 3.708 | nll_loss 2.162 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 40000 | best_loss 3.708 epoch 024 | valid on 'valid' subset | loss 3.708 | nll_loss 2.162 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 40000 | best_loss 3.708 epoch 024 | valid on 'valid' subset | loss 3.708 | nll_loss 2.162 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 40000 | best_loss 3.708 epoch 024 | valid on 'valid' subset | loss 3.708 | nll_loss 2.162 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 40000 | best_loss 3.708 epoch 024 | valid on 'valid' subset | loss 3.708 | nll_loss 2.162 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 40000 | best_loss 3.708 epoch 024 | valid on 'valid' subset | loss 3.708 | nll_loss 2.162 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 40000 | best_loss 3.708 epoch 024 | valid on 'valid' subset | loss 3.708 | nll_loss 2.162 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 40000 | best_loss 3.708 epoch 024 | valid on 'valid' subset | loss 3.708 | nll_loss 2.162 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 40000 | best_loss 3.708 epoch 024 | valid on 'valid' subset | loss 3.708 | nll_loss 2.162 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 40000 | best_loss 3.708 epoch 024 | valid on 'valid' subset | loss 3.708 | nll_loss 2.162 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 40000 | best_loss 3.708 epoch 024 | valid on 'valid' subset | loss 3.708 | nll_loss 2.162 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 40000 | best_loss 3.708 epoch 024 | valid on 'valid' subset | loss 3.708 | nll_loss 2.162 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 40000 | best_loss 3.708 epoch 024 | valid on 'valid' subset | loss 3.708 | nll_loss 2.162 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 40000 | best_loss 3.708 epoch 024 | valid on 'valid' subset | loss 3.708 | nll_loss 2.162 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 40000 | best_loss 3.708 epoch 024 | valid on 'valid' subset | loss 3.708 | nll_loss 2.162 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 40000 | best_loss 3.708 epoch 024 | valid on 'valid' subset | loss 3.708 | nll_loss 2.162 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 40000 | best_loss 3.708 epoch 024 | valid on 'valid' subset | loss 3.708 | nll_loss 2.162 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 40000 | best_loss 3.708 epoch 024 | valid on 'valid' subset | loss 3.708 | nll_loss 2.162 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 40000 | best_loss 3.708 epoch 024: 1314 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=456889, ups=0.92, wpb=494659, bsz=16573.2, num_updates=40100, lr=0.000315833, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=36987 epoch 024: 1314 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=456889, ups=0.92, wpb=494659, bsz=16573.2, num_updates=40100, lr=0.000315833, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=36987 epoch 024: 1314 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=456889, ups=0.92, wpb=494659, bsz=16573.2, num_updates=40100, lr=0.000315833, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=36987 epoch 024: 1314 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=456889, ups=0.92, wpb=494659, bsz=16573.2, num_updates=40100, lr=0.000315833, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=36987 epoch 024: 1314 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=456889, ups=0.92, wpb=494659, bsz=16573.2, num_updates=40100, lr=0.000315833, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=36987 epoch 024: 1314 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=456889, ups=0.92, wpb=494659, bsz=16573.2, num_updates=40100, lr=0.000315833, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=36987 epoch 024: 1314 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=456889, ups=0.92, wpb=494659, bsz=16573.2, num_updates=40100, lr=0.000315833, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=36987 epoch 024: 1314 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=456889, ups=0.92, wpb=494659, bsz=16573.2, num_updates=40100, lr=0.000315833, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=36987 epoch 024: 1314 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=456889, ups=0.92, wpb=494659, bsz=16573.2, num_updates=40100, lr=0.000315833, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=36987 epoch 024: 1314 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=456889, ups=0.92, wpb=494659, bsz=16573.2, num_updates=40100, lr=0.000315833, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=36987 epoch 024: 1314 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=456889, ups=0.92, wpb=494659, bsz=16573.2, num_updates=40100, lr=0.000315833, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=36987 epoch 024: 1314 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=456889, ups=0.92, wpb=494659, bsz=16573.2, num_updates=40100, lr=0.000315833, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=36987 epoch 024: 1314 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=456889, ups=0.92, wpb=494659, bsz=16573.2, num_updates=40100, lr=0.000315833, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=36987 epoch 024: 1314 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=456889, ups=0.92, wpb=494659, bsz=16573.2, num_updates=40100, lr=0.000315833, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=36987 epoch 024: 1314 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=456889, ups=0.92, wpb=494659, bsz=16573.2, num_updates=40100, lr=0.000315833, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=36987 epoch 024: 1314 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=456889, ups=0.92, wpb=494659, bsz=16573.2, num_updates=40100, lr=0.000315833, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=36987 epoch 024: 1314 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=456889, ups=0.92, wpb=494659, bsz=16573.2, num_updates=40100, lr=0.000315833, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=36987 epoch 024: 1314 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=456889, ups=0.92, wpb=494659, bsz=16573.2, num_updates=40100, lr=0.000315833, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=36987 epoch 024: 1314 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=456889, ups=0.92, wpb=494659, bsz=16573.2, num_updates=40100, lr=0.000315833, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=36987 epoch 024: 1314 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=456889, ups=0.92, wpb=494659, bsz=16573.2, num_updates=40100, lr=0.000315833, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=36987 epoch 024: 1314 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=456889, ups=0.92, wpb=494659, bsz=16573.2, num_updates=40100, lr=0.000315833, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=36987 epoch 024: 1314 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=456889, ups=0.92, wpb=494659, bsz=16573.2, num_updates=40100, lr=0.000315833, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=36987 epoch 024: 1314 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=456889, ups=0.92, wpb=494659, bsz=16573.2, num_updates=40100, lr=0.000315833, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=36987 epoch 024: 1314 / 1689 loss=3.502, nll_loss=1.947, ppl=3.86, wps=456889, ups=0.92, wpb=494659, bsz=16573.2, num_updates=40100, lr=0.000315833, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.6, wall=36987 epoch 024: 1414 / 1689 loss=3.505, nll_loss=1.951, ppl=3.87, wps=555340, ups=1.12, wpb=493950, bsz=16139.3, num_updates=40200, lr=0.00031544, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=37075 epoch 024: 1414 / 1689 loss=3.505, nll_loss=1.951, ppl=3.87, wps=555340, ups=1.12, wpb=493950, bsz=16139.3, num_updates=40200, lr=0.00031544, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=37075 epoch 024: 1414 / 1689 loss=3.505, nll_loss=1.951, ppl=3.87, wps=555340, ups=1.12, wpb=493950, bsz=16139.3, num_updates=40200, lr=0.00031544, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=37075 epoch 024: 1414 / 1689 loss=3.505, nll_loss=1.951, ppl=3.87, wps=555340, ups=1.12, wpb=493950, bsz=16139.3, num_updates=40200, lr=0.00031544, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=37075 epoch 024: 1414 / 1689 loss=3.505, nll_loss=1.951, ppl=3.87, wps=555340, ups=1.12, wpb=493950, bsz=16139.3, num_updates=40200, lr=0.00031544, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=37075 epoch 024: 1414 / 1689 loss=3.505, nll_loss=1.951, ppl=3.87, wps=555340, ups=1.12, wpb=493950, bsz=16139.3, num_updates=40200, lr=0.00031544, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=37075 epoch 024: 1414 / 1689 loss=3.505, nll_loss=1.951, ppl=3.87, wps=555340, ups=1.12, wpb=493950, bsz=16139.3, num_updates=40200, lr=0.00031544, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=37075 epoch 024: 1414 / 1689 loss=3.505, nll_loss=1.951, ppl=3.87, wps=555340, ups=1.12, wpb=493950, bsz=16139.3, num_updates=40200, lr=0.00031544, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=37075 epoch 024: 1414 / 1689 loss=3.505, nll_loss=1.951, ppl=3.87, wps=555340, ups=1.12, wpb=493950, bsz=16139.3, num_updates=40200, lr=0.00031544, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=37075 epoch 024: 1414 / 1689 loss=3.505, nll_loss=1.951, ppl=3.87, wps=555340, ups=1.12, wpb=493950, bsz=16139.3, num_updates=40200, lr=0.00031544, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=37075 epoch 024: 1414 / 1689 loss=3.505, nll_loss=1.951, ppl=3.87, wps=555340, ups=1.12, wpb=493950, bsz=16139.3, num_updates=40200, lr=0.00031544, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=37075 epoch 024: 1414 / 1689 loss=3.505, nll_loss=1.951, ppl=3.87, wps=555340, ups=1.12, wpb=493950, bsz=16139.3, num_updates=40200, lr=0.00031544, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=37075 epoch 024: 1414 / 1689 loss=3.505, nll_loss=1.951, ppl=3.87, wps=555340, ups=1.12, wpb=493950, bsz=16139.3, num_updates=40200, lr=0.00031544, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=37075 epoch 024: 1414 / 1689 loss=3.505, nll_loss=1.951, ppl=3.87, wps=555340, ups=1.12, wpb=493950, bsz=16139.3, num_updates=40200, lr=0.00031544, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=37075 epoch 024: 1414 / 1689 loss=3.505, nll_loss=1.951, ppl=3.87, wps=555340, ups=1.12, wpb=493950, bsz=16139.3, num_updates=40200, lr=0.00031544, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=37075 epoch 024: 1414 / 1689 loss=3.505, nll_loss=1.951, ppl=3.87, wps=555340, ups=1.12, wpb=493950, bsz=16139.3, num_updates=40200, lr=0.00031544, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=37075 epoch 024: 1414 / 1689 loss=3.505, nll_loss=1.951, ppl=3.87, wps=555340, ups=1.12, wpb=493950, bsz=16139.3, num_updates=40200, lr=0.00031544, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=37075 epoch 024: 1414 / 1689 loss=3.505, nll_loss=1.951, ppl=3.87, wps=555340, ups=1.12, wpb=493950, bsz=16139.3, num_updates=40200, lr=0.00031544, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=37075 epoch 024: 1414 / 1689 loss=3.505, nll_loss=1.951, ppl=3.87, wps=555340, ups=1.12, wpb=493950, bsz=16139.3, num_updates=40200, lr=0.00031544, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=37075 epoch 024: 1414 / 1689 loss=3.505, nll_loss=1.951, ppl=3.87, wps=555340, ups=1.12, wpb=493950, bsz=16139.3, num_updates=40200, lr=0.00031544, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=37075 epoch 024: 1414 / 1689 loss=3.505, nll_loss=1.951, ppl=3.87, wps=555340, ups=1.12, wpb=493950, bsz=16139.3, num_updates=40200, lr=0.00031544, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=37075 epoch 024: 1414 / 1689 loss=3.505, nll_loss=1.951, ppl=3.87, wps=555340, ups=1.12, wpb=493950, bsz=16139.3, num_updates=40200, lr=0.00031544, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=37075 epoch 024: 1414 / 1689 loss=3.505, nll_loss=1.951, ppl=3.87, wps=555340, ups=1.12, wpb=493950, bsz=16139.3, num_updates=40200, lr=0.00031544, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=37075 epoch 024: 1414 / 1689 loss=3.505, nll_loss=1.951, ppl=3.87, wps=555340, ups=1.12, wpb=493950, bsz=16139.3, num_updates=40200, lr=0.00031544, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.6, wall=37075 epoch 024: 1514 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=555624, ups=1.12, wpb=495209, bsz=16564.4, num_updates=40300, lr=0.000315049, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=37165 epoch 024: 1514 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=555624, ups=1.12, wpb=495209, bsz=16564.4, num_updates=40300, lr=0.000315049, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=37165 epoch 024: 1514 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=555624, ups=1.12, wpb=495209, bsz=16564.4, num_updates=40300, lr=0.000315049, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=37165 epoch 024: 1514 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=555624, ups=1.12, wpb=495209, bsz=16564.4, num_updates=40300, lr=0.000315049, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=37165 epoch 024: 1514 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=555624, ups=1.12, wpb=495209, bsz=16564.4, num_updates=40300, lr=0.000315049, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=37165 epoch 024: 1514 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=555624, ups=1.12, wpb=495209, bsz=16564.4, num_updates=40300, lr=0.000315049, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=37165 epoch 024: 1514 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=555624, ups=1.12, wpb=495209, bsz=16564.4, num_updates=40300, lr=0.000315049, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=37165 epoch 024: 1514 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=555624, ups=1.12, wpb=495209, bsz=16564.4, num_updates=40300, lr=0.000315049, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=37165 epoch 024: 1514 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=555624, ups=1.12, wpb=495209, bsz=16564.4, num_updates=40300, lr=0.000315049, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=37165 epoch 024: 1514 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=555624, ups=1.12, wpb=495209, bsz=16564.4, num_updates=40300, lr=0.000315049, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=37165 epoch 024: 1514 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=555624, ups=1.12, wpb=495209, bsz=16564.4, num_updates=40300, lr=0.000315049, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=37165 epoch 024: 1514 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=555624, ups=1.12, wpb=495209, bsz=16564.4, num_updates=40300, lr=0.000315049, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=37165 epoch 024: 1514 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=555624, ups=1.12, wpb=495209, bsz=16564.4, num_updates=40300, lr=0.000315049, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=37165 epoch 024: 1514 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=555624, ups=1.12, wpb=495209, bsz=16564.4, num_updates=40300, lr=0.000315049, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=37165 epoch 024: 1514 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=555624, ups=1.12, wpb=495209, bsz=16564.4, num_updates=40300, lr=0.000315049, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=37165 epoch 024: 1514 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=555624, ups=1.12, wpb=495209, bsz=16564.4, num_updates=40300, lr=0.000315049, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=37165 epoch 024: 1514 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=555624, ups=1.12, wpb=495209, bsz=16564.4, num_updates=40300, lr=0.000315049, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=37165 epoch 024: 1514 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=555624, ups=1.12, wpb=495209, bsz=16564.4, num_updates=40300, lr=0.000315049, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=37165 epoch 024: 1514 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=555624, ups=1.12, wpb=495209, bsz=16564.4, num_updates=40300, lr=0.000315049, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=37165 epoch 024: 1514 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=555624, ups=1.12, wpb=495209, bsz=16564.4, num_updates=40300, lr=0.000315049, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=37165 epoch 024: 1514 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=555624, ups=1.12, wpb=495209, bsz=16564.4, num_updates=40300, lr=0.000315049, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=37165 epoch 024: 1514 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=555624, ups=1.12, wpb=495209, bsz=16564.4, num_updates=40300, lr=0.000315049, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=37165 epoch 024: 1514 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=555624, ups=1.12, wpb=495209, bsz=16564.4, num_updates=40300, lr=0.000315049, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=37165 epoch 024: 1514 / 1689 loss=3.512, nll_loss=1.959, ppl=3.89, wps=555624, ups=1.12, wpb=495209, bsz=16564.4, num_updates=40300, lr=0.000315049, gnorm=0.169, clip=0, loss_scale=2, train_wall=87, gb_free=22.2, wall=37165 epoch 024: 1614 / 1689 loss=3.51, nll_loss=1.958, ppl=3.88, wps=554535, ups=1.12, wpb=496241, bsz=16528.7, num_updates=40400, lr=0.000314658, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=37254 epoch 024: 1614 / 1689 loss=3.51, nll_loss=1.958, ppl=3.88, wps=554535, ups=1.12, wpb=496241, bsz=16528.7, num_updates=40400, lr=0.000314658, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=37254 epoch 024: 1614 / 1689 loss=3.51, nll_loss=1.958, ppl=3.88, wps=554535, ups=1.12, wpb=496241, bsz=16528.7, num_updates=40400, lr=0.000314658, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=37254 epoch 024: 1614 / 1689 loss=3.51, nll_loss=1.958, ppl=3.88, wps=554535, ups=1.12, wpb=496241, bsz=16528.7, num_updates=40400, lr=0.000314658, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=37254 epoch 024: 1614 / 1689 loss=3.51, nll_loss=1.958, ppl=3.88, wps=554535, ups=1.12, wpb=496241, bsz=16528.7, num_updates=40400, lr=0.000314658, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=37254 epoch 024: 1614 / 1689 loss=3.51, nll_loss=1.958, ppl=3.88, wps=554535, ups=1.12, wpb=496241, bsz=16528.7, num_updates=40400, lr=0.000314658, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=37254 epoch 024: 1614 / 1689 loss=3.51, nll_loss=1.958, ppl=3.88, wps=554535, ups=1.12, wpb=496241, bsz=16528.7, num_updates=40400, lr=0.000314658, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=37254 epoch 024: 1614 / 1689 loss=3.51, nll_loss=1.958, ppl=3.88, wps=554535, ups=1.12, wpb=496241, bsz=16528.7, num_updates=40400, lr=0.000314658, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=37254 epoch 024: 1614 / 1689 loss=3.51, nll_loss=1.958, ppl=3.88, wps=554535, ups=1.12, wpb=496241, bsz=16528.7, num_updates=40400, lr=0.000314658, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=37254 epoch 024: 1614 / 1689 loss=3.51, nll_loss=1.958, ppl=3.88, wps=554535, ups=1.12, wpb=496241, bsz=16528.7, num_updates=40400, lr=0.000314658, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=37254 epoch 024: 1614 / 1689 loss=3.51, nll_loss=1.958, ppl=3.88, wps=554535, ups=1.12, wpb=496241, bsz=16528.7, num_updates=40400, lr=0.000314658, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=37254 epoch 024: 1614 / 1689 loss=3.51, nll_loss=1.958, ppl=3.88, wps=554535, ups=1.12, wpb=496241, bsz=16528.7, num_updates=40400, lr=0.000314658, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=37254 epoch 024: 1614 / 1689 loss=3.51, nll_loss=1.958, ppl=3.88, wps=554535, ups=1.12, wpb=496241, bsz=16528.7, num_updates=40400, lr=0.000314658, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=37254 epoch 024: 1614 / 1689 loss=3.51, nll_loss=1.958, ppl=3.88, wps=554535, ups=1.12, wpb=496241, bsz=16528.7, num_updates=40400, lr=0.000314658, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=37254 epoch 024: 1614 / 1689 loss=3.51, nll_loss=1.958, ppl=3.88, wps=554535, ups=1.12, wpb=496241, bsz=16528.7, num_updates=40400, lr=0.000314658, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=37254 epoch 024: 1614 / 1689 loss=3.51, nll_loss=1.958, ppl=3.88, wps=554535, ups=1.12, wpb=496241, bsz=16528.7, num_updates=40400, lr=0.000314658, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=37254 epoch 024: 1614 / 1689 loss=3.51, nll_loss=1.958, ppl=3.88, wps=554535, ups=1.12, wpb=496241, bsz=16528.7, num_updates=40400, lr=0.000314658, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=37254 epoch 024: 1614 / 1689 loss=3.51, nll_loss=1.958, ppl=3.88, wps=554535, ups=1.12, wpb=496241, bsz=16528.7, num_updates=40400, lr=0.000314658, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=37254 epoch 024: 1614 / 1689 loss=3.51, nll_loss=1.958, ppl=3.88, wps=554535, ups=1.12, wpb=496241, bsz=16528.7, num_updates=40400, lr=0.000314658, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=37254 epoch 024: 1614 / 1689 loss=3.51, nll_loss=1.958, ppl=3.88, wps=554535, ups=1.12, wpb=496241, bsz=16528.7, num_updates=40400, lr=0.000314658, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=37254 epoch 024: 1614 / 1689 loss=3.51, nll_loss=1.958, ppl=3.88, wps=554535, ups=1.12, wpb=496241, bsz=16528.7, num_updates=40400, lr=0.000314658, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=37254 epoch 024: 1614 / 1689 loss=3.51, nll_loss=1.958, ppl=3.88, wps=554535, ups=1.12, wpb=496241, bsz=16528.7, num_updates=40400, lr=0.000314658, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=37254 epoch 024: 1614 / 1689 loss=3.51, nll_loss=1.958, ppl=3.88, wps=554535, ups=1.12, wpb=496241, bsz=16528.7, num_updates=40400, lr=0.000314658, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=37254 epoch 024: 1614 / 1689 loss=3.51, nll_loss=1.958, ppl=3.88, wps=554535, ups=1.12, wpb=496241, bsz=16528.7, num_updates=40400, lr=0.000314658, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=37254 end of epoch 24 (average epoch stats below) epoch 024 | loss 3.498 | nll_loss 1.943 | ppl 3.84 | wps 540851 | ups 1.09 | wpb 495131 | bsz 16507.5 | num_updates 40474 | lr 0.000314371 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 23 | wall 37320 epoch 024 | loss 3.498 | nll_loss 1.943 | ppl 3.84 | wps 540851 | ups 1.09 | wpb 495131 | bsz 16507.5 | num_updates 40474 | lr 0.000314371 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 23 | wall 37320 epoch 024 | loss 3.498 | nll_loss 1.943 | ppl 3.84 | wps 540851 | ups 1.09 | wpb 495131 | bsz 16507.5 | num_updates 40474 | lr 0.000314371 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 23 | wall 37320 epoch 024 | loss 3.498 | nll_loss 1.943 | ppl 3.84 | wps 540851 | ups 1.09 | wpb 495131 | bsz 16507.5 | num_updates 40474 | lr 0.000314371 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 23 | wall 37320 epoch 024 | loss 3.498 | nll_loss 1.943 | ppl 3.84 | wps 540851 | ups 1.09 | wpb 495131 | bsz 16507.5 | num_updates 40474 | lr 0.000314371 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 23 | wall 37320 epoch 024 | loss 3.498 | nll_loss 1.943 | ppl 3.84 | wps 540851 | ups 1.09 | wpb 495131 | bsz 16507.5 | num_updates 40474 | lr 0.000314371 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 23 | wall 37320 epoch 024 | loss 3.498 | nll_loss 1.943 | ppl 3.84 | wps 540851 | ups 1.09 | wpb 495131 | bsz 16507.5 | num_updates 40474 | lr 0.000314371 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 23 | wall 37320 epoch 024 | loss 3.498 | nll_loss 1.943 | ppl 3.84 | wps 540851 | ups 1.09 | wpb 495131 | bsz 16507.5 | num_updates 40474 | lr 0.000314371 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 23 | wall 37320 epoch 024 | loss 3.498 | nll_loss 1.943 | ppl 3.84 | wps 540851 | ups 1.09 | wpb 495131 | bsz 16507.5 | num_updates 40474 | lr 0.000314371 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 23 | wall 37320 epoch 024 | loss 3.498 | nll_loss 1.943 | ppl 3.84 | wps 540851 | ups 1.09 | wpb 495131 | bsz 16507.5 | num_updates 40474 | lr 0.000314371 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 23 | wall 37320 epoch 024 | loss 3.498 | nll_loss 1.943 | ppl 3.84 | wps 540851 | ups 1.09 | wpb 495131 | bsz 16507.5 | num_updates 40474 | lr 0.000314371 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 23 | wall 37320 epoch 024 | loss 3.498 | nll_loss 1.943 | ppl 3.84 | wps 540851 | ups 1.09 | wpb 495131 | bsz 16507.5 | num_updates 40474 | lr 0.000314371 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 23 | wall 37320 epoch 024 | loss 3.498 | nll_loss 1.943 | ppl 3.84 | wps 540851 | ups 1.09 | wpb 495131 | bsz 16507.5 | num_updates 40474 | lr 0.000314371 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 23 | wall 37320 epoch 024 | loss 3.498 | nll_loss 1.943 | ppl 3.84 | wps 540851 | ups 1.09 | wpb 495131 | bsz 16507.5 | num_updates 40474 | lr 0.000314371 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 23 | wall 37320 epoch 024 | loss 3.498 | nll_loss 1.943 | ppl 3.84 | wps 540851 | ups 1.09 | wpb 495131 | bsz 16507.5 | num_updates 40474 | lr 0.000314371 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 23 | wall 37320 epoch 024 | loss 3.498 | nll_loss 1.943 | ppl 3.84 | wps 540851 | ups 1.09 | wpb 495131 | bsz 16507.5 | num_updates 40474 | lr 0.000314371 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 23 | wall 37320 epoch 024 | loss 3.498 | nll_loss 1.943 | ppl 3.84 | wps 540851 | ups 1.09 | wpb 495131 | bsz 16507.5 | num_updates 40474 | lr 0.000314371 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 23 | wall 37320 epoch 024 | loss 3.498 | nll_loss 1.943 | ppl 3.84 | wps 540851 | ups 1.09 | wpb 495131 | bsz 16507.5 | num_updates 40474 | lr 0.000314371 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 23 | wall 37320 epoch 024 | loss 3.498 | nll_loss 1.943 | ppl 3.84 | wps 540851 | ups 1.09 | wpb 495131 | bsz 16507.5 | num_updates 40474 | lr 0.000314371 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 23 | wall 37320 epoch 024 | loss 3.498 | nll_loss 1.943 | ppl 3.84 | wps 540851 | ups 1.09 | wpb 495131 | bsz 16507.5 | num_updates 40474 | lr 0.000314371 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 23 | wall 37320 epoch 024 | loss 3.498 | nll_loss 1.943 | ppl 3.84 | wps 540851 | ups 1.09 | wpb 495131 | bsz 16507.5 | num_updates 40474 | lr 0.000314371 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 23 | wall 37320 epoch 024 | loss 3.498 | nll_loss 1.943 | ppl 3.84 | wps 540851 | ups 1.09 | wpb 495131 | bsz 16507.5 | num_updates 40474 | lr 0.000314371 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 23 | wall 37320 epoch 024 | loss 3.498 | nll_loss 1.943 | ppl 3.84 | wps 540851 | ups 1.09 | wpb 495131 | bsz 16507.5 | num_updates 40474 | lr 0.000314371 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 23 | wall 37320 epoch 024 | loss 3.498 | nll_loss 1.943 | ppl 3.84 | wps 540851 | ups 1.09 | wpb 495131 | bsz 16507.5 | num_updates 40474 | lr 0.000314371 | gnorm 0.17 | clip 0 | loss_scale 2 | train_wall 1486 | gb_free 23 | wall 37320 Start iterating over samples epoch 025: 26 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=545819, ups=1.11, wpb=490941, bsz=16119.2, num_updates=40500, lr=0.00031427, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=37344 epoch 025: 26 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=545819, ups=1.11, wpb=490941, bsz=16119.2, num_updates=40500, lr=0.00031427, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=37344 epoch 025: 26 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=545819, ups=1.11, wpb=490941, bsz=16119.2, num_updates=40500, lr=0.00031427, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=37344 epoch 025: 26 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=545819, ups=1.11, wpb=490941, bsz=16119.2, num_updates=40500, lr=0.00031427, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=37344 epoch 025: 26 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=545819, ups=1.11, wpb=490941, bsz=16119.2, num_updates=40500, lr=0.00031427, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=37344 epoch 025: 26 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=545819, ups=1.11, wpb=490941, bsz=16119.2, num_updates=40500, lr=0.00031427, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=37344 epoch 025: 26 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=545819, ups=1.11, wpb=490941, bsz=16119.2, num_updates=40500, lr=0.00031427, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=37344 epoch 025: 26 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=545819, ups=1.11, wpb=490941, bsz=16119.2, num_updates=40500, lr=0.00031427, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=37344 epoch 025: 26 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=545819, ups=1.11, wpb=490941, bsz=16119.2, num_updates=40500, lr=0.00031427, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=37344 epoch 025: 26 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=545819, ups=1.11, wpb=490941, bsz=16119.2, num_updates=40500, lr=0.00031427, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=37344 epoch 025: 26 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=545819, ups=1.11, wpb=490941, bsz=16119.2, num_updates=40500, lr=0.00031427, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=37344 epoch 025: 26 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=545819, ups=1.11, wpb=490941, bsz=16119.2, num_updates=40500, lr=0.00031427, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=37344 epoch 025: 26 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=545819, ups=1.11, wpb=490941, bsz=16119.2, num_updates=40500, lr=0.00031427, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=37344 epoch 025: 26 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=545819, ups=1.11, wpb=490941, bsz=16119.2, num_updates=40500, lr=0.00031427, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=37344 epoch 025: 26 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=545819, ups=1.11, wpb=490941, bsz=16119.2, num_updates=40500, lr=0.00031427, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=37344 epoch 025: 26 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=545819, ups=1.11, wpb=490941, bsz=16119.2, num_updates=40500, lr=0.00031427, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=37344 epoch 025: 26 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=545819, ups=1.11, wpb=490941, bsz=16119.2, num_updates=40500, lr=0.00031427, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=37344 epoch 025: 26 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=545819, ups=1.11, wpb=490941, bsz=16119.2, num_updates=40500, lr=0.00031427, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=37344 epoch 025: 26 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=545819, ups=1.11, wpb=490941, bsz=16119.2, num_updates=40500, lr=0.00031427, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=37344 epoch 025: 26 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=545819, ups=1.11, wpb=490941, bsz=16119.2, num_updates=40500, lr=0.00031427, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=37344 epoch 025: 26 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=545819, ups=1.11, wpb=490941, bsz=16119.2, num_updates=40500, lr=0.00031427, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=37344 epoch 025: 26 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=545819, ups=1.11, wpb=490941, bsz=16119.2, num_updates=40500, lr=0.00031427, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=37344 epoch 025: 26 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=545819, ups=1.11, wpb=490941, bsz=16119.2, num_updates=40500, lr=0.00031427, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=37344 epoch 025: 26 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=545819, ups=1.11, wpb=490941, bsz=16119.2, num_updates=40500, lr=0.00031427, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=37344 epoch 025: 26 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=545819, ups=1.11, wpb=490941, bsz=16119.2, num_updates=40500, lr=0.00031427, gnorm=0.177, clip=0, loss_scale=2, train_wall=87, gb_free=21.7, wall=37344 epoch 025: 126 / 1689 loss=3.477, nll_loss=1.918, ppl=3.78, wps=554967, ups=1.12, wpb=494896, bsz=16302.2, num_updates=40600, lr=0.000313882, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=37433 epoch 025: 126 / 1689 loss=3.477, nll_loss=1.918, ppl=3.78, wps=554967, ups=1.12, wpb=494896, bsz=16302.2, num_updates=40600, lr=0.000313882, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=37433 epoch 025: 126 / 1689 loss=3.477, nll_loss=1.918, ppl=3.78, wps=554967, ups=1.12, wpb=494896, bsz=16302.2, num_updates=40600, lr=0.000313882, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=37433 epoch 025: 126 / 1689 loss=3.477, nll_loss=1.918, ppl=3.78, wps=554967, ups=1.12, wpb=494896, bsz=16302.2, num_updates=40600, lr=0.000313882, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=37433 epoch 025: 126 / 1689 loss=3.477, nll_loss=1.918, ppl=3.78, wps=554967, ups=1.12, wpb=494896, bsz=16302.2, num_updates=40600, lr=0.000313882, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=37433 epoch 025: 126 / 1689 loss=3.477, nll_loss=1.918, ppl=3.78, wps=554967, ups=1.12, wpb=494896, bsz=16302.2, num_updates=40600, lr=0.000313882, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=37433 epoch 025: 126 / 1689 loss=3.477, nll_loss=1.918, ppl=3.78, wps=554967, ups=1.12, wpb=494896, bsz=16302.2, num_updates=40600, lr=0.000313882, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=37433 epoch 025: 126 / 1689 loss=3.477, nll_loss=1.918, ppl=3.78, wps=554967, ups=1.12, wpb=494896, bsz=16302.2, num_updates=40600, lr=0.000313882, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=37433 epoch 025: 126 / 1689 loss=3.477, nll_loss=1.918, ppl=3.78, wps=554967, ups=1.12, wpb=494896, bsz=16302.2, num_updates=40600, lr=0.000313882, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=37433 epoch 025: 126 / 1689 loss=3.477, nll_loss=1.918, ppl=3.78, wps=554967, ups=1.12, wpb=494896, bsz=16302.2, num_updates=40600, lr=0.000313882, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=37433 epoch 025: 126 / 1689 loss=3.477, nll_loss=1.918, ppl=3.78, wps=554967, ups=1.12, wpb=494896, bsz=16302.2, num_updates=40600, lr=0.000313882, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=37433 epoch 025: 126 / 1689 loss=3.477, nll_loss=1.918, ppl=3.78, wps=554967, ups=1.12, wpb=494896, bsz=16302.2, num_updates=40600, lr=0.000313882, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=37433 epoch 025: 126 / 1689 loss=3.477, nll_loss=1.918, ppl=3.78, wps=554967, ups=1.12, wpb=494896, bsz=16302.2, num_updates=40600, lr=0.000313882, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=37433 epoch 025: 126 / 1689 loss=3.477, nll_loss=1.918, ppl=3.78, wps=554967, ups=1.12, wpb=494896, bsz=16302.2, num_updates=40600, lr=0.000313882, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=37433 epoch 025: 126 / 1689 loss=3.477, nll_loss=1.918, ppl=3.78, wps=554967, ups=1.12, wpb=494896, bsz=16302.2, num_updates=40600, lr=0.000313882, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=37433 epoch 025: 126 / 1689 loss=3.477, nll_loss=1.918, ppl=3.78, wps=554967, ups=1.12, wpb=494896, bsz=16302.2, num_updates=40600, lr=0.000313882, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=37433 epoch 025: 126 / 1689 loss=3.477, nll_loss=1.918, ppl=3.78, wps=554967, ups=1.12, wpb=494896, bsz=16302.2, num_updates=40600, lr=0.000313882, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=37433 epoch 025: 126 / 1689 loss=3.477, nll_loss=1.918, ppl=3.78, wps=554967, ups=1.12, wpb=494896, bsz=16302.2, num_updates=40600, lr=0.000313882, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=37433 epoch 025: 126 / 1689 loss=3.477, nll_loss=1.918, ppl=3.78, wps=554967, ups=1.12, wpb=494896, bsz=16302.2, num_updates=40600, lr=0.000313882, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=37433 epoch 025: 126 / 1689 loss=3.477, nll_loss=1.918, ppl=3.78, wps=554967, ups=1.12, wpb=494896, bsz=16302.2, num_updates=40600, lr=0.000313882, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=37433 epoch 025: 126 / 1689 loss=3.477, nll_loss=1.918, ppl=3.78, wps=554967, ups=1.12, wpb=494896, bsz=16302.2, num_updates=40600, lr=0.000313882, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=37433 epoch 025: 126 / 1689 loss=3.477, nll_loss=1.918, ppl=3.78, wps=554967, ups=1.12, wpb=494896, bsz=16302.2, num_updates=40600, lr=0.000313882, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=37433 epoch 025: 126 / 1689 loss=3.477, nll_loss=1.918, ppl=3.78, wps=554967, ups=1.12, wpb=494896, bsz=16302.2, num_updates=40600, lr=0.000313882, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=37433 epoch 025: 126 / 1689 loss=3.477, nll_loss=1.918, ppl=3.78, wps=554967, ups=1.12, wpb=494896, bsz=16302.2, num_updates=40600, lr=0.000313882, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=37433 epoch 025: 126 / 1689 loss=3.477, nll_loss=1.918, ppl=3.78, wps=554967, ups=1.12, wpb=494896, bsz=16302.2, num_updates=40600, lr=0.000313882, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=37433 epoch 025: 226 / 1689 loss=3.48, nll_loss=1.922, ppl=3.79, wps=555782, ups=1.12, wpb=496756, bsz=16525.3, num_updates=40700, lr=0.000313497, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=37523 epoch 025: 226 / 1689 loss=3.48, nll_loss=1.922, ppl=3.79, wps=555782, ups=1.12, wpb=496756, bsz=16525.3, num_updates=40700, lr=0.000313497, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=37523 epoch 025: 226 / 1689 loss=3.48, nll_loss=1.922, ppl=3.79, wps=555782, ups=1.12, wpb=496756, bsz=16525.3, num_updates=40700, lr=0.000313497, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=37523 epoch 025: 226 / 1689 loss=3.48, nll_loss=1.922, ppl=3.79, wps=555782, ups=1.12, wpb=496756, bsz=16525.3, num_updates=40700, lr=0.000313497, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=37523 epoch 025: 226 / 1689 loss=3.48, nll_loss=1.922, ppl=3.79, wps=555782, ups=1.12, wpb=496756, bsz=16525.3, num_updates=40700, lr=0.000313497, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=37523 epoch 025: 226 / 1689 loss=3.48, nll_loss=1.922, ppl=3.79, wps=555782, ups=1.12, wpb=496756, bsz=16525.3, num_updates=40700, lr=0.000313497, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=37523 epoch 025: 226 / 1689 loss=3.48, nll_loss=1.922, ppl=3.79, wps=555782, ups=1.12, wpb=496756, bsz=16525.3, num_updates=40700, lr=0.000313497, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=37523 epoch 025: 226 / 1689 loss=3.48, nll_loss=1.922, ppl=3.79, wps=555782, ups=1.12, wpb=496756, bsz=16525.3, num_updates=40700, lr=0.000313497, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=37523 epoch 025: 226 / 1689 loss=3.48, nll_loss=1.922, ppl=3.79, wps=555782, ups=1.12, wpb=496756, bsz=16525.3, num_updates=40700, lr=0.000313497, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=37523 epoch 025: 226 / 1689 loss=3.48, nll_loss=1.922, ppl=3.79, wps=555782, ups=1.12, wpb=496756, bsz=16525.3, num_updates=40700, lr=0.000313497, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=37523 epoch 025: 226 / 1689 loss=3.48, nll_loss=1.922, ppl=3.79, wps=555782, ups=1.12, wpb=496756, bsz=16525.3, num_updates=40700, lr=0.000313497, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=37523 epoch 025: 226 / 1689 loss=3.48, nll_loss=1.922, ppl=3.79, wps=555782, ups=1.12, wpb=496756, bsz=16525.3, num_updates=40700, lr=0.000313497, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=37523 epoch 025: 226 / 1689 loss=3.48, nll_loss=1.922, ppl=3.79, wps=555782, ups=1.12, wpb=496756, bsz=16525.3, num_updates=40700, lr=0.000313497, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=37523 epoch 025: 226 / 1689 loss=3.48, nll_loss=1.922, ppl=3.79, wps=555782, ups=1.12, wpb=496756, bsz=16525.3, num_updates=40700, lr=0.000313497, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=37523 epoch 025: 226 / 1689 loss=3.48, nll_loss=1.922, ppl=3.79, wps=555782, ups=1.12, wpb=496756, bsz=16525.3, num_updates=40700, lr=0.000313497, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=37523 epoch 025: 226 / 1689 loss=3.48, nll_loss=1.922, ppl=3.79, wps=555782, ups=1.12, wpb=496756, bsz=16525.3, num_updates=40700, lr=0.000313497, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=37523 epoch 025: 226 / 1689 loss=3.48, nll_loss=1.922, ppl=3.79, wps=555782, ups=1.12, wpb=496756, bsz=16525.3, num_updates=40700, lr=0.000313497, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=37523 epoch 025: 226 / 1689 loss=3.48, nll_loss=1.922, ppl=3.79, wps=555782, ups=1.12, wpb=496756, bsz=16525.3, num_updates=40700, lr=0.000313497, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=37523 epoch 025: 226 / 1689 loss=3.48, nll_loss=1.922, ppl=3.79, wps=555782, ups=1.12, wpb=496756, bsz=16525.3, num_updates=40700, lr=0.000313497, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=37523 epoch 025: 226 / 1689 loss=3.48, nll_loss=1.922, ppl=3.79, wps=555782, ups=1.12, wpb=496756, bsz=16525.3, num_updates=40700, lr=0.000313497, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=37523 epoch 025: 226 / 1689 loss=3.48, nll_loss=1.922, ppl=3.79, wps=555782, ups=1.12, wpb=496756, bsz=16525.3, num_updates=40700, lr=0.000313497, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=37523 epoch 025: 226 / 1689 loss=3.48, nll_loss=1.922, ppl=3.79, wps=555782, ups=1.12, wpb=496756, bsz=16525.3, num_updates=40700, lr=0.000313497, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=37523 epoch 025: 226 / 1689 loss=3.48, nll_loss=1.922, ppl=3.79, wps=555782, ups=1.12, wpb=496756, bsz=16525.3, num_updates=40700, lr=0.000313497, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=37523 epoch 025: 226 / 1689 loss=3.48, nll_loss=1.922, ppl=3.79, wps=555782, ups=1.12, wpb=496756, bsz=16525.3, num_updates=40700, lr=0.000313497, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=37523 epoch 025: 226 / 1689 loss=3.48, nll_loss=1.922, ppl=3.79, wps=555782, ups=1.12, wpb=496756, bsz=16525.3, num_updates=40700, lr=0.000313497, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=37523 epoch 025: 326 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=550989, ups=1.11, wpb=496459, bsz=16704.8, num_updates=40800, lr=0.000313112, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=37613 epoch 025: 326 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=550989, ups=1.11, wpb=496459, bsz=16704.8, num_updates=40800, lr=0.000313112, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=37613 epoch 025: 326 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=550989, ups=1.11, wpb=496459, bsz=16704.8, num_updates=40800, lr=0.000313112, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=37613 epoch 025: 326 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=550989, ups=1.11, wpb=496459, bsz=16704.8, num_updates=40800, lr=0.000313112, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=37613 epoch 025: 326 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=550989, ups=1.11, wpb=496459, bsz=16704.8, num_updates=40800, lr=0.000313112, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=37613 epoch 025: 326 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=550989, ups=1.11, wpb=496459, bsz=16704.8, num_updates=40800, lr=0.000313112, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=37613 epoch 025: 326 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=550989, ups=1.11, wpb=496459, bsz=16704.8, num_updates=40800, lr=0.000313112, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=37613 epoch 025: 326 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=550989, ups=1.11, wpb=496459, bsz=16704.8, num_updates=40800, lr=0.000313112, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=37613 epoch 025: 326 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=550989, ups=1.11, wpb=496459, bsz=16704.8, num_updates=40800, lr=0.000313112, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=37613 epoch 025: 326 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=550989, ups=1.11, wpb=496459, bsz=16704.8, num_updates=40800, lr=0.000313112, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=37613 epoch 025: 326 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=550989, ups=1.11, wpb=496459, bsz=16704.8, num_updates=40800, lr=0.000313112, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=37613 epoch 025: 326 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=550989, ups=1.11, wpb=496459, bsz=16704.8, num_updates=40800, lr=0.000313112, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=37613 epoch 025: 326 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=550989, ups=1.11, wpb=496459, bsz=16704.8, num_updates=40800, lr=0.000313112, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=37613 epoch 025: 326 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=550989, ups=1.11, wpb=496459, bsz=16704.8, num_updates=40800, lr=0.000313112, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=37613 epoch 025: 326 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=550989, ups=1.11, wpb=496459, bsz=16704.8, num_updates=40800, lr=0.000313112, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=37613 epoch 025: 326 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=550989, ups=1.11, wpb=496459, bsz=16704.8, num_updates=40800, lr=0.000313112, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=37613 epoch 025: 326 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=550989, ups=1.11, wpb=496459, bsz=16704.8, num_updates=40800, lr=0.000313112, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=37613 epoch 025: 326 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=550989, ups=1.11, wpb=496459, bsz=16704.8, num_updates=40800, lr=0.000313112, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=37613 epoch 025: 326 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=550989, ups=1.11, wpb=496459, bsz=16704.8, num_updates=40800, lr=0.000313112, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=37613 epoch 025: 326 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=550989, ups=1.11, wpb=496459, bsz=16704.8, num_updates=40800, lr=0.000313112, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=37613 epoch 025: 326 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=550989, ups=1.11, wpb=496459, bsz=16704.8, num_updates=40800, lr=0.000313112, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=37613 epoch 025: 326 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=550989, ups=1.11, wpb=496459, bsz=16704.8, num_updates=40800, lr=0.000313112, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=37613 epoch 025: 326 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=550989, ups=1.11, wpb=496459, bsz=16704.8, num_updates=40800, lr=0.000313112, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=37613 epoch 025: 326 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=550989, ups=1.11, wpb=496459, bsz=16704.8, num_updates=40800, lr=0.000313112, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=37613 epoch 025: 326 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=550989, ups=1.11, wpb=496459, bsz=16704.8, num_updates=40800, lr=0.000313112, gnorm=0.171, clip=0, loss_scale=2, train_wall=89, gb_free=21.5, wall=37613 epoch 025: 427 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=548606, ups=1.11, wpb=495804, bsz=16557.4, num_updates=40900, lr=0.000312729, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=37703 epoch 025: 427 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=548606, ups=1.11, wpb=495804, bsz=16557.4, num_updates=40900, lr=0.000312729, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=37703 epoch 025: 427 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=548606, ups=1.11, wpb=495804, bsz=16557.4, num_updates=40900, lr=0.000312729, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=37703 epoch 025: 427 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=548606, ups=1.11, wpb=495804, bsz=16557.4, num_updates=40900, lr=0.000312729, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=37703 epoch 025: 427 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=548606, ups=1.11, wpb=495804, bsz=16557.4, num_updates=40900, lr=0.000312729, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=37703 epoch 025: 427 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=548606, ups=1.11, wpb=495804, bsz=16557.4, num_updates=40900, lr=0.000312729, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=37703 epoch 025: 427 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=548606, ups=1.11, wpb=495804, bsz=16557.4, num_updates=40900, lr=0.000312729, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=37703 epoch 025: 427 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=548606, ups=1.11, wpb=495804, bsz=16557.4, num_updates=40900, lr=0.000312729, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=37703 epoch 025: 427 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=548606, ups=1.11, wpb=495804, bsz=16557.4, num_updates=40900, lr=0.000312729, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=37703 epoch 025: 427 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=548606, ups=1.11, wpb=495804, bsz=16557.4, num_updates=40900, lr=0.000312729, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=37703 epoch 025: 427 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=548606, ups=1.11, wpb=495804, bsz=16557.4, num_updates=40900, lr=0.000312729, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=37703 epoch 025: 427 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=548606, ups=1.11, wpb=495804, bsz=16557.4, num_updates=40900, lr=0.000312729, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=37703 epoch 025: 427 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=548606, ups=1.11, wpb=495804, bsz=16557.4, num_updates=40900, lr=0.000312729, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=37703 epoch 025: 427 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=548606, ups=1.11, wpb=495804, bsz=16557.4, num_updates=40900, lr=0.000312729, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=37703 epoch 025: 427 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=548606, ups=1.11, wpb=495804, bsz=16557.4, num_updates=40900, lr=0.000312729, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=37703 epoch 025: 427 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=548606, ups=1.11, wpb=495804, bsz=16557.4, num_updates=40900, lr=0.000312729, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=37703 epoch 025: 427 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=548606, ups=1.11, wpb=495804, bsz=16557.4, num_updates=40900, lr=0.000312729, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=37703 epoch 025: 427 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=548606, ups=1.11, wpb=495804, bsz=16557.4, num_updates=40900, lr=0.000312729, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=37703 epoch 025: 427 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=548606, ups=1.11, wpb=495804, bsz=16557.4, num_updates=40900, lr=0.000312729, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=37703 epoch 025: 427 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=548606, ups=1.11, wpb=495804, bsz=16557.4, num_updates=40900, lr=0.000312729, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=37703 epoch 025: 427 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=548606, ups=1.11, wpb=495804, bsz=16557.4, num_updates=40900, lr=0.000312729, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=37703 epoch 025: 427 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=548606, ups=1.11, wpb=495804, bsz=16557.4, num_updates=40900, lr=0.000312729, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=37703 epoch 025: 427 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=548606, ups=1.11, wpb=495804, bsz=16557.4, num_updates=40900, lr=0.000312729, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=37703 epoch 025: 427 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=548606, ups=1.11, wpb=495804, bsz=16557.4, num_updates=40900, lr=0.000312729, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=37703 epoch 025: 427 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=548606, ups=1.11, wpb=495804, bsz=16557.4, num_updates=40900, lr=0.000312729, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=37703 epoch 025: 527 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=550140, ups=1.11, wpb=494760, bsz=16638.6, num_updates=41000, lr=0.000312348, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=37793 epoch 025: 527 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=550140, ups=1.11, wpb=494760, bsz=16638.6, num_updates=41000, lr=0.000312348, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=37793 epoch 025: 527 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=550140, ups=1.11, wpb=494760, bsz=16638.6, num_updates=41000, lr=0.000312348, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=37793 epoch 025: 527 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=550140, ups=1.11, wpb=494760, bsz=16638.6, num_updates=41000, lr=0.000312348, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=37793 epoch 025: 527 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=550140, ups=1.11, wpb=494760, bsz=16638.6, num_updates=41000, lr=0.000312348, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=37793 epoch 025: 527 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=550140, ups=1.11, wpb=494760, bsz=16638.6, num_updates=41000, lr=0.000312348, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=37793 epoch 025: 527 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=550140, ups=1.11, wpb=494760, bsz=16638.6, num_updates=41000, lr=0.000312348, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=37793 epoch 025: 527 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=550140, ups=1.11, wpb=494760, bsz=16638.6, num_updates=41000, lr=0.000312348, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=37793 epoch 025: 527 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=550140, ups=1.11, wpb=494760, bsz=16638.6, num_updates=41000, lr=0.000312348, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=37793 epoch 025: 527 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=550140, ups=1.11, wpb=494760, bsz=16638.6, num_updates=41000, lr=0.000312348, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=37793 epoch 025: 527 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=550140, ups=1.11, wpb=494760, bsz=16638.6, num_updates=41000, lr=0.000312348, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=37793 epoch 025: 527 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=550140, ups=1.11, wpb=494760, bsz=16638.6, num_updates=41000, lr=0.000312348, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=37793 epoch 025: 527 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=550140, ups=1.11, wpb=494760, bsz=16638.6, num_updates=41000, lr=0.000312348, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=37793 epoch 025: 527 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=550140, ups=1.11, wpb=494760, bsz=16638.6, num_updates=41000, lr=0.000312348, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=37793 epoch 025: 527 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=550140, ups=1.11, wpb=494760, bsz=16638.6, num_updates=41000, lr=0.000312348, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=37793 epoch 025: 527 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=550140, ups=1.11, wpb=494760, bsz=16638.6, num_updates=41000, lr=0.000312348, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=37793 epoch 025: 527 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=550140, ups=1.11, wpb=494760, bsz=16638.6, num_updates=41000, lr=0.000312348, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=37793 epoch 025: 527 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=550140, ups=1.11, wpb=494760, bsz=16638.6, num_updates=41000, lr=0.000312348, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=37793 epoch 025: 527 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=550140, ups=1.11, wpb=494760, bsz=16638.6, num_updates=41000, lr=0.000312348, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=37793 epoch 025: 527 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=550140, ups=1.11, wpb=494760, bsz=16638.6, num_updates=41000, lr=0.000312348, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=37793 epoch 025: 527 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=550140, ups=1.11, wpb=494760, bsz=16638.6, num_updates=41000, lr=0.000312348, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=37793 epoch 025: 527 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=550140, ups=1.11, wpb=494760, bsz=16638.6, num_updates=41000, lr=0.000312348, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=37793 epoch 025: 527 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=550140, ups=1.11, wpb=494760, bsz=16638.6, num_updates=41000, lr=0.000312348, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=37793 epoch 025: 527 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=550140, ups=1.11, wpb=494760, bsz=16638.6, num_updates=41000, lr=0.000312348, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=37793 epoch 025: 527 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=550140, ups=1.11, wpb=494760, bsz=16638.6, num_updates=41000, lr=0.000312348, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=37793 begin validation on "valid" subset epoch 025 | valid on 'valid' subset | loss 3.718 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 41000 | best_loss 3.708 epoch 025 | valid on 'valid' subset | loss 3.718 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 41000 | best_loss 3.708 epoch 025 | valid on 'valid' subset | loss 3.718 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 41000 | best_loss 3.708 epoch 025 | valid on 'valid' subset | loss 3.718 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 41000 | best_loss 3.708 epoch 025 | valid on 'valid' subset | loss 3.718 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 41000 | best_loss 3.708 epoch 025 | valid on 'valid' subset | loss 3.718 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 41000 | best_loss 3.708 epoch 025 | valid on 'valid' subset | loss 3.718 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 41000 | best_loss 3.708 epoch 025 | valid on 'valid' subset | loss 3.718 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 41000 | best_loss 3.708 epoch 025 | valid on 'valid' subset | loss 3.718 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 41000 | best_loss 3.708 epoch 025 | valid on 'valid' subset | loss 3.718 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 41000 | best_loss 3.708 epoch 025 | valid on 'valid' subset | loss 3.718 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 41000 | best_loss 3.708 epoch 025 | valid on 'valid' subset | loss 3.718 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 41000 | best_loss 3.708 epoch 025 | valid on 'valid' subset | loss 3.718 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 41000 | best_loss 3.708 epoch 025 | valid on 'valid' subset | loss 3.718 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 41000 | best_loss 3.708 epoch 025 | valid on 'valid' subset | loss 3.718 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 41000 | best_loss 3.708 epoch 025 | valid on 'valid' subset | loss 3.718 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 41000 | best_loss 3.708 epoch 025 | valid on 'valid' subset | loss 3.718 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 41000 | best_loss 3.708 epoch 025 | valid on 'valid' subset | loss 3.718 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 41000 | best_loss 3.708 epoch 025 | valid on 'valid' subset | loss 3.718 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 41000 | best_loss 3.708 epoch 025 | valid on 'valid' subset | loss 3.718 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 41000 | best_loss 3.708 epoch 025 | valid on 'valid' subset | loss 3.718 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 41000 | best_loss 3.708 epoch 025 | valid on 'valid' subset | loss 3.718 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 41000 | best_loss 3.708 epoch 025 | valid on 'valid' subset | loss 3.718 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 41000 | best_loss 3.708 epoch 025 | valid on 'valid' subset | loss 3.718 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 41000 | best_loss 3.708 epoch 025 | valid on 'valid' subset | loss 3.718 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 41000 | best_loss 3.708 epoch 025: 627 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=384359, ups=0.78, wpb=494964, bsz=16365, num_updates=41100, lr=0.000311967, gnorm=0.164, clip=0, loss_scale=1, train_wall=99, gb_free=21.8, wall=37922 epoch 025: 627 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=384359, ups=0.78, wpb=494964, bsz=16365, num_updates=41100, lr=0.000311967, gnorm=0.164, clip=0, loss_scale=1, train_wall=99, gb_free=21.8, wall=37922 epoch 025: 627 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=384359, ups=0.78, wpb=494964, bsz=16365, num_updates=41100, lr=0.000311967, gnorm=0.164, clip=0, loss_scale=1, train_wall=99, gb_free=21.8, wall=37922 epoch 025: 627 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=384359, ups=0.78, wpb=494964, bsz=16365, num_updates=41100, lr=0.000311967, gnorm=0.164, clip=0, loss_scale=1, train_wall=99, gb_free=21.8, wall=37922 epoch 025: 627 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=384359, ups=0.78, wpb=494964, bsz=16365, num_updates=41100, lr=0.000311967, gnorm=0.164, clip=0, loss_scale=1, train_wall=99, gb_free=21.8, wall=37922 epoch 025: 627 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=384359, ups=0.78, wpb=494964, bsz=16365, num_updates=41100, lr=0.000311967, gnorm=0.164, clip=0, loss_scale=1, train_wall=99, gb_free=21.8, wall=37922 epoch 025: 627 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=384359, ups=0.78, wpb=494964, bsz=16365, num_updates=41100, lr=0.000311967, gnorm=0.164, clip=0, loss_scale=1, train_wall=99, gb_free=21.8, wall=37922 epoch 025: 627 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=384359, ups=0.78, wpb=494964, bsz=16365, num_updates=41100, lr=0.000311967, gnorm=0.164, clip=0, loss_scale=1, train_wall=99, gb_free=21.8, wall=37922 epoch 025: 627 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=384359, ups=0.78, wpb=494964, bsz=16365, num_updates=41100, lr=0.000311967, gnorm=0.164, clip=0, loss_scale=1, train_wall=99, gb_free=21.8, wall=37922 epoch 025: 627 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=384359, ups=0.78, wpb=494964, bsz=16365, num_updates=41100, lr=0.000311967, gnorm=0.164, clip=0, loss_scale=1, train_wall=99, gb_free=21.8, wall=37922 epoch 025: 627 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=384359, ups=0.78, wpb=494964, bsz=16365, num_updates=41100, lr=0.000311967, gnorm=0.164, clip=0, loss_scale=1, train_wall=99, gb_free=21.8, wall=37922 epoch 025: 627 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=384359, ups=0.78, wpb=494964, bsz=16365, num_updates=41100, lr=0.000311967, gnorm=0.164, clip=0, loss_scale=1, train_wall=99, gb_free=21.8, wall=37922 epoch 025: 627 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=384359, ups=0.78, wpb=494964, bsz=16365, num_updates=41100, lr=0.000311967, gnorm=0.164, clip=0, loss_scale=1, train_wall=99, gb_free=21.8, wall=37922 epoch 025: 627 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=384359, ups=0.78, wpb=494964, bsz=16365, num_updates=41100, lr=0.000311967, gnorm=0.164, clip=0, loss_scale=1, train_wall=99, gb_free=21.8, wall=37922 epoch 025: 627 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=384359, ups=0.78, wpb=494964, bsz=16365, num_updates=41100, lr=0.000311967, gnorm=0.164, clip=0, loss_scale=1, train_wall=99, gb_free=21.8, wall=37922 epoch 025: 627 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=384359, ups=0.78, wpb=494964, bsz=16365, num_updates=41100, lr=0.000311967, gnorm=0.164, clip=0, loss_scale=1, train_wall=99, gb_free=21.8, wall=37922 epoch 025: 627 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=384359, ups=0.78, wpb=494964, bsz=16365, num_updates=41100, lr=0.000311967, gnorm=0.164, clip=0, loss_scale=1, train_wall=99, gb_free=21.8, wall=37922 epoch 025: 627 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=384359, ups=0.78, wpb=494964, bsz=16365, num_updates=41100, lr=0.000311967, gnorm=0.164, clip=0, loss_scale=1, train_wall=99, gb_free=21.8, wall=37922 epoch 025: 627 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=384359, ups=0.78, wpb=494964, bsz=16365, num_updates=41100, lr=0.000311967, gnorm=0.164, clip=0, loss_scale=1, train_wall=99, gb_free=21.8, wall=37922 epoch 025: 627 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=384359, ups=0.78, wpb=494964, bsz=16365, num_updates=41100, lr=0.000311967, gnorm=0.164, clip=0, loss_scale=1, train_wall=99, gb_free=21.8, wall=37922 epoch 025: 627 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=384359, ups=0.78, wpb=494964, bsz=16365, num_updates=41100, lr=0.000311967, gnorm=0.164, clip=0, loss_scale=1, train_wall=99, gb_free=21.8, wall=37922 epoch 025: 627 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=384359, ups=0.78, wpb=494964, bsz=16365, num_updates=41100, lr=0.000311967, gnorm=0.164, clip=0, loss_scale=1, train_wall=99, gb_free=21.8, wall=37922 epoch 025: 627 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=384359, ups=0.78, wpb=494964, bsz=16365, num_updates=41100, lr=0.000311967, gnorm=0.164, clip=0, loss_scale=1, train_wall=99, gb_free=21.8, wall=37922 epoch 025: 627 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=384359, ups=0.78, wpb=494964, bsz=16365, num_updates=41100, lr=0.000311967, gnorm=0.164, clip=0, loss_scale=1, train_wall=99, gb_free=21.8, wall=37922 epoch 025: 627 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=384359, ups=0.78, wpb=494964, bsz=16365, num_updates=41100, lr=0.000311967, gnorm=0.164, clip=0, loss_scale=1, train_wall=99, gb_free=21.8, wall=37922 epoch 025: 727 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=557493, ups=1.12, wpb=495659, bsz=16779.1, num_updates=41200, lr=0.000311588, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=38011 epoch 025: 727 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=557493, ups=1.12, wpb=495659, bsz=16779.1, num_updates=41200, lr=0.000311588, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=38011 epoch 025: 727 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=557493, ups=1.12, wpb=495659, bsz=16779.1, num_updates=41200, lr=0.000311588, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=38011 epoch 025: 727 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=557493, ups=1.12, wpb=495659, bsz=16779.1, num_updates=41200, lr=0.000311588, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=38011 epoch 025: 727 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=557493, ups=1.12, wpb=495659, bsz=16779.1, num_updates=41200, lr=0.000311588, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=38011 epoch 025: 727 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=557493, ups=1.12, wpb=495659, bsz=16779.1, num_updates=41200, lr=0.000311588, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=38011 epoch 025: 727 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=557493, ups=1.12, wpb=495659, bsz=16779.1, num_updates=41200, lr=0.000311588, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=38011 epoch 025: 727 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=557493, ups=1.12, wpb=495659, bsz=16779.1, num_updates=41200, lr=0.000311588, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=38011 epoch 025: 727 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=557493, ups=1.12, wpb=495659, bsz=16779.1, num_updates=41200, lr=0.000311588, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=38011 epoch 025: 727 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=557493, ups=1.12, wpb=495659, bsz=16779.1, num_updates=41200, lr=0.000311588, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=38011 epoch 025: 727 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=557493, ups=1.12, wpb=495659, bsz=16779.1, num_updates=41200, lr=0.000311588, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=38011 epoch 025: 727 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=557493, ups=1.12, wpb=495659, bsz=16779.1, num_updates=41200, lr=0.000311588, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=38011 epoch 025: 727 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=557493, ups=1.12, wpb=495659, bsz=16779.1, num_updates=41200, lr=0.000311588, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=38011 epoch 025: 727 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=557493, ups=1.12, wpb=495659, bsz=16779.1, num_updates=41200, lr=0.000311588, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=38011 epoch 025: 727 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=557493, ups=1.12, wpb=495659, bsz=16779.1, num_updates=41200, lr=0.000311588, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=38011 epoch 025: 727 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=557493, ups=1.12, wpb=495659, bsz=16779.1, num_updates=41200, lr=0.000311588, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=38011 epoch 025: 727 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=557493, ups=1.12, wpb=495659, bsz=16779.1, num_updates=41200, lr=0.000311588, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=38011 epoch 025: 727 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=557493, ups=1.12, wpb=495659, bsz=16779.1, num_updates=41200, lr=0.000311588, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=38011 epoch 025: 727 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=557493, ups=1.12, wpb=495659, bsz=16779.1, num_updates=41200, lr=0.000311588, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=38011 epoch 025: 727 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=557493, ups=1.12, wpb=495659, bsz=16779.1, num_updates=41200, lr=0.000311588, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=38011 epoch 025: 727 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=557493, ups=1.12, wpb=495659, bsz=16779.1, num_updates=41200, lr=0.000311588, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=38011 epoch 025: 727 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=557493, ups=1.12, wpb=495659, bsz=16779.1, num_updates=41200, lr=0.000311588, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=38011 epoch 025: 727 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=557493, ups=1.12, wpb=495659, bsz=16779.1, num_updates=41200, lr=0.000311588, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=38011 epoch 025: 727 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=557493, ups=1.12, wpb=495659, bsz=16779.1, num_updates=41200, lr=0.000311588, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=38011 epoch 025: 727 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=557493, ups=1.12, wpb=495659, bsz=16779.1, num_updates=41200, lr=0.000311588, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=38011 epoch 025: 827 / 1689 loss=3.498, nll_loss=1.943, ppl=3.84, wps=555396, ups=1.12, wpb=495297, bsz=16309.7, num_updates=41300, lr=0.000311211, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=38100 epoch 025: 827 / 1689 loss=3.498, nll_loss=1.943, ppl=3.84, wps=555396, ups=1.12, wpb=495297, bsz=16309.7, num_updates=41300, lr=0.000311211, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=38100 epoch 025: 827 / 1689 loss=3.498, nll_loss=1.943, ppl=3.84, wps=555396, ups=1.12, wpb=495297, bsz=16309.7, num_updates=41300, lr=0.000311211, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=38100 epoch 025: 827 / 1689 loss=3.498, nll_loss=1.943, ppl=3.84, wps=555396, ups=1.12, wpb=495297, bsz=16309.7, num_updates=41300, lr=0.000311211, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=38100 epoch 025: 827 / 1689 loss=3.498, nll_loss=1.943, ppl=3.84, wps=555396, ups=1.12, wpb=495297, bsz=16309.7, num_updates=41300, lr=0.000311211, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=38100 epoch 025: 827 / 1689 loss=3.498, nll_loss=1.943, ppl=3.84, wps=555396, ups=1.12, wpb=495297, bsz=16309.7, num_updates=41300, lr=0.000311211, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=38100 epoch 025: 827 / 1689 loss=3.498, nll_loss=1.943, ppl=3.84, wps=555396, ups=1.12, wpb=495297, bsz=16309.7, num_updates=41300, lr=0.000311211, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=38100 epoch 025: 827 / 1689 loss=3.498, nll_loss=1.943, ppl=3.84, wps=555396, ups=1.12, wpb=495297, bsz=16309.7, num_updates=41300, lr=0.000311211, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=38100 epoch 025: 827 / 1689 loss=3.498, nll_loss=1.943, ppl=3.84, wps=555396, ups=1.12, wpb=495297, bsz=16309.7, num_updates=41300, lr=0.000311211, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=38100 epoch 025: 827 / 1689 loss=3.498, nll_loss=1.943, ppl=3.84, wps=555396, ups=1.12, wpb=495297, bsz=16309.7, num_updates=41300, lr=0.000311211, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=38100 epoch 025: 827 / 1689 loss=3.498, nll_loss=1.943, ppl=3.84, wps=555396, ups=1.12, wpb=495297, bsz=16309.7, num_updates=41300, lr=0.000311211, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=38100 epoch 025: 827 / 1689 loss=3.498, nll_loss=1.943, ppl=3.84, wps=555396, ups=1.12, wpb=495297, bsz=16309.7, num_updates=41300, lr=0.000311211, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=38100 epoch 025: 827 / 1689 loss=3.498, nll_loss=1.943, ppl=3.84, wps=555396, ups=1.12, wpb=495297, bsz=16309.7, num_updates=41300, lr=0.000311211, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=38100 epoch 025: 827 / 1689 loss=3.498, nll_loss=1.943, ppl=3.84, wps=555396, ups=1.12, wpb=495297, bsz=16309.7, num_updates=41300, lr=0.000311211, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=38100 epoch 025: 827 / 1689 loss=3.498, nll_loss=1.943, ppl=3.84, wps=555396, ups=1.12, wpb=495297, bsz=16309.7, num_updates=41300, lr=0.000311211, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=38100 epoch 025: 827 / 1689 loss=3.498, nll_loss=1.943, ppl=3.84, wps=555396, ups=1.12, wpb=495297, bsz=16309.7, num_updates=41300, lr=0.000311211, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=38100 epoch 025: 827 / 1689 loss=3.498, nll_loss=1.943, ppl=3.84, wps=555396, ups=1.12, wpb=495297, bsz=16309.7, num_updates=41300, lr=0.000311211, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=38100 epoch 025: 827 / 1689 loss=3.498, nll_loss=1.943, ppl=3.84, wps=555396, ups=1.12, wpb=495297, bsz=16309.7, num_updates=41300, lr=0.000311211, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=38100 epoch 025: 827 / 1689 loss=3.498, nll_loss=1.943, ppl=3.84, wps=555396, ups=1.12, wpb=495297, bsz=16309.7, num_updates=41300, lr=0.000311211, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=38100 epoch 025: 827 / 1689 loss=3.498, nll_loss=1.943, ppl=3.84, wps=555396, ups=1.12, wpb=495297, bsz=16309.7, num_updates=41300, lr=0.000311211, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=38100 epoch 025: 827 / 1689 loss=3.498, nll_loss=1.943, ppl=3.84, wps=555396, ups=1.12, wpb=495297, bsz=16309.7, num_updates=41300, lr=0.000311211, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=38100 epoch 025: 827 / 1689 loss=3.498, nll_loss=1.943, ppl=3.84, wps=555396, ups=1.12, wpb=495297, bsz=16309.7, num_updates=41300, lr=0.000311211, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=38100 epoch 025: 827 / 1689 loss=3.498, nll_loss=1.943, ppl=3.84, wps=555396, ups=1.12, wpb=495297, bsz=16309.7, num_updates=41300, lr=0.000311211, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=38100 epoch 025: 827 / 1689 loss=3.498, nll_loss=1.943, ppl=3.84, wps=555396, ups=1.12, wpb=495297, bsz=16309.7, num_updates=41300, lr=0.000311211, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=38100 epoch 025: 827 / 1689 loss=3.498, nll_loss=1.943, ppl=3.84, wps=555396, ups=1.12, wpb=495297, bsz=16309.7, num_updates=41300, lr=0.000311211, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=38100 epoch 025: 927 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=553331, ups=1.12, wpb=494568, bsz=16887.3, num_updates=41400, lr=0.000310835, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=38189 epoch 025: 927 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=553331, ups=1.12, wpb=494568, bsz=16887.3, num_updates=41400, lr=0.000310835, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=38189 epoch 025: 927 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=553331, ups=1.12, wpb=494568, bsz=16887.3, num_updates=41400, lr=0.000310835, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=38189 epoch 025: 927 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=553331, ups=1.12, wpb=494568, bsz=16887.3, num_updates=41400, lr=0.000310835, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=38189 epoch 025: 927 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=553331, ups=1.12, wpb=494568, bsz=16887.3, num_updates=41400, lr=0.000310835, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=38189 epoch 025: 927 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=553331, ups=1.12, wpb=494568, bsz=16887.3, num_updates=41400, lr=0.000310835, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=38189 epoch 025: 927 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=553331, ups=1.12, wpb=494568, bsz=16887.3, num_updates=41400, lr=0.000310835, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=38189 epoch 025: 927 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=553331, ups=1.12, wpb=494568, bsz=16887.3, num_updates=41400, lr=0.000310835, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=38189 epoch 025: 927 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=553331, ups=1.12, wpb=494568, bsz=16887.3, num_updates=41400, lr=0.000310835, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=38189 epoch 025: 927 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=553331, ups=1.12, wpb=494568, bsz=16887.3, num_updates=41400, lr=0.000310835, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=38189 epoch 025: 927 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=553331, ups=1.12, wpb=494568, bsz=16887.3, num_updates=41400, lr=0.000310835, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=38189 epoch 025: 927 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=553331, ups=1.12, wpb=494568, bsz=16887.3, num_updates=41400, lr=0.000310835, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=38189 epoch 025: 927 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=553331, ups=1.12, wpb=494568, bsz=16887.3, num_updates=41400, lr=0.000310835, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=38189 epoch 025: 927 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=553331, ups=1.12, wpb=494568, bsz=16887.3, num_updates=41400, lr=0.000310835, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=38189 epoch 025: 927 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=553331, ups=1.12, wpb=494568, bsz=16887.3, num_updates=41400, lr=0.000310835, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=38189 epoch 025: 927 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=553331, ups=1.12, wpb=494568, bsz=16887.3, num_updates=41400, lr=0.000310835, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=38189 epoch 025: 927 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=553331, ups=1.12, wpb=494568, bsz=16887.3, num_updates=41400, lr=0.000310835, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=38189 epoch 025: 927 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=553331, ups=1.12, wpb=494568, bsz=16887.3, num_updates=41400, lr=0.000310835, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=38189 epoch 025: 927 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=553331, ups=1.12, wpb=494568, bsz=16887.3, num_updates=41400, lr=0.000310835, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=38189 epoch 025: 927 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=553331, ups=1.12, wpb=494568, bsz=16887.3, num_updates=41400, lr=0.000310835, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=38189 epoch 025: 927 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=553331, ups=1.12, wpb=494568, bsz=16887.3, num_updates=41400, lr=0.000310835, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=38189 epoch 025: 927 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=553331, ups=1.12, wpb=494568, bsz=16887.3, num_updates=41400, lr=0.000310835, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=38189 epoch 025: 927 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=553331, ups=1.12, wpb=494568, bsz=16887.3, num_updates=41400, lr=0.000310835, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=38189 epoch 025: 927 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=553331, ups=1.12, wpb=494568, bsz=16887.3, num_updates=41400, lr=0.000310835, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=38189 epoch 025: 927 / 1689 loss=3.503, nll_loss=1.949, ppl=3.86, wps=553331, ups=1.12, wpb=494568, bsz=16887.3, num_updates=41400, lr=0.000310835, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=38189 epoch 025: 1028 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=550731, ups=1.11, wpb=494243, bsz=16500.6, num_updates=41500, lr=0.00031046, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38279 epoch 025: 1028 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=550731, ups=1.11, wpb=494243, bsz=16500.6, num_updates=41500, lr=0.00031046, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38279 epoch 025: 1028 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=550731, ups=1.11, wpb=494243, bsz=16500.6, num_updates=41500, lr=0.00031046, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38279 epoch 025: 1028 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=550731, ups=1.11, wpb=494243, bsz=16500.6, num_updates=41500, lr=0.00031046, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38279 epoch 025: 1028 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=550731, ups=1.11, wpb=494243, bsz=16500.6, num_updates=41500, lr=0.00031046, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38279 epoch 025: 1028 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=550731, ups=1.11, wpb=494243, bsz=16500.6, num_updates=41500, lr=0.00031046, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38279 epoch 025: 1028 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=550731, ups=1.11, wpb=494243, bsz=16500.6, num_updates=41500, lr=0.00031046, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38279 epoch 025: 1028 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=550731, ups=1.11, wpb=494243, bsz=16500.6, num_updates=41500, lr=0.00031046, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38279 epoch 025: 1028 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=550731, ups=1.11, wpb=494243, bsz=16500.6, num_updates=41500, lr=0.00031046, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38279 epoch 025: 1028 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=550731, ups=1.11, wpb=494243, bsz=16500.6, num_updates=41500, lr=0.00031046, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38279 epoch 025: 1028 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=550731, ups=1.11, wpb=494243, bsz=16500.6, num_updates=41500, lr=0.00031046, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38279 epoch 025: 1028 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=550731, ups=1.11, wpb=494243, bsz=16500.6, num_updates=41500, lr=0.00031046, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38279 epoch 025: 1028 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=550731, ups=1.11, wpb=494243, bsz=16500.6, num_updates=41500, lr=0.00031046, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38279 epoch 025: 1028 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=550731, ups=1.11, wpb=494243, bsz=16500.6, num_updates=41500, lr=0.00031046, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38279 epoch 025: 1028 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=550731, ups=1.11, wpb=494243, bsz=16500.6, num_updates=41500, lr=0.00031046, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38279 epoch 025: 1028 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=550731, ups=1.11, wpb=494243, bsz=16500.6, num_updates=41500, lr=0.00031046, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38279 epoch 025: 1028 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=550731, ups=1.11, wpb=494243, bsz=16500.6, num_updates=41500, lr=0.00031046, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38279 epoch 025: 1028 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=550731, ups=1.11, wpb=494243, bsz=16500.6, num_updates=41500, lr=0.00031046, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38279 epoch 025: 1028 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=550731, ups=1.11, wpb=494243, bsz=16500.6, num_updates=41500, lr=0.00031046, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38279 epoch 025: 1028 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=550731, ups=1.11, wpb=494243, bsz=16500.6, num_updates=41500, lr=0.00031046, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38279 epoch 025: 1028 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=550731, ups=1.11, wpb=494243, bsz=16500.6, num_updates=41500, lr=0.00031046, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38279 epoch 025: 1028 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=550731, ups=1.11, wpb=494243, bsz=16500.6, num_updates=41500, lr=0.00031046, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38279 epoch 025: 1028 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=550731, ups=1.11, wpb=494243, bsz=16500.6, num_updates=41500, lr=0.00031046, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38279 epoch 025: 1028 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=550731, ups=1.11, wpb=494243, bsz=16500.6, num_updates=41500, lr=0.00031046, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38279 epoch 025: 1028 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=550731, ups=1.11, wpb=494243, bsz=16500.6, num_updates=41500, lr=0.00031046, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38279 epoch 025: 1128 / 1689 loss=3.492, nll_loss=1.937, ppl=3.83, wps=549622, ups=1.11, wpb=494678, bsz=16182.6, num_updates=41600, lr=0.000310087, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=38369 epoch 025: 1128 / 1689 loss=3.492, nll_loss=1.937, ppl=3.83, wps=549622, ups=1.11, wpb=494678, bsz=16182.6, num_updates=41600, lr=0.000310087, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=38369 epoch 025: 1128 / 1689 loss=3.492, nll_loss=1.937, ppl=3.83, wps=549622, ups=1.11, wpb=494678, bsz=16182.6, num_updates=41600, lr=0.000310087, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=38369 epoch 025: 1128 / 1689 loss=3.492, nll_loss=1.937, ppl=3.83, wps=549622, ups=1.11, wpb=494678, bsz=16182.6, num_updates=41600, lr=0.000310087, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=38369 epoch 025: 1128 / 1689 loss=3.492, nll_loss=1.937, ppl=3.83, wps=549622, ups=1.11, wpb=494678, bsz=16182.6, num_updates=41600, lr=0.000310087, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=38369 epoch 025: 1128 / 1689 loss=3.492, nll_loss=1.937, ppl=3.83, wps=549622, ups=1.11, wpb=494678, bsz=16182.6, num_updates=41600, lr=0.000310087, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=38369 epoch 025: 1128 / 1689 loss=3.492, nll_loss=1.937, ppl=3.83, wps=549622, ups=1.11, wpb=494678, bsz=16182.6, num_updates=41600, lr=0.000310087, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=38369 epoch 025: 1128 / 1689 loss=3.492, nll_loss=1.937, ppl=3.83, wps=549622, ups=1.11, wpb=494678, bsz=16182.6, num_updates=41600, lr=0.000310087, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=38369 epoch 025: 1128 / 1689 loss=3.492, nll_loss=1.937, ppl=3.83, wps=549622, ups=1.11, wpb=494678, bsz=16182.6, num_updates=41600, lr=0.000310087, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=38369 epoch 025: 1128 / 1689 loss=3.492, nll_loss=1.937, ppl=3.83, wps=549622, ups=1.11, wpb=494678, bsz=16182.6, num_updates=41600, lr=0.000310087, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=38369 epoch 025: 1128 / 1689 loss=3.492, nll_loss=1.937, ppl=3.83, wps=549622, ups=1.11, wpb=494678, bsz=16182.6, num_updates=41600, lr=0.000310087, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=38369 epoch 025: 1128 / 1689 loss=3.492, nll_loss=1.937, ppl=3.83, wps=549622, ups=1.11, wpb=494678, bsz=16182.6, num_updates=41600, lr=0.000310087, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=38369 epoch 025: 1128 / 1689 loss=3.492, nll_loss=1.937, ppl=3.83, wps=549622, ups=1.11, wpb=494678, bsz=16182.6, num_updates=41600, lr=0.000310087, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=38369 epoch 025: 1128 / 1689 loss=3.492, nll_loss=1.937, ppl=3.83, wps=549622, ups=1.11, wpb=494678, bsz=16182.6, num_updates=41600, lr=0.000310087, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=38369 epoch 025: 1128 / 1689 loss=3.492, nll_loss=1.937, ppl=3.83, wps=549622, ups=1.11, wpb=494678, bsz=16182.6, num_updates=41600, lr=0.000310087, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=38369 epoch 025: 1128 / 1689 loss=3.492, nll_loss=1.937, ppl=3.83, wps=549622, ups=1.11, wpb=494678, bsz=16182.6, num_updates=41600, lr=0.000310087, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=38369 epoch 025: 1128 / 1689 loss=3.492, nll_loss=1.937, ppl=3.83, wps=549622, ups=1.11, wpb=494678, bsz=16182.6, num_updates=41600, lr=0.000310087, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=38369 epoch 025: 1128 / 1689 loss=3.492, nll_loss=1.937, ppl=3.83, wps=549622, ups=1.11, wpb=494678, bsz=16182.6, num_updates=41600, lr=0.000310087, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=38369 epoch 025: 1128 / 1689 loss=3.492, nll_loss=1.937, ppl=3.83, wps=549622, ups=1.11, wpb=494678, bsz=16182.6, num_updates=41600, lr=0.000310087, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=38369 epoch 025: 1128 / 1689 loss=3.492, nll_loss=1.937, ppl=3.83, wps=549622, ups=1.11, wpb=494678, bsz=16182.6, num_updates=41600, lr=0.000310087, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=38369 epoch 025: 1128 / 1689 loss=3.492, nll_loss=1.937, ppl=3.83, wps=549622, ups=1.11, wpb=494678, bsz=16182.6, num_updates=41600, lr=0.000310087, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=38369 epoch 025: 1128 / 1689 loss=3.492, nll_loss=1.937, ppl=3.83, wps=549622, ups=1.11, wpb=494678, bsz=16182.6, num_updates=41600, lr=0.000310087, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=38369 epoch 025: 1128 / 1689 loss=3.492, nll_loss=1.937, ppl=3.83, wps=549622, ups=1.11, wpb=494678, bsz=16182.6, num_updates=41600, lr=0.000310087, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=38369 epoch 025: 1128 / 1689 loss=3.492, nll_loss=1.937, ppl=3.83, wps=549622, ups=1.11, wpb=494678, bsz=16182.6, num_updates=41600, lr=0.000310087, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=38369 epoch 025: 1128 / 1689 loss=3.492, nll_loss=1.937, ppl=3.83, wps=549622, ups=1.11, wpb=494678, bsz=16182.6, num_updates=41600, lr=0.000310087, gnorm=0.174, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=38369 epoch 025: 1228 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554074, ups=1.12, wpb=494796, bsz=16369.6, num_updates=41700, lr=0.000309715, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=38458 epoch 025: 1228 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554074, ups=1.12, wpb=494796, bsz=16369.6, num_updates=41700, lr=0.000309715, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=38458 epoch 025: 1228 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554074, ups=1.12, wpb=494796, bsz=16369.6, num_updates=41700, lr=0.000309715, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=38458 epoch 025: 1228 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554074, ups=1.12, wpb=494796, bsz=16369.6, num_updates=41700, lr=0.000309715, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=38458 epoch 025: 1228 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554074, ups=1.12, wpb=494796, bsz=16369.6, num_updates=41700, lr=0.000309715, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=38458 epoch 025: 1228 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554074, ups=1.12, wpb=494796, bsz=16369.6, num_updates=41700, lr=0.000309715, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=38458 epoch 025: 1228 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554074, ups=1.12, wpb=494796, bsz=16369.6, num_updates=41700, lr=0.000309715, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=38458 epoch 025: 1228 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554074, ups=1.12, wpb=494796, bsz=16369.6, num_updates=41700, lr=0.000309715, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=38458 epoch 025: 1228 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554074, ups=1.12, wpb=494796, bsz=16369.6, num_updates=41700, lr=0.000309715, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=38458 epoch 025: 1228 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554074, ups=1.12, wpb=494796, bsz=16369.6, num_updates=41700, lr=0.000309715, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=38458 epoch 025: 1228 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554074, ups=1.12, wpb=494796, bsz=16369.6, num_updates=41700, lr=0.000309715, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=38458 epoch 025: 1228 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554074, ups=1.12, wpb=494796, bsz=16369.6, num_updates=41700, lr=0.000309715, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=38458 epoch 025: 1228 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554074, ups=1.12, wpb=494796, bsz=16369.6, num_updates=41700, lr=0.000309715, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=38458 epoch 025: 1228 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554074, ups=1.12, wpb=494796, bsz=16369.6, num_updates=41700, lr=0.000309715, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=38458 epoch 025: 1228 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554074, ups=1.12, wpb=494796, bsz=16369.6, num_updates=41700, lr=0.000309715, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=38458 epoch 025: 1228 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554074, ups=1.12, wpb=494796, bsz=16369.6, num_updates=41700, lr=0.000309715, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=38458 epoch 025: 1228 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554074, ups=1.12, wpb=494796, bsz=16369.6, num_updates=41700, lr=0.000309715, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=38458 epoch 025: 1228 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554074, ups=1.12, wpb=494796, bsz=16369.6, num_updates=41700, lr=0.000309715, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=38458 epoch 025: 1228 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554074, ups=1.12, wpb=494796, bsz=16369.6, num_updates=41700, lr=0.000309715, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=38458 epoch 025: 1228 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554074, ups=1.12, wpb=494796, bsz=16369.6, num_updates=41700, lr=0.000309715, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=38458 epoch 025: 1228 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554074, ups=1.12, wpb=494796, bsz=16369.6, num_updates=41700, lr=0.000309715, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=38458 epoch 025: 1228 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554074, ups=1.12, wpb=494796, bsz=16369.6, num_updates=41700, lr=0.000309715, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=38458 epoch 025: 1228 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554074, ups=1.12, wpb=494796, bsz=16369.6, num_updates=41700, lr=0.000309715, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=38458 epoch 025: 1228 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554074, ups=1.12, wpb=494796, bsz=16369.6, num_updates=41700, lr=0.000309715, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=38458 epoch 025: 1228 / 1689 loss=3.499, nll_loss=1.944, ppl=3.85, wps=554074, ups=1.12, wpb=494796, bsz=16369.6, num_updates=41700, lr=0.000309715, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=38458 epoch 025: 1328 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=551007, ups=1.11, wpb=497528, bsz=16635.6, num_updates=41800, lr=0.000309344, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=38549 epoch 025: 1328 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=551007, ups=1.11, wpb=497528, bsz=16635.6, num_updates=41800, lr=0.000309344, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=38549 epoch 025: 1328 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=551007, ups=1.11, wpb=497528, bsz=16635.6, num_updates=41800, lr=0.000309344, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=38549 epoch 025: 1328 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=551007, ups=1.11, wpb=497528, bsz=16635.6, num_updates=41800, lr=0.000309344, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=38549 epoch 025: 1328 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=551007, ups=1.11, wpb=497528, bsz=16635.6, num_updates=41800, lr=0.000309344, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=38549 epoch 025: 1328 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=551007, ups=1.11, wpb=497528, bsz=16635.6, num_updates=41800, lr=0.000309344, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=38549 epoch 025: 1328 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=551007, ups=1.11, wpb=497528, bsz=16635.6, num_updates=41800, lr=0.000309344, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=38549 epoch 025: 1328 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=551007, ups=1.11, wpb=497528, bsz=16635.6, num_updates=41800, lr=0.000309344, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=38549 epoch 025: 1328 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=551007, ups=1.11, wpb=497528, bsz=16635.6, num_updates=41800, lr=0.000309344, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=38549 epoch 025: 1328 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=551007, ups=1.11, wpb=497528, bsz=16635.6, num_updates=41800, lr=0.000309344, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=38549 epoch 025: 1328 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=551007, ups=1.11, wpb=497528, bsz=16635.6, num_updates=41800, lr=0.000309344, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=38549 epoch 025: 1328 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=551007, ups=1.11, wpb=497528, bsz=16635.6, num_updates=41800, lr=0.000309344, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=38549 epoch 025: 1328 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=551007, ups=1.11, wpb=497528, bsz=16635.6, num_updates=41800, lr=0.000309344, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=38549 epoch 025: 1328 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=551007, ups=1.11, wpb=497528, bsz=16635.6, num_updates=41800, lr=0.000309344, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=38549 epoch 025: 1328 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=551007, ups=1.11, wpb=497528, bsz=16635.6, num_updates=41800, lr=0.000309344, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=38549 epoch 025: 1328 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=551007, ups=1.11, wpb=497528, bsz=16635.6, num_updates=41800, lr=0.000309344, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=38549 epoch 025: 1328 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=551007, ups=1.11, wpb=497528, bsz=16635.6, num_updates=41800, lr=0.000309344, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=38549 epoch 025: 1328 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=551007, ups=1.11, wpb=497528, bsz=16635.6, num_updates=41800, lr=0.000309344, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=38549 epoch 025: 1328 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=551007, ups=1.11, wpb=497528, bsz=16635.6, num_updates=41800, lr=0.000309344, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=38549 epoch 025: 1328 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=551007, ups=1.11, wpb=497528, bsz=16635.6, num_updates=41800, lr=0.000309344, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=38549 epoch 025: 1328 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=551007, ups=1.11, wpb=497528, bsz=16635.6, num_updates=41800, lr=0.000309344, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=38549 epoch 025: 1328 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=551007, ups=1.11, wpb=497528, bsz=16635.6, num_updates=41800, lr=0.000309344, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=38549 epoch 025: 1328 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=551007, ups=1.11, wpb=497528, bsz=16635.6, num_updates=41800, lr=0.000309344, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=38549 epoch 025: 1328 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=551007, ups=1.11, wpb=497528, bsz=16635.6, num_updates=41800, lr=0.000309344, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=38549 epoch 025: 1328 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=551007, ups=1.11, wpb=497528, bsz=16635.6, num_updates=41800, lr=0.000309344, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.1, wall=38549 epoch 025: 1428 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=547441, ups=1.11, wpb=494895, bsz=16431.4, num_updates=41900, lr=0.000308975, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38639 epoch 025: 1428 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=547441, ups=1.11, wpb=494895, bsz=16431.4, num_updates=41900, lr=0.000308975, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38639 epoch 025: 1428 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=547441, ups=1.11, wpb=494895, bsz=16431.4, num_updates=41900, lr=0.000308975, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38639 epoch 025: 1428 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=547441, ups=1.11, wpb=494895, bsz=16431.4, num_updates=41900, lr=0.000308975, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38639 epoch 025: 1428 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=547441, ups=1.11, wpb=494895, bsz=16431.4, num_updates=41900, lr=0.000308975, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38639 epoch 025: 1428 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=547441, ups=1.11, wpb=494895, bsz=16431.4, num_updates=41900, lr=0.000308975, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38639 epoch 025: 1428 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=547441, ups=1.11, wpb=494895, bsz=16431.4, num_updates=41900, lr=0.000308975, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38639 epoch 025: 1428 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=547441, ups=1.11, wpb=494895, bsz=16431.4, num_updates=41900, lr=0.000308975, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38639 epoch 025: 1428 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=547441, ups=1.11, wpb=494895, bsz=16431.4, num_updates=41900, lr=0.000308975, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38639 epoch 025: 1428 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=547441, ups=1.11, wpb=494895, bsz=16431.4, num_updates=41900, lr=0.000308975, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38639 epoch 025: 1428 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=547441, ups=1.11, wpb=494895, bsz=16431.4, num_updates=41900, lr=0.000308975, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38639 epoch 025: 1428 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=547441, ups=1.11, wpb=494895, bsz=16431.4, num_updates=41900, lr=0.000308975, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38639 epoch 025: 1428 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=547441, ups=1.11, wpb=494895, bsz=16431.4, num_updates=41900, lr=0.000308975, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38639 epoch 025: 1428 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=547441, ups=1.11, wpb=494895, bsz=16431.4, num_updates=41900, lr=0.000308975, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38639 epoch 025: 1428 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=547441, ups=1.11, wpb=494895, bsz=16431.4, num_updates=41900, lr=0.000308975, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38639 epoch 025: 1428 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=547441, ups=1.11, wpb=494895, bsz=16431.4, num_updates=41900, lr=0.000308975, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38639 epoch 025: 1428 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=547441, ups=1.11, wpb=494895, bsz=16431.4, num_updates=41900, lr=0.000308975, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38639 epoch 025: 1428 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=547441, ups=1.11, wpb=494895, bsz=16431.4, num_updates=41900, lr=0.000308975, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38639 epoch 025: 1428 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=547441, ups=1.11, wpb=494895, bsz=16431.4, num_updates=41900, lr=0.000308975, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38639 epoch 025: 1428 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=547441, ups=1.11, wpb=494895, bsz=16431.4, num_updates=41900, lr=0.000308975, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38639 epoch 025: 1428 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=547441, ups=1.11, wpb=494895, bsz=16431.4, num_updates=41900, lr=0.000308975, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38639 epoch 025: 1428 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=547441, ups=1.11, wpb=494895, bsz=16431.4, num_updates=41900, lr=0.000308975, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38639 epoch 025: 1428 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=547441, ups=1.11, wpb=494895, bsz=16431.4, num_updates=41900, lr=0.000308975, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38639 epoch 025: 1428 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=547441, ups=1.11, wpb=494895, bsz=16431.4, num_updates=41900, lr=0.000308975, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38639 epoch 025: 1428 / 1689 loss=3.496, nll_loss=1.941, ppl=3.84, wps=547441, ups=1.11, wpb=494895, bsz=16431.4, num_updates=41900, lr=0.000308975, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=38639 epoch 025: 1529 / 1689 loss=3.502, nll_loss=1.948, ppl=3.86, wps=544578, ups=1.1, wpb=494246, bsz=16797, num_updates=42000, lr=0.000308607, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=38730 epoch 025: 1529 / 1689 loss=3.502, nll_loss=1.948, ppl=3.86, wps=544578, ups=1.1, wpb=494246, bsz=16797, num_updates=42000, lr=0.000308607, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=38730 epoch 025: 1529 / 1689 loss=3.502, nll_loss=1.948, ppl=3.86, wps=544578, ups=1.1, wpb=494246, bsz=16797, num_updates=42000, lr=0.000308607, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=38730 epoch 025: 1529 / 1689 loss=3.502, nll_loss=1.948, ppl=3.86, wps=544578, ups=1.1, wpb=494246, bsz=16797, num_updates=42000, lr=0.000308607, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=38730 epoch 025: 1529 / 1689 loss=3.502, nll_loss=1.948, ppl=3.86, wps=544578, ups=1.1, wpb=494246, bsz=16797, num_updates=42000, lr=0.000308607, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=38730 epoch 025: 1529 / 1689 loss=3.502, nll_loss=1.948, ppl=3.86, wps=544578, ups=1.1, wpb=494246, bsz=16797, num_updates=42000, lr=0.000308607, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=38730 epoch 025: 1529 / 1689 loss=3.502, nll_loss=1.948, ppl=3.86, wps=544578, ups=1.1, wpb=494246, bsz=16797, num_updates=42000, lr=0.000308607, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=38730 epoch 025: 1529 / 1689 loss=3.502, nll_loss=1.948, ppl=3.86, wps=544578, ups=1.1, wpb=494246, bsz=16797, num_updates=42000, lr=0.000308607, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=38730 epoch 025: 1529 / 1689 loss=3.502, nll_loss=1.948, ppl=3.86, wps=544578, ups=1.1, wpb=494246, bsz=16797, num_updates=42000, lr=0.000308607, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=38730 epoch 025: 1529 / 1689 loss=3.502, nll_loss=1.948, ppl=3.86, wps=544578, ups=1.1, wpb=494246, bsz=16797, num_updates=42000, lr=0.000308607, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=38730 epoch 025: 1529 / 1689 loss=3.502, nll_loss=1.948, ppl=3.86, wps=544578, ups=1.1, wpb=494246, bsz=16797, num_updates=42000, lr=0.000308607, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=38730 epoch 025: 1529 / 1689 loss=3.502, nll_loss=1.948, ppl=3.86, wps=544578, ups=1.1, wpb=494246, bsz=16797, num_updates=42000, lr=0.000308607, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=38730 epoch 025: 1529 / 1689 loss=3.502, nll_loss=1.948, ppl=3.86, wps=544578, ups=1.1, wpb=494246, bsz=16797, num_updates=42000, lr=0.000308607, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=38730 epoch 025: 1529 / 1689 loss=3.502, nll_loss=1.948, ppl=3.86, wps=544578, ups=1.1, wpb=494246, bsz=16797, num_updates=42000, lr=0.000308607, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=38730 epoch 025: 1529 / 1689 loss=3.502, nll_loss=1.948, ppl=3.86, wps=544578, ups=1.1, wpb=494246, bsz=16797, num_updates=42000, lr=0.000308607, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=38730 epoch 025: 1529 / 1689 loss=3.502, nll_loss=1.948, ppl=3.86, wps=544578, ups=1.1, wpb=494246, bsz=16797, num_updates=42000, lr=0.000308607, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=38730 epoch 025: 1529 / 1689 loss=3.502, nll_loss=1.948, ppl=3.86, wps=544578, ups=1.1, wpb=494246, bsz=16797, num_updates=42000, lr=0.000308607, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=38730 epoch 025: 1529 / 1689 loss=3.502, nll_loss=1.948, ppl=3.86, wps=544578, ups=1.1, wpb=494246, bsz=16797, num_updates=42000, lr=0.000308607, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=38730 epoch 025: 1529 / 1689 loss=3.502, nll_loss=1.948, ppl=3.86, wps=544578, ups=1.1, wpb=494246, bsz=16797, num_updates=42000, lr=0.000308607, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=38730 epoch 025: 1529 / 1689 loss=3.502, nll_loss=1.948, ppl=3.86, wps=544578, ups=1.1, wpb=494246, bsz=16797, num_updates=42000, lr=0.000308607, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=38730 epoch 025: 1529 / 1689 loss=3.502, nll_loss=1.948, ppl=3.86, wps=544578, ups=1.1, wpb=494246, bsz=16797, num_updates=42000, lr=0.000308607, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=38730 epoch 025: 1529 / 1689 loss=3.502, nll_loss=1.948, ppl=3.86, wps=544578, ups=1.1, wpb=494246, bsz=16797, num_updates=42000, lr=0.000308607, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=38730 epoch 025: 1529 / 1689 loss=3.502, nll_loss=1.948, ppl=3.86, wps=544578, ups=1.1, wpb=494246, bsz=16797, num_updates=42000, lr=0.000308607, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=38730 epoch 025: 1529 / 1689 loss=3.502, nll_loss=1.948, ppl=3.86, wps=544578, ups=1.1, wpb=494246, bsz=16797, num_updates=42000, lr=0.000308607, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=38730 epoch 025: 1529 / 1689 loss=3.502, nll_loss=1.948, ppl=3.86, wps=544578, ups=1.1, wpb=494246, bsz=16797, num_updates=42000, lr=0.000308607, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=38730 begin validation on "valid" subset epoch 025 | valid on 'valid' subset | loss 3.704 | nll_loss 2.16 | ppl 4.47 | wps 0 | wpb 44526 | bsz 2008 | num_updates 42000 | best_loss 3.704 epoch 025 | valid on 'valid' subset | loss 3.704 | nll_loss 2.16 | ppl 4.47 | wps 0 | wpb 44526 | bsz 2008 | num_updates 42000 | best_loss 3.704 epoch 025 | valid on 'valid' subset | loss 3.704 | nll_loss 2.16 | ppl 4.47 | wps 0 | wpb 44526 | bsz 2008 | num_updates 42000 | best_loss 3.704 epoch 025 | valid on 'valid' subset | loss 3.704 | nll_loss 2.16 | ppl 4.47 | wps 0 | wpb 44526 | bsz 2008 | num_updates 42000 | best_loss 3.704 epoch 025 | valid on 'valid' subset | loss 3.704 | nll_loss 2.16 | ppl 4.47 | wps 0 | wpb 44526 | bsz 2008 | num_updates 42000 | best_loss 3.704 epoch 025 | valid on 'valid' subset | loss 3.704 | nll_loss 2.16 | ppl 4.47 | wps 0 | wpb 44526 | bsz 2008 | num_updates 42000 | best_loss 3.704 epoch 025 | valid on 'valid' subset | loss 3.704 | nll_loss 2.16 | ppl 4.47 | wps 0 | wpb 44526 | bsz 2008 | num_updates 42000 | best_loss 3.704 epoch 025 | valid on 'valid' subset | loss 3.704 | nll_loss 2.16 | ppl 4.47 | wps 0 | wpb 44526 | bsz 2008 | num_updates 42000 | best_loss 3.704 epoch 025 | valid on 'valid' subset | loss 3.704 | nll_loss 2.16 | ppl 4.47 | wps 0 | wpb 44526 | bsz 2008 | num_updates 42000 | best_loss 3.704 epoch 025 | valid on 'valid' subset | loss 3.704 | nll_loss 2.16 | ppl 4.47 | wps 0 | wpb 44526 | bsz 2008 | num_updates 42000 | best_loss 3.704 epoch 025 | valid on 'valid' subset | loss 3.704 | nll_loss 2.16 | ppl 4.47 | wps 0 | wpb 44526 | bsz 2008 | num_updates 42000 | best_loss 3.704 epoch 025 | valid on 'valid' subset | loss 3.704 | nll_loss 2.16 | ppl 4.47 | wps 0 | wpb 44526 | bsz 2008 | num_updates 42000 | best_loss 3.704 epoch 025 | valid on 'valid' subset | loss 3.704 | nll_loss 2.16 | ppl 4.47 | wps 0 | wpb 44526 | bsz 2008 | num_updates 42000 | best_loss 3.704 epoch 025 | valid on 'valid' subset | loss 3.704 | nll_loss 2.16 | ppl 4.47 | wps 0 | wpb 44526 | bsz 2008 | num_updates 42000 | best_loss 3.704 epoch 025 | valid on 'valid' subset | loss 3.704 | nll_loss 2.16 | ppl 4.47 | wps 0 | wpb 44526 | bsz 2008 | num_updates 42000 | best_loss 3.704 epoch 025 | valid on 'valid' subset | loss 3.704 | nll_loss 2.16 | ppl 4.47 | wps 0 | wpb 44526 | bsz 2008 | num_updates 42000 | best_loss 3.704 epoch 025 | valid on 'valid' subset | loss 3.704 | nll_loss 2.16 | ppl 4.47 | wps 0 | wpb 44526 | bsz 2008 | num_updates 42000 | best_loss 3.704 epoch 025 | valid on 'valid' subset | loss 3.704 | nll_loss 2.16 | ppl 4.47 | wps 0 | wpb 44526 | bsz 2008 | num_updates 42000 | best_loss 3.704 epoch 025 | valid on 'valid' subset | loss 3.704 | nll_loss 2.16 | ppl 4.47 | wps 0 | wpb 44526 | bsz 2008 | num_updates 42000 | best_loss 3.704 epoch 025 | valid on 'valid' subset | loss 3.704 | nll_loss 2.16 | ppl 4.47 | wps 0 | wpb 44526 | bsz 2008 | num_updates 42000 | best_loss 3.704 epoch 025 | valid on 'valid' subset | loss 3.704 | nll_loss 2.16 | ppl 4.47 | wps 0 | wpb 44526 | bsz 2008 | num_updates 42000 | best_loss 3.704 epoch 025 | valid on 'valid' subset | loss 3.704 | nll_loss 2.16 | ppl 4.47 | wps 0 | wpb 44526 | bsz 2008 | num_updates 42000 | best_loss 3.704 epoch 025 | valid on 'valid' subset | loss 3.704 | nll_loss 2.16 | ppl 4.47 | wps 0 | wpb 44526 | bsz 2008 | num_updates 42000 | best_loss 3.704 epoch 025 | valid on 'valid' subset | loss 3.704 | nll_loss 2.16 | ppl 4.47 | wps 0 | wpb 44526 | bsz 2008 | num_updates 42000 | best_loss 3.704 epoch 025 | valid on 'valid' subset | loss 3.704 | nll_loss 2.16 | ppl 4.47 | wps 0 | wpb 44526 | bsz 2008 | num_updates 42000 | best_loss 3.704 epoch 025: 1629 / 1689 loss=3.499, nll_loss=1.945, ppl=3.85, wps=366154, ups=0.74, wpb=495420, bsz=16322.4, num_updates=42100, lr=0.00030824, gnorm=0.166, clip=0, loss_scale=1, train_wall=111, gb_free=21.8, wall=38865 epoch 025: 1629 / 1689 loss=3.499, nll_loss=1.945, ppl=3.85, wps=366154, ups=0.74, wpb=495420, bsz=16322.4, num_updates=42100, lr=0.00030824, gnorm=0.166, clip=0, loss_scale=1, train_wall=111, gb_free=21.8, wall=38865 epoch 025: 1629 / 1689 loss=3.499, nll_loss=1.945, ppl=3.85, wps=366154, ups=0.74, wpb=495420, bsz=16322.4, num_updates=42100, lr=0.00030824, gnorm=0.166, clip=0, loss_scale=1, train_wall=111, gb_free=21.8, wall=38865 epoch 025: 1629 / 1689 loss=3.499, nll_loss=1.945, ppl=3.85, wps=366154, ups=0.74, wpb=495420, bsz=16322.4, num_updates=42100, lr=0.00030824, gnorm=0.166, clip=0, loss_scale=1, train_wall=111, gb_free=21.8, wall=38865 epoch 025: 1629 / 1689 loss=3.499, nll_loss=1.945, ppl=3.85, wps=366154, ups=0.74, wpb=495420, bsz=16322.4, num_updates=42100, lr=0.00030824, gnorm=0.166, clip=0, loss_scale=1, train_wall=111, gb_free=21.8, wall=38865 epoch 025: 1629 / 1689 loss=3.499, nll_loss=1.945, ppl=3.85, wps=366154, ups=0.74, wpb=495420, bsz=16322.4, num_updates=42100, lr=0.00030824, gnorm=0.166, clip=0, loss_scale=1, train_wall=111, gb_free=21.8, wall=38865 epoch 025: 1629 / 1689 loss=3.499, nll_loss=1.945, ppl=3.85, wps=366154, ups=0.74, wpb=495420, bsz=16322.4, num_updates=42100, lr=0.00030824, gnorm=0.166, clip=0, loss_scale=1, train_wall=111, gb_free=21.8, wall=38865 epoch 025: 1629 / 1689 loss=3.499, nll_loss=1.945, ppl=3.85, wps=366154, ups=0.74, wpb=495420, bsz=16322.4, num_updates=42100, lr=0.00030824, gnorm=0.166, clip=0, loss_scale=1, train_wall=111, gb_free=21.8, wall=38865 epoch 025: 1629 / 1689 loss=3.499, nll_loss=1.945, ppl=3.85, wps=366154, ups=0.74, wpb=495420, bsz=16322.4, num_updates=42100, lr=0.00030824, gnorm=0.166, clip=0, loss_scale=1, train_wall=111, gb_free=21.8, wall=38865 epoch 025: 1629 / 1689 loss=3.499, nll_loss=1.945, ppl=3.85, wps=366154, ups=0.74, wpb=495420, bsz=16322.4, num_updates=42100, lr=0.00030824, gnorm=0.166, clip=0, loss_scale=1, train_wall=111, gb_free=21.8, wall=38865 epoch 025: 1629 / 1689 loss=3.499, nll_loss=1.945, ppl=3.85, wps=366154, ups=0.74, wpb=495420, bsz=16322.4, num_updates=42100, lr=0.00030824, gnorm=0.166, clip=0, loss_scale=1, train_wall=111, gb_free=21.8, wall=38865 epoch 025: 1629 / 1689 loss=3.499, nll_loss=1.945, ppl=3.85, wps=366154, ups=0.74, wpb=495420, bsz=16322.4, num_updates=42100, lr=0.00030824, gnorm=0.166, clip=0, loss_scale=1, train_wall=111, gb_free=21.8, wall=38865 epoch 025: 1629 / 1689 loss=3.499, nll_loss=1.945, ppl=3.85, wps=366154, ups=0.74, wpb=495420, bsz=16322.4, num_updates=42100, lr=0.00030824, gnorm=0.166, clip=0, loss_scale=1, train_wall=111, gb_free=21.8, wall=38865 epoch 025: 1629 / 1689 loss=3.499, nll_loss=1.945, ppl=3.85, wps=366154, ups=0.74, wpb=495420, bsz=16322.4, num_updates=42100, lr=0.00030824, gnorm=0.166, clip=0, loss_scale=1, train_wall=111, gb_free=21.8, wall=38865 epoch 025: 1629 / 1689 loss=3.499, nll_loss=1.945, ppl=3.85, wps=366154, ups=0.74, wpb=495420, bsz=16322.4, num_updates=42100, lr=0.00030824, gnorm=0.166, clip=0, loss_scale=1, train_wall=111, gb_free=21.8, wall=38865 epoch 025: 1629 / 1689 loss=3.499, nll_loss=1.945, ppl=3.85, wps=366154, ups=0.74, wpb=495420, bsz=16322.4, num_updates=42100, lr=0.00030824, gnorm=0.166, clip=0, loss_scale=1, train_wall=111, gb_free=21.8, wall=38865 epoch 025: 1629 / 1689 loss=3.499, nll_loss=1.945, ppl=3.85, wps=366154, ups=0.74, wpb=495420, bsz=16322.4, num_updates=42100, lr=0.00030824, gnorm=0.166, clip=0, loss_scale=1, train_wall=111, gb_free=21.8, wall=38865 epoch 025: 1629 / 1689 loss=3.499, nll_loss=1.945, ppl=3.85, wps=366154, ups=0.74, wpb=495420, bsz=16322.4, num_updates=42100, lr=0.00030824, gnorm=0.166, clip=0, loss_scale=1, train_wall=111, gb_free=21.8, wall=38865 epoch 025: 1629 / 1689 loss=3.499, nll_loss=1.945, ppl=3.85, wps=366154, ups=0.74, wpb=495420, bsz=16322.4, num_updates=42100, lr=0.00030824, gnorm=0.166, clip=0, loss_scale=1, train_wall=111, gb_free=21.8, wall=38865 epoch 025: 1629 / 1689 loss=3.499, nll_loss=1.945, ppl=3.85, wps=366154, ups=0.74, wpb=495420, bsz=16322.4, num_updates=42100, lr=0.00030824, gnorm=0.166, clip=0, loss_scale=1, train_wall=111, gb_free=21.8, wall=38865 epoch 025: 1629 / 1689 loss=3.499, nll_loss=1.945, ppl=3.85, wps=366154, ups=0.74, wpb=495420, bsz=16322.4, num_updates=42100, lr=0.00030824, gnorm=0.166, clip=0, loss_scale=1, train_wall=111, gb_free=21.8, wall=38865 epoch 025: 1629 / 1689 loss=3.499, nll_loss=1.945, ppl=3.85, wps=366154, ups=0.74, wpb=495420, bsz=16322.4, num_updates=42100, lr=0.00030824, gnorm=0.166, clip=0, loss_scale=1, train_wall=111, gb_free=21.8, wall=38865 epoch 025: 1629 / 1689 loss=3.499, nll_loss=1.945, ppl=3.85, wps=366154, ups=0.74, wpb=495420, bsz=16322.4, num_updates=42100, lr=0.00030824, gnorm=0.166, clip=0, loss_scale=1, train_wall=111, gb_free=21.8, wall=38865 epoch 025: 1629 / 1689 loss=3.499, nll_loss=1.945, ppl=3.85, wps=366154, ups=0.74, wpb=495420, bsz=16322.4, num_updates=42100, lr=0.00030824, gnorm=0.166, clip=0, loss_scale=1, train_wall=111, gb_free=21.8, wall=38865 epoch 025: 1629 / 1689 loss=3.499, nll_loss=1.945, ppl=3.85, wps=366154, ups=0.74, wpb=495420, bsz=16322.4, num_updates=42100, lr=0.00030824, gnorm=0.166, clip=0, loss_scale=1, train_wall=111, gb_free=21.8, wall=38865 end of epoch 25 (average epoch stats below) epoch 025 | loss 3.492 | nll_loss 1.937 | ppl 3.83 | wps 522388 | ups 1.06 | wpb 495121 | bsz 16508.6 | num_updates 42160 | lr 0.000308021 | gnorm 0.168 | clip 0 | loss_scale 1 | train_wall 1524 | gb_free 23.7 | wall 38918 epoch 025 | loss 3.492 | nll_loss 1.937 | ppl 3.83 | wps 522388 | ups 1.06 | wpb 495121 | bsz 16508.6 | num_updates 42160 | lr 0.000308021 | gnorm 0.168 | clip 0 | loss_scale 1 | train_wall 1524 | gb_free 23.7 | wall 38918 epoch 025 | loss 3.492 | nll_loss 1.937 | ppl 3.83 | wps 522388 | ups 1.06 | wpb 495121 | bsz 16508.6 | num_updates 42160 | lr 0.000308021 | gnorm 0.168 | clip 0 | loss_scale 1 | train_wall 1524 | gb_free 23.7 | wall 38918 epoch 025 | loss 3.492 | nll_loss 1.937 | ppl 3.83 | wps 522388 | ups 1.06 | wpb 495121 | bsz 16508.6 | num_updates 42160 | lr 0.000308021 | gnorm 0.168 | clip 0 | loss_scale 1 | train_wall 1524 | gb_free 23.7 | wall 38918 epoch 025 | loss 3.492 | nll_loss 1.937 | ppl 3.83 | wps 522388 | ups 1.06 | wpb 495121 | bsz 16508.6 | num_updates 42160 | lr 0.000308021 | gnorm 0.168 | clip 0 | loss_scale 1 | train_wall 1524 | gb_free 23.7 | wall 38918 epoch 025 | loss 3.492 | nll_loss 1.937 | ppl 3.83 | wps 522388 | ups 1.06 | wpb 495121 | bsz 16508.6 | num_updates 42160 | lr 0.000308021 | gnorm 0.168 | clip 0 | loss_scale 1 | train_wall 1524 | gb_free 23.7 | wall 38918 epoch 025 | loss 3.492 | nll_loss 1.937 | ppl 3.83 | wps 522388 | ups 1.06 | wpb 495121 | bsz 16508.6 | num_updates 42160 | lr 0.000308021 | gnorm 0.168 | clip 0 | loss_scale 1 | train_wall 1524 | gb_free 23.7 | wall 38918 epoch 025 | loss 3.492 | nll_loss 1.937 | ppl 3.83 | wps 522388 | ups 1.06 | wpb 495121 | bsz 16508.6 | num_updates 42160 | lr 0.000308021 | gnorm 0.168 | clip 0 | loss_scale 1 | train_wall 1524 | gb_free 23.7 | wall 38918 epoch 025 | loss 3.492 | nll_loss 1.937 | ppl 3.83 | wps 522388 | ups 1.06 | wpb 495121 | bsz 16508.6 | num_updates 42160 | lr 0.000308021 | gnorm 0.168 | clip 0 | loss_scale 1 | train_wall 1524 | gb_free 23.7 | wall 38918 epoch 025 | loss 3.492 | nll_loss 1.937 | ppl 3.83 | wps 522388 | ups 1.06 | wpb 495121 | bsz 16508.6 | num_updates 42160 | lr 0.000308021 | gnorm 0.168 | clip 0 | loss_scale 1 | train_wall 1524 | gb_free 23.7 | wall 38918 epoch 025 | loss 3.492 | nll_loss 1.937 | ppl 3.83 | wps 522388 | ups 1.06 | wpb 495121 | bsz 16508.6 | num_updates 42160 | lr 0.000308021 | gnorm 0.168 | clip 0 | loss_scale 1 | train_wall 1524 | gb_free 23.7 | wall 38918 epoch 025 | loss 3.492 | nll_loss 1.937 | ppl 3.83 | wps 522388 | ups 1.06 | wpb 495121 | bsz 16508.6 | num_updates 42160 | lr 0.000308021 | gnorm 0.168 | clip 0 | loss_scale 1 | train_wall 1524 | gb_free 23.7 | wall 38918 epoch 025 | loss 3.492 | nll_loss 1.937 | ppl 3.83 | wps 522388 | ups 1.06 | wpb 495121 | bsz 16508.6 | num_updates 42160 | lr 0.000308021 | gnorm 0.168 | clip 0 | loss_scale 1 | train_wall 1524 | gb_free 23.7 | wall 38918 epoch 025 | loss 3.492 | nll_loss 1.937 | ppl 3.83 | wps 522388 | ups 1.06 | wpb 495121 | bsz 16508.6 | num_updates 42160 | lr 0.000308021 | gnorm 0.168 | clip 0 | loss_scale 1 | train_wall 1524 | gb_free 23.7 | wall 38918 epoch 025 | loss 3.492 | nll_loss 1.937 | ppl 3.83 | wps 522388 | ups 1.06 | wpb 495121 | bsz 16508.6 | num_updates 42160 | lr 0.000308021 | gnorm 0.168 | clip 0 | loss_scale 1 | train_wall 1524 | gb_free 23.7 | wall 38918 epoch 025 | loss 3.492 | nll_loss 1.937 | ppl 3.83 | wps 522388 | ups 1.06 | wpb 495121 | bsz 16508.6 | num_updates 42160 | lr 0.000308021 | gnorm 0.168 | clip 0 | loss_scale 1 | train_wall 1524 | gb_free 23.7 | wall 38918 epoch 025 | loss 3.492 | nll_loss 1.937 | ppl 3.83 | wps 522388 | ups 1.06 | wpb 495121 | bsz 16508.6 | num_updates 42160 | lr 0.000308021 | gnorm 0.168 | clip 0 | loss_scale 1 | train_wall 1524 | gb_free 23.7 | wall 38918 epoch 025 | loss 3.492 | nll_loss 1.937 | ppl 3.83 | wps 522388 | ups 1.06 | wpb 495121 | bsz 16508.6 | num_updates 42160 | lr 0.000308021 | gnorm 0.168 | clip 0 | loss_scale 1 | train_wall 1524 | gb_free 23.7 | wall 38918 epoch 025 | loss 3.492 | nll_loss 1.937 | ppl 3.83 | wps 522388 | ups 1.06 | wpb 495121 | bsz 16508.6 | num_updates 42160 | lr 0.000308021 | gnorm 0.168 | clip 0 | loss_scale 1 | train_wall 1524 | gb_free 23.7 | wall 38918 epoch 025 | loss 3.492 | nll_loss 1.937 | ppl 3.83 | wps 522388 | ups 1.06 | wpb 495121 | bsz 16508.6 | num_updates 42160 | lr 0.000308021 | gnorm 0.168 | clip 0 | loss_scale 1 | train_wall 1524 | gb_free 23.7 | wall 38918 epoch 025 | loss 3.492 | nll_loss 1.937 | ppl 3.83 | wps 522388 | ups 1.06 | wpb 495121 | bsz 16508.6 | num_updates 42160 | lr 0.000308021 | gnorm 0.168 | clip 0 | loss_scale 1 | train_wall 1524 | gb_free 23.7 | wall 38918 epoch 025 | loss 3.492 | nll_loss 1.937 | ppl 3.83 | wps 522388 | ups 1.06 | wpb 495121 | bsz 16508.6 | num_updates 42160 | lr 0.000308021 | gnorm 0.168 | clip 0 | loss_scale 1 | train_wall 1524 | gb_free 23.7 | wall 38918 epoch 025 | loss 3.492 | nll_loss 1.937 | ppl 3.83 | wps 522388 | ups 1.06 | wpb 495121 | bsz 16508.6 | num_updates 42160 | lr 0.000308021 | gnorm 0.168 | clip 0 | loss_scale 1 | train_wall 1524 | gb_free 23.7 | wall 38918 epoch 025 | loss 3.492 | nll_loss 1.937 | ppl 3.83 | wps 522388 | ups 1.06 | wpb 495121 | bsz 16508.6 | num_updates 42160 | lr 0.000308021 | gnorm 0.168 | clip 0 | loss_scale 1 | train_wall 1524 | gb_free 23.7 | wall 38918 epoch 025 | loss 3.492 | nll_loss 1.937 | ppl 3.83 | wps 522388 | ups 1.06 | wpb 495121 | bsz 16508.6 | num_updates 42160 | lr 0.000308021 | gnorm 0.168 | clip 0 | loss_scale 1 | train_wall 1524 | gb_free 23.7 | wall 38918 Start iterating over samples epoch 026: 40 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=547966, ups=1.11, wpb=492013, bsz=16520.6, num_updates=42200, lr=0.000307875, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=22.3, wall=38955 epoch 026: 40 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=547966, ups=1.11, wpb=492013, bsz=16520.6, num_updates=42200, lr=0.000307875, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=22.3, wall=38955 epoch 026: 40 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=547966, ups=1.11, wpb=492013, bsz=16520.6, num_updates=42200, lr=0.000307875, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=22.3, wall=38955 epoch 026: 40 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=547966, ups=1.11, wpb=492013, bsz=16520.6, num_updates=42200, lr=0.000307875, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=22.3, wall=38955 epoch 026: 40 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=547966, ups=1.11, wpb=492013, bsz=16520.6, num_updates=42200, lr=0.000307875, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=22.3, wall=38955 epoch 026: 40 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=547966, ups=1.11, wpb=492013, bsz=16520.6, num_updates=42200, lr=0.000307875, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=22.3, wall=38955 epoch 026: 40 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=547966, ups=1.11, wpb=492013, bsz=16520.6, num_updates=42200, lr=0.000307875, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=22.3, wall=38955 epoch 026: 40 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=547966, ups=1.11, wpb=492013, bsz=16520.6, num_updates=42200, lr=0.000307875, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=22.3, wall=38955 epoch 026: 40 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=547966, ups=1.11, wpb=492013, bsz=16520.6, num_updates=42200, lr=0.000307875, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=22.3, wall=38955 epoch 026: 40 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=547966, ups=1.11, wpb=492013, bsz=16520.6, num_updates=42200, lr=0.000307875, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=22.3, wall=38955 epoch 026: 40 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=547966, ups=1.11, wpb=492013, bsz=16520.6, num_updates=42200, lr=0.000307875, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=22.3, wall=38955 epoch 026: 40 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=547966, ups=1.11, wpb=492013, bsz=16520.6, num_updates=42200, lr=0.000307875, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=22.3, wall=38955 epoch 026: 40 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=547966, ups=1.11, wpb=492013, bsz=16520.6, num_updates=42200, lr=0.000307875, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=22.3, wall=38955 epoch 026: 40 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=547966, ups=1.11, wpb=492013, bsz=16520.6, num_updates=42200, lr=0.000307875, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=22.3, wall=38955 epoch 026: 40 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=547966, ups=1.11, wpb=492013, bsz=16520.6, num_updates=42200, lr=0.000307875, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=22.3, wall=38955 epoch 026: 40 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=547966, ups=1.11, wpb=492013, bsz=16520.6, num_updates=42200, lr=0.000307875, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=22.3, wall=38955 epoch 026: 40 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=547966, ups=1.11, wpb=492013, bsz=16520.6, num_updates=42200, lr=0.000307875, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=22.3, wall=38955 epoch 026: 40 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=547966, ups=1.11, wpb=492013, bsz=16520.6, num_updates=42200, lr=0.000307875, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=22.3, wall=38955 epoch 026: 40 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=547966, ups=1.11, wpb=492013, bsz=16520.6, num_updates=42200, lr=0.000307875, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=22.3, wall=38955 epoch 026: 40 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=547966, ups=1.11, wpb=492013, bsz=16520.6, num_updates=42200, lr=0.000307875, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=22.3, wall=38955 epoch 026: 40 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=547966, ups=1.11, wpb=492013, bsz=16520.6, num_updates=42200, lr=0.000307875, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=22.3, wall=38955 epoch 026: 40 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=547966, ups=1.11, wpb=492013, bsz=16520.6, num_updates=42200, lr=0.000307875, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=22.3, wall=38955 epoch 026: 40 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=547966, ups=1.11, wpb=492013, bsz=16520.6, num_updates=42200, lr=0.000307875, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=22.3, wall=38955 epoch 026: 40 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=547966, ups=1.11, wpb=492013, bsz=16520.6, num_updates=42200, lr=0.000307875, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=22.3, wall=38955 epoch 026: 40 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=547966, ups=1.11, wpb=492013, bsz=16520.6, num_updates=42200, lr=0.000307875, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=22.3, wall=38955 epoch 026: 40 / 1689 loss=3.485, nll_loss=1.928, ppl=3.81, wps=547966, ups=1.11, wpb=492013, bsz=16520.6, num_updates=42200, lr=0.000307875, gnorm=0.174, clip=0, loss_scale=1, train_wall=87, gb_free=22.3, wall=38955 epoch 026: 140 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=552065, ups=1.12, wpb=495073, bsz=16471.7, num_updates=42300, lr=0.00030751, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=39045 epoch 026: 140 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=552065, ups=1.12, wpb=495073, bsz=16471.7, num_updates=42300, lr=0.00030751, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=39045 epoch 026: 140 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=552065, ups=1.12, wpb=495073, bsz=16471.7, num_updates=42300, lr=0.00030751, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=39045 epoch 026: 140 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=552065, ups=1.12, wpb=495073, bsz=16471.7, num_updates=42300, lr=0.00030751, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=39045 epoch 026: 140 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=552065, ups=1.12, wpb=495073, bsz=16471.7, num_updates=42300, lr=0.00030751, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=39045 epoch 026: 140 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=552065, ups=1.12, wpb=495073, bsz=16471.7, num_updates=42300, lr=0.00030751, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=39045 epoch 026: 140 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=552065, ups=1.12, wpb=495073, bsz=16471.7, num_updates=42300, lr=0.00030751, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=39045 epoch 026: 140 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=552065, ups=1.12, wpb=495073, bsz=16471.7, num_updates=42300, lr=0.00030751, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=39045 epoch 026: 140 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=552065, ups=1.12, wpb=495073, bsz=16471.7, num_updates=42300, lr=0.00030751, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=39045 epoch 026: 140 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=552065, ups=1.12, wpb=495073, bsz=16471.7, num_updates=42300, lr=0.00030751, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=39045 epoch 026: 140 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=552065, ups=1.12, wpb=495073, bsz=16471.7, num_updates=42300, lr=0.00030751, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=39045 epoch 026: 140 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=552065, ups=1.12, wpb=495073, bsz=16471.7, num_updates=42300, lr=0.00030751, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=39045 epoch 026: 140 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=552065, ups=1.12, wpb=495073, bsz=16471.7, num_updates=42300, lr=0.00030751, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=39045 epoch 026: 140 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=552065, ups=1.12, wpb=495073, bsz=16471.7, num_updates=42300, lr=0.00030751, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=39045 epoch 026: 140 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=552065, ups=1.12, wpb=495073, bsz=16471.7, num_updates=42300, lr=0.00030751, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=39045 epoch 026: 140 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=552065, ups=1.12, wpb=495073, bsz=16471.7, num_updates=42300, lr=0.00030751, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=39045 epoch 026: 140 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=552065, ups=1.12, wpb=495073, bsz=16471.7, num_updates=42300, lr=0.00030751, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=39045 epoch 026: 140 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=552065, ups=1.12, wpb=495073, bsz=16471.7, num_updates=42300, lr=0.00030751, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=39045 epoch 026: 140 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=552065, ups=1.12, wpb=495073, bsz=16471.7, num_updates=42300, lr=0.00030751, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=39045 epoch 026: 140 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=552065, ups=1.12, wpb=495073, bsz=16471.7, num_updates=42300, lr=0.00030751, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=39045 epoch 026: 140 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=552065, ups=1.12, wpb=495073, bsz=16471.7, num_updates=42300, lr=0.00030751, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=39045 epoch 026: 140 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=552065, ups=1.12, wpb=495073, bsz=16471.7, num_updates=42300, lr=0.00030751, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=39045 epoch 026: 140 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=552065, ups=1.12, wpb=495073, bsz=16471.7, num_updates=42300, lr=0.00030751, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=39045 epoch 026: 140 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=552065, ups=1.12, wpb=495073, bsz=16471.7, num_updates=42300, lr=0.00030751, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=39045 epoch 026: 140 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=552065, ups=1.12, wpb=495073, bsz=16471.7, num_updates=42300, lr=0.00030751, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=39045 epoch 026: 140 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=552065, ups=1.12, wpb=495073, bsz=16471.7, num_updates=42300, lr=0.00030751, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=39045 epoch 026: 240 / 1689 loss=3.479, nll_loss=1.921, ppl=3.79, wps=551072, ups=1.11, wpb=496074, bsz=16859, num_updates=42400, lr=0.000307148, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=39135 epoch 026: 240 / 1689 loss=3.479, nll_loss=1.921, ppl=3.79, wps=551072, ups=1.11, wpb=496074, bsz=16859, num_updates=42400, lr=0.000307148, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=39135 epoch 026: 240 / 1689 loss=3.479, nll_loss=1.921, ppl=3.79, wps=551072, ups=1.11, wpb=496074, bsz=16859, num_updates=42400, lr=0.000307148, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=39135 epoch 026: 240 / 1689 loss=3.479, nll_loss=1.921, ppl=3.79, wps=551072, ups=1.11, wpb=496074, bsz=16859, num_updates=42400, lr=0.000307148, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=39135 epoch 026: 240 / 1689 loss=3.479, nll_loss=1.921, ppl=3.79, wps=551072, ups=1.11, wpb=496074, bsz=16859, num_updates=42400, lr=0.000307148, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=39135 epoch 026: 240 / 1689 loss=3.479, nll_loss=1.921, ppl=3.79, wps=551072, ups=1.11, wpb=496074, bsz=16859, num_updates=42400, lr=0.000307148, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=39135 epoch 026: 240 / 1689 loss=3.479, nll_loss=1.921, ppl=3.79, wps=551072, ups=1.11, wpb=496074, bsz=16859, num_updates=42400, lr=0.000307148, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=39135 epoch 026: 240 / 1689 loss=3.479, nll_loss=1.921, ppl=3.79, wps=551072, ups=1.11, wpb=496074, bsz=16859, num_updates=42400, lr=0.000307148, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=39135 epoch 026: 240 / 1689 loss=3.479, nll_loss=1.921, ppl=3.79, wps=551072, ups=1.11, wpb=496074, bsz=16859, num_updates=42400, lr=0.000307148, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=39135 epoch 026: 240 / 1689 loss=3.479, nll_loss=1.921, ppl=3.79, wps=551072, ups=1.11, wpb=496074, bsz=16859, num_updates=42400, lr=0.000307148, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=39135 epoch 026: 240 / 1689 loss=3.479, nll_loss=1.921, ppl=3.79, wps=551072, ups=1.11, wpb=496074, bsz=16859, num_updates=42400, lr=0.000307148, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=39135 epoch 026: 240 / 1689 loss=3.479, nll_loss=1.921, ppl=3.79, wps=551072, ups=1.11, wpb=496074, bsz=16859, num_updates=42400, lr=0.000307148, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=39135 epoch 026: 240 / 1689 loss=3.479, nll_loss=1.921, ppl=3.79, wps=551072, ups=1.11, wpb=496074, bsz=16859, num_updates=42400, lr=0.000307148, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=39135 epoch 026: 240 / 1689 loss=3.479, nll_loss=1.921, ppl=3.79, wps=551072, ups=1.11, wpb=496074, bsz=16859, num_updates=42400, lr=0.000307148, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=39135 epoch 026: 240 / 1689 loss=3.479, nll_loss=1.921, ppl=3.79, wps=551072, ups=1.11, wpb=496074, bsz=16859, num_updates=42400, lr=0.000307148, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=39135 epoch 026: 240 / 1689 loss=3.479, nll_loss=1.921, ppl=3.79, wps=551072, ups=1.11, wpb=496074, bsz=16859, num_updates=42400, lr=0.000307148, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=39135 epoch 026: 240 / 1689 loss=3.479, nll_loss=1.921, ppl=3.79, wps=551072, ups=1.11, wpb=496074, bsz=16859, num_updates=42400, lr=0.000307148, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=39135 epoch 026: 240 / 1689 loss=3.479, nll_loss=1.921, ppl=3.79, wps=551072, ups=1.11, wpb=496074, bsz=16859, num_updates=42400, lr=0.000307148, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=39135 epoch 026: 240 / 1689 loss=3.479, nll_loss=1.921, ppl=3.79, wps=551072, ups=1.11, wpb=496074, bsz=16859, num_updates=42400, lr=0.000307148, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=39135 epoch 026: 240 / 1689 loss=3.479, nll_loss=1.921, ppl=3.79, wps=551072, ups=1.11, wpb=496074, bsz=16859, num_updates=42400, lr=0.000307148, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=39135 epoch 026: 240 / 1689 loss=3.479, nll_loss=1.921, ppl=3.79, wps=551072, ups=1.11, wpb=496074, bsz=16859, num_updates=42400, lr=0.000307148, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=39135 epoch 026: 240 / 1689 loss=3.479, nll_loss=1.921, ppl=3.79, wps=551072, ups=1.11, wpb=496074, bsz=16859, num_updates=42400, lr=0.000307148, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=39135 epoch 026: 240 / 1689 loss=3.479, nll_loss=1.921, ppl=3.79, wps=551072, ups=1.11, wpb=496074, bsz=16859, num_updates=42400, lr=0.000307148, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=39135 epoch 026: 240 / 1689 loss=3.479, nll_loss=1.921, ppl=3.79, wps=551072, ups=1.11, wpb=496074, bsz=16859, num_updates=42400, lr=0.000307148, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=39135 epoch 026: 240 / 1689 loss=3.479, nll_loss=1.921, ppl=3.79, wps=551072, ups=1.11, wpb=496074, bsz=16859, num_updates=42400, lr=0.000307148, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=39135 epoch 026: 240 / 1689 loss=3.479, nll_loss=1.921, ppl=3.79, wps=551072, ups=1.11, wpb=496074, bsz=16859, num_updates=42400, lr=0.000307148, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=39135 epoch 026: 340 / 1689 loss=3.481, nll_loss=1.924, ppl=3.79, wps=547202, ups=1.11, wpb=494564, bsz=16748.8, num_updates=42500, lr=0.000306786, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=39225 epoch 026: 340 / 1689 loss=3.481, nll_loss=1.924, ppl=3.79, wps=547202, ups=1.11, wpb=494564, bsz=16748.8, num_updates=42500, lr=0.000306786, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=39225 epoch 026: 340 / 1689 loss=3.481, nll_loss=1.924, ppl=3.79, wps=547202, ups=1.11, wpb=494564, bsz=16748.8, num_updates=42500, lr=0.000306786, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=39225 epoch 026: 340 / 1689 loss=3.481, nll_loss=1.924, ppl=3.79, wps=547202, ups=1.11, wpb=494564, bsz=16748.8, num_updates=42500, lr=0.000306786, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=39225 epoch 026: 340 / 1689 loss=3.481, nll_loss=1.924, ppl=3.79, wps=547202, ups=1.11, wpb=494564, bsz=16748.8, num_updates=42500, lr=0.000306786, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=39225 epoch 026: 340 / 1689 loss=3.481, nll_loss=1.924, ppl=3.79, wps=547202, ups=1.11, wpb=494564, bsz=16748.8, num_updates=42500, lr=0.000306786, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=39225 epoch 026: 340 / 1689 loss=3.481, nll_loss=1.924, ppl=3.79, wps=547202, ups=1.11, wpb=494564, bsz=16748.8, num_updates=42500, lr=0.000306786, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=39225 epoch 026: 340 / 1689 loss=3.481, nll_loss=1.924, ppl=3.79, wps=547202, ups=1.11, wpb=494564, bsz=16748.8, num_updates=42500, lr=0.000306786, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=39225 epoch 026: 340 / 1689 loss=3.481, nll_loss=1.924, ppl=3.79, wps=547202, ups=1.11, wpb=494564, bsz=16748.8, num_updates=42500, lr=0.000306786, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=39225 epoch 026: 340 / 1689 loss=3.481, nll_loss=1.924, ppl=3.79, wps=547202, ups=1.11, wpb=494564, bsz=16748.8, num_updates=42500, lr=0.000306786, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=39225 epoch 026: 340 / 1689 loss=3.481, nll_loss=1.924, ppl=3.79, wps=547202, ups=1.11, wpb=494564, bsz=16748.8, num_updates=42500, lr=0.000306786, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=39225 epoch 026: 340 / 1689 loss=3.481, nll_loss=1.924, ppl=3.79, wps=547202, ups=1.11, wpb=494564, bsz=16748.8, num_updates=42500, lr=0.000306786, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=39225 epoch 026: 340 / 1689 loss=3.481, nll_loss=1.924, ppl=3.79, wps=547202, ups=1.11, wpb=494564, bsz=16748.8, num_updates=42500, lr=0.000306786, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=39225 epoch 026: 340 / 1689 loss=3.481, nll_loss=1.924, ppl=3.79, wps=547202, ups=1.11, wpb=494564, bsz=16748.8, num_updates=42500, lr=0.000306786, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=39225 epoch 026: 340 / 1689 loss=3.481, nll_loss=1.924, ppl=3.79, wps=547202, ups=1.11, wpb=494564, bsz=16748.8, num_updates=42500, lr=0.000306786, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=39225 epoch 026: 340 / 1689 loss=3.481, nll_loss=1.924, ppl=3.79, wps=547202, ups=1.11, wpb=494564, bsz=16748.8, num_updates=42500, lr=0.000306786, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=39225 epoch 026: 340 / 1689 loss=3.481, nll_loss=1.924, ppl=3.79, wps=547202, ups=1.11, wpb=494564, bsz=16748.8, num_updates=42500, lr=0.000306786, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=39225 epoch 026: 340 / 1689 loss=3.481, nll_loss=1.924, ppl=3.79, wps=547202, ups=1.11, wpb=494564, bsz=16748.8, num_updates=42500, lr=0.000306786, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=39225 epoch 026: 340 / 1689 loss=3.481, nll_loss=1.924, ppl=3.79, wps=547202, ups=1.11, wpb=494564, bsz=16748.8, num_updates=42500, lr=0.000306786, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=39225 epoch 026: 340 / 1689 loss=3.481, nll_loss=1.924, ppl=3.79, wps=547202, ups=1.11, wpb=494564, bsz=16748.8, num_updates=42500, lr=0.000306786, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=39225 epoch 026: 340 / 1689 loss=3.481, nll_loss=1.924, ppl=3.79, wps=547202, ups=1.11, wpb=494564, bsz=16748.8, num_updates=42500, lr=0.000306786, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=39225 epoch 026: 340 / 1689 loss=3.481, nll_loss=1.924, ppl=3.79, wps=547202, ups=1.11, wpb=494564, bsz=16748.8, num_updates=42500, lr=0.000306786, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=39225 epoch 026: 340 / 1689 loss=3.481, nll_loss=1.924, ppl=3.79, wps=547202, ups=1.11, wpb=494564, bsz=16748.8, num_updates=42500, lr=0.000306786, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=39225 epoch 026: 340 / 1689 loss=3.481, nll_loss=1.924, ppl=3.79, wps=547202, ups=1.11, wpb=494564, bsz=16748.8, num_updates=42500, lr=0.000306786, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=39225 epoch 026: 340 / 1689 loss=3.481, nll_loss=1.924, ppl=3.79, wps=547202, ups=1.11, wpb=494564, bsz=16748.8, num_updates=42500, lr=0.000306786, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=39225 epoch 026: 340 / 1689 loss=3.481, nll_loss=1.924, ppl=3.79, wps=547202, ups=1.11, wpb=494564, bsz=16748.8, num_updates=42500, lr=0.000306786, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=39225 epoch 026: 440 / 1689 loss=3.481, nll_loss=1.923, ppl=3.79, wps=552834, ups=1.12, wpb=494763, bsz=16201.2, num_updates=42600, lr=0.000306426, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=39314 epoch 026: 440 / 1689 loss=3.481, nll_loss=1.923, ppl=3.79, wps=552834, ups=1.12, wpb=494763, bsz=16201.2, num_updates=42600, lr=0.000306426, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=39314 epoch 026: 440 / 1689 loss=3.481, nll_loss=1.923, ppl=3.79, wps=552834, ups=1.12, wpb=494763, bsz=16201.2, num_updates=42600, lr=0.000306426, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=39314 epoch 026: 440 / 1689 loss=3.481, nll_loss=1.923, ppl=3.79, wps=552834, ups=1.12, wpb=494763, bsz=16201.2, num_updates=42600, lr=0.000306426, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=39314 epoch 026: 440 / 1689 loss=3.481, nll_loss=1.923, ppl=3.79, wps=552834, ups=1.12, wpb=494763, bsz=16201.2, num_updates=42600, lr=0.000306426, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=39314 epoch 026: 440 / 1689 loss=3.481, nll_loss=1.923, ppl=3.79, wps=552834, ups=1.12, wpb=494763, bsz=16201.2, num_updates=42600, lr=0.000306426, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=39314 epoch 026: 440 / 1689 loss=3.481, nll_loss=1.923, ppl=3.79, wps=552834, ups=1.12, wpb=494763, bsz=16201.2, num_updates=42600, lr=0.000306426, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=39314 epoch 026: 440 / 1689 loss=3.481, nll_loss=1.923, ppl=3.79, wps=552834, ups=1.12, wpb=494763, bsz=16201.2, num_updates=42600, lr=0.000306426, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=39314 epoch 026: 440 / 1689 loss=3.481, nll_loss=1.923, ppl=3.79, wps=552834, ups=1.12, wpb=494763, bsz=16201.2, num_updates=42600, lr=0.000306426, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=39314 epoch 026: 440 / 1689 loss=3.481, nll_loss=1.923, ppl=3.79, wps=552834, ups=1.12, wpb=494763, bsz=16201.2, num_updates=42600, lr=0.000306426, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=39314 epoch 026: 440 / 1689 loss=3.481, nll_loss=1.923, ppl=3.79, wps=552834, ups=1.12, wpb=494763, bsz=16201.2, num_updates=42600, lr=0.000306426, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=39314 epoch 026: 440 / 1689 loss=3.481, nll_loss=1.923, ppl=3.79, wps=552834, ups=1.12, wpb=494763, bsz=16201.2, num_updates=42600, lr=0.000306426, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=39314 epoch 026: 440 / 1689 loss=3.481, nll_loss=1.923, ppl=3.79, wps=552834, ups=1.12, wpb=494763, bsz=16201.2, num_updates=42600, lr=0.000306426, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=39314 epoch 026: 440 / 1689 loss=3.481, nll_loss=1.923, ppl=3.79, wps=552834, ups=1.12, wpb=494763, bsz=16201.2, num_updates=42600, lr=0.000306426, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=39314 epoch 026: 440 / 1689 loss=3.481, nll_loss=1.923, ppl=3.79, wps=552834, ups=1.12, wpb=494763, bsz=16201.2, num_updates=42600, lr=0.000306426, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=39314 epoch 026: 440 / 1689 loss=3.481, nll_loss=1.923, ppl=3.79, wps=552834, ups=1.12, wpb=494763, bsz=16201.2, num_updates=42600, lr=0.000306426, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=39314 epoch 026: 440 / 1689 loss=3.481, nll_loss=1.923, ppl=3.79, wps=552834, ups=1.12, wpb=494763, bsz=16201.2, num_updates=42600, lr=0.000306426, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=39314 epoch 026: 440 / 1689 loss=3.481, nll_loss=1.923, ppl=3.79, wps=552834, ups=1.12, wpb=494763, bsz=16201.2, num_updates=42600, lr=0.000306426, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=39314 epoch 026: 440 / 1689 loss=3.481, nll_loss=1.923, ppl=3.79, wps=552834, ups=1.12, wpb=494763, bsz=16201.2, num_updates=42600, lr=0.000306426, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=39314 epoch 026: 440 / 1689 loss=3.481, nll_loss=1.923, ppl=3.79, wps=552834, ups=1.12, wpb=494763, bsz=16201.2, num_updates=42600, lr=0.000306426, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=39314 epoch 026: 440 / 1689 loss=3.481, nll_loss=1.923, ppl=3.79, wps=552834, ups=1.12, wpb=494763, bsz=16201.2, num_updates=42600, lr=0.000306426, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=39314 epoch 026: 440 / 1689 loss=3.481, nll_loss=1.923, ppl=3.79, wps=552834, ups=1.12, wpb=494763, bsz=16201.2, num_updates=42600, lr=0.000306426, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=39314 epoch 026: 440 / 1689 loss=3.481, nll_loss=1.923, ppl=3.79, wps=552834, ups=1.12, wpb=494763, bsz=16201.2, num_updates=42600, lr=0.000306426, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=39314 epoch 026: 440 / 1689 loss=3.481, nll_loss=1.923, ppl=3.79, wps=552834, ups=1.12, wpb=494763, bsz=16201.2, num_updates=42600, lr=0.000306426, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=39314 epoch 026: 440 / 1689 loss=3.481, nll_loss=1.923, ppl=3.79, wps=552834, ups=1.12, wpb=494763, bsz=16201.2, num_updates=42600, lr=0.000306426, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=39314 epoch 026: 440 / 1689 loss=3.481, nll_loss=1.923, ppl=3.79, wps=552834, ups=1.12, wpb=494763, bsz=16201.2, num_updates=42600, lr=0.000306426, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=22.3, wall=39314 epoch 026: 540 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=550082, ups=1.11, wpb=495513, bsz=16576, num_updates=42700, lr=0.000306067, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=39405 epoch 026: 540 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=550082, ups=1.11, wpb=495513, bsz=16576, num_updates=42700, lr=0.000306067, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=39405 epoch 026: 540 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=550082, ups=1.11, wpb=495513, bsz=16576, num_updates=42700, lr=0.000306067, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=39405 epoch 026: 540 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=550082, ups=1.11, wpb=495513, bsz=16576, num_updates=42700, lr=0.000306067, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=39405 epoch 026: 540 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=550082, ups=1.11, wpb=495513, bsz=16576, num_updates=42700, lr=0.000306067, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=39405 epoch 026: 540 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=550082, ups=1.11, wpb=495513, bsz=16576, num_updates=42700, lr=0.000306067, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=39405 epoch 026: 540 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=550082, ups=1.11, wpb=495513, bsz=16576, num_updates=42700, lr=0.000306067, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=39405 epoch 026: 540 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=550082, ups=1.11, wpb=495513, bsz=16576, num_updates=42700, lr=0.000306067, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=39405 epoch 026: 540 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=550082, ups=1.11, wpb=495513, bsz=16576, num_updates=42700, lr=0.000306067, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=39405 epoch 026: 540 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=550082, ups=1.11, wpb=495513, bsz=16576, num_updates=42700, lr=0.000306067, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=39405 epoch 026: 540 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=550082, ups=1.11, wpb=495513, bsz=16576, num_updates=42700, lr=0.000306067, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=39405 epoch 026: 540 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=550082, ups=1.11, wpb=495513, bsz=16576, num_updates=42700, lr=0.000306067, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=39405 epoch 026: 540 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=550082, ups=1.11, wpb=495513, bsz=16576, num_updates=42700, lr=0.000306067, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=39405 epoch 026: 540 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=550082, ups=1.11, wpb=495513, bsz=16576, num_updates=42700, lr=0.000306067, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=39405 epoch 026: 540 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=550082, ups=1.11, wpb=495513, bsz=16576, num_updates=42700, lr=0.000306067, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=39405 epoch 026: 540 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=550082, ups=1.11, wpb=495513, bsz=16576, num_updates=42700, lr=0.000306067, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=39405 epoch 026: 540 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=550082, ups=1.11, wpb=495513, bsz=16576, num_updates=42700, lr=0.000306067, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=39405 epoch 026: 540 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=550082, ups=1.11, wpb=495513, bsz=16576, num_updates=42700, lr=0.000306067, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=39405 epoch 026: 540 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=550082, ups=1.11, wpb=495513, bsz=16576, num_updates=42700, lr=0.000306067, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=39405 epoch 026: 540 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=550082, ups=1.11, wpb=495513, bsz=16576, num_updates=42700, lr=0.000306067, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=39405 epoch 026: 540 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=550082, ups=1.11, wpb=495513, bsz=16576, num_updates=42700, lr=0.000306067, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=39405 epoch 026: 540 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=550082, ups=1.11, wpb=495513, bsz=16576, num_updates=42700, lr=0.000306067, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=39405 epoch 026: 540 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=550082, ups=1.11, wpb=495513, bsz=16576, num_updates=42700, lr=0.000306067, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=39405 epoch 026: 540 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=550082, ups=1.11, wpb=495513, bsz=16576, num_updates=42700, lr=0.000306067, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=39405 epoch 026: 540 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=550082, ups=1.11, wpb=495513, bsz=16576, num_updates=42700, lr=0.000306067, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=39405 epoch 026: 540 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=550082, ups=1.11, wpb=495513, bsz=16576, num_updates=42700, lr=0.000306067, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=39405 epoch 026: 641 / 1689 loss=3.485, nll_loss=1.928, ppl=3.8, wps=550567, ups=1.11, wpb=495312, bsz=16084.3, num_updates=42800, lr=0.000305709, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=39495 epoch 026: 641 / 1689 loss=3.485, nll_loss=1.928, ppl=3.8, wps=550567, ups=1.11, wpb=495312, bsz=16084.3, num_updates=42800, lr=0.000305709, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=39495 epoch 026: 641 / 1689 loss=3.485, nll_loss=1.928, ppl=3.8, wps=550567, ups=1.11, wpb=495312, bsz=16084.3, num_updates=42800, lr=0.000305709, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=39495 epoch 026: 641 / 1689 loss=3.485, nll_loss=1.928, ppl=3.8, wps=550567, ups=1.11, wpb=495312, bsz=16084.3, num_updates=42800, lr=0.000305709, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=39495 epoch 026: 641 / 1689 loss=3.485, nll_loss=1.928, ppl=3.8, wps=550567, ups=1.11, wpb=495312, bsz=16084.3, num_updates=42800, lr=0.000305709, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=39495 epoch 026: 641 / 1689 loss=3.485, nll_loss=1.928, ppl=3.8, wps=550567, ups=1.11, wpb=495312, bsz=16084.3, num_updates=42800, lr=0.000305709, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=39495 epoch 026: 641 / 1689 loss=3.485, nll_loss=1.928, ppl=3.8, wps=550567, ups=1.11, wpb=495312, bsz=16084.3, num_updates=42800, lr=0.000305709, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=39495 epoch 026: 641 / 1689 loss=3.485, nll_loss=1.928, ppl=3.8, wps=550567, ups=1.11, wpb=495312, bsz=16084.3, num_updates=42800, lr=0.000305709, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=39495 epoch 026: 641 / 1689 loss=3.485, nll_loss=1.928, ppl=3.8, wps=550567, ups=1.11, wpb=495312, bsz=16084.3, num_updates=42800, lr=0.000305709, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=39495 epoch 026: 641 / 1689 loss=3.485, nll_loss=1.928, ppl=3.8, wps=550567, ups=1.11, wpb=495312, bsz=16084.3, num_updates=42800, lr=0.000305709, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=39495 epoch 026: 641 / 1689 loss=3.485, nll_loss=1.928, ppl=3.8, wps=550567, ups=1.11, wpb=495312, bsz=16084.3, num_updates=42800, lr=0.000305709, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=39495 epoch 026: 641 / 1689 loss=3.485, nll_loss=1.928, ppl=3.8, wps=550567, ups=1.11, wpb=495312, bsz=16084.3, num_updates=42800, lr=0.000305709, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=39495 epoch 026: 641 / 1689 loss=3.485, nll_loss=1.928, ppl=3.8, wps=550567, ups=1.11, wpb=495312, bsz=16084.3, num_updates=42800, lr=0.000305709, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=39495 epoch 026: 641 / 1689 loss=3.485, nll_loss=1.928, ppl=3.8, wps=550567, ups=1.11, wpb=495312, bsz=16084.3, num_updates=42800, lr=0.000305709, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=39495 epoch 026: 641 / 1689 loss=3.485, nll_loss=1.928, ppl=3.8, wps=550567, ups=1.11, wpb=495312, bsz=16084.3, num_updates=42800, lr=0.000305709, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=39495 epoch 026: 641 / 1689 loss=3.485, nll_loss=1.928, ppl=3.8, wps=550567, ups=1.11, wpb=495312, bsz=16084.3, num_updates=42800, lr=0.000305709, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=39495 epoch 026: 641 / 1689 loss=3.485, nll_loss=1.928, ppl=3.8, wps=550567, ups=1.11, wpb=495312, bsz=16084.3, num_updates=42800, lr=0.000305709, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=39495 epoch 026: 641 / 1689 loss=3.485, nll_loss=1.928, ppl=3.8, wps=550567, ups=1.11, wpb=495312, bsz=16084.3, num_updates=42800, lr=0.000305709, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=39495 epoch 026: 641 / 1689 loss=3.485, nll_loss=1.928, ppl=3.8, wps=550567, ups=1.11, wpb=495312, bsz=16084.3, num_updates=42800, lr=0.000305709, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=39495 epoch 026: 641 / 1689 loss=3.485, nll_loss=1.928, ppl=3.8, wps=550567, ups=1.11, wpb=495312, bsz=16084.3, num_updates=42800, lr=0.000305709, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=39495 epoch 026: 641 / 1689 loss=3.485, nll_loss=1.928, ppl=3.8, wps=550567, ups=1.11, wpb=495312, bsz=16084.3, num_updates=42800, lr=0.000305709, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=39495 epoch 026: 641 / 1689 loss=3.485, nll_loss=1.928, ppl=3.8, wps=550567, ups=1.11, wpb=495312, bsz=16084.3, num_updates=42800, lr=0.000305709, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=39495 epoch 026: 641 / 1689 loss=3.485, nll_loss=1.928, ppl=3.8, wps=550567, ups=1.11, wpb=495312, bsz=16084.3, num_updates=42800, lr=0.000305709, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=39495 epoch 026: 641 / 1689 loss=3.485, nll_loss=1.928, ppl=3.8, wps=550567, ups=1.11, wpb=495312, bsz=16084.3, num_updates=42800, lr=0.000305709, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=39495 epoch 026: 641 / 1689 loss=3.485, nll_loss=1.928, ppl=3.8, wps=550567, ups=1.11, wpb=495312, bsz=16084.3, num_updates=42800, lr=0.000305709, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=39495 epoch 026: 641 / 1689 loss=3.485, nll_loss=1.928, ppl=3.8, wps=550567, ups=1.11, wpb=495312, bsz=16084.3, num_updates=42800, lr=0.000305709, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=39495 epoch 026: 741 / 1689 loss=3.487, nll_loss=1.93, ppl=3.81, wps=553474, ups=1.12, wpb=496279, bsz=16689.1, num_updates=42900, lr=0.000305352, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=39584 epoch 026: 741 / 1689 loss=3.487, nll_loss=1.93, ppl=3.81, wps=553474, ups=1.12, wpb=496279, bsz=16689.1, num_updates=42900, lr=0.000305352, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=39584 epoch 026: 741 / 1689 loss=3.487, nll_loss=1.93, ppl=3.81, wps=553474, ups=1.12, wpb=496279, bsz=16689.1, num_updates=42900, lr=0.000305352, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=39584 epoch 026: 741 / 1689 loss=3.487, nll_loss=1.93, ppl=3.81, wps=553474, ups=1.12, wpb=496279, bsz=16689.1, num_updates=42900, lr=0.000305352, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=39584 epoch 026: 741 / 1689 loss=3.487, nll_loss=1.93, ppl=3.81, wps=553474, ups=1.12, wpb=496279, bsz=16689.1, num_updates=42900, lr=0.000305352, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=39584 epoch 026: 741 / 1689 loss=3.487, nll_loss=1.93, ppl=3.81, wps=553474, ups=1.12, wpb=496279, bsz=16689.1, num_updates=42900, lr=0.000305352, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=39584 epoch 026: 741 / 1689 loss=3.487, nll_loss=1.93, ppl=3.81, wps=553474, ups=1.12, wpb=496279, bsz=16689.1, num_updates=42900, lr=0.000305352, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=39584 epoch 026: 741 / 1689 loss=3.487, nll_loss=1.93, ppl=3.81, wps=553474, ups=1.12, wpb=496279, bsz=16689.1, num_updates=42900, lr=0.000305352, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=39584 epoch 026: 741 / 1689 loss=3.487, nll_loss=1.93, ppl=3.81, wps=553474, ups=1.12, wpb=496279, bsz=16689.1, num_updates=42900, lr=0.000305352, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=39584 epoch 026: 741 / 1689 loss=3.487, nll_loss=1.93, ppl=3.81, wps=553474, ups=1.12, wpb=496279, bsz=16689.1, num_updates=42900, lr=0.000305352, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=39584 epoch 026: 741 / 1689 loss=3.487, nll_loss=1.93, ppl=3.81, wps=553474, ups=1.12, wpb=496279, bsz=16689.1, num_updates=42900, lr=0.000305352, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=39584 epoch 026: 741 / 1689 loss=3.487, nll_loss=1.93, ppl=3.81, wps=553474, ups=1.12, wpb=496279, bsz=16689.1, num_updates=42900, lr=0.000305352, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=39584 epoch 026: 741 / 1689 loss=3.487, nll_loss=1.93, ppl=3.81, wps=553474, ups=1.12, wpb=496279, bsz=16689.1, num_updates=42900, lr=0.000305352, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=39584 epoch 026: 741 / 1689 loss=3.487, nll_loss=1.93, ppl=3.81, wps=553474, ups=1.12, wpb=496279, bsz=16689.1, num_updates=42900, lr=0.000305352, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=39584 epoch 026: 741 / 1689 loss=3.487, nll_loss=1.93, ppl=3.81, wps=553474, ups=1.12, wpb=496279, bsz=16689.1, num_updates=42900, lr=0.000305352, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=39584 epoch 026: 741 / 1689 loss=3.487, nll_loss=1.93, ppl=3.81, wps=553474, ups=1.12, wpb=496279, bsz=16689.1, num_updates=42900, lr=0.000305352, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=39584 epoch 026: 741 / 1689 loss=3.487, nll_loss=1.93, ppl=3.81, wps=553474, ups=1.12, wpb=496279, bsz=16689.1, num_updates=42900, lr=0.000305352, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=39584 epoch 026: 741 / 1689 loss=3.487, nll_loss=1.93, ppl=3.81, wps=553474, ups=1.12, wpb=496279, bsz=16689.1, num_updates=42900, lr=0.000305352, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=39584 epoch 026: 741 / 1689 loss=3.487, nll_loss=1.93, ppl=3.81, wps=553474, ups=1.12, wpb=496279, bsz=16689.1, num_updates=42900, lr=0.000305352, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=39584 epoch 026: 741 / 1689 loss=3.487, nll_loss=1.93, ppl=3.81, wps=553474, ups=1.12, wpb=496279, bsz=16689.1, num_updates=42900, lr=0.000305352, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=39584 epoch 026: 741 / 1689 loss=3.487, nll_loss=1.93, ppl=3.81, wps=553474, ups=1.12, wpb=496279, bsz=16689.1, num_updates=42900, lr=0.000305352, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=39584 epoch 026: 741 / 1689 loss=3.487, nll_loss=1.93, ppl=3.81, wps=553474, ups=1.12, wpb=496279, bsz=16689.1, num_updates=42900, lr=0.000305352, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=39584 epoch 026: 741 / 1689 loss=3.487, nll_loss=1.93, ppl=3.81, wps=553474, ups=1.12, wpb=496279, bsz=16689.1, num_updates=42900, lr=0.000305352, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=39584 epoch 026: 741 / 1689 loss=3.487, nll_loss=1.93, ppl=3.81, wps=553474, ups=1.12, wpb=496279, bsz=16689.1, num_updates=42900, lr=0.000305352, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=39584 epoch 026: 741 / 1689 loss=3.487, nll_loss=1.93, ppl=3.81, wps=553474, ups=1.12, wpb=496279, bsz=16689.1, num_updates=42900, lr=0.000305352, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=39584 epoch 026: 741 / 1689 loss=3.487, nll_loss=1.93, ppl=3.81, wps=553474, ups=1.12, wpb=496279, bsz=16689.1, num_updates=42900, lr=0.000305352, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=39584 epoch 026: 841 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=549627, ups=1.11, wpb=495153, bsz=16531.2, num_updates=43000, lr=0.000304997, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=39674 epoch 026: 841 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=549627, ups=1.11, wpb=495153, bsz=16531.2, num_updates=43000, lr=0.000304997, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=39674 epoch 026: 841 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=549627, ups=1.11, wpb=495153, bsz=16531.2, num_updates=43000, lr=0.000304997, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=39674 epoch 026: 841 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=549627, ups=1.11, wpb=495153, bsz=16531.2, num_updates=43000, lr=0.000304997, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=39674 epoch 026: 841 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=549627, ups=1.11, wpb=495153, bsz=16531.2, num_updates=43000, lr=0.000304997, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=39674 epoch 026: 841 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=549627, ups=1.11, wpb=495153, bsz=16531.2, num_updates=43000, lr=0.000304997, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=39674 epoch 026: 841 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=549627, ups=1.11, wpb=495153, bsz=16531.2, num_updates=43000, lr=0.000304997, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=39674 epoch 026: 841 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=549627, ups=1.11, wpb=495153, bsz=16531.2, num_updates=43000, lr=0.000304997, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=39674 epoch 026: 841 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=549627, ups=1.11, wpb=495153, bsz=16531.2, num_updates=43000, lr=0.000304997, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=39674 epoch 026: 841 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=549627, ups=1.11, wpb=495153, bsz=16531.2, num_updates=43000, lr=0.000304997, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=39674 epoch 026: 841 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=549627, ups=1.11, wpb=495153, bsz=16531.2, num_updates=43000, lr=0.000304997, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=39674 epoch 026: 841 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=549627, ups=1.11, wpb=495153, bsz=16531.2, num_updates=43000, lr=0.000304997, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=39674 epoch 026: 841 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=549627, ups=1.11, wpb=495153, bsz=16531.2, num_updates=43000, lr=0.000304997, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=39674 epoch 026: 841 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=549627, ups=1.11, wpb=495153, bsz=16531.2, num_updates=43000, lr=0.000304997, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=39674 epoch 026: 841 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=549627, ups=1.11, wpb=495153, bsz=16531.2, num_updates=43000, lr=0.000304997, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=39674 epoch 026: 841 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=549627, ups=1.11, wpb=495153, bsz=16531.2, num_updates=43000, lr=0.000304997, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=39674 epoch 026: 841 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=549627, ups=1.11, wpb=495153, bsz=16531.2, num_updates=43000, lr=0.000304997, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=39674 epoch 026: 841 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=549627, ups=1.11, wpb=495153, bsz=16531.2, num_updates=43000, lr=0.000304997, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=39674 epoch 026: 841 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=549627, ups=1.11, wpb=495153, bsz=16531.2, num_updates=43000, lr=0.000304997, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=39674 epoch 026: 841 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=549627, ups=1.11, wpb=495153, bsz=16531.2, num_updates=43000, lr=0.000304997, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=39674 epoch 026: 841 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=549627, ups=1.11, wpb=495153, bsz=16531.2, num_updates=43000, lr=0.000304997, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=39674 epoch 026: 841 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=549627, ups=1.11, wpb=495153, bsz=16531.2, num_updates=43000, lr=0.000304997, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=39674 epoch 026: 841 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=549627, ups=1.11, wpb=495153, bsz=16531.2, num_updates=43000, lr=0.000304997, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=39674 epoch 026: 841 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=549627, ups=1.11, wpb=495153, bsz=16531.2, num_updates=43000, lr=0.000304997, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=39674 epoch 026: 841 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=549627, ups=1.11, wpb=495153, bsz=16531.2, num_updates=43000, lr=0.000304997, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=39674 epoch 026: 841 / 1689 loss=3.492, nll_loss=1.936, ppl=3.83, wps=549627, ups=1.11, wpb=495153, bsz=16531.2, num_updates=43000, lr=0.000304997, gnorm=0.172, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=39674 begin validation on "valid" subset epoch 026 | valid on 'valid' subset | loss 3.72 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 43000 | best_loss 3.704 epoch 026 | valid on 'valid' subset | loss 3.72 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 43000 | best_loss 3.704 epoch 026 | valid on 'valid' subset | loss 3.72 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 43000 | best_loss 3.704 epoch 026 | valid on 'valid' subset | loss 3.72 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 43000 | best_loss 3.704 epoch 026 | valid on 'valid' subset | loss 3.72 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 43000 | best_loss 3.704 epoch 026 | valid on 'valid' subset | loss 3.72 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 43000 | best_loss 3.704 epoch 026 | valid on 'valid' subset | loss 3.72 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 43000 | best_loss 3.704 epoch 026 | valid on 'valid' subset | loss 3.72 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 43000 | best_loss 3.704 epoch 026 | valid on 'valid' subset | loss 3.72 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 43000 | best_loss 3.704 epoch 026 | valid on 'valid' subset | loss 3.72 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 43000 | best_loss 3.704 epoch 026 | valid on 'valid' subset | loss 3.72 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 43000 | best_loss 3.704 epoch 026 | valid on 'valid' subset | loss 3.72 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 43000 | best_loss 3.704 epoch 026 | valid on 'valid' subset | loss 3.72 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 43000 | best_loss 3.704 epoch 026 | valid on 'valid' subset | loss 3.72 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 43000 | best_loss 3.704 epoch 026 | valid on 'valid' subset | loss 3.72 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 43000 | best_loss 3.704 epoch 026 | valid on 'valid' subset | loss 3.72 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 43000 | best_loss 3.704 epoch 026 | valid on 'valid' subset | loss 3.72 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 43000 | best_loss 3.704 epoch 026 | valid on 'valid' subset | loss 3.72 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 43000 | best_loss 3.704 epoch 026 | valid on 'valid' subset | loss 3.72 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 43000 | best_loss 3.704 epoch 026 | valid on 'valid' subset | loss 3.72 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 43000 | best_loss 3.704 epoch 026 | valid on 'valid' subset | loss 3.72 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 43000 | best_loss 3.704 epoch 026 | valid on 'valid' subset | loss 3.72 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 43000 | best_loss 3.704 epoch 026 | valid on 'valid' subset | loss 3.72 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 43000 | best_loss 3.704 epoch 026 | valid on 'valid' subset | loss 3.72 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 43000 | best_loss 3.704 epoch 026 | valid on 'valid' subset | loss 3.72 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 43000 | best_loss 3.704 epoch 026 | valid on 'valid' subset | loss 3.72 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 43000 | best_loss 3.704 epoch 026: 942 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=474929, ups=0.96, wpb=495678, bsz=16511.4, num_updates=43100, lr=0.000304643, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.1, wall=39779 epoch 026: 942 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=474929, ups=0.96, wpb=495678, bsz=16511.4, num_updates=43100, lr=0.000304643, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.1, wall=39779 epoch 026: 942 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=474929, ups=0.96, wpb=495678, bsz=16511.4, num_updates=43100, lr=0.000304643, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.1, wall=39779 epoch 026: 942 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=474929, ups=0.96, wpb=495678, bsz=16511.4, num_updates=43100, lr=0.000304643, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.1, wall=39779 epoch 026: 942 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=474929, ups=0.96, wpb=495678, bsz=16511.4, num_updates=43100, lr=0.000304643, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.1, wall=39779 epoch 026: 942 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=474929, ups=0.96, wpb=495678, bsz=16511.4, num_updates=43100, lr=0.000304643, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.1, wall=39779 epoch 026: 942 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=474929, ups=0.96, wpb=495678, bsz=16511.4, num_updates=43100, lr=0.000304643, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.1, wall=39779 epoch 026: 942 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=474929, ups=0.96, wpb=495678, bsz=16511.4, num_updates=43100, lr=0.000304643, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.1, wall=39779 epoch 026: 942 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=474929, ups=0.96, wpb=495678, bsz=16511.4, num_updates=43100, lr=0.000304643, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.1, wall=39779 epoch 026: 942 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=474929, ups=0.96, wpb=495678, bsz=16511.4, num_updates=43100, lr=0.000304643, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.1, wall=39779 epoch 026: 942 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=474929, ups=0.96, wpb=495678, bsz=16511.4, num_updates=43100, lr=0.000304643, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.1, wall=39779 epoch 026: 942 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=474929, ups=0.96, wpb=495678, bsz=16511.4, num_updates=43100, lr=0.000304643, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.1, wall=39779 epoch 026: 942 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=474929, ups=0.96, wpb=495678, bsz=16511.4, num_updates=43100, lr=0.000304643, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.1, wall=39779 epoch 026: 942 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=474929, ups=0.96, wpb=495678, bsz=16511.4, num_updates=43100, lr=0.000304643, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.1, wall=39779 epoch 026: 942 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=474929, ups=0.96, wpb=495678, bsz=16511.4, num_updates=43100, lr=0.000304643, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.1, wall=39779 epoch 026: 942 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=474929, ups=0.96, wpb=495678, bsz=16511.4, num_updates=43100, lr=0.000304643, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.1, wall=39779 epoch 026: 942 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=474929, ups=0.96, wpb=495678, bsz=16511.4, num_updates=43100, lr=0.000304643, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.1, wall=39779 epoch 026: 942 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=474929, ups=0.96, wpb=495678, bsz=16511.4, num_updates=43100, lr=0.000304643, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.1, wall=39779 epoch 026: 942 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=474929, ups=0.96, wpb=495678, bsz=16511.4, num_updates=43100, lr=0.000304643, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.1, wall=39779 epoch 026: 942 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=474929, ups=0.96, wpb=495678, bsz=16511.4, num_updates=43100, lr=0.000304643, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.1, wall=39779 epoch 026: 942 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=474929, ups=0.96, wpb=495678, bsz=16511.4, num_updates=43100, lr=0.000304643, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.1, wall=39779 epoch 026: 942 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=474929, ups=0.96, wpb=495678, bsz=16511.4, num_updates=43100, lr=0.000304643, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.1, wall=39779 epoch 026: 942 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=474929, ups=0.96, wpb=495678, bsz=16511.4, num_updates=43100, lr=0.000304643, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.1, wall=39779 epoch 026: 942 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=474929, ups=0.96, wpb=495678, bsz=16511.4, num_updates=43100, lr=0.000304643, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.1, wall=39779 epoch 026: 942 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=474929, ups=0.96, wpb=495678, bsz=16511.4, num_updates=43100, lr=0.000304643, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.1, wall=39779 epoch 026: 942 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=474929, ups=0.96, wpb=495678, bsz=16511.4, num_updates=43100, lr=0.000304643, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.1, wall=39779 epoch 026: 1042 / 1689 loss=3.493, nll_loss=1.937, ppl=3.83, wps=552756, ups=1.12, wpb=495267, bsz=16302.6, num_updates=43200, lr=0.00030429, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.4, wall=39868 epoch 026: 1042 / 1689 loss=3.493, nll_loss=1.937, ppl=3.83, wps=552756, ups=1.12, wpb=495267, bsz=16302.6, num_updates=43200, lr=0.00030429, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.4, wall=39868 epoch 026: 1042 / 1689 loss=3.493, nll_loss=1.937, ppl=3.83, wps=552756, ups=1.12, wpb=495267, bsz=16302.6, num_updates=43200, lr=0.00030429, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.4, wall=39868 epoch 026: 1042 / 1689 loss=3.493, nll_loss=1.937, ppl=3.83, wps=552756, ups=1.12, wpb=495267, bsz=16302.6, num_updates=43200, lr=0.00030429, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.4, wall=39868 epoch 026: 1042 / 1689 loss=3.493, nll_loss=1.937, ppl=3.83, wps=552756, ups=1.12, wpb=495267, bsz=16302.6, num_updates=43200, lr=0.00030429, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.4, wall=39868 epoch 026: 1042 / 1689 loss=3.493, nll_loss=1.937, ppl=3.83, wps=552756, ups=1.12, wpb=495267, bsz=16302.6, num_updates=43200, lr=0.00030429, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.4, wall=39868 epoch 026: 1042 / 1689 loss=3.493, nll_loss=1.937, ppl=3.83, wps=552756, ups=1.12, wpb=495267, bsz=16302.6, num_updates=43200, lr=0.00030429, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.4, wall=39868 epoch 026: 1042 / 1689 loss=3.493, nll_loss=1.937, ppl=3.83, wps=552756, ups=1.12, wpb=495267, bsz=16302.6, num_updates=43200, lr=0.00030429, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.4, wall=39868 epoch 026: 1042 / 1689 loss=3.493, nll_loss=1.937, ppl=3.83, wps=552756, ups=1.12, wpb=495267, bsz=16302.6, num_updates=43200, lr=0.00030429, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.4, wall=39868 epoch 026: 1042 / 1689 loss=3.493, nll_loss=1.937, ppl=3.83, wps=552756, ups=1.12, wpb=495267, bsz=16302.6, num_updates=43200, lr=0.00030429, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.4, wall=39868 epoch 026: 1042 / 1689 loss=3.493, nll_loss=1.937, ppl=3.83, wps=552756, ups=1.12, wpb=495267, bsz=16302.6, num_updates=43200, lr=0.00030429, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.4, wall=39868 epoch 026: 1042 / 1689 loss=3.493, nll_loss=1.937, ppl=3.83, wps=552756, ups=1.12, wpb=495267, bsz=16302.6, num_updates=43200, lr=0.00030429, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.4, wall=39868 epoch 026: 1042 / 1689 loss=3.493, nll_loss=1.937, ppl=3.83, wps=552756, ups=1.12, wpb=495267, bsz=16302.6, num_updates=43200, lr=0.00030429, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.4, wall=39868 epoch 026: 1042 / 1689 loss=3.493, nll_loss=1.937, ppl=3.83, wps=552756, ups=1.12, wpb=495267, bsz=16302.6, num_updates=43200, lr=0.00030429, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.4, wall=39868 epoch 026: 1042 / 1689 loss=3.493, nll_loss=1.937, ppl=3.83, wps=552756, ups=1.12, wpb=495267, bsz=16302.6, num_updates=43200, lr=0.00030429, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.4, wall=39868 epoch 026: 1042 / 1689 loss=3.493, nll_loss=1.937, ppl=3.83, wps=552756, ups=1.12, wpb=495267, bsz=16302.6, num_updates=43200, lr=0.00030429, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.4, wall=39868 epoch 026: 1042 / 1689 loss=3.493, nll_loss=1.937, ppl=3.83, wps=552756, ups=1.12, wpb=495267, bsz=16302.6, num_updates=43200, lr=0.00030429, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.4, wall=39868 epoch 026: 1042 / 1689 loss=3.493, nll_loss=1.937, ppl=3.83, wps=552756, ups=1.12, wpb=495267, bsz=16302.6, num_updates=43200, lr=0.00030429, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.4, wall=39868 epoch 026: 1042 / 1689 loss=3.493, nll_loss=1.937, ppl=3.83, wps=552756, ups=1.12, wpb=495267, bsz=16302.6, num_updates=43200, lr=0.00030429, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.4, wall=39868 epoch 026: 1042 / 1689 loss=3.493, nll_loss=1.937, ppl=3.83, wps=552756, ups=1.12, wpb=495267, bsz=16302.6, num_updates=43200, lr=0.00030429, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.4, wall=39868 epoch 026: 1042 / 1689 loss=3.493, nll_loss=1.937, ppl=3.83, wps=552756, ups=1.12, wpb=495267, bsz=16302.6, num_updates=43200, lr=0.00030429, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.4, wall=39868 epoch 026: 1042 / 1689 loss=3.493, nll_loss=1.937, ppl=3.83, wps=552756, ups=1.12, wpb=495267, bsz=16302.6, num_updates=43200, lr=0.00030429, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.4, wall=39868 epoch 026: 1042 / 1689 loss=3.493, nll_loss=1.937, ppl=3.83, wps=552756, ups=1.12, wpb=495267, bsz=16302.6, num_updates=43200, lr=0.00030429, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.4, wall=39868 epoch 026: 1042 / 1689 loss=3.493, nll_loss=1.937, ppl=3.83, wps=552756, ups=1.12, wpb=495267, bsz=16302.6, num_updates=43200, lr=0.00030429, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.4, wall=39868 epoch 026: 1042 / 1689 loss=3.493, nll_loss=1.937, ppl=3.83, wps=552756, ups=1.12, wpb=495267, bsz=16302.6, num_updates=43200, lr=0.00030429, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.4, wall=39868 epoch 026: 1042 / 1689 loss=3.493, nll_loss=1.937, ppl=3.83, wps=552756, ups=1.12, wpb=495267, bsz=16302.6, num_updates=43200, lr=0.00030429, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.4, wall=39868 epoch 026: 1142 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=554371, ups=1.12, wpb=495655, bsz=16972.5, num_updates=43300, lr=0.000303939, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=39958 epoch 026: 1142 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=554371, ups=1.12, wpb=495655, bsz=16972.5, num_updates=43300, lr=0.000303939, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=39958 epoch 026: 1142 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=554371, ups=1.12, wpb=495655, bsz=16972.5, num_updates=43300, lr=0.000303939, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=39958 epoch 026: 1142 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=554371, ups=1.12, wpb=495655, bsz=16972.5, num_updates=43300, lr=0.000303939, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=39958 epoch 026: 1142 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=554371, ups=1.12, wpb=495655, bsz=16972.5, num_updates=43300, lr=0.000303939, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=39958 epoch 026: 1142 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=554371, ups=1.12, wpb=495655, bsz=16972.5, num_updates=43300, lr=0.000303939, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=39958 epoch 026: 1142 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=554371, ups=1.12, wpb=495655, bsz=16972.5, num_updates=43300, lr=0.000303939, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=39958 epoch 026: 1142 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=554371, ups=1.12, wpb=495655, bsz=16972.5, num_updates=43300, lr=0.000303939, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=39958 epoch 026: 1142 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=554371, ups=1.12, wpb=495655, bsz=16972.5, num_updates=43300, lr=0.000303939, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=39958 epoch 026: 1142 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=554371, ups=1.12, wpb=495655, bsz=16972.5, num_updates=43300, lr=0.000303939, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=39958 epoch 026: 1142 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=554371, ups=1.12, wpb=495655, bsz=16972.5, num_updates=43300, lr=0.000303939, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=39958 epoch 026: 1142 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=554371, ups=1.12, wpb=495655, bsz=16972.5, num_updates=43300, lr=0.000303939, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=39958 epoch 026: 1142 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=554371, ups=1.12, wpb=495655, bsz=16972.5, num_updates=43300, lr=0.000303939, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=39958 epoch 026: 1142 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=554371, ups=1.12, wpb=495655, bsz=16972.5, num_updates=43300, lr=0.000303939, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=39958 epoch 026: 1142 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=554371, ups=1.12, wpb=495655, bsz=16972.5, num_updates=43300, lr=0.000303939, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=39958 epoch 026: 1142 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=554371, ups=1.12, wpb=495655, bsz=16972.5, num_updates=43300, lr=0.000303939, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=39958 epoch 026: 1142 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=554371, ups=1.12, wpb=495655, bsz=16972.5, num_updates=43300, lr=0.000303939, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=39958 epoch 026: 1142 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=554371, ups=1.12, wpb=495655, bsz=16972.5, num_updates=43300, lr=0.000303939, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=39958 epoch 026: 1142 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=554371, ups=1.12, wpb=495655, bsz=16972.5, num_updates=43300, lr=0.000303939, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=39958 epoch 026: 1142 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=554371, ups=1.12, wpb=495655, bsz=16972.5, num_updates=43300, lr=0.000303939, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=39958 epoch 026: 1142 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=554371, ups=1.12, wpb=495655, bsz=16972.5, num_updates=43300, lr=0.000303939, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=39958 epoch 026: 1142 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=554371, ups=1.12, wpb=495655, bsz=16972.5, num_updates=43300, lr=0.000303939, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=39958 epoch 026: 1142 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=554371, ups=1.12, wpb=495655, bsz=16972.5, num_updates=43300, lr=0.000303939, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=39958 epoch 026: 1142 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=554371, ups=1.12, wpb=495655, bsz=16972.5, num_updates=43300, lr=0.000303939, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=39958 epoch 026: 1142 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=554371, ups=1.12, wpb=495655, bsz=16972.5, num_updates=43300, lr=0.000303939, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=39958 epoch 026: 1142 / 1689 loss=3.494, nll_loss=1.938, ppl=3.83, wps=554371, ups=1.12, wpb=495655, bsz=16972.5, num_updates=43300, lr=0.000303939, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=39958 epoch 026: 1242 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=553652, ups=1.12, wpb=495558, bsz=16288.2, num_updates=43400, lr=0.000303588, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.6, wall=40047 epoch 026: 1242 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=553652, ups=1.12, wpb=495558, bsz=16288.2, num_updates=43400, lr=0.000303588, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.6, wall=40047 epoch 026: 1242 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=553652, ups=1.12, wpb=495558, bsz=16288.2, num_updates=43400, lr=0.000303588, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.6, wall=40047 epoch 026: 1242 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=553652, ups=1.12, wpb=495558, bsz=16288.2, num_updates=43400, lr=0.000303588, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.6, wall=40047 epoch 026: 1242 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=553652, ups=1.12, wpb=495558, bsz=16288.2, num_updates=43400, lr=0.000303588, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.6, wall=40047 epoch 026: 1242 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=553652, ups=1.12, wpb=495558, bsz=16288.2, num_updates=43400, lr=0.000303588, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.6, wall=40047 epoch 026: 1242 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=553652, ups=1.12, wpb=495558, bsz=16288.2, num_updates=43400, lr=0.000303588, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.6, wall=40047 epoch 026: 1242 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=553652, ups=1.12, wpb=495558, bsz=16288.2, num_updates=43400, lr=0.000303588, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.6, wall=40047 epoch 026: 1242 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=553652, ups=1.12, wpb=495558, bsz=16288.2, num_updates=43400, lr=0.000303588, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.6, wall=40047 epoch 026: 1242 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=553652, ups=1.12, wpb=495558, bsz=16288.2, num_updates=43400, lr=0.000303588, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.6, wall=40047 epoch 026: 1242 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=553652, ups=1.12, wpb=495558, bsz=16288.2, num_updates=43400, lr=0.000303588, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.6, wall=40047 epoch 026: 1242 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=553652, ups=1.12, wpb=495558, bsz=16288.2, num_updates=43400, lr=0.000303588, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.6, wall=40047 epoch 026: 1242 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=553652, ups=1.12, wpb=495558, bsz=16288.2, num_updates=43400, lr=0.000303588, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.6, wall=40047 epoch 026: 1242 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=553652, ups=1.12, wpb=495558, bsz=16288.2, num_updates=43400, lr=0.000303588, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.6, wall=40047 epoch 026: 1242 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=553652, ups=1.12, wpb=495558, bsz=16288.2, num_updates=43400, lr=0.000303588, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.6, wall=40047 epoch 026: 1242 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=553652, ups=1.12, wpb=495558, bsz=16288.2, num_updates=43400, lr=0.000303588, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.6, wall=40047 epoch 026: 1242 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=553652, ups=1.12, wpb=495558, bsz=16288.2, num_updates=43400, lr=0.000303588, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.6, wall=40047 epoch 026: 1242 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=553652, ups=1.12, wpb=495558, bsz=16288.2, num_updates=43400, lr=0.000303588, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.6, wall=40047 epoch 026: 1242 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=553652, ups=1.12, wpb=495558, bsz=16288.2, num_updates=43400, lr=0.000303588, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.6, wall=40047 epoch 026: 1242 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=553652, ups=1.12, wpb=495558, bsz=16288.2, num_updates=43400, lr=0.000303588, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.6, wall=40047 epoch 026: 1242 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=553652, ups=1.12, wpb=495558, bsz=16288.2, num_updates=43400, lr=0.000303588, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.6, wall=40047 epoch 026: 1242 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=553652, ups=1.12, wpb=495558, bsz=16288.2, num_updates=43400, lr=0.000303588, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.6, wall=40047 epoch 026: 1242 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=553652, ups=1.12, wpb=495558, bsz=16288.2, num_updates=43400, lr=0.000303588, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.6, wall=40047 epoch 026: 1242 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=553652, ups=1.12, wpb=495558, bsz=16288.2, num_updates=43400, lr=0.000303588, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.6, wall=40047 epoch 026: 1242 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=553652, ups=1.12, wpb=495558, bsz=16288.2, num_updates=43400, lr=0.000303588, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.6, wall=40047 epoch 026: 1242 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=553652, ups=1.12, wpb=495558, bsz=16288.2, num_updates=43400, lr=0.000303588, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.6, wall=40047 epoch 026: 1342 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=548165, ups=1.11, wpb=495464, bsz=16426.7, num_updates=43500, lr=0.000303239, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=40138 epoch 026: 1342 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=548165, ups=1.11, wpb=495464, bsz=16426.7, num_updates=43500, lr=0.000303239, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=40138 epoch 026: 1342 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=548165, ups=1.11, wpb=495464, bsz=16426.7, num_updates=43500, lr=0.000303239, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=40138 epoch 026: 1342 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=548165, ups=1.11, wpb=495464, bsz=16426.7, num_updates=43500, lr=0.000303239, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=40138 epoch 026: 1342 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=548165, ups=1.11, wpb=495464, bsz=16426.7, num_updates=43500, lr=0.000303239, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=40138 epoch 026: 1342 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=548165, ups=1.11, wpb=495464, bsz=16426.7, num_updates=43500, lr=0.000303239, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=40138 epoch 026: 1342 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=548165, ups=1.11, wpb=495464, bsz=16426.7, num_updates=43500, lr=0.000303239, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=40138 epoch 026: 1342 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=548165, ups=1.11, wpb=495464, bsz=16426.7, num_updates=43500, lr=0.000303239, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=40138 epoch 026: 1342 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=548165, ups=1.11, wpb=495464, bsz=16426.7, num_updates=43500, lr=0.000303239, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=40138 epoch 026: 1342 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=548165, ups=1.11, wpb=495464, bsz=16426.7, num_updates=43500, lr=0.000303239, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=40138 epoch 026: 1342 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=548165, ups=1.11, wpb=495464, bsz=16426.7, num_updates=43500, lr=0.000303239, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=40138 epoch 026: 1342 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=548165, ups=1.11, wpb=495464, bsz=16426.7, num_updates=43500, lr=0.000303239, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=40138 epoch 026: 1342 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=548165, ups=1.11, wpb=495464, bsz=16426.7, num_updates=43500, lr=0.000303239, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=40138 epoch 026: 1342 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=548165, ups=1.11, wpb=495464, bsz=16426.7, num_updates=43500, lr=0.000303239, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=40138 epoch 026: 1342 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=548165, ups=1.11, wpb=495464, bsz=16426.7, num_updates=43500, lr=0.000303239, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=40138 epoch 026: 1342 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=548165, ups=1.11, wpb=495464, bsz=16426.7, num_updates=43500, lr=0.000303239, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=40138 epoch 026: 1342 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=548165, ups=1.11, wpb=495464, bsz=16426.7, num_updates=43500, lr=0.000303239, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=40138 epoch 026: 1342 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=548165, ups=1.11, wpb=495464, bsz=16426.7, num_updates=43500, lr=0.000303239, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=40138 epoch 026: 1342 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=548165, ups=1.11, wpb=495464, bsz=16426.7, num_updates=43500, lr=0.000303239, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=40138 epoch 026: 1342 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=548165, ups=1.11, wpb=495464, bsz=16426.7, num_updates=43500, lr=0.000303239, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=40138 epoch 026: 1342 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=548165, ups=1.11, wpb=495464, bsz=16426.7, num_updates=43500, lr=0.000303239, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=40138 epoch 026: 1342 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=548165, ups=1.11, wpb=495464, bsz=16426.7, num_updates=43500, lr=0.000303239, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=40138 epoch 026: 1342 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=548165, ups=1.11, wpb=495464, bsz=16426.7, num_updates=43500, lr=0.000303239, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=40138 epoch 026: 1342 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=548165, ups=1.11, wpb=495464, bsz=16426.7, num_updates=43500, lr=0.000303239, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=40138 epoch 026: 1342 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=548165, ups=1.11, wpb=495464, bsz=16426.7, num_updates=43500, lr=0.000303239, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=40138 epoch 026: 1342 / 1689 loss=3.49, nll_loss=1.934, ppl=3.82, wps=548165, ups=1.11, wpb=495464, bsz=16426.7, num_updates=43500, lr=0.000303239, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=40138 epoch 026: 1442 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=553512, ups=1.12, wpb=495761, bsz=16645.5, num_updates=43600, lr=0.000302891, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40227 epoch 026: 1442 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=553512, ups=1.12, wpb=495761, bsz=16645.5, num_updates=43600, lr=0.000302891, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40227 epoch 026: 1442 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=553512, ups=1.12, wpb=495761, bsz=16645.5, num_updates=43600, lr=0.000302891, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40227 epoch 026: 1442 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=553512, ups=1.12, wpb=495761, bsz=16645.5, num_updates=43600, lr=0.000302891, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40227 epoch 026: 1442 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=553512, ups=1.12, wpb=495761, bsz=16645.5, num_updates=43600, lr=0.000302891, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40227 epoch 026: 1442 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=553512, ups=1.12, wpb=495761, bsz=16645.5, num_updates=43600, lr=0.000302891, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40227 epoch 026: 1442 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=553512, ups=1.12, wpb=495761, bsz=16645.5, num_updates=43600, lr=0.000302891, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40227 epoch 026: 1442 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=553512, ups=1.12, wpb=495761, bsz=16645.5, num_updates=43600, lr=0.000302891, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40227 epoch 026: 1442 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=553512, ups=1.12, wpb=495761, bsz=16645.5, num_updates=43600, lr=0.000302891, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40227 epoch 026: 1442 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=553512, ups=1.12, wpb=495761, bsz=16645.5, num_updates=43600, lr=0.000302891, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40227 epoch 026: 1442 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=553512, ups=1.12, wpb=495761, bsz=16645.5, num_updates=43600, lr=0.000302891, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40227 epoch 026: 1442 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=553512, ups=1.12, wpb=495761, bsz=16645.5, num_updates=43600, lr=0.000302891, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40227 epoch 026: 1442 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=553512, ups=1.12, wpb=495761, bsz=16645.5, num_updates=43600, lr=0.000302891, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40227 epoch 026: 1442 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=553512, ups=1.12, wpb=495761, bsz=16645.5, num_updates=43600, lr=0.000302891, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40227 epoch 026: 1442 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=553512, ups=1.12, wpb=495761, bsz=16645.5, num_updates=43600, lr=0.000302891, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40227 epoch 026: 1442 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=553512, ups=1.12, wpb=495761, bsz=16645.5, num_updates=43600, lr=0.000302891, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40227 epoch 026: 1442 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=553512, ups=1.12, wpb=495761, bsz=16645.5, num_updates=43600, lr=0.000302891, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40227 epoch 026: 1442 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=553512, ups=1.12, wpb=495761, bsz=16645.5, num_updates=43600, lr=0.000302891, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40227 epoch 026: 1442 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=553512, ups=1.12, wpb=495761, bsz=16645.5, num_updates=43600, lr=0.000302891, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40227 epoch 026: 1442 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=553512, ups=1.12, wpb=495761, bsz=16645.5, num_updates=43600, lr=0.000302891, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40227 epoch 026: 1442 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=553512, ups=1.12, wpb=495761, bsz=16645.5, num_updates=43600, lr=0.000302891, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40227 epoch 026: 1442 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=553512, ups=1.12, wpb=495761, bsz=16645.5, num_updates=43600, lr=0.000302891, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40227 epoch 026: 1442 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=553512, ups=1.12, wpb=495761, bsz=16645.5, num_updates=43600, lr=0.000302891, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40227 epoch 026: 1442 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=553512, ups=1.12, wpb=495761, bsz=16645.5, num_updates=43600, lr=0.000302891, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40227 epoch 026: 1442 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=553512, ups=1.12, wpb=495761, bsz=16645.5, num_updates=43600, lr=0.000302891, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40227 epoch 026: 1442 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=553512, ups=1.12, wpb=495761, bsz=16645.5, num_updates=43600, lr=0.000302891, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40227 epoch 026: 1542 / 1689 loss=3.501, nll_loss=1.947, ppl=3.86, wps=552586, ups=1.12, wpb=494621, bsz=16353.7, num_updates=43700, lr=0.000302545, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40317 epoch 026: 1542 / 1689 loss=3.501, nll_loss=1.947, ppl=3.86, wps=552586, ups=1.12, wpb=494621, bsz=16353.7, num_updates=43700, lr=0.000302545, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40317 epoch 026: 1542 / 1689 loss=3.501, nll_loss=1.947, ppl=3.86, wps=552586, ups=1.12, wpb=494621, bsz=16353.7, num_updates=43700, lr=0.000302545, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40317 epoch 026: 1542 / 1689 loss=3.501, nll_loss=1.947, ppl=3.86, wps=552586, ups=1.12, wpb=494621, bsz=16353.7, num_updates=43700, lr=0.000302545, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40317 epoch 026: 1542 / 1689 loss=3.501, nll_loss=1.947, ppl=3.86, wps=552586, ups=1.12, wpb=494621, bsz=16353.7, num_updates=43700, lr=0.000302545, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40317 epoch 026: 1542 / 1689 loss=3.501, nll_loss=1.947, ppl=3.86, wps=552586, ups=1.12, wpb=494621, bsz=16353.7, num_updates=43700, lr=0.000302545, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40317 epoch 026: 1542 / 1689 loss=3.501, nll_loss=1.947, ppl=3.86, wps=552586, ups=1.12, wpb=494621, bsz=16353.7, num_updates=43700, lr=0.000302545, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40317 epoch 026: 1542 / 1689 loss=3.501, nll_loss=1.947, ppl=3.86, wps=552586, ups=1.12, wpb=494621, bsz=16353.7, num_updates=43700, lr=0.000302545, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40317 epoch 026: 1542 / 1689 loss=3.501, nll_loss=1.947, ppl=3.86, wps=552586, ups=1.12, wpb=494621, bsz=16353.7, num_updates=43700, lr=0.000302545, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40317 epoch 026: 1542 / 1689 loss=3.501, nll_loss=1.947, ppl=3.86, wps=552586, ups=1.12, wpb=494621, bsz=16353.7, num_updates=43700, lr=0.000302545, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40317 epoch 026: 1542 / 1689 loss=3.501, nll_loss=1.947, ppl=3.86, wps=552586, ups=1.12, wpb=494621, bsz=16353.7, num_updates=43700, lr=0.000302545, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40317 epoch 026: 1542 / 1689 loss=3.501, nll_loss=1.947, ppl=3.86, wps=552586, ups=1.12, wpb=494621, bsz=16353.7, num_updates=43700, lr=0.000302545, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40317 epoch 026: 1542 / 1689 loss=3.501, nll_loss=1.947, ppl=3.86, wps=552586, ups=1.12, wpb=494621, bsz=16353.7, num_updates=43700, lr=0.000302545, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40317 epoch 026: 1542 / 1689 loss=3.501, nll_loss=1.947, ppl=3.86, wps=552586, ups=1.12, wpb=494621, bsz=16353.7, num_updates=43700, lr=0.000302545, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40317 epoch 026: 1542 / 1689 loss=3.501, nll_loss=1.947, ppl=3.86, wps=552586, ups=1.12, wpb=494621, bsz=16353.7, num_updates=43700, lr=0.000302545, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40317 epoch 026: 1542 / 1689 loss=3.501, nll_loss=1.947, ppl=3.86, wps=552586, ups=1.12, wpb=494621, bsz=16353.7, num_updates=43700, lr=0.000302545, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40317 epoch 026: 1542 / 1689 loss=3.501, nll_loss=1.947, ppl=3.86, wps=552586, ups=1.12, wpb=494621, bsz=16353.7, num_updates=43700, lr=0.000302545, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40317 epoch 026: 1542 / 1689 loss=3.501, nll_loss=1.947, ppl=3.86, wps=552586, ups=1.12, wpb=494621, bsz=16353.7, num_updates=43700, lr=0.000302545, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40317 epoch 026: 1542 / 1689 loss=3.501, nll_loss=1.947, ppl=3.86, wps=552586, ups=1.12, wpb=494621, bsz=16353.7, num_updates=43700, lr=0.000302545, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40317 epoch 026: 1542 / 1689 loss=3.501, nll_loss=1.947, ppl=3.86, wps=552586, ups=1.12, wpb=494621, bsz=16353.7, num_updates=43700, lr=0.000302545, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40317 epoch 026: 1542 / 1689 loss=3.501, nll_loss=1.947, ppl=3.86, wps=552586, ups=1.12, wpb=494621, bsz=16353.7, num_updates=43700, lr=0.000302545, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40317 epoch 026: 1542 / 1689 loss=3.501, nll_loss=1.947, ppl=3.86, wps=552586, ups=1.12, wpb=494621, bsz=16353.7, num_updates=43700, lr=0.000302545, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40317 epoch 026: 1542 / 1689 loss=3.501, nll_loss=1.947, ppl=3.86, wps=552586, ups=1.12, wpb=494621, bsz=16353.7, num_updates=43700, lr=0.000302545, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40317 epoch 026: 1542 / 1689 loss=3.501, nll_loss=1.947, ppl=3.86, wps=552586, ups=1.12, wpb=494621, bsz=16353.7, num_updates=43700, lr=0.000302545, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40317 epoch 026: 1542 / 1689 loss=3.501, nll_loss=1.947, ppl=3.86, wps=552586, ups=1.12, wpb=494621, bsz=16353.7, num_updates=43700, lr=0.000302545, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40317 epoch 026: 1542 / 1689 loss=3.501, nll_loss=1.947, ppl=3.86, wps=552586, ups=1.12, wpb=494621, bsz=16353.7, num_updates=43700, lr=0.000302545, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=40317 epoch 026: 1642 / 1689 loss=3.494, nll_loss=1.939, ppl=3.84, wps=550234, ups=1.11, wpb=495190, bsz=16476.9, num_updates=43800, lr=0.000302199, gnorm=0.178, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=40407 epoch 026: 1642 / 1689 loss=3.494, nll_loss=1.939, ppl=3.84, wps=550234, ups=1.11, wpb=495190, bsz=16476.9, num_updates=43800, lr=0.000302199, gnorm=0.178, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=40407 epoch 026: 1642 / 1689 loss=3.494, nll_loss=1.939, ppl=3.84, wps=550234, ups=1.11, wpb=495190, bsz=16476.9, num_updates=43800, lr=0.000302199, gnorm=0.178, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=40407 epoch 026: 1642 / 1689 loss=3.494, nll_loss=1.939, ppl=3.84, wps=550234, ups=1.11, wpb=495190, bsz=16476.9, num_updates=43800, lr=0.000302199, gnorm=0.178, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=40407 epoch 026: 1642 / 1689 loss=3.494, nll_loss=1.939, ppl=3.84, wps=550234, ups=1.11, wpb=495190, bsz=16476.9, num_updates=43800, lr=0.000302199, gnorm=0.178, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=40407 epoch 026: 1642 / 1689 loss=3.494, nll_loss=1.939, ppl=3.84, wps=550234, ups=1.11, wpb=495190, bsz=16476.9, num_updates=43800, lr=0.000302199, gnorm=0.178, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=40407 epoch 026: 1642 / 1689 loss=3.494, nll_loss=1.939, ppl=3.84, wps=550234, ups=1.11, wpb=495190, bsz=16476.9, num_updates=43800, lr=0.000302199, gnorm=0.178, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=40407 epoch 026: 1642 / 1689 loss=3.494, nll_loss=1.939, ppl=3.84, wps=550234, ups=1.11, wpb=495190, bsz=16476.9, num_updates=43800, lr=0.000302199, gnorm=0.178, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=40407 epoch 026: 1642 / 1689 loss=3.494, nll_loss=1.939, ppl=3.84, wps=550234, ups=1.11, wpb=495190, bsz=16476.9, num_updates=43800, lr=0.000302199, gnorm=0.178, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=40407 epoch 026: 1642 / 1689 loss=3.494, nll_loss=1.939, ppl=3.84, wps=550234, ups=1.11, wpb=495190, bsz=16476.9, num_updates=43800, lr=0.000302199, gnorm=0.178, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=40407 epoch 026: 1642 / 1689 loss=3.494, nll_loss=1.939, ppl=3.84, wps=550234, ups=1.11, wpb=495190, bsz=16476.9, num_updates=43800, lr=0.000302199, gnorm=0.178, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=40407 epoch 026: 1642 / 1689 loss=3.494, nll_loss=1.939, ppl=3.84, wps=550234, ups=1.11, wpb=495190, bsz=16476.9, num_updates=43800, lr=0.000302199, gnorm=0.178, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=40407 epoch 026: 1642 / 1689 loss=3.494, nll_loss=1.939, ppl=3.84, wps=550234, ups=1.11, wpb=495190, bsz=16476.9, num_updates=43800, lr=0.000302199, gnorm=0.178, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=40407 epoch 026: 1642 / 1689 loss=3.494, nll_loss=1.939, ppl=3.84, wps=550234, ups=1.11, wpb=495190, bsz=16476.9, num_updates=43800, lr=0.000302199, gnorm=0.178, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=40407 epoch 026: 1642 / 1689 loss=3.494, nll_loss=1.939, ppl=3.84, wps=550234, ups=1.11, wpb=495190, bsz=16476.9, num_updates=43800, lr=0.000302199, gnorm=0.178, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=40407 epoch 026: 1642 / 1689 loss=3.494, nll_loss=1.939, ppl=3.84, wps=550234, ups=1.11, wpb=495190, bsz=16476.9, num_updates=43800, lr=0.000302199, gnorm=0.178, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=40407 epoch 026: 1642 / 1689 loss=3.494, nll_loss=1.939, ppl=3.84, wps=550234, ups=1.11, wpb=495190, bsz=16476.9, num_updates=43800, lr=0.000302199, gnorm=0.178, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=40407 epoch 026: 1642 / 1689 loss=3.494, nll_loss=1.939, ppl=3.84, wps=550234, ups=1.11, wpb=495190, bsz=16476.9, num_updates=43800, lr=0.000302199, gnorm=0.178, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=40407 epoch 026: 1642 / 1689 loss=3.494, nll_loss=1.939, ppl=3.84, wps=550234, ups=1.11, wpb=495190, bsz=16476.9, num_updates=43800, lr=0.000302199, gnorm=0.178, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=40407 epoch 026: 1642 / 1689 loss=3.494, nll_loss=1.939, ppl=3.84, wps=550234, ups=1.11, wpb=495190, bsz=16476.9, num_updates=43800, lr=0.000302199, gnorm=0.178, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=40407 epoch 026: 1642 / 1689 loss=3.494, nll_loss=1.939, ppl=3.84, wps=550234, ups=1.11, wpb=495190, bsz=16476.9, num_updates=43800, lr=0.000302199, gnorm=0.178, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=40407 epoch 026: 1642 / 1689 loss=3.494, nll_loss=1.939, ppl=3.84, wps=550234, ups=1.11, wpb=495190, bsz=16476.9, num_updates=43800, lr=0.000302199, gnorm=0.178, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=40407 epoch 026: 1642 / 1689 loss=3.494, nll_loss=1.939, ppl=3.84, wps=550234, ups=1.11, wpb=495190, bsz=16476.9, num_updates=43800, lr=0.000302199, gnorm=0.178, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=40407 epoch 026: 1642 / 1689 loss=3.494, nll_loss=1.939, ppl=3.84, wps=550234, ups=1.11, wpb=495190, bsz=16476.9, num_updates=43800, lr=0.000302199, gnorm=0.178, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=40407 epoch 026: 1642 / 1689 loss=3.494, nll_loss=1.939, ppl=3.84, wps=550234, ups=1.11, wpb=495190, bsz=16476.9, num_updates=43800, lr=0.000302199, gnorm=0.178, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=40407 epoch 026: 1642 / 1689 loss=3.494, nll_loss=1.939, ppl=3.84, wps=550234, ups=1.11, wpb=495190, bsz=16476.9, num_updates=43800, lr=0.000302199, gnorm=0.178, clip=0, loss_scale=1, train_wall=89, gb_free=21.7, wall=40407 end of epoch 26 (average epoch stats below) epoch 026 | loss 3.487 | nll_loss 1.931 | ppl 3.81 | wps 546002 | ups 1.1 | wpb 495116 | bsz 16506.6 | num_updates 43847 | lr 0.000302037 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1492 | gb_free 23.1 | wall 40448 epoch 026 | loss 3.487 | nll_loss 1.931 | ppl 3.81 | wps 546002 | ups 1.1 | wpb 495116 | bsz 16506.6 | num_updates 43847 | lr 0.000302037 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1492 | gb_free 23.1 | wall 40448 epoch 026 | loss 3.487 | nll_loss 1.931 | ppl 3.81 | wps 546002 | ups 1.1 | wpb 495116 | bsz 16506.6 | num_updates 43847 | lr 0.000302037 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1492 | gb_free 23.1 | wall 40448 epoch 026 | loss 3.487 | nll_loss 1.931 | ppl 3.81 | wps 546002 | ups 1.1 | wpb 495116 | bsz 16506.6 | num_updates 43847 | lr 0.000302037 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1492 | gb_free 23.1 | wall 40448 epoch 026 | loss 3.487 | nll_loss 1.931 | ppl 3.81 | wps 546002 | ups 1.1 | wpb 495116 | bsz 16506.6 | num_updates 43847 | lr 0.000302037 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1492 | gb_free 23.1 | wall 40448 epoch 026 | loss 3.487 | nll_loss 1.931 | ppl 3.81 | wps 546002 | ups 1.1 | wpb 495116 | bsz 16506.6 | num_updates 43847 | lr 0.000302037 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1492 | gb_free 23.1 | wall 40448 epoch 026 | loss 3.487 | nll_loss 1.931 | ppl 3.81 | wps 546002 | ups 1.1 | wpb 495116 | bsz 16506.6 | num_updates 43847 | lr 0.000302037 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1492 | gb_free 23.1 | wall 40448 epoch 026 | loss 3.487 | nll_loss 1.931 | ppl 3.81 | wps 546002 | ups 1.1 | wpb 495116 | bsz 16506.6 | num_updates 43847 | lr 0.000302037 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1492 | gb_free 23.1 | wall 40448 epoch 026 | loss 3.487 | nll_loss 1.931 | ppl 3.81 | wps 546002 | ups 1.1 | wpb 495116 | bsz 16506.6 | num_updates 43847 | lr 0.000302037 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1492 | gb_free 23.1 | wall 40448 epoch 026 | loss 3.487 | nll_loss 1.931 | ppl 3.81 | wps 546002 | ups 1.1 | wpb 495116 | bsz 16506.6 | num_updates 43847 | lr 0.000302037 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1492 | gb_free 23.1 | wall 40448 epoch 026 | loss 3.487 | nll_loss 1.931 | ppl 3.81 | wps 546002 | ups 1.1 | wpb 495116 | bsz 16506.6 | num_updates 43847 | lr 0.000302037 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1492 | gb_free 23.1 | wall 40448 epoch 026 | loss 3.487 | nll_loss 1.931 | ppl 3.81 | wps 546002 | ups 1.1 | wpb 495116 | bsz 16506.6 | num_updates 43847 | lr 0.000302037 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1492 | gb_free 23.1 | wall 40448 epoch 026 | loss 3.487 | nll_loss 1.931 | ppl 3.81 | wps 546002 | ups 1.1 | wpb 495116 | bsz 16506.6 | num_updates 43847 | lr 0.000302037 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1492 | gb_free 23.1 | wall 40448 epoch 026 | loss 3.487 | nll_loss 1.931 | ppl 3.81 | wps 546002 | ups 1.1 | wpb 495116 | bsz 16506.6 | num_updates 43847 | lr 0.000302037 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1492 | gb_free 23.1 | wall 40448 epoch 026 | loss 3.487 | nll_loss 1.931 | ppl 3.81 | wps 546002 | ups 1.1 | wpb 495116 | bsz 16506.6 | num_updates 43847 | lr 0.000302037 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1492 | gb_free 23.1 | wall 40448 epoch 026 | loss 3.487 | nll_loss 1.931 | ppl 3.81 | wps 546002 | ups 1.1 | wpb 495116 | bsz 16506.6 | num_updates 43847 | lr 0.000302037 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1492 | gb_free 23.1 | wall 40448 epoch 026 | loss 3.487 | nll_loss 1.931 | ppl 3.81 | wps 546002 | ups 1.1 | wpb 495116 | bsz 16506.6 | num_updates 43847 | lr 0.000302037 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1492 | gb_free 23.1 | wall 40448 epoch 026 | loss 3.487 | nll_loss 1.931 | ppl 3.81 | wps 546002 | ups 1.1 | wpb 495116 | bsz 16506.6 | num_updates 43847 | lr 0.000302037 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1492 | gb_free 23.1 | wall 40448 epoch 026 | loss 3.487 | nll_loss 1.931 | ppl 3.81 | wps 546002 | ups 1.1 | wpb 495116 | bsz 16506.6 | num_updates 43847 | lr 0.000302037 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1492 | gb_free 23.1 | wall 40448 epoch 026 | loss 3.487 | nll_loss 1.931 | ppl 3.81 | wps 546002 | ups 1.1 | wpb 495116 | bsz 16506.6 | num_updates 43847 | lr 0.000302037 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1492 | gb_free 23.1 | wall 40448 epoch 026 | loss 3.487 | nll_loss 1.931 | ppl 3.81 | wps 546002 | ups 1.1 | wpb 495116 | bsz 16506.6 | num_updates 43847 | lr 0.000302037 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1492 | gb_free 23.1 | wall 40448 epoch 026 | loss 3.487 | nll_loss 1.931 | ppl 3.81 | wps 546002 | ups 1.1 | wpb 495116 | bsz 16506.6 | num_updates 43847 | lr 0.000302037 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1492 | gb_free 23.1 | wall 40448 epoch 026 | loss 3.487 | nll_loss 1.931 | ppl 3.81 | wps 546002 | ups 1.1 | wpb 495116 | bsz 16506.6 | num_updates 43847 | lr 0.000302037 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1492 | gb_free 23.1 | wall 40448 epoch 026 | loss 3.487 | nll_loss 1.931 | ppl 3.81 | wps 546002 | ups 1.1 | wpb 495116 | bsz 16506.6 | num_updates 43847 | lr 0.000302037 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1492 | gb_free 23.1 | wall 40448 epoch 026 | loss 3.487 | nll_loss 1.931 | ppl 3.81 | wps 546002 | ups 1.1 | wpb 495116 | bsz 16506.6 | num_updates 43847 | lr 0.000302037 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1492 | gb_free 23.1 | wall 40448 epoch 026 | loss 3.487 | nll_loss 1.931 | ppl 3.81 | wps 546002 | ups 1.1 | wpb 495116 | bsz 16506.6 | num_updates 43847 | lr 0.000302037 | gnorm 0.17 | clip 0 | loss_scale 1 | train_wall 1492 | gb_free 23.1 | wall 40448 Start iterating over samples epoch 027: 53 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=545898, ups=1.11, wpb=490338, bsz=16033.4, num_updates=43900, lr=0.000301855, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=40496 epoch 027: 53 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=545898, ups=1.11, wpb=490338, bsz=16033.4, num_updates=43900, lr=0.000301855, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=40496 epoch 027: 53 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=545898, ups=1.11, wpb=490338, bsz=16033.4, num_updates=43900, lr=0.000301855, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=40496 epoch 027: 53 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=545898, ups=1.11, wpb=490338, bsz=16033.4, num_updates=43900, lr=0.000301855, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=40496 epoch 027: 53 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=545898, ups=1.11, wpb=490338, bsz=16033.4, num_updates=43900, lr=0.000301855, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=40496 epoch 027: 53 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=545898, ups=1.11, wpb=490338, bsz=16033.4, num_updates=43900, lr=0.000301855, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=40496 epoch 027: 53 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=545898, ups=1.11, wpb=490338, bsz=16033.4, num_updates=43900, lr=0.000301855, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=40496 epoch 027: 53 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=545898, ups=1.11, wpb=490338, bsz=16033.4, num_updates=43900, lr=0.000301855, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=40496 epoch 027: 53 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=545898, ups=1.11, wpb=490338, bsz=16033.4, num_updates=43900, lr=0.000301855, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=40496 epoch 027: 53 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=545898, ups=1.11, wpb=490338, bsz=16033.4, num_updates=43900, lr=0.000301855, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=40496 epoch 027: 53 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=545898, ups=1.11, wpb=490338, bsz=16033.4, num_updates=43900, lr=0.000301855, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=40496 epoch 027: 53 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=545898, ups=1.11, wpb=490338, bsz=16033.4, num_updates=43900, lr=0.000301855, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=40496 epoch 027: 53 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=545898, ups=1.11, wpb=490338, bsz=16033.4, num_updates=43900, lr=0.000301855, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=40496 epoch 027: 53 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=545898, ups=1.11, wpb=490338, bsz=16033.4, num_updates=43900, lr=0.000301855, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=40496 epoch 027: 53 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=545898, ups=1.11, wpb=490338, bsz=16033.4, num_updates=43900, lr=0.000301855, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=40496 epoch 027: 53 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=545898, ups=1.11, wpb=490338, bsz=16033.4, num_updates=43900, lr=0.000301855, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=40496 epoch 027: 53 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=545898, ups=1.11, wpb=490338, bsz=16033.4, num_updates=43900, lr=0.000301855, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=40496 epoch 027: 53 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=545898, ups=1.11, wpb=490338, bsz=16033.4, num_updates=43900, lr=0.000301855, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=40496 epoch 027: 53 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=545898, ups=1.11, wpb=490338, bsz=16033.4, num_updates=43900, lr=0.000301855, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=40496 epoch 027: 53 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=545898, ups=1.11, wpb=490338, bsz=16033.4, num_updates=43900, lr=0.000301855, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=40496 epoch 027: 53 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=545898, ups=1.11, wpb=490338, bsz=16033.4, num_updates=43900, lr=0.000301855, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=40496 epoch 027: 53 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=545898, ups=1.11, wpb=490338, bsz=16033.4, num_updates=43900, lr=0.000301855, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=40496 epoch 027: 53 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=545898, ups=1.11, wpb=490338, bsz=16033.4, num_updates=43900, lr=0.000301855, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=40496 epoch 027: 53 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=545898, ups=1.11, wpb=490338, bsz=16033.4, num_updates=43900, lr=0.000301855, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=40496 epoch 027: 53 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=545898, ups=1.11, wpb=490338, bsz=16033.4, num_updates=43900, lr=0.000301855, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=40496 epoch 027: 53 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=545898, ups=1.11, wpb=490338, bsz=16033.4, num_updates=43900, lr=0.000301855, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=40496 epoch 027: 53 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=545898, ups=1.11, wpb=490338, bsz=16033.4, num_updates=43900, lr=0.000301855, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=40496 epoch 027: 153 / 1689 loss=3.462, nll_loss=1.902, ppl=3.74, wps=552651, ups=1.11, wpb=495877, bsz=16485.4, num_updates=44000, lr=0.000301511, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=40586 epoch 027: 153 / 1689 loss=3.462, nll_loss=1.902, ppl=3.74, wps=552651, ups=1.11, wpb=495877, bsz=16485.4, num_updates=44000, lr=0.000301511, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=40586 epoch 027: 153 / 1689 loss=3.462, nll_loss=1.902, ppl=3.74, wps=552651, ups=1.11, wpb=495877, bsz=16485.4, num_updates=44000, lr=0.000301511, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=40586 epoch 027: 153 / 1689 loss=3.462, nll_loss=1.902, ppl=3.74, wps=552651, ups=1.11, wpb=495877, bsz=16485.4, num_updates=44000, lr=0.000301511, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=40586 epoch 027: 153 / 1689 loss=3.462, nll_loss=1.902, ppl=3.74, wps=552651, ups=1.11, wpb=495877, bsz=16485.4, num_updates=44000, lr=0.000301511, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=40586 epoch 027: 153 / 1689 loss=3.462, nll_loss=1.902, ppl=3.74, wps=552651, ups=1.11, wpb=495877, bsz=16485.4, num_updates=44000, lr=0.000301511, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=40586 epoch 027: 153 / 1689 loss=3.462, nll_loss=1.902, ppl=3.74, wps=552651, ups=1.11, wpb=495877, bsz=16485.4, num_updates=44000, lr=0.000301511, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=40586 epoch 027: 153 / 1689 loss=3.462, nll_loss=1.902, ppl=3.74, wps=552651, ups=1.11, wpb=495877, bsz=16485.4, num_updates=44000, lr=0.000301511, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=40586 epoch 027: 153 / 1689 loss=3.462, nll_loss=1.902, ppl=3.74, wps=552651, ups=1.11, wpb=495877, bsz=16485.4, num_updates=44000, lr=0.000301511, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=40586 epoch 027: 153 / 1689 loss=3.462, nll_loss=1.902, ppl=3.74, wps=552651, ups=1.11, wpb=495877, bsz=16485.4, num_updates=44000, lr=0.000301511, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=40586 epoch 027: 153 / 1689 loss=3.462, nll_loss=1.902, ppl=3.74, wps=552651, ups=1.11, wpb=495877, bsz=16485.4, num_updates=44000, lr=0.000301511, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=40586 epoch 027: 153 / 1689 loss=3.462, nll_loss=1.902, ppl=3.74, wps=552651, ups=1.11, wpb=495877, bsz=16485.4, num_updates=44000, lr=0.000301511, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=40586 epoch 027: 153 / 1689 loss=3.462, nll_loss=1.902, ppl=3.74, wps=552651, ups=1.11, wpb=495877, bsz=16485.4, num_updates=44000, lr=0.000301511, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=40586 epoch 027: 153 / 1689 loss=3.462, nll_loss=1.902, ppl=3.74, wps=552651, ups=1.11, wpb=495877, bsz=16485.4, num_updates=44000, lr=0.000301511, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=40586 epoch 027: 153 / 1689 loss=3.462, nll_loss=1.902, ppl=3.74, wps=552651, ups=1.11, wpb=495877, bsz=16485.4, num_updates=44000, lr=0.000301511, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=40586 epoch 027: 153 / 1689 loss=3.462, nll_loss=1.902, ppl=3.74, wps=552651, ups=1.11, wpb=495877, bsz=16485.4, num_updates=44000, lr=0.000301511, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=40586 epoch 027: 153 / 1689 loss=3.462, nll_loss=1.902, ppl=3.74, wps=552651, ups=1.11, wpb=495877, bsz=16485.4, num_updates=44000, lr=0.000301511, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=40586 epoch 027: 153 / 1689 loss=3.462, nll_loss=1.902, ppl=3.74, wps=552651, ups=1.11, wpb=495877, bsz=16485.4, num_updates=44000, lr=0.000301511, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=40586 epoch 027: 153 / 1689 loss=3.462, nll_loss=1.902, ppl=3.74, wps=552651, ups=1.11, wpb=495877, bsz=16485.4, num_updates=44000, lr=0.000301511, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=40586 epoch 027: 153 / 1689 loss=3.462, nll_loss=1.902, ppl=3.74, wps=552651, ups=1.11, wpb=495877, bsz=16485.4, num_updates=44000, lr=0.000301511, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=40586 epoch 027: 153 / 1689 loss=3.462, nll_loss=1.902, ppl=3.74, wps=552651, ups=1.11, wpb=495877, bsz=16485.4, num_updates=44000, lr=0.000301511, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=40586 epoch 027: 153 / 1689 loss=3.462, nll_loss=1.902, ppl=3.74, wps=552651, ups=1.11, wpb=495877, bsz=16485.4, num_updates=44000, lr=0.000301511, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=40586 epoch 027: 153 / 1689 loss=3.462, nll_loss=1.902, ppl=3.74, wps=552651, ups=1.11, wpb=495877, bsz=16485.4, num_updates=44000, lr=0.000301511, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=40586 epoch 027: 153 / 1689 loss=3.462, nll_loss=1.902, ppl=3.74, wps=552651, ups=1.11, wpb=495877, bsz=16485.4, num_updates=44000, lr=0.000301511, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=40586 epoch 027: 153 / 1689 loss=3.462, nll_loss=1.902, ppl=3.74, wps=552651, ups=1.11, wpb=495877, bsz=16485.4, num_updates=44000, lr=0.000301511, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=40586 epoch 027: 153 / 1689 loss=3.462, nll_loss=1.902, ppl=3.74, wps=552651, ups=1.11, wpb=495877, bsz=16485.4, num_updates=44000, lr=0.000301511, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=40586 epoch 027: 153 / 1689 loss=3.462, nll_loss=1.902, ppl=3.74, wps=552651, ups=1.11, wpb=495877, bsz=16485.4, num_updates=44000, lr=0.000301511, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=40586 begin validation on "valid" subset epoch 027 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 44000 | best_loss 3.704 epoch 027 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 44000 | best_loss 3.704 epoch 027 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 44000 | best_loss 3.704 epoch 027 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 44000 | best_loss 3.704 epoch 027 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 44000 | best_loss 3.704 epoch 027 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 44000 | best_loss 3.704 epoch 027 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 44000 | best_loss 3.704 epoch 027 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 44000 | best_loss 3.704 epoch 027 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 44000 | best_loss 3.704 epoch 027 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 44000 | best_loss 3.704 epoch 027 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 44000 | best_loss 3.704 epoch 027 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 44000 | best_loss 3.704 epoch 027 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 44000 | best_loss 3.704 epoch 027 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 44000 | best_loss 3.704 epoch 027 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 44000 | best_loss 3.704 epoch 027 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 44000 | best_loss 3.704 epoch 027 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 44000 | best_loss 3.704 epoch 027 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 44000 | best_loss 3.704 epoch 027 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 44000 | best_loss 3.704 epoch 027 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 44000 | best_loss 3.704 epoch 027 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 44000 | best_loss 3.704 epoch 027 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 44000 | best_loss 3.704 epoch 027 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 44000 | best_loss 3.704 epoch 027 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 44000 | best_loss 3.704 epoch 027 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 44000 | best_loss 3.704 epoch 027 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 44000 | best_loss 3.704 epoch 027 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 44000 | best_loss 3.704 epoch 027: 253 / 1689 loss=3.473, nll_loss=1.914, ppl=3.77, wps=421320, ups=0.85, wpb=495955, bsz=16074.2, num_updates=44100, lr=0.000301169, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=40704 epoch 027: 253 / 1689 loss=3.473, nll_loss=1.914, ppl=3.77, wps=421320, ups=0.85, wpb=495955, bsz=16074.2, num_updates=44100, lr=0.000301169, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=40704 epoch 027: 253 / 1689 loss=3.473, nll_loss=1.914, ppl=3.77, wps=421320, ups=0.85, wpb=495955, bsz=16074.2, num_updates=44100, lr=0.000301169, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=40704 epoch 027: 253 / 1689 loss=3.473, nll_loss=1.914, ppl=3.77, wps=421320, ups=0.85, wpb=495955, bsz=16074.2, num_updates=44100, lr=0.000301169, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=40704 epoch 027: 253 / 1689 loss=3.473, nll_loss=1.914, ppl=3.77, wps=421320, ups=0.85, wpb=495955, bsz=16074.2, num_updates=44100, lr=0.000301169, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=40704 epoch 027: 253 / 1689 loss=3.473, nll_loss=1.914, ppl=3.77, wps=421320, ups=0.85, wpb=495955, bsz=16074.2, num_updates=44100, lr=0.000301169, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=40704 epoch 027: 253 / 1689 loss=3.473, nll_loss=1.914, ppl=3.77, wps=421320, ups=0.85, wpb=495955, bsz=16074.2, num_updates=44100, lr=0.000301169, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=40704 epoch 027: 253 / 1689 loss=3.473, nll_loss=1.914, ppl=3.77, wps=421320, ups=0.85, wpb=495955, bsz=16074.2, num_updates=44100, lr=0.000301169, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=40704 epoch 027: 253 / 1689 loss=3.473, nll_loss=1.914, ppl=3.77, wps=421320, ups=0.85, wpb=495955, bsz=16074.2, num_updates=44100, lr=0.000301169, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=40704 epoch 027: 253 / 1689 loss=3.473, nll_loss=1.914, ppl=3.77, wps=421320, ups=0.85, wpb=495955, bsz=16074.2, num_updates=44100, lr=0.000301169, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=40704 epoch 027: 253 / 1689 loss=3.473, nll_loss=1.914, ppl=3.77, wps=421320, ups=0.85, wpb=495955, bsz=16074.2, num_updates=44100, lr=0.000301169, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=40704 epoch 027: 253 / 1689 loss=3.473, nll_loss=1.914, ppl=3.77, wps=421320, ups=0.85, wpb=495955, bsz=16074.2, num_updates=44100, lr=0.000301169, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=40704 epoch 027: 253 / 1689 loss=3.473, nll_loss=1.914, ppl=3.77, wps=421320, ups=0.85, wpb=495955, bsz=16074.2, num_updates=44100, lr=0.000301169, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=40704 epoch 027: 253 / 1689 loss=3.473, nll_loss=1.914, ppl=3.77, wps=421320, ups=0.85, wpb=495955, bsz=16074.2, num_updates=44100, lr=0.000301169, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=40704 epoch 027: 253 / 1689 loss=3.473, nll_loss=1.914, ppl=3.77, wps=421320, ups=0.85, wpb=495955, bsz=16074.2, num_updates=44100, lr=0.000301169, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=40704 epoch 027: 253 / 1689 loss=3.473, nll_loss=1.914, ppl=3.77, wps=421320, ups=0.85, wpb=495955, bsz=16074.2, num_updates=44100, lr=0.000301169, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=40704 epoch 027: 253 / 1689 loss=3.473, nll_loss=1.914, ppl=3.77, wps=421320, ups=0.85, wpb=495955, bsz=16074.2, num_updates=44100, lr=0.000301169, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=40704 epoch 027: 253 / 1689 loss=3.473, nll_loss=1.914, ppl=3.77, wps=421320, ups=0.85, wpb=495955, bsz=16074.2, num_updates=44100, lr=0.000301169, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=40704 epoch 027: 253 / 1689 loss=3.473, nll_loss=1.914, ppl=3.77, wps=421320, ups=0.85, wpb=495955, bsz=16074.2, num_updates=44100, lr=0.000301169, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=40704 epoch 027: 253 / 1689 loss=3.473, nll_loss=1.914, ppl=3.77, wps=421320, ups=0.85, wpb=495955, bsz=16074.2, num_updates=44100, lr=0.000301169, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=40704 epoch 027: 253 / 1689 loss=3.473, nll_loss=1.914, ppl=3.77, wps=421320, ups=0.85, wpb=495955, bsz=16074.2, num_updates=44100, lr=0.000301169, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=40704 epoch 027: 253 / 1689 loss=3.473, nll_loss=1.914, ppl=3.77, wps=421320, ups=0.85, wpb=495955, bsz=16074.2, num_updates=44100, lr=0.000301169, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=40704 epoch 027: 253 / 1689 loss=3.473, nll_loss=1.914, ppl=3.77, wps=421320, ups=0.85, wpb=495955, bsz=16074.2, num_updates=44100, lr=0.000301169, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=40704 epoch 027: 253 / 1689 loss=3.473, nll_loss=1.914, ppl=3.77, wps=421320, ups=0.85, wpb=495955, bsz=16074.2, num_updates=44100, lr=0.000301169, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=40704 epoch 027: 253 / 1689 loss=3.473, nll_loss=1.914, ppl=3.77, wps=421320, ups=0.85, wpb=495955, bsz=16074.2, num_updates=44100, lr=0.000301169, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=40704 epoch 027: 253 / 1689 loss=3.473, nll_loss=1.914, ppl=3.77, wps=421320, ups=0.85, wpb=495955, bsz=16074.2, num_updates=44100, lr=0.000301169, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=40704 epoch 027: 253 / 1689 loss=3.473, nll_loss=1.914, ppl=3.77, wps=421320, ups=0.85, wpb=495955, bsz=16074.2, num_updates=44100, lr=0.000301169, gnorm=0.163, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=40704 epoch 027: 353 / 1689 loss=3.471, nll_loss=1.912, ppl=3.76, wps=560380, ups=1.13, wpb=495458, bsz=16505, num_updates=44200, lr=0.000300828, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=40792 epoch 027: 353 / 1689 loss=3.471, nll_loss=1.912, ppl=3.76, wps=560380, ups=1.13, wpb=495458, bsz=16505, num_updates=44200, lr=0.000300828, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=40792 epoch 027: 353 / 1689 loss=3.471, nll_loss=1.912, ppl=3.76, wps=560380, ups=1.13, wpb=495458, bsz=16505, num_updates=44200, lr=0.000300828, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=40792 epoch 027: 353 / 1689 loss=3.471, nll_loss=1.912, ppl=3.76, wps=560380, ups=1.13, wpb=495458, bsz=16505, num_updates=44200, lr=0.000300828, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=40792 epoch 027: 353 / 1689 loss=3.471, nll_loss=1.912, ppl=3.76, wps=560380, ups=1.13, wpb=495458, bsz=16505, num_updates=44200, lr=0.000300828, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=40792 epoch 027: 353 / 1689 loss=3.471, nll_loss=1.912, ppl=3.76, wps=560380, ups=1.13, wpb=495458, bsz=16505, num_updates=44200, lr=0.000300828, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=40792 epoch 027: 353 / 1689 loss=3.471, nll_loss=1.912, ppl=3.76, wps=560380, ups=1.13, wpb=495458, bsz=16505, num_updates=44200, lr=0.000300828, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=40792 epoch 027: 353 / 1689 loss=3.471, nll_loss=1.912, ppl=3.76, wps=560380, ups=1.13, wpb=495458, bsz=16505, num_updates=44200, lr=0.000300828, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=40792 epoch 027: 353 / 1689 loss=3.471, nll_loss=1.912, ppl=3.76, wps=560380, ups=1.13, wpb=495458, bsz=16505, num_updates=44200, lr=0.000300828, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=40792 epoch 027: 353 / 1689 loss=3.471, nll_loss=1.912, ppl=3.76, wps=560380, ups=1.13, wpb=495458, bsz=16505, num_updates=44200, lr=0.000300828, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=40792 epoch 027: 353 / 1689 loss=3.471, nll_loss=1.912, ppl=3.76, wps=560380, ups=1.13, wpb=495458, bsz=16505, num_updates=44200, lr=0.000300828, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=40792 epoch 027: 353 / 1689 loss=3.471, nll_loss=1.912, ppl=3.76, wps=560380, ups=1.13, wpb=495458, bsz=16505, num_updates=44200, lr=0.000300828, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=40792 epoch 027: 353 / 1689 loss=3.471, nll_loss=1.912, ppl=3.76, wps=560380, ups=1.13, wpb=495458, bsz=16505, num_updates=44200, lr=0.000300828, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=40792 epoch 027: 353 / 1689 loss=3.471, nll_loss=1.912, ppl=3.76, wps=560380, ups=1.13, wpb=495458, bsz=16505, num_updates=44200, lr=0.000300828, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=40792 epoch 027: 353 / 1689 loss=3.471, nll_loss=1.912, ppl=3.76, wps=560380, ups=1.13, wpb=495458, bsz=16505, num_updates=44200, lr=0.000300828, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=40792 epoch 027: 353 / 1689 loss=3.471, nll_loss=1.912, ppl=3.76, wps=560380, ups=1.13, wpb=495458, bsz=16505, num_updates=44200, lr=0.000300828, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=40792 epoch 027: 353 / 1689 loss=3.471, nll_loss=1.912, ppl=3.76, wps=560380, ups=1.13, wpb=495458, bsz=16505, num_updates=44200, lr=0.000300828, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=40792 epoch 027: 353 / 1689 loss=3.471, nll_loss=1.912, ppl=3.76, wps=560380, ups=1.13, wpb=495458, bsz=16505, num_updates=44200, lr=0.000300828, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=40792 epoch 027: 353 / 1689 loss=3.471, nll_loss=1.912, ppl=3.76, wps=560380, ups=1.13, wpb=495458, bsz=16505, num_updates=44200, lr=0.000300828, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=40792 epoch 027: 353 / 1689 loss=3.471, nll_loss=1.912, ppl=3.76, wps=560380, ups=1.13, wpb=495458, bsz=16505, num_updates=44200, lr=0.000300828, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=40792 epoch 027: 353 / 1689 loss=3.471, nll_loss=1.912, ppl=3.76, wps=560380, ups=1.13, wpb=495458, bsz=16505, num_updates=44200, lr=0.000300828, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=40792 epoch 027: 353 / 1689 loss=3.471, nll_loss=1.912, ppl=3.76, wps=560380, ups=1.13, wpb=495458, bsz=16505, num_updates=44200, lr=0.000300828, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=40792 epoch 027: 353 / 1689 loss=3.471, nll_loss=1.912, ppl=3.76, wps=560380, ups=1.13, wpb=495458, bsz=16505, num_updates=44200, lr=0.000300828, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=40792 epoch 027: 353 / 1689 loss=3.471, nll_loss=1.912, ppl=3.76, wps=560380, ups=1.13, wpb=495458, bsz=16505, num_updates=44200, lr=0.000300828, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=40792 epoch 027: 353 / 1689 loss=3.471, nll_loss=1.912, ppl=3.76, wps=560380, ups=1.13, wpb=495458, bsz=16505, num_updates=44200, lr=0.000300828, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=40792 epoch 027: 353 / 1689 loss=3.471, nll_loss=1.912, ppl=3.76, wps=560380, ups=1.13, wpb=495458, bsz=16505, num_updates=44200, lr=0.000300828, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=40792 epoch 027: 353 / 1689 loss=3.471, nll_loss=1.912, ppl=3.76, wps=560380, ups=1.13, wpb=495458, bsz=16505, num_updates=44200, lr=0.000300828, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=40792 epoch 027: 453 / 1689 loss=3.476, nll_loss=1.918, ppl=3.78, wps=555889, ups=1.12, wpb=496075, bsz=16420.6, num_updates=44300, lr=0.000300489, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=40882 epoch 027: 453 / 1689 loss=3.476, nll_loss=1.918, ppl=3.78, wps=555889, ups=1.12, wpb=496075, bsz=16420.6, num_updates=44300, lr=0.000300489, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=40882 epoch 027: 453 / 1689 loss=3.476, nll_loss=1.918, ppl=3.78, wps=555889, ups=1.12, wpb=496075, bsz=16420.6, num_updates=44300, lr=0.000300489, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=40882 epoch 027: 453 / 1689 loss=3.476, nll_loss=1.918, ppl=3.78, wps=555889, ups=1.12, wpb=496075, bsz=16420.6, num_updates=44300, lr=0.000300489, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=40882 epoch 027: 453 / 1689 loss=3.476, nll_loss=1.918, ppl=3.78, wps=555889, ups=1.12, wpb=496075, bsz=16420.6, num_updates=44300, lr=0.000300489, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=40882 epoch 027: 453 / 1689 loss=3.476, nll_loss=1.918, ppl=3.78, wps=555889, ups=1.12, wpb=496075, bsz=16420.6, num_updates=44300, lr=0.000300489, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=40882 epoch 027: 453 / 1689 loss=3.476, nll_loss=1.918, ppl=3.78, wps=555889, ups=1.12, wpb=496075, bsz=16420.6, num_updates=44300, lr=0.000300489, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=40882 epoch 027: 453 / 1689 loss=3.476, nll_loss=1.918, ppl=3.78, wps=555889, ups=1.12, wpb=496075, bsz=16420.6, num_updates=44300, lr=0.000300489, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=40882 epoch 027: 453 / 1689 loss=3.476, nll_loss=1.918, ppl=3.78, wps=555889, ups=1.12, wpb=496075, bsz=16420.6, num_updates=44300, lr=0.000300489, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=40882 epoch 027: 453 / 1689 loss=3.476, nll_loss=1.918, ppl=3.78, wps=555889, ups=1.12, wpb=496075, bsz=16420.6, num_updates=44300, lr=0.000300489, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=40882 epoch 027: 453 / 1689 loss=3.476, nll_loss=1.918, ppl=3.78, wps=555889, ups=1.12, wpb=496075, bsz=16420.6, num_updates=44300, lr=0.000300489, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=40882 epoch 027: 453 / 1689 loss=3.476, nll_loss=1.918, ppl=3.78, wps=555889, ups=1.12, wpb=496075, bsz=16420.6, num_updates=44300, lr=0.000300489, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=40882 epoch 027: 453 / 1689 loss=3.476, nll_loss=1.918, ppl=3.78, wps=555889, ups=1.12, wpb=496075, bsz=16420.6, num_updates=44300, lr=0.000300489, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=40882 epoch 027: 453 / 1689 loss=3.476, nll_loss=1.918, ppl=3.78, wps=555889, ups=1.12, wpb=496075, bsz=16420.6, num_updates=44300, lr=0.000300489, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=40882 epoch 027: 453 / 1689 loss=3.476, nll_loss=1.918, ppl=3.78, wps=555889, ups=1.12, wpb=496075, bsz=16420.6, num_updates=44300, lr=0.000300489, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=40882 epoch 027: 453 / 1689 loss=3.476, nll_loss=1.918, ppl=3.78, wps=555889, ups=1.12, wpb=496075, bsz=16420.6, num_updates=44300, lr=0.000300489, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=40882 epoch 027: 453 / 1689 loss=3.476, nll_loss=1.918, ppl=3.78, wps=555889, ups=1.12, wpb=496075, bsz=16420.6, num_updates=44300, lr=0.000300489, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=40882 epoch 027: 453 / 1689 loss=3.476, nll_loss=1.918, ppl=3.78, wps=555889, ups=1.12, wpb=496075, bsz=16420.6, num_updates=44300, lr=0.000300489, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=40882 epoch 027: 453 / 1689 loss=3.476, nll_loss=1.918, ppl=3.78, wps=555889, ups=1.12, wpb=496075, bsz=16420.6, num_updates=44300, lr=0.000300489, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=40882 epoch 027: 453 / 1689 loss=3.476, nll_loss=1.918, ppl=3.78, wps=555889, ups=1.12, wpb=496075, bsz=16420.6, num_updates=44300, lr=0.000300489, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=40882 epoch 027: 453 / 1689 loss=3.476, nll_loss=1.918, ppl=3.78, wps=555889, ups=1.12, wpb=496075, bsz=16420.6, num_updates=44300, lr=0.000300489, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=40882 epoch 027: 453 / 1689 loss=3.476, nll_loss=1.918, ppl=3.78, wps=555889, ups=1.12, wpb=496075, bsz=16420.6, num_updates=44300, lr=0.000300489, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=40882 epoch 027: 453 / 1689 loss=3.476, nll_loss=1.918, ppl=3.78, wps=555889, ups=1.12, wpb=496075, bsz=16420.6, num_updates=44300, lr=0.000300489, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=40882 epoch 027: 453 / 1689 loss=3.476, nll_loss=1.918, ppl=3.78, wps=555889, ups=1.12, wpb=496075, bsz=16420.6, num_updates=44300, lr=0.000300489, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=40882 epoch 027: 453 / 1689 loss=3.476, nll_loss=1.918, ppl=3.78, wps=555889, ups=1.12, wpb=496075, bsz=16420.6, num_updates=44300, lr=0.000300489, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=40882 epoch 027: 453 / 1689 loss=3.476, nll_loss=1.918, ppl=3.78, wps=555889, ups=1.12, wpb=496075, bsz=16420.6, num_updates=44300, lr=0.000300489, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=40882 epoch 027: 453 / 1689 loss=3.476, nll_loss=1.918, ppl=3.78, wps=555889, ups=1.12, wpb=496075, bsz=16420.6, num_updates=44300, lr=0.000300489, gnorm=0.174, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=40882 epoch 027: 553 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=559426, ups=1.13, wpb=496658, bsz=16289.8, num_updates=44400, lr=0.00030015, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=40970 epoch 027: 553 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=559426, ups=1.13, wpb=496658, bsz=16289.8, num_updates=44400, lr=0.00030015, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=40970 epoch 027: 553 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=559426, ups=1.13, wpb=496658, bsz=16289.8, num_updates=44400, lr=0.00030015, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=40970 epoch 027: 553 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=559426, ups=1.13, wpb=496658, bsz=16289.8, num_updates=44400, lr=0.00030015, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=40970 epoch 027: 553 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=559426, ups=1.13, wpb=496658, bsz=16289.8, num_updates=44400, lr=0.00030015, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=40970 epoch 027: 553 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=559426, ups=1.13, wpb=496658, bsz=16289.8, num_updates=44400, lr=0.00030015, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=40970 epoch 027: 553 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=559426, ups=1.13, wpb=496658, bsz=16289.8, num_updates=44400, lr=0.00030015, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=40970 epoch 027: 553 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=559426, ups=1.13, wpb=496658, bsz=16289.8, num_updates=44400, lr=0.00030015, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=40970 epoch 027: 553 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=559426, ups=1.13, wpb=496658, bsz=16289.8, num_updates=44400, lr=0.00030015, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=40970 epoch 027: 553 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=559426, ups=1.13, wpb=496658, bsz=16289.8, num_updates=44400, lr=0.00030015, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=40970 epoch 027: 553 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=559426, ups=1.13, wpb=496658, bsz=16289.8, num_updates=44400, lr=0.00030015, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=40970 epoch 027: 553 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=559426, ups=1.13, wpb=496658, bsz=16289.8, num_updates=44400, lr=0.00030015, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=40970 epoch 027: 553 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=559426, ups=1.13, wpb=496658, bsz=16289.8, num_updates=44400, lr=0.00030015, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=40970 epoch 027: 553 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=559426, ups=1.13, wpb=496658, bsz=16289.8, num_updates=44400, lr=0.00030015, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=40970 epoch 027: 553 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=559426, ups=1.13, wpb=496658, bsz=16289.8, num_updates=44400, lr=0.00030015, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=40970 epoch 027: 553 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=559426, ups=1.13, wpb=496658, bsz=16289.8, num_updates=44400, lr=0.00030015, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=40970 epoch 027: 553 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=559426, ups=1.13, wpb=496658, bsz=16289.8, num_updates=44400, lr=0.00030015, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=40970 epoch 027: 553 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=559426, ups=1.13, wpb=496658, bsz=16289.8, num_updates=44400, lr=0.00030015, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=40970 epoch 027: 553 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=559426, ups=1.13, wpb=496658, bsz=16289.8, num_updates=44400, lr=0.00030015, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=40970 epoch 027: 553 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=559426, ups=1.13, wpb=496658, bsz=16289.8, num_updates=44400, lr=0.00030015, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=40970 epoch 027: 553 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=559426, ups=1.13, wpb=496658, bsz=16289.8, num_updates=44400, lr=0.00030015, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=40970 epoch 027: 553 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=559426, ups=1.13, wpb=496658, bsz=16289.8, num_updates=44400, lr=0.00030015, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=40970 epoch 027: 553 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=559426, ups=1.13, wpb=496658, bsz=16289.8, num_updates=44400, lr=0.00030015, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=40970 epoch 027: 553 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=559426, ups=1.13, wpb=496658, bsz=16289.8, num_updates=44400, lr=0.00030015, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=40970 epoch 027: 553 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=559426, ups=1.13, wpb=496658, bsz=16289.8, num_updates=44400, lr=0.00030015, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=40970 epoch 027: 553 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=559426, ups=1.13, wpb=496658, bsz=16289.8, num_updates=44400, lr=0.00030015, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=40970 epoch 027: 553 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=559426, ups=1.13, wpb=496658, bsz=16289.8, num_updates=44400, lr=0.00030015, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=22.4, wall=40970 epoch 027: 653 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=552540, ups=1.12, wpb=493955, bsz=16640.5, num_updates=44500, lr=0.000299813, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=41060 epoch 027: 653 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=552540, ups=1.12, wpb=493955, bsz=16640.5, num_updates=44500, lr=0.000299813, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=41060 epoch 027: 653 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=552540, ups=1.12, wpb=493955, bsz=16640.5, num_updates=44500, lr=0.000299813, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=41060 epoch 027: 653 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=552540, ups=1.12, wpb=493955, bsz=16640.5, num_updates=44500, lr=0.000299813, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=41060 epoch 027: 653 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=552540, ups=1.12, wpb=493955, bsz=16640.5, num_updates=44500, lr=0.000299813, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=41060 epoch 027: 653 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=552540, ups=1.12, wpb=493955, bsz=16640.5, num_updates=44500, lr=0.000299813, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=41060 epoch 027: 653 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=552540, ups=1.12, wpb=493955, bsz=16640.5, num_updates=44500, lr=0.000299813, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=41060 epoch 027: 653 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=552540, ups=1.12, wpb=493955, bsz=16640.5, num_updates=44500, lr=0.000299813, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=41060 epoch 027: 653 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=552540, ups=1.12, wpb=493955, bsz=16640.5, num_updates=44500, lr=0.000299813, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=41060 epoch 027: 653 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=552540, ups=1.12, wpb=493955, bsz=16640.5, num_updates=44500, lr=0.000299813, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=41060 epoch 027: 653 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=552540, ups=1.12, wpb=493955, bsz=16640.5, num_updates=44500, lr=0.000299813, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=41060 epoch 027: 653 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=552540, ups=1.12, wpb=493955, bsz=16640.5, num_updates=44500, lr=0.000299813, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=41060 epoch 027: 653 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=552540, ups=1.12, wpb=493955, bsz=16640.5, num_updates=44500, lr=0.000299813, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=41060 epoch 027: 653 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=552540, ups=1.12, wpb=493955, bsz=16640.5, num_updates=44500, lr=0.000299813, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=41060 epoch 027: 653 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=552540, ups=1.12, wpb=493955, bsz=16640.5, num_updates=44500, lr=0.000299813, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=41060 epoch 027: 653 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=552540, ups=1.12, wpb=493955, bsz=16640.5, num_updates=44500, lr=0.000299813, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=41060 epoch 027: 653 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=552540, ups=1.12, wpb=493955, bsz=16640.5, num_updates=44500, lr=0.000299813, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=41060 epoch 027: 653 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=552540, ups=1.12, wpb=493955, bsz=16640.5, num_updates=44500, lr=0.000299813, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=41060 epoch 027: 653 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=552540, ups=1.12, wpb=493955, bsz=16640.5, num_updates=44500, lr=0.000299813, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=41060 epoch 027: 653 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=552540, ups=1.12, wpb=493955, bsz=16640.5, num_updates=44500, lr=0.000299813, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=41060 epoch 027: 653 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=552540, ups=1.12, wpb=493955, bsz=16640.5, num_updates=44500, lr=0.000299813, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=41060 epoch 027: 653 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=552540, ups=1.12, wpb=493955, bsz=16640.5, num_updates=44500, lr=0.000299813, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=41060 epoch 027: 653 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=552540, ups=1.12, wpb=493955, bsz=16640.5, num_updates=44500, lr=0.000299813, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=41060 epoch 027: 653 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=552540, ups=1.12, wpb=493955, bsz=16640.5, num_updates=44500, lr=0.000299813, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=41060 epoch 027: 653 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=552540, ups=1.12, wpb=493955, bsz=16640.5, num_updates=44500, lr=0.000299813, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=41060 epoch 027: 653 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=552540, ups=1.12, wpb=493955, bsz=16640.5, num_updates=44500, lr=0.000299813, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=41060 epoch 027: 653 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=552540, ups=1.12, wpb=493955, bsz=16640.5, num_updates=44500, lr=0.000299813, gnorm=0.17, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=41060 epoch 027: 753 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=556542, ups=1.13, wpb=493879, bsz=16803.5, num_updates=44600, lr=0.000299476, gnorm=0.17, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=41149 epoch 027: 753 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=556542, ups=1.13, wpb=493879, bsz=16803.5, num_updates=44600, lr=0.000299476, gnorm=0.17, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=41149 epoch 027: 753 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=556542, ups=1.13, wpb=493879, bsz=16803.5, num_updates=44600, lr=0.000299476, gnorm=0.17, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=41149 epoch 027: 753 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=556542, ups=1.13, wpb=493879, bsz=16803.5, num_updates=44600, lr=0.000299476, gnorm=0.17, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=41149 epoch 027: 753 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=556542, ups=1.13, wpb=493879, bsz=16803.5, num_updates=44600, lr=0.000299476, gnorm=0.17, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=41149 epoch 027: 753 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=556542, ups=1.13, wpb=493879, bsz=16803.5, num_updates=44600, lr=0.000299476, gnorm=0.17, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=41149 epoch 027: 753 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=556542, ups=1.13, wpb=493879, bsz=16803.5, num_updates=44600, lr=0.000299476, gnorm=0.17, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=41149 epoch 027: 753 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=556542, ups=1.13, wpb=493879, bsz=16803.5, num_updates=44600, lr=0.000299476, gnorm=0.17, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=41149 epoch 027: 753 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=556542, ups=1.13, wpb=493879, bsz=16803.5, num_updates=44600, lr=0.000299476, gnorm=0.17, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=41149 epoch 027: 753 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=556542, ups=1.13, wpb=493879, bsz=16803.5, num_updates=44600, lr=0.000299476, gnorm=0.17, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=41149 epoch 027: 753 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=556542, ups=1.13, wpb=493879, bsz=16803.5, num_updates=44600, lr=0.000299476, gnorm=0.17, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=41149 epoch 027: 753 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=556542, ups=1.13, wpb=493879, bsz=16803.5, num_updates=44600, lr=0.000299476, gnorm=0.17, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=41149 epoch 027: 753 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=556542, ups=1.13, wpb=493879, bsz=16803.5, num_updates=44600, lr=0.000299476, gnorm=0.17, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=41149 epoch 027: 753 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=556542, ups=1.13, wpb=493879, bsz=16803.5, num_updates=44600, lr=0.000299476, gnorm=0.17, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=41149 epoch 027: 753 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=556542, ups=1.13, wpb=493879, bsz=16803.5, num_updates=44600, lr=0.000299476, gnorm=0.17, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=41149 epoch 027: 753 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=556542, ups=1.13, wpb=493879, bsz=16803.5, num_updates=44600, lr=0.000299476, gnorm=0.17, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=41149 epoch 027: 753 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=556542, ups=1.13, wpb=493879, bsz=16803.5, num_updates=44600, lr=0.000299476, gnorm=0.17, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=41149 epoch 027: 753 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=556542, ups=1.13, wpb=493879, bsz=16803.5, num_updates=44600, lr=0.000299476, gnorm=0.17, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=41149 epoch 027: 753 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=556542, ups=1.13, wpb=493879, bsz=16803.5, num_updates=44600, lr=0.000299476, gnorm=0.17, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=41149 epoch 027: 753 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=556542, ups=1.13, wpb=493879, bsz=16803.5, num_updates=44600, lr=0.000299476, gnorm=0.17, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=41149 epoch 027: 753 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=556542, ups=1.13, wpb=493879, bsz=16803.5, num_updates=44600, lr=0.000299476, gnorm=0.17, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=41149 epoch 027: 753 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=556542, ups=1.13, wpb=493879, bsz=16803.5, num_updates=44600, lr=0.000299476, gnorm=0.17, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=41149 epoch 027: 753 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=556542, ups=1.13, wpb=493879, bsz=16803.5, num_updates=44600, lr=0.000299476, gnorm=0.17, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=41149 epoch 027: 753 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=556542, ups=1.13, wpb=493879, bsz=16803.5, num_updates=44600, lr=0.000299476, gnorm=0.17, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=41149 epoch 027: 753 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=556542, ups=1.13, wpb=493879, bsz=16803.5, num_updates=44600, lr=0.000299476, gnorm=0.17, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=41149 epoch 027: 753 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=556542, ups=1.13, wpb=493879, bsz=16803.5, num_updates=44600, lr=0.000299476, gnorm=0.17, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=41149 epoch 027: 753 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=556542, ups=1.13, wpb=493879, bsz=16803.5, num_updates=44600, lr=0.000299476, gnorm=0.17, clip=0, loss_scale=4, train_wall=87, gb_free=21.9, wall=41149 epoch 027: 854 / 1689 loss=3.486, nll_loss=1.929, ppl=3.81, wps=553727, ups=1.12, wpb=494919, bsz=16712.8, num_updates=44700, lr=0.000299141, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=41238 epoch 027: 854 / 1689 loss=3.486, nll_loss=1.929, ppl=3.81, wps=553727, ups=1.12, wpb=494919, bsz=16712.8, num_updates=44700, lr=0.000299141, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=41238 epoch 027: 854 / 1689 loss=3.486, nll_loss=1.929, ppl=3.81, wps=553727, ups=1.12, wpb=494919, bsz=16712.8, num_updates=44700, lr=0.000299141, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=41238 epoch 027: 854 / 1689 loss=3.486, nll_loss=1.929, ppl=3.81, wps=553727, ups=1.12, wpb=494919, bsz=16712.8, num_updates=44700, lr=0.000299141, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=41238 epoch 027: 854 / 1689 loss=3.486, nll_loss=1.929, ppl=3.81, wps=553727, ups=1.12, wpb=494919, bsz=16712.8, num_updates=44700, lr=0.000299141, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=41238 epoch 027: 854 / 1689 loss=3.486, nll_loss=1.929, ppl=3.81, wps=553727, ups=1.12, wpb=494919, bsz=16712.8, num_updates=44700, lr=0.000299141, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=41238 epoch 027: 854 / 1689 loss=3.486, nll_loss=1.929, ppl=3.81, wps=553727, ups=1.12, wpb=494919, bsz=16712.8, num_updates=44700, lr=0.000299141, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=41238 epoch 027: 854 / 1689 loss=3.486, nll_loss=1.929, ppl=3.81, wps=553727, ups=1.12, wpb=494919, bsz=16712.8, num_updates=44700, lr=0.000299141, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=41238 epoch 027: 854 / 1689 loss=3.486, nll_loss=1.929, ppl=3.81, wps=553727, ups=1.12, wpb=494919, bsz=16712.8, num_updates=44700, lr=0.000299141, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=41238 epoch 027: 854 / 1689 loss=3.486, nll_loss=1.929, ppl=3.81, wps=553727, ups=1.12, wpb=494919, bsz=16712.8, num_updates=44700, lr=0.000299141, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=41238 epoch 027: 854 / 1689 loss=3.486, nll_loss=1.929, ppl=3.81, wps=553727, ups=1.12, wpb=494919, bsz=16712.8, num_updates=44700, lr=0.000299141, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=41238 epoch 027: 854 / 1689 loss=3.486, nll_loss=1.929, ppl=3.81, wps=553727, ups=1.12, wpb=494919, bsz=16712.8, num_updates=44700, lr=0.000299141, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=41238 epoch 027: 854 / 1689 loss=3.486, nll_loss=1.929, ppl=3.81, wps=553727, ups=1.12, wpb=494919, bsz=16712.8, num_updates=44700, lr=0.000299141, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=41238 epoch 027: 854 / 1689 loss=3.486, nll_loss=1.929, ppl=3.81, wps=553727, ups=1.12, wpb=494919, bsz=16712.8, num_updates=44700, lr=0.000299141, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=41238 epoch 027: 854 / 1689 loss=3.486, nll_loss=1.929, ppl=3.81, wps=553727, ups=1.12, wpb=494919, bsz=16712.8, num_updates=44700, lr=0.000299141, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=41238 epoch 027: 854 / 1689 loss=3.486, nll_loss=1.929, ppl=3.81, wps=553727, ups=1.12, wpb=494919, bsz=16712.8, num_updates=44700, lr=0.000299141, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=41238 epoch 027: 854 / 1689 loss=3.486, nll_loss=1.929, ppl=3.81, wps=553727, ups=1.12, wpb=494919, bsz=16712.8, num_updates=44700, lr=0.000299141, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=41238 epoch 027: 854 / 1689 loss=3.486, nll_loss=1.929, ppl=3.81, wps=553727, ups=1.12, wpb=494919, bsz=16712.8, num_updates=44700, lr=0.000299141, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=41238 epoch 027: 854 / 1689 loss=3.486, nll_loss=1.929, ppl=3.81, wps=553727, ups=1.12, wpb=494919, bsz=16712.8, num_updates=44700, lr=0.000299141, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=41238 epoch 027: 854 / 1689 loss=3.486, nll_loss=1.929, ppl=3.81, wps=553727, ups=1.12, wpb=494919, bsz=16712.8, num_updates=44700, lr=0.000299141, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=41238 epoch 027: 854 / 1689 loss=3.486, nll_loss=1.929, ppl=3.81, wps=553727, ups=1.12, wpb=494919, bsz=16712.8, num_updates=44700, lr=0.000299141, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=41238 epoch 027: 854 / 1689 loss=3.486, nll_loss=1.929, ppl=3.81, wps=553727, ups=1.12, wpb=494919, bsz=16712.8, num_updates=44700, lr=0.000299141, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=41238 epoch 027: 854 / 1689 loss=3.486, nll_loss=1.929, ppl=3.81, wps=553727, ups=1.12, wpb=494919, bsz=16712.8, num_updates=44700, lr=0.000299141, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=41238 epoch 027: 854 / 1689 loss=3.486, nll_loss=1.929, ppl=3.81, wps=553727, ups=1.12, wpb=494919, bsz=16712.8, num_updates=44700, lr=0.000299141, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=41238 epoch 027: 854 / 1689 loss=3.486, nll_loss=1.929, ppl=3.81, wps=553727, ups=1.12, wpb=494919, bsz=16712.8, num_updates=44700, lr=0.000299141, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=41238 epoch 027: 854 / 1689 loss=3.486, nll_loss=1.929, ppl=3.81, wps=553727, ups=1.12, wpb=494919, bsz=16712.8, num_updates=44700, lr=0.000299141, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=41238 epoch 027: 854 / 1689 loss=3.486, nll_loss=1.929, ppl=3.81, wps=553727, ups=1.12, wpb=494919, bsz=16712.8, num_updates=44700, lr=0.000299141, gnorm=0.175, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=41238 epoch 027: 954 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=553753, ups=1.12, wpb=495320, bsz=16196.5, num_updates=44800, lr=0.000298807, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=41327 epoch 027: 954 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=553753, ups=1.12, wpb=495320, bsz=16196.5, num_updates=44800, lr=0.000298807, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=41327 epoch 027: 954 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=553753, ups=1.12, wpb=495320, bsz=16196.5, num_updates=44800, lr=0.000298807, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=41327 epoch 027: 954 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=553753, ups=1.12, wpb=495320, bsz=16196.5, num_updates=44800, lr=0.000298807, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=41327 epoch 027: 954 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=553753, ups=1.12, wpb=495320, bsz=16196.5, num_updates=44800, lr=0.000298807, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=41327 epoch 027: 954 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=553753, ups=1.12, wpb=495320, bsz=16196.5, num_updates=44800, lr=0.000298807, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=41327 epoch 027: 954 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=553753, ups=1.12, wpb=495320, bsz=16196.5, num_updates=44800, lr=0.000298807, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=41327 epoch 027: 954 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=553753, ups=1.12, wpb=495320, bsz=16196.5, num_updates=44800, lr=0.000298807, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=41327 epoch 027: 954 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=553753, ups=1.12, wpb=495320, bsz=16196.5, num_updates=44800, lr=0.000298807, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=41327 epoch 027: 954 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=553753, ups=1.12, wpb=495320, bsz=16196.5, num_updates=44800, lr=0.000298807, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=41327 epoch 027: 954 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=553753, ups=1.12, wpb=495320, bsz=16196.5, num_updates=44800, lr=0.000298807, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=41327 epoch 027: 954 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=553753, ups=1.12, wpb=495320, bsz=16196.5, num_updates=44800, lr=0.000298807, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=41327 epoch 027: 954 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=553753, ups=1.12, wpb=495320, bsz=16196.5, num_updates=44800, lr=0.000298807, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=41327 epoch 027: 954 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=553753, ups=1.12, wpb=495320, bsz=16196.5, num_updates=44800, lr=0.000298807, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=41327 epoch 027: 954 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=553753, ups=1.12, wpb=495320, bsz=16196.5, num_updates=44800, lr=0.000298807, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=41327 epoch 027: 954 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=553753, ups=1.12, wpb=495320, bsz=16196.5, num_updates=44800, lr=0.000298807, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=41327 epoch 027: 954 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=553753, ups=1.12, wpb=495320, bsz=16196.5, num_updates=44800, lr=0.000298807, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=41327 epoch 027: 954 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=553753, ups=1.12, wpb=495320, bsz=16196.5, num_updates=44800, lr=0.000298807, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=41327 epoch 027: 954 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=553753, ups=1.12, wpb=495320, bsz=16196.5, num_updates=44800, lr=0.000298807, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=41327 epoch 027: 954 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=553753, ups=1.12, wpb=495320, bsz=16196.5, num_updates=44800, lr=0.000298807, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=41327 epoch 027: 954 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=553753, ups=1.12, wpb=495320, bsz=16196.5, num_updates=44800, lr=0.000298807, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=41327 epoch 027: 954 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=553753, ups=1.12, wpb=495320, bsz=16196.5, num_updates=44800, lr=0.000298807, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=41327 epoch 027: 954 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=553753, ups=1.12, wpb=495320, bsz=16196.5, num_updates=44800, lr=0.000298807, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=41327 epoch 027: 954 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=553753, ups=1.12, wpb=495320, bsz=16196.5, num_updates=44800, lr=0.000298807, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=41327 epoch 027: 954 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=553753, ups=1.12, wpb=495320, bsz=16196.5, num_updates=44800, lr=0.000298807, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=41327 epoch 027: 954 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=553753, ups=1.12, wpb=495320, bsz=16196.5, num_updates=44800, lr=0.000298807, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=41327 epoch 027: 954 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=553753, ups=1.12, wpb=495320, bsz=16196.5, num_updates=44800, lr=0.000298807, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=41327 epoch 027: 1055 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=550196, ups=1.11, wpb=494349, bsz=16842, num_updates=44900, lr=0.000298474, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=41417 epoch 027: 1055 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=550196, ups=1.11, wpb=494349, bsz=16842, num_updates=44900, lr=0.000298474, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=41417 epoch 027: 1055 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=550196, ups=1.11, wpb=494349, bsz=16842, num_updates=44900, lr=0.000298474, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=41417 epoch 027: 1055 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=550196, ups=1.11, wpb=494349, bsz=16842, num_updates=44900, lr=0.000298474, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=41417 epoch 027: 1055 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=550196, ups=1.11, wpb=494349, bsz=16842, num_updates=44900, lr=0.000298474, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=41417 epoch 027: 1055 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=550196, ups=1.11, wpb=494349, bsz=16842, num_updates=44900, lr=0.000298474, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=41417 epoch 027: 1055 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=550196, ups=1.11, wpb=494349, bsz=16842, num_updates=44900, lr=0.000298474, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=41417 epoch 027: 1055 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=550196, ups=1.11, wpb=494349, bsz=16842, num_updates=44900, lr=0.000298474, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=41417 epoch 027: 1055 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=550196, ups=1.11, wpb=494349, bsz=16842, num_updates=44900, lr=0.000298474, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=41417 epoch 027: 1055 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=550196, ups=1.11, wpb=494349, bsz=16842, num_updates=44900, lr=0.000298474, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=41417 epoch 027: 1055 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=550196, ups=1.11, wpb=494349, bsz=16842, num_updates=44900, lr=0.000298474, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=41417 epoch 027: 1055 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=550196, ups=1.11, wpb=494349, bsz=16842, num_updates=44900, lr=0.000298474, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=41417 epoch 027: 1055 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=550196, ups=1.11, wpb=494349, bsz=16842, num_updates=44900, lr=0.000298474, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=41417 epoch 027: 1055 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=550196, ups=1.11, wpb=494349, bsz=16842, num_updates=44900, lr=0.000298474, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=41417 epoch 027: 1055 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=550196, ups=1.11, wpb=494349, bsz=16842, num_updates=44900, lr=0.000298474, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=41417 epoch 027: 1055 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=550196, ups=1.11, wpb=494349, bsz=16842, num_updates=44900, lr=0.000298474, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=41417 epoch 027: 1055 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=550196, ups=1.11, wpb=494349, bsz=16842, num_updates=44900, lr=0.000298474, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=41417 epoch 027: 1055 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=550196, ups=1.11, wpb=494349, bsz=16842, num_updates=44900, lr=0.000298474, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=41417 epoch 027: 1055 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=550196, ups=1.11, wpb=494349, bsz=16842, num_updates=44900, lr=0.000298474, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=41417 epoch 027: 1055 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=550196, ups=1.11, wpb=494349, bsz=16842, num_updates=44900, lr=0.000298474, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=41417 epoch 027: 1055 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=550196, ups=1.11, wpb=494349, bsz=16842, num_updates=44900, lr=0.000298474, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=41417 epoch 027: 1055 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=550196, ups=1.11, wpb=494349, bsz=16842, num_updates=44900, lr=0.000298474, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=41417 epoch 027: 1055 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=550196, ups=1.11, wpb=494349, bsz=16842, num_updates=44900, lr=0.000298474, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=41417 epoch 027: 1055 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=550196, ups=1.11, wpb=494349, bsz=16842, num_updates=44900, lr=0.000298474, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=41417 epoch 027: 1055 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=550196, ups=1.11, wpb=494349, bsz=16842, num_updates=44900, lr=0.000298474, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=41417 epoch 027: 1055 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=550196, ups=1.11, wpb=494349, bsz=16842, num_updates=44900, lr=0.000298474, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=41417 epoch 027: 1055 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=550196, ups=1.11, wpb=494349, bsz=16842, num_updates=44900, lr=0.000298474, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=41417 epoch 027: 1155 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=552901, ups=1.12, wpb=494956, bsz=17000.2, num_updates=45000, lr=0.000298142, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=20.7, wall=41507 epoch 027: 1155 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=552901, ups=1.12, wpb=494956, bsz=17000.2, num_updates=45000, lr=0.000298142, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=20.7, wall=41507 epoch 027: 1155 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=552901, ups=1.12, wpb=494956, bsz=17000.2, num_updates=45000, lr=0.000298142, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=20.7, wall=41507 epoch 027: 1155 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=552901, ups=1.12, wpb=494956, bsz=17000.2, num_updates=45000, lr=0.000298142, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=20.7, wall=41507 epoch 027: 1155 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=552901, ups=1.12, wpb=494956, bsz=17000.2, num_updates=45000, lr=0.000298142, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=20.7, wall=41507 epoch 027: 1155 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=552901, ups=1.12, wpb=494956, bsz=17000.2, num_updates=45000, lr=0.000298142, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=20.7, wall=41507 epoch 027: 1155 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=552901, ups=1.12, wpb=494956, bsz=17000.2, num_updates=45000, lr=0.000298142, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=20.7, wall=41507 epoch 027: 1155 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=552901, ups=1.12, wpb=494956, bsz=17000.2, num_updates=45000, lr=0.000298142, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=20.7, wall=41507 epoch 027: 1155 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=552901, ups=1.12, wpb=494956, bsz=17000.2, num_updates=45000, lr=0.000298142, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=20.7, wall=41507 epoch 027: 1155 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=552901, ups=1.12, wpb=494956, bsz=17000.2, num_updates=45000, lr=0.000298142, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=20.7, wall=41507 epoch 027: 1155 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=552901, ups=1.12, wpb=494956, bsz=17000.2, num_updates=45000, lr=0.000298142, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=20.7, wall=41507 epoch 027: 1155 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=552901, ups=1.12, wpb=494956, bsz=17000.2, num_updates=45000, lr=0.000298142, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=20.7, wall=41507 epoch 027: 1155 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=552901, ups=1.12, wpb=494956, bsz=17000.2, num_updates=45000, lr=0.000298142, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=20.7, wall=41507 epoch 027: 1155 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=552901, ups=1.12, wpb=494956, bsz=17000.2, num_updates=45000, lr=0.000298142, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=20.7, wall=41507 epoch 027: 1155 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=552901, ups=1.12, wpb=494956, bsz=17000.2, num_updates=45000, lr=0.000298142, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=20.7, wall=41507 epoch 027: 1155 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=552901, ups=1.12, wpb=494956, bsz=17000.2, num_updates=45000, lr=0.000298142, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=20.7, wall=41507 epoch 027: 1155 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=552901, ups=1.12, wpb=494956, bsz=17000.2, num_updates=45000, lr=0.000298142, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=20.7, wall=41507 epoch 027: 1155 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=552901, ups=1.12, wpb=494956, bsz=17000.2, num_updates=45000, lr=0.000298142, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=20.7, wall=41507 epoch 027: 1155 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=552901, ups=1.12, wpb=494956, bsz=17000.2, num_updates=45000, lr=0.000298142, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=20.7, wall=41507 epoch 027: 1155 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=552901, ups=1.12, wpb=494956, bsz=17000.2, num_updates=45000, lr=0.000298142, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=20.7, wall=41507 epoch 027: 1155 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=552901, ups=1.12, wpb=494956, bsz=17000.2, num_updates=45000, lr=0.000298142, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=20.7, wall=41507 epoch 027: 1155 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=552901, ups=1.12, wpb=494956, bsz=17000.2, num_updates=45000, lr=0.000298142, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=20.7, wall=41507 epoch 027: 1155 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=552901, ups=1.12, wpb=494956, bsz=17000.2, num_updates=45000, lr=0.000298142, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=20.7, wall=41507 epoch 027: 1155 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=552901, ups=1.12, wpb=494956, bsz=17000.2, num_updates=45000, lr=0.000298142, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=20.7, wall=41507 epoch 027: 1155 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=552901, ups=1.12, wpb=494956, bsz=17000.2, num_updates=45000, lr=0.000298142, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=20.7, wall=41507 epoch 027: 1155 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=552901, ups=1.12, wpb=494956, bsz=17000.2, num_updates=45000, lr=0.000298142, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=20.7, wall=41507 epoch 027: 1155 / 1689 loss=3.488, nll_loss=1.932, ppl=3.82, wps=552901, ups=1.12, wpb=494956, bsz=17000.2, num_updates=45000, lr=0.000298142, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=20.7, wall=41507 begin validation on "valid" subset epoch 027 | valid on 'valid' subset | loss 3.702 | nll_loss 2.159 | ppl 4.46 | wps 0 | wpb 44526 | bsz 2008 | num_updates 45000 | best_loss 3.702 epoch 027 | valid on 'valid' subset | loss 3.702 | nll_loss 2.159 | ppl 4.46 | wps 0 | wpb 44526 | bsz 2008 | num_updates 45000 | best_loss 3.702 epoch 027 | valid on 'valid' subset | loss 3.702 | nll_loss 2.159 | ppl 4.46 | wps 0 | wpb 44526 | bsz 2008 | num_updates 45000 | best_loss 3.702 epoch 027 | valid on 'valid' subset | loss 3.702 | nll_loss 2.159 | ppl 4.46 | wps 0 | wpb 44526 | bsz 2008 | num_updates 45000 | best_loss 3.702 epoch 027 | valid on 'valid' subset | loss 3.702 | nll_loss 2.159 | ppl 4.46 | wps 0 | wpb 44526 | bsz 2008 | num_updates 45000 | best_loss 3.702 epoch 027 | valid on 'valid' subset | loss 3.702 | nll_loss 2.159 | ppl 4.46 | wps 0 | wpb 44526 | bsz 2008 | num_updates 45000 | best_loss 3.702 epoch 027 | valid on 'valid' subset | loss 3.702 | nll_loss 2.159 | ppl 4.46 | wps 0 | wpb 44526 | bsz 2008 | num_updates 45000 | best_loss 3.702 epoch 027 | valid on 'valid' subset | loss 3.702 | nll_loss 2.159 | ppl 4.46 | wps 0 | wpb 44526 | bsz 2008 | num_updates 45000 | best_loss 3.702 epoch 027 | valid on 'valid' subset | loss 3.702 | nll_loss 2.159 | ppl 4.46 | wps 0 | wpb 44526 | bsz 2008 | num_updates 45000 | best_loss 3.702 epoch 027 | valid on 'valid' subset | loss 3.702 | nll_loss 2.159 | ppl 4.46 | wps 0 | wpb 44526 | bsz 2008 | num_updates 45000 | best_loss 3.702 epoch 027 | valid on 'valid' subset | loss 3.702 | nll_loss 2.159 | ppl 4.46 | wps 0 | wpb 44526 | bsz 2008 | num_updates 45000 | best_loss 3.702 epoch 027 | valid on 'valid' subset | loss 3.702 | nll_loss 2.159 | ppl 4.46 | wps 0 | wpb 44526 | bsz 2008 | num_updates 45000 | best_loss 3.702 epoch 027 | valid on 'valid' subset | loss 3.702 | nll_loss 2.159 | ppl 4.46 | wps 0 | wpb 44526 | bsz 2008 | num_updates 45000 | best_loss 3.702 epoch 027 | valid on 'valid' subset | loss 3.702 | nll_loss 2.159 | ppl 4.46 | wps 0 | wpb 44526 | bsz 2008 | num_updates 45000 | best_loss 3.702 epoch 027 | valid on 'valid' subset | loss 3.702 | nll_loss 2.159 | ppl 4.46 | wps 0 | wpb 44526 | bsz 2008 | num_updates 45000 | best_loss 3.702 epoch 027 | valid on 'valid' subset | loss 3.702 | nll_loss 2.159 | ppl 4.46 | wps 0 | wpb 44526 | bsz 2008 | num_updates 45000 | best_loss 3.702 epoch 027 | valid on 'valid' subset | loss 3.702 | nll_loss 2.159 | ppl 4.46 | wps 0 | wpb 44526 | bsz 2008 | num_updates 45000 | best_loss 3.702 epoch 027 | valid on 'valid' subset | loss 3.702 | nll_loss 2.159 | ppl 4.46 | wps 0 | wpb 44526 | bsz 2008 | num_updates 45000 | best_loss 3.702 epoch 027 | valid on 'valid' subset | loss 3.702 | nll_loss 2.159 | ppl 4.46 | wps 0 | wpb 44526 | bsz 2008 | num_updates 45000 | best_loss 3.702 epoch 027 | valid on 'valid' subset | loss 3.702 | nll_loss 2.159 | ppl 4.46 | wps 0 | wpb 44526 | bsz 2008 | num_updates 45000 | best_loss 3.702 epoch 027 | valid on 'valid' subset | loss 3.702 | nll_loss 2.159 | ppl 4.46 | wps 0 | wpb 44526 | bsz 2008 | num_updates 45000 | best_loss 3.702 epoch 027 | valid on 'valid' subset | loss 3.702 | nll_loss 2.159 | ppl 4.46 | wps 0 | wpb 44526 | bsz 2008 | num_updates 45000 | best_loss 3.702 epoch 027 | valid on 'valid' subset | loss 3.702 | nll_loss 2.159 | ppl 4.46 | wps 0 | wpb 44526 | bsz 2008 | num_updates 45000 | best_loss 3.702 epoch 027 | valid on 'valid' subset | loss 3.702 | nll_loss 2.159 | ppl 4.46 | wps 0 | wpb 44526 | bsz 2008 | num_updates 45000 | best_loss 3.702 epoch 027 | valid on 'valid' subset | loss 3.702 | nll_loss 2.159 | ppl 4.46 | wps 0 | wpb 44526 | bsz 2008 | num_updates 45000 | best_loss 3.702 epoch 027 | valid on 'valid' subset | loss 3.702 | nll_loss 2.159 | ppl 4.46 | wps 0 | wpb 44526 | bsz 2008 | num_updates 45000 | best_loss 3.702 epoch 027 | valid on 'valid' subset | loss 3.702 | nll_loss 2.159 | ppl 4.46 | wps 0 | wpb 44526 | bsz 2008 | num_updates 45000 | best_loss 3.702 epoch 027: 1255 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=359714, ups=0.73, wpb=495820, bsz=16303.6, num_updates=45100, lr=0.000297812, gnorm=0.172, clip=0, loss_scale=1, train_wall=111, gb_free=22, wall=41645 epoch 027: 1255 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=359714, ups=0.73, wpb=495820, bsz=16303.6, num_updates=45100, lr=0.000297812, gnorm=0.172, clip=0, loss_scale=1, train_wall=111, gb_free=22, wall=41645 epoch 027: 1255 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=359714, ups=0.73, wpb=495820, bsz=16303.6, num_updates=45100, lr=0.000297812, gnorm=0.172, clip=0, loss_scale=1, train_wall=111, gb_free=22, wall=41645 epoch 027: 1255 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=359714, ups=0.73, wpb=495820, bsz=16303.6, num_updates=45100, lr=0.000297812, gnorm=0.172, clip=0, loss_scale=1, train_wall=111, gb_free=22, wall=41645 epoch 027: 1255 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=359714, ups=0.73, wpb=495820, bsz=16303.6, num_updates=45100, lr=0.000297812, gnorm=0.172, clip=0, loss_scale=1, train_wall=111, gb_free=22, wall=41645 epoch 027: 1255 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=359714, ups=0.73, wpb=495820, bsz=16303.6, num_updates=45100, lr=0.000297812, gnorm=0.172, clip=0, loss_scale=1, train_wall=111, gb_free=22, wall=41645 epoch 027: 1255 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=359714, ups=0.73, wpb=495820, bsz=16303.6, num_updates=45100, lr=0.000297812, gnorm=0.172, clip=0, loss_scale=1, train_wall=111, gb_free=22, wall=41645 epoch 027: 1255 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=359714, ups=0.73, wpb=495820, bsz=16303.6, num_updates=45100, lr=0.000297812, gnorm=0.172, clip=0, loss_scale=1, train_wall=111, gb_free=22, wall=41645 epoch 027: 1255 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=359714, ups=0.73, wpb=495820, bsz=16303.6, num_updates=45100, lr=0.000297812, gnorm=0.172, clip=0, loss_scale=1, train_wall=111, gb_free=22, wall=41645 epoch 027: 1255 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=359714, ups=0.73, wpb=495820, bsz=16303.6, num_updates=45100, lr=0.000297812, gnorm=0.172, clip=0, loss_scale=1, train_wall=111, gb_free=22, wall=41645 epoch 027: 1255 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=359714, ups=0.73, wpb=495820, bsz=16303.6, num_updates=45100, lr=0.000297812, gnorm=0.172, clip=0, loss_scale=1, train_wall=111, gb_free=22, wall=41645 epoch 027: 1255 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=359714, ups=0.73, wpb=495820, bsz=16303.6, num_updates=45100, lr=0.000297812, gnorm=0.172, clip=0, loss_scale=1, train_wall=111, gb_free=22, wall=41645 epoch 027: 1255 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=359714, ups=0.73, wpb=495820, bsz=16303.6, num_updates=45100, lr=0.000297812, gnorm=0.172, clip=0, loss_scale=1, train_wall=111, gb_free=22, wall=41645 epoch 027: 1255 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=359714, ups=0.73, wpb=495820, bsz=16303.6, num_updates=45100, lr=0.000297812, gnorm=0.172, clip=0, loss_scale=1, train_wall=111, gb_free=22, wall=41645 epoch 027: 1255 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=359714, ups=0.73, wpb=495820, bsz=16303.6, num_updates=45100, lr=0.000297812, gnorm=0.172, clip=0, loss_scale=1, train_wall=111, gb_free=22, wall=41645 epoch 027: 1255 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=359714, ups=0.73, wpb=495820, bsz=16303.6, num_updates=45100, lr=0.000297812, gnorm=0.172, clip=0, loss_scale=1, train_wall=111, gb_free=22, wall=41645 epoch 027: 1255 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=359714, ups=0.73, wpb=495820, bsz=16303.6, num_updates=45100, lr=0.000297812, gnorm=0.172, clip=0, loss_scale=1, train_wall=111, gb_free=22, wall=41645 epoch 027: 1255 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=359714, ups=0.73, wpb=495820, bsz=16303.6, num_updates=45100, lr=0.000297812, gnorm=0.172, clip=0, loss_scale=1, train_wall=111, gb_free=22, wall=41645 epoch 027: 1255 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=359714, ups=0.73, wpb=495820, bsz=16303.6, num_updates=45100, lr=0.000297812, gnorm=0.172, clip=0, loss_scale=1, train_wall=111, gb_free=22, wall=41645 epoch 027: 1255 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=359714, ups=0.73, wpb=495820, bsz=16303.6, num_updates=45100, lr=0.000297812, gnorm=0.172, clip=0, loss_scale=1, train_wall=111, gb_free=22, wall=41645 epoch 027: 1255 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=359714, ups=0.73, wpb=495820, bsz=16303.6, num_updates=45100, lr=0.000297812, gnorm=0.172, clip=0, loss_scale=1, train_wall=111, gb_free=22, wall=41645 epoch 027: 1255 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=359714, ups=0.73, wpb=495820, bsz=16303.6, num_updates=45100, lr=0.000297812, gnorm=0.172, clip=0, loss_scale=1, train_wall=111, gb_free=22, wall=41645 epoch 027: 1255 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=359714, ups=0.73, wpb=495820, bsz=16303.6, num_updates=45100, lr=0.000297812, gnorm=0.172, clip=0, loss_scale=1, train_wall=111, gb_free=22, wall=41645 epoch 027: 1255 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=359714, ups=0.73, wpb=495820, bsz=16303.6, num_updates=45100, lr=0.000297812, gnorm=0.172, clip=0, loss_scale=1, train_wall=111, gb_free=22, wall=41645 epoch 027: 1255 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=359714, ups=0.73, wpb=495820, bsz=16303.6, num_updates=45100, lr=0.000297812, gnorm=0.172, clip=0, loss_scale=1, train_wall=111, gb_free=22, wall=41645 epoch 027: 1255 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=359714, ups=0.73, wpb=495820, bsz=16303.6, num_updates=45100, lr=0.000297812, gnorm=0.172, clip=0, loss_scale=1, train_wall=111, gb_free=22, wall=41645 epoch 027: 1255 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=359714, ups=0.73, wpb=495820, bsz=16303.6, num_updates=45100, lr=0.000297812, gnorm=0.172, clip=0, loss_scale=1, train_wall=111, gb_free=22, wall=41645 epoch 027: 1355 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=552345, ups=1.11, wpb=495956, bsz=16524.2, num_updates=45200, lr=0.000297482, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=41734 epoch 027: 1355 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=552345, ups=1.11, wpb=495956, bsz=16524.2, num_updates=45200, lr=0.000297482, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=41734 epoch 027: 1355 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=552345, ups=1.11, wpb=495956, bsz=16524.2, num_updates=45200, lr=0.000297482, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=41734 epoch 027: 1355 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=552345, ups=1.11, wpb=495956, bsz=16524.2, num_updates=45200, lr=0.000297482, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=41734 epoch 027: 1355 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=552345, ups=1.11, wpb=495956, bsz=16524.2, num_updates=45200, lr=0.000297482, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=41734 epoch 027: 1355 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=552345, ups=1.11, wpb=495956, bsz=16524.2, num_updates=45200, lr=0.000297482, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=41734 epoch 027: 1355 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=552345, ups=1.11, wpb=495956, bsz=16524.2, num_updates=45200, lr=0.000297482, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=41734 epoch 027: 1355 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=552345, ups=1.11, wpb=495956, bsz=16524.2, num_updates=45200, lr=0.000297482, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=41734 epoch 027: 1355 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=552345, ups=1.11, wpb=495956, bsz=16524.2, num_updates=45200, lr=0.000297482, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=41734 epoch 027: 1355 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=552345, ups=1.11, wpb=495956, bsz=16524.2, num_updates=45200, lr=0.000297482, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=41734 epoch 027: 1355 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=552345, ups=1.11, wpb=495956, bsz=16524.2, num_updates=45200, lr=0.000297482, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=41734 epoch 027: 1355 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=552345, ups=1.11, wpb=495956, bsz=16524.2, num_updates=45200, lr=0.000297482, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=41734 epoch 027: 1355 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=552345, ups=1.11, wpb=495956, bsz=16524.2, num_updates=45200, lr=0.000297482, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=41734 epoch 027: 1355 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=552345, ups=1.11, wpb=495956, bsz=16524.2, num_updates=45200, lr=0.000297482, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=41734 epoch 027: 1355 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=552345, ups=1.11, wpb=495956, bsz=16524.2, num_updates=45200, lr=0.000297482, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=41734 epoch 027: 1355 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=552345, ups=1.11, wpb=495956, bsz=16524.2, num_updates=45200, lr=0.000297482, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=41734 epoch 027: 1355 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=552345, ups=1.11, wpb=495956, bsz=16524.2, num_updates=45200, lr=0.000297482, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=41734 epoch 027: 1355 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=552345, ups=1.11, wpb=495956, bsz=16524.2, num_updates=45200, lr=0.000297482, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=41734 epoch 027: 1355 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=552345, ups=1.11, wpb=495956, bsz=16524.2, num_updates=45200, lr=0.000297482, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=41734 epoch 027: 1355 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=552345, ups=1.11, wpb=495956, bsz=16524.2, num_updates=45200, lr=0.000297482, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=41734 epoch 027: 1355 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=552345, ups=1.11, wpb=495956, bsz=16524.2, num_updates=45200, lr=0.000297482, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=41734 epoch 027: 1355 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=552345, ups=1.11, wpb=495956, bsz=16524.2, num_updates=45200, lr=0.000297482, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=41734 epoch 027: 1355 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=552345, ups=1.11, wpb=495956, bsz=16524.2, num_updates=45200, lr=0.000297482, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=41734 epoch 027: 1355 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=552345, ups=1.11, wpb=495956, bsz=16524.2, num_updates=45200, lr=0.000297482, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=41734 epoch 027: 1355 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=552345, ups=1.11, wpb=495956, bsz=16524.2, num_updates=45200, lr=0.000297482, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=41734 epoch 027: 1355 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=552345, ups=1.11, wpb=495956, bsz=16524.2, num_updates=45200, lr=0.000297482, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=41734 epoch 027: 1355 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=552345, ups=1.11, wpb=495956, bsz=16524.2, num_updates=45200, lr=0.000297482, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=41734 epoch 027: 1455 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=561202, ups=1.13, wpb=496269, bsz=16355.1, num_updates=45300, lr=0.000297154, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=41823 epoch 027: 1455 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=561202, ups=1.13, wpb=496269, bsz=16355.1, num_updates=45300, lr=0.000297154, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=41823 epoch 027: 1455 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=561202, ups=1.13, wpb=496269, bsz=16355.1, num_updates=45300, lr=0.000297154, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=41823 epoch 027: 1455 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=561202, ups=1.13, wpb=496269, bsz=16355.1, num_updates=45300, lr=0.000297154, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=41823 epoch 027: 1455 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=561202, ups=1.13, wpb=496269, bsz=16355.1, num_updates=45300, lr=0.000297154, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=41823 epoch 027: 1455 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=561202, ups=1.13, wpb=496269, bsz=16355.1, num_updates=45300, lr=0.000297154, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=41823 epoch 027: 1455 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=561202, ups=1.13, wpb=496269, bsz=16355.1, num_updates=45300, lr=0.000297154, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=41823 epoch 027: 1455 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=561202, ups=1.13, wpb=496269, bsz=16355.1, num_updates=45300, lr=0.000297154, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=41823 epoch 027: 1455 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=561202, ups=1.13, wpb=496269, bsz=16355.1, num_updates=45300, lr=0.000297154, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=41823 epoch 027: 1455 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=561202, ups=1.13, wpb=496269, bsz=16355.1, num_updates=45300, lr=0.000297154, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=41823 epoch 027: 1455 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=561202, ups=1.13, wpb=496269, bsz=16355.1, num_updates=45300, lr=0.000297154, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=41823 epoch 027: 1455 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=561202, ups=1.13, wpb=496269, bsz=16355.1, num_updates=45300, lr=0.000297154, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=41823 epoch 027: 1455 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=561202, ups=1.13, wpb=496269, bsz=16355.1, num_updates=45300, lr=0.000297154, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=41823 epoch 027: 1455 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=561202, ups=1.13, wpb=496269, bsz=16355.1, num_updates=45300, lr=0.000297154, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=41823 epoch 027: 1455 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=561202, ups=1.13, wpb=496269, bsz=16355.1, num_updates=45300, lr=0.000297154, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=41823 epoch 027: 1455 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=561202, ups=1.13, wpb=496269, bsz=16355.1, num_updates=45300, lr=0.000297154, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=41823 epoch 027: 1455 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=561202, ups=1.13, wpb=496269, bsz=16355.1, num_updates=45300, lr=0.000297154, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=41823 epoch 027: 1455 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=561202, ups=1.13, wpb=496269, bsz=16355.1, num_updates=45300, lr=0.000297154, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=41823 epoch 027: 1455 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=561202, ups=1.13, wpb=496269, bsz=16355.1, num_updates=45300, lr=0.000297154, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=41823 epoch 027: 1455 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=561202, ups=1.13, wpb=496269, bsz=16355.1, num_updates=45300, lr=0.000297154, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=41823 epoch 027: 1455 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=561202, ups=1.13, wpb=496269, bsz=16355.1, num_updates=45300, lr=0.000297154, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=41823 epoch 027: 1455 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=561202, ups=1.13, wpb=496269, bsz=16355.1, num_updates=45300, lr=0.000297154, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=41823 epoch 027: 1455 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=561202, ups=1.13, wpb=496269, bsz=16355.1, num_updates=45300, lr=0.000297154, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=41823 epoch 027: 1455 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=561202, ups=1.13, wpb=496269, bsz=16355.1, num_updates=45300, lr=0.000297154, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=41823 epoch 027: 1455 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=561202, ups=1.13, wpb=496269, bsz=16355.1, num_updates=45300, lr=0.000297154, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=41823 epoch 027: 1455 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=561202, ups=1.13, wpb=496269, bsz=16355.1, num_updates=45300, lr=0.000297154, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=41823 epoch 027: 1455 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=561202, ups=1.13, wpb=496269, bsz=16355.1, num_updates=45300, lr=0.000297154, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=41823 epoch 027: 1556 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=555637, ups=1.12, wpb=494890, bsz=16399.8, num_updates=45400, lr=0.000296826, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=41912 epoch 027: 1556 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=555637, ups=1.12, wpb=494890, bsz=16399.8, num_updates=45400, lr=0.000296826, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=41912 epoch 027: 1556 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=555637, ups=1.12, wpb=494890, bsz=16399.8, num_updates=45400, lr=0.000296826, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=41912 epoch 027: 1556 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=555637, ups=1.12, wpb=494890, bsz=16399.8, num_updates=45400, lr=0.000296826, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=41912 epoch 027: 1556 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=555637, ups=1.12, wpb=494890, bsz=16399.8, num_updates=45400, lr=0.000296826, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=41912 epoch 027: 1556 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=555637, ups=1.12, wpb=494890, bsz=16399.8, num_updates=45400, lr=0.000296826, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=41912 epoch 027: 1556 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=555637, ups=1.12, wpb=494890, bsz=16399.8, num_updates=45400, lr=0.000296826, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=41912 epoch 027: 1556 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=555637, ups=1.12, wpb=494890, bsz=16399.8, num_updates=45400, lr=0.000296826, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=41912 epoch 027: 1556 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=555637, ups=1.12, wpb=494890, bsz=16399.8, num_updates=45400, lr=0.000296826, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=41912 epoch 027: 1556 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=555637, ups=1.12, wpb=494890, bsz=16399.8, num_updates=45400, lr=0.000296826, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=41912 epoch 027: 1556 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=555637, ups=1.12, wpb=494890, bsz=16399.8, num_updates=45400, lr=0.000296826, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=41912 epoch 027: 1556 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=555637, ups=1.12, wpb=494890, bsz=16399.8, num_updates=45400, lr=0.000296826, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=41912 epoch 027: 1556 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=555637, ups=1.12, wpb=494890, bsz=16399.8, num_updates=45400, lr=0.000296826, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=41912 epoch 027: 1556 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=555637, ups=1.12, wpb=494890, bsz=16399.8, num_updates=45400, lr=0.000296826, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=41912 epoch 027: 1556 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=555637, ups=1.12, wpb=494890, bsz=16399.8, num_updates=45400, lr=0.000296826, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=41912 epoch 027: 1556 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=555637, ups=1.12, wpb=494890, bsz=16399.8, num_updates=45400, lr=0.000296826, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=41912 epoch 027: 1556 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=555637, ups=1.12, wpb=494890, bsz=16399.8, num_updates=45400, lr=0.000296826, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=41912 epoch 027: 1556 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=555637, ups=1.12, wpb=494890, bsz=16399.8, num_updates=45400, lr=0.000296826, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=41912 epoch 027: 1556 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=555637, ups=1.12, wpb=494890, bsz=16399.8, num_updates=45400, lr=0.000296826, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=41912 epoch 027: 1556 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=555637, ups=1.12, wpb=494890, bsz=16399.8, num_updates=45400, lr=0.000296826, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=41912 epoch 027: 1556 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=555637, ups=1.12, wpb=494890, bsz=16399.8, num_updates=45400, lr=0.000296826, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=41912 epoch 027: 1556 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=555637, ups=1.12, wpb=494890, bsz=16399.8, num_updates=45400, lr=0.000296826, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=41912 epoch 027: 1556 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=555637, ups=1.12, wpb=494890, bsz=16399.8, num_updates=45400, lr=0.000296826, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=41912 epoch 027: 1556 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=555637, ups=1.12, wpb=494890, bsz=16399.8, num_updates=45400, lr=0.000296826, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=41912 epoch 027: 1556 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=555637, ups=1.12, wpb=494890, bsz=16399.8, num_updates=45400, lr=0.000296826, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=41912 epoch 027: 1556 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=555637, ups=1.12, wpb=494890, bsz=16399.8, num_updates=45400, lr=0.000296826, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=41912 epoch 027: 1556 / 1689 loss=3.495, nll_loss=1.94, ppl=3.84, wps=555637, ups=1.12, wpb=494890, bsz=16399.8, num_updates=45400, lr=0.000296826, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=41912 epoch 027: 1656 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=557981, ups=1.13, wpb=495608, bsz=16772.6, num_updates=45500, lr=0.0002965, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=42001 epoch 027: 1656 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=557981, ups=1.13, wpb=495608, bsz=16772.6, num_updates=45500, lr=0.0002965, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=42001 epoch 027: 1656 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=557981, ups=1.13, wpb=495608, bsz=16772.6, num_updates=45500, lr=0.0002965, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=42001 epoch 027: 1656 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=557981, ups=1.13, wpb=495608, bsz=16772.6, num_updates=45500, lr=0.0002965, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=42001 epoch 027: 1656 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=557981, ups=1.13, wpb=495608, bsz=16772.6, num_updates=45500, lr=0.0002965, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=42001 epoch 027: 1656 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=557981, ups=1.13, wpb=495608, bsz=16772.6, num_updates=45500, lr=0.0002965, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=42001 epoch 027: 1656 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=557981, ups=1.13, wpb=495608, bsz=16772.6, num_updates=45500, lr=0.0002965, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=42001 epoch 027: 1656 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=557981, ups=1.13, wpb=495608, bsz=16772.6, num_updates=45500, lr=0.0002965, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=42001 epoch 027: 1656 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=557981, ups=1.13, wpb=495608, bsz=16772.6, num_updates=45500, lr=0.0002965, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=42001 epoch 027: 1656 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=557981, ups=1.13, wpb=495608, bsz=16772.6, num_updates=45500, lr=0.0002965, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=42001 epoch 027: 1656 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=557981, ups=1.13, wpb=495608, bsz=16772.6, num_updates=45500, lr=0.0002965, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=42001 epoch 027: 1656 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=557981, ups=1.13, wpb=495608, bsz=16772.6, num_updates=45500, lr=0.0002965, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=42001 epoch 027: 1656 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=557981, ups=1.13, wpb=495608, bsz=16772.6, num_updates=45500, lr=0.0002965, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=42001 epoch 027: 1656 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=557981, ups=1.13, wpb=495608, bsz=16772.6, num_updates=45500, lr=0.0002965, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=42001 epoch 027: 1656 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=557981, ups=1.13, wpb=495608, bsz=16772.6, num_updates=45500, lr=0.0002965, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=42001 epoch 027: 1656 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=557981, ups=1.13, wpb=495608, bsz=16772.6, num_updates=45500, lr=0.0002965, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=42001 epoch 027: 1656 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=557981, ups=1.13, wpb=495608, bsz=16772.6, num_updates=45500, lr=0.0002965, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=42001 epoch 027: 1656 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=557981, ups=1.13, wpb=495608, bsz=16772.6, num_updates=45500, lr=0.0002965, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=42001 epoch 027: 1656 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=557981, ups=1.13, wpb=495608, bsz=16772.6, num_updates=45500, lr=0.0002965, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=42001 epoch 027: 1656 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=557981, ups=1.13, wpb=495608, bsz=16772.6, num_updates=45500, lr=0.0002965, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=42001 epoch 027: 1656 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=557981, ups=1.13, wpb=495608, bsz=16772.6, num_updates=45500, lr=0.0002965, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=42001 epoch 027: 1656 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=557981, ups=1.13, wpb=495608, bsz=16772.6, num_updates=45500, lr=0.0002965, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=42001 epoch 027: 1656 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=557981, ups=1.13, wpb=495608, bsz=16772.6, num_updates=45500, lr=0.0002965, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=42001 epoch 027: 1656 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=557981, ups=1.13, wpb=495608, bsz=16772.6, num_updates=45500, lr=0.0002965, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=42001 epoch 027: 1656 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=557981, ups=1.13, wpb=495608, bsz=16772.6, num_updates=45500, lr=0.0002965, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=42001 epoch 027: 1656 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=557981, ups=1.13, wpb=495608, bsz=16772.6, num_updates=45500, lr=0.0002965, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=42001 epoch 027: 1656 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=557981, ups=1.13, wpb=495608, bsz=16772.6, num_updates=45500, lr=0.0002965, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=42001 end of epoch 27 (average epoch stats below) epoch 027 | loss 3.482 | nll_loss 1.926 | ppl 3.8 | wps 527999 | ups 1.07 | wpb 495144 | bsz 16502.9 | num_updates 45533 | lr 0.000296392 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1508 | gb_free 23.2 | wall 42029 epoch 027 | loss 3.482 | nll_loss 1.926 | ppl 3.8 | wps 527999 | ups 1.07 | wpb 495144 | bsz 16502.9 | num_updates 45533 | lr 0.000296392 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1508 | gb_free 23.2 | wall 42029 epoch 027 | loss 3.482 | nll_loss 1.926 | ppl 3.8 | wps 527999 | ups 1.07 | wpb 495144 | bsz 16502.9 | num_updates 45533 | lr 0.000296392 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1508 | gb_free 23.2 | wall 42029 epoch 027 | loss 3.482 | nll_loss 1.926 | ppl 3.8 | wps 527999 | ups 1.07 | wpb 495144 | bsz 16502.9 | num_updates 45533 | lr 0.000296392 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1508 | gb_free 23.2 | wall 42029 epoch 027 | loss 3.482 | nll_loss 1.926 | ppl 3.8 | wps 527999 | ups 1.07 | wpb 495144 | bsz 16502.9 | num_updates 45533 | lr 0.000296392 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1508 | gb_free 23.2 | wall 42029 epoch 027 | loss 3.482 | nll_loss 1.926 | ppl 3.8 | wps 527999 | ups 1.07 | wpb 495144 | bsz 16502.9 | num_updates 45533 | lr 0.000296392 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1508 | gb_free 23.2 | wall 42029 epoch 027 | loss 3.482 | nll_loss 1.926 | ppl 3.8 | wps 527999 | ups 1.07 | wpb 495144 | bsz 16502.9 | num_updates 45533 | lr 0.000296392 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1508 | gb_free 23.2 | wall 42029 epoch 027 | loss 3.482 | nll_loss 1.926 | ppl 3.8 | wps 527999 | ups 1.07 | wpb 495144 | bsz 16502.9 | num_updates 45533 | lr 0.000296392 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1508 | gb_free 23.2 | wall 42029 epoch 027 | loss 3.482 | nll_loss 1.926 | ppl 3.8 | wps 527999 | ups 1.07 | wpb 495144 | bsz 16502.9 | num_updates 45533 | lr 0.000296392 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1508 | gb_free 23.2 | wall 42029 epoch 027 | loss 3.482 | nll_loss 1.926 | ppl 3.8 | wps 527999 | ups 1.07 | wpb 495144 | bsz 16502.9 | num_updates 45533 | lr 0.000296392 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1508 | gb_free 23.2 | wall 42029 epoch 027 | loss 3.482 | nll_loss 1.926 | ppl 3.8 | wps 527999 | ups 1.07 | wpb 495144 | bsz 16502.9 | num_updates 45533 | lr 0.000296392 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1508 | gb_free 23.2 | wall 42029 epoch 027 | loss 3.482 | nll_loss 1.926 | ppl 3.8 | wps 527999 | ups 1.07 | wpb 495144 | bsz 16502.9 | num_updates 45533 | lr 0.000296392 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1508 | gb_free 23.2 | wall 42029 epoch 027 | loss 3.482 | nll_loss 1.926 | ppl 3.8 | wps 527999 | ups 1.07 | wpb 495144 | bsz 16502.9 | num_updates 45533 | lr 0.000296392 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1508 | gb_free 23.2 | wall 42029 epoch 027 | loss 3.482 | nll_loss 1.926 | ppl 3.8 | wps 527999 | ups 1.07 | wpb 495144 | bsz 16502.9 | num_updates 45533 | lr 0.000296392 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1508 | gb_free 23.2 | wall 42029 epoch 027 | loss 3.482 | nll_loss 1.926 | ppl 3.8 | wps 527999 | ups 1.07 | wpb 495144 | bsz 16502.9 | num_updates 45533 | lr 0.000296392 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1508 | gb_free 23.2 | wall 42029 epoch 027 | loss 3.482 | nll_loss 1.926 | ppl 3.8 | wps 527999 | ups 1.07 | wpb 495144 | bsz 16502.9 | num_updates 45533 | lr 0.000296392 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1508 | gb_free 23.2 | wall 42029 epoch 027 | loss 3.482 | nll_loss 1.926 | ppl 3.8 | wps 527999 | ups 1.07 | wpb 495144 | bsz 16502.9 | num_updates 45533 | lr 0.000296392 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1508 | gb_free 23.2 | wall 42029 epoch 027 | loss 3.482 | nll_loss 1.926 | ppl 3.8 | wps 527999 | ups 1.07 | wpb 495144 | bsz 16502.9 | num_updates 45533 | lr 0.000296392 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1508 | gb_free 23.2 | wall 42029 epoch 027 | loss 3.482 | nll_loss 1.926 | ppl 3.8 | wps 527999 | ups 1.07 | wpb 495144 | bsz 16502.9 | num_updates 45533 | lr 0.000296392 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1508 | gb_free 23.2 | wall 42029 epoch 027 | loss 3.482 | nll_loss 1.926 | ppl 3.8 | wps 527999 | ups 1.07 | wpb 495144 | bsz 16502.9 | num_updates 45533 | lr 0.000296392 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1508 | gb_free 23.2 | wall 42029 epoch 027 | loss 3.482 | nll_loss 1.926 | ppl 3.8 | wps 527999 | ups 1.07 | wpb 495144 | bsz 16502.9 | num_updates 45533 | lr 0.000296392 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1508 | gb_free 23.2 | wall 42029 epoch 027 | loss 3.482 | nll_loss 1.926 | ppl 3.8 | wps 527999 | ups 1.07 | wpb 495144 | bsz 16502.9 | num_updates 45533 | lr 0.000296392 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1508 | gb_free 23.2 | wall 42029 epoch 027 | loss 3.482 | nll_loss 1.926 | ppl 3.8 | wps 527999 | ups 1.07 | wpb 495144 | bsz 16502.9 | num_updates 45533 | lr 0.000296392 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1508 | gb_free 23.2 | wall 42029 epoch 027 | loss 3.482 | nll_loss 1.926 | ppl 3.8 | wps 527999 | ups 1.07 | wpb 495144 | bsz 16502.9 | num_updates 45533 | lr 0.000296392 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1508 | gb_free 23.2 | wall 42029 epoch 027 | loss 3.482 | nll_loss 1.926 | ppl 3.8 | wps 527999 | ups 1.07 | wpb 495144 | bsz 16502.9 | num_updates 45533 | lr 0.000296392 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1508 | gb_free 23.2 | wall 42029 epoch 027 | loss 3.482 | nll_loss 1.926 | ppl 3.8 | wps 527999 | ups 1.07 | wpb 495144 | bsz 16502.9 | num_updates 45533 | lr 0.000296392 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1508 | gb_free 23.2 | wall 42029 epoch 027 | loss 3.482 | nll_loss 1.926 | ppl 3.8 | wps 527999 | ups 1.07 | wpb 495144 | bsz 16502.9 | num_updates 45533 | lr 0.000296392 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1508 | gb_free 23.2 | wall 42029 Start iterating over samples epoch 028: 67 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=552046, ups=1.13, wpb=490698, bsz=16692.4, num_updates=45600, lr=0.000296174, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=42090 epoch 028: 67 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=552046, ups=1.13, wpb=490698, bsz=16692.4, num_updates=45600, lr=0.000296174, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=42090 epoch 028: 67 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=552046, ups=1.13, wpb=490698, bsz=16692.4, num_updates=45600, lr=0.000296174, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=42090 epoch 028: 67 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=552046, ups=1.13, wpb=490698, bsz=16692.4, num_updates=45600, lr=0.000296174, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=42090 epoch 028: 67 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=552046, ups=1.13, wpb=490698, bsz=16692.4, num_updates=45600, lr=0.000296174, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=42090 epoch 028: 67 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=552046, ups=1.13, wpb=490698, bsz=16692.4, num_updates=45600, lr=0.000296174, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=42090 epoch 028: 67 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=552046, ups=1.13, wpb=490698, bsz=16692.4, num_updates=45600, lr=0.000296174, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=42090 epoch 028: 67 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=552046, ups=1.13, wpb=490698, bsz=16692.4, num_updates=45600, lr=0.000296174, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=42090 epoch 028: 67 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=552046, ups=1.13, wpb=490698, bsz=16692.4, num_updates=45600, lr=0.000296174, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=42090 epoch 028: 67 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=552046, ups=1.13, wpb=490698, bsz=16692.4, num_updates=45600, lr=0.000296174, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=42090 epoch 028: 67 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=552046, ups=1.13, wpb=490698, bsz=16692.4, num_updates=45600, lr=0.000296174, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=42090 epoch 028: 67 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=552046, ups=1.13, wpb=490698, bsz=16692.4, num_updates=45600, lr=0.000296174, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=42090 epoch 028: 67 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=552046, ups=1.13, wpb=490698, bsz=16692.4, num_updates=45600, lr=0.000296174, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=42090 epoch 028: 67 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=552046, ups=1.13, wpb=490698, bsz=16692.4, num_updates=45600, lr=0.000296174, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=42090 epoch 028: 67 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=552046, ups=1.13, wpb=490698, bsz=16692.4, num_updates=45600, lr=0.000296174, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=42090 epoch 028: 67 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=552046, ups=1.13, wpb=490698, bsz=16692.4, num_updates=45600, lr=0.000296174, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=42090 epoch 028: 67 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=552046, ups=1.13, wpb=490698, bsz=16692.4, num_updates=45600, lr=0.000296174, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=42090 epoch 028: 67 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=552046, ups=1.13, wpb=490698, bsz=16692.4, num_updates=45600, lr=0.000296174, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=42090 epoch 028: 67 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=552046, ups=1.13, wpb=490698, bsz=16692.4, num_updates=45600, lr=0.000296174, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=42090 epoch 028: 67 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=552046, ups=1.13, wpb=490698, bsz=16692.4, num_updates=45600, lr=0.000296174, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=42090 epoch 028: 67 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=552046, ups=1.13, wpb=490698, bsz=16692.4, num_updates=45600, lr=0.000296174, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=42090 epoch 028: 67 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=552046, ups=1.13, wpb=490698, bsz=16692.4, num_updates=45600, lr=0.000296174, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=42090 epoch 028: 67 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=552046, ups=1.13, wpb=490698, bsz=16692.4, num_updates=45600, lr=0.000296174, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=42090 epoch 028: 67 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=552046, ups=1.13, wpb=490698, bsz=16692.4, num_updates=45600, lr=0.000296174, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=42090 epoch 028: 67 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=552046, ups=1.13, wpb=490698, bsz=16692.4, num_updates=45600, lr=0.000296174, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=42090 epoch 028: 67 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=552046, ups=1.13, wpb=490698, bsz=16692.4, num_updates=45600, lr=0.000296174, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=42090 epoch 028: 67 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=552046, ups=1.13, wpb=490698, bsz=16692.4, num_updates=45600, lr=0.000296174, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=42090 epoch 028: 67 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=552046, ups=1.13, wpb=490698, bsz=16692.4, num_updates=45600, lr=0.000296174, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=42090 epoch 028: 167 / 1689 loss=3.463, nll_loss=1.903, ppl=3.74, wps=560088, ups=1.13, wpb=497612, bsz=16277.8, num_updates=45700, lr=0.00029585, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=42178 epoch 028: 167 / 1689 loss=3.463, nll_loss=1.903, ppl=3.74, wps=560088, ups=1.13, wpb=497612, bsz=16277.8, num_updates=45700, lr=0.00029585, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=42178 epoch 028: 167 / 1689 loss=3.463, nll_loss=1.903, ppl=3.74, wps=560088, ups=1.13, wpb=497612, bsz=16277.8, num_updates=45700, lr=0.00029585, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=42178 epoch 028: 167 / 1689 loss=3.463, nll_loss=1.903, ppl=3.74, wps=560088, ups=1.13, wpb=497612, bsz=16277.8, num_updates=45700, lr=0.00029585, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=42178 epoch 028: 167 / 1689 loss=3.463, nll_loss=1.903, ppl=3.74, wps=560088, ups=1.13, wpb=497612, bsz=16277.8, num_updates=45700, lr=0.00029585, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=42178 epoch 028: 167 / 1689 loss=3.463, nll_loss=1.903, ppl=3.74, wps=560088, ups=1.13, wpb=497612, bsz=16277.8, num_updates=45700, lr=0.00029585, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=42178 epoch 028: 167 / 1689 loss=3.463, nll_loss=1.903, ppl=3.74, wps=560088, ups=1.13, wpb=497612, bsz=16277.8, num_updates=45700, lr=0.00029585, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=42178 epoch 028: 167 / 1689 loss=3.463, nll_loss=1.903, ppl=3.74, wps=560088, ups=1.13, wpb=497612, bsz=16277.8, num_updates=45700, lr=0.00029585, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=42178 epoch 028: 167 / 1689 loss=3.463, nll_loss=1.903, ppl=3.74, wps=560088, ups=1.13, wpb=497612, bsz=16277.8, num_updates=45700, lr=0.00029585, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=42178 epoch 028: 167 / 1689 loss=3.463, nll_loss=1.903, ppl=3.74, wps=560088, ups=1.13, wpb=497612, bsz=16277.8, num_updates=45700, lr=0.00029585, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=42178 epoch 028: 167 / 1689 loss=3.463, nll_loss=1.903, ppl=3.74, wps=560088, ups=1.13, wpb=497612, bsz=16277.8, num_updates=45700, lr=0.00029585, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=42178 epoch 028: 167 / 1689 loss=3.463, nll_loss=1.903, ppl=3.74, wps=560088, ups=1.13, wpb=497612, bsz=16277.8, num_updates=45700, lr=0.00029585, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=42178 epoch 028: 167 / 1689 loss=3.463, nll_loss=1.903, ppl=3.74, wps=560088, ups=1.13, wpb=497612, bsz=16277.8, num_updates=45700, lr=0.00029585, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=42178 epoch 028: 167 / 1689 loss=3.463, nll_loss=1.903, ppl=3.74, wps=560088, ups=1.13, wpb=497612, bsz=16277.8, num_updates=45700, lr=0.00029585, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=42178 epoch 028: 167 / 1689 loss=3.463, nll_loss=1.903, ppl=3.74, wps=560088, ups=1.13, wpb=497612, bsz=16277.8, num_updates=45700, lr=0.00029585, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=42178 epoch 028: 167 / 1689 loss=3.463, nll_loss=1.903, ppl=3.74, wps=560088, ups=1.13, wpb=497612, bsz=16277.8, num_updates=45700, lr=0.00029585, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=42178 epoch 028: 167 / 1689 loss=3.463, nll_loss=1.903, ppl=3.74, wps=560088, ups=1.13, wpb=497612, bsz=16277.8, num_updates=45700, lr=0.00029585, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=42178 epoch 028: 167 / 1689 loss=3.463, nll_loss=1.903, ppl=3.74, wps=560088, ups=1.13, wpb=497612, bsz=16277.8, num_updates=45700, lr=0.00029585, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=42178 epoch 028: 167 / 1689 loss=3.463, nll_loss=1.903, ppl=3.74, wps=560088, ups=1.13, wpb=497612, bsz=16277.8, num_updates=45700, lr=0.00029585, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=42178 epoch 028: 167 / 1689 loss=3.463, nll_loss=1.903, ppl=3.74, wps=560088, ups=1.13, wpb=497612, bsz=16277.8, num_updates=45700, lr=0.00029585, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=42178 epoch 028: 167 / 1689 loss=3.463, nll_loss=1.903, ppl=3.74, wps=560088, ups=1.13, wpb=497612, bsz=16277.8, num_updates=45700, lr=0.00029585, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=42178 epoch 028: 167 / 1689 loss=3.463, nll_loss=1.903, ppl=3.74, wps=560088, ups=1.13, wpb=497612, bsz=16277.8, num_updates=45700, lr=0.00029585, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=42178 epoch 028: 167 / 1689 loss=3.463, nll_loss=1.903, ppl=3.74, wps=560088, ups=1.13, wpb=497612, bsz=16277.8, num_updates=45700, lr=0.00029585, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=42178 epoch 028: 167 / 1689 loss=3.463, nll_loss=1.903, ppl=3.74, wps=560088, ups=1.13, wpb=497612, bsz=16277.8, num_updates=45700, lr=0.00029585, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=42178 epoch 028: 167 / 1689 loss=3.463, nll_loss=1.903, ppl=3.74, wps=560088, ups=1.13, wpb=497612, bsz=16277.8, num_updates=45700, lr=0.00029585, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=42178 epoch 028: 167 / 1689 loss=3.463, nll_loss=1.903, ppl=3.74, wps=560088, ups=1.13, wpb=497612, bsz=16277.8, num_updates=45700, lr=0.00029585, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=42178 epoch 028: 167 / 1689 loss=3.463, nll_loss=1.903, ppl=3.74, wps=560088, ups=1.13, wpb=497612, bsz=16277.8, num_updates=45700, lr=0.00029585, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=42178 epoch 028: 167 / 1689 loss=3.463, nll_loss=1.903, ppl=3.74, wps=560088, ups=1.13, wpb=497612, bsz=16277.8, num_updates=45700, lr=0.00029585, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=42178 epoch 028: 267 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554120, ups=1.12, wpb=494930, bsz=16453.3, num_updates=45800, lr=0.000295527, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=42268 epoch 028: 267 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554120, ups=1.12, wpb=494930, bsz=16453.3, num_updates=45800, lr=0.000295527, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=42268 epoch 028: 267 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554120, ups=1.12, wpb=494930, bsz=16453.3, num_updates=45800, lr=0.000295527, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=42268 epoch 028: 267 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554120, ups=1.12, wpb=494930, bsz=16453.3, num_updates=45800, lr=0.000295527, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=42268 epoch 028: 267 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554120, ups=1.12, wpb=494930, bsz=16453.3, num_updates=45800, lr=0.000295527, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=42268 epoch 028: 267 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554120, ups=1.12, wpb=494930, bsz=16453.3, num_updates=45800, lr=0.000295527, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=42268 epoch 028: 267 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554120, ups=1.12, wpb=494930, bsz=16453.3, num_updates=45800, lr=0.000295527, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=42268 epoch 028: 267 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554120, ups=1.12, wpb=494930, bsz=16453.3, num_updates=45800, lr=0.000295527, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=42268 epoch 028: 267 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554120, ups=1.12, wpb=494930, bsz=16453.3, num_updates=45800, lr=0.000295527, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=42268 epoch 028: 267 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554120, ups=1.12, wpb=494930, bsz=16453.3, num_updates=45800, lr=0.000295527, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=42268 epoch 028: 267 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554120, ups=1.12, wpb=494930, bsz=16453.3, num_updates=45800, lr=0.000295527, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=42268 epoch 028: 267 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554120, ups=1.12, wpb=494930, bsz=16453.3, num_updates=45800, lr=0.000295527, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=42268 epoch 028: 267 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554120, ups=1.12, wpb=494930, bsz=16453.3, num_updates=45800, lr=0.000295527, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=42268 epoch 028: 267 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554120, ups=1.12, wpb=494930, bsz=16453.3, num_updates=45800, lr=0.000295527, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=42268 epoch 028: 267 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554120, ups=1.12, wpb=494930, bsz=16453.3, num_updates=45800, lr=0.000295527, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=42268 epoch 028: 267 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554120, ups=1.12, wpb=494930, bsz=16453.3, num_updates=45800, lr=0.000295527, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=42268 epoch 028: 267 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554120, ups=1.12, wpb=494930, bsz=16453.3, num_updates=45800, lr=0.000295527, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=42268 epoch 028: 267 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554120, ups=1.12, wpb=494930, bsz=16453.3, num_updates=45800, lr=0.000295527, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=42268 epoch 028: 267 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554120, ups=1.12, wpb=494930, bsz=16453.3, num_updates=45800, lr=0.000295527, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=42268 epoch 028: 267 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554120, ups=1.12, wpb=494930, bsz=16453.3, num_updates=45800, lr=0.000295527, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=42268 epoch 028: 267 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554120, ups=1.12, wpb=494930, bsz=16453.3, num_updates=45800, lr=0.000295527, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=42268 epoch 028: 267 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554120, ups=1.12, wpb=494930, bsz=16453.3, num_updates=45800, lr=0.000295527, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=42268 epoch 028: 267 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554120, ups=1.12, wpb=494930, bsz=16453.3, num_updates=45800, lr=0.000295527, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=42268 epoch 028: 267 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554120, ups=1.12, wpb=494930, bsz=16453.3, num_updates=45800, lr=0.000295527, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=42268 epoch 028: 267 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554120, ups=1.12, wpb=494930, bsz=16453.3, num_updates=45800, lr=0.000295527, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=42268 epoch 028: 267 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554120, ups=1.12, wpb=494930, bsz=16453.3, num_updates=45800, lr=0.000295527, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=42268 epoch 028: 267 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554120, ups=1.12, wpb=494930, bsz=16453.3, num_updates=45800, lr=0.000295527, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=42268 epoch 028: 267 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554120, ups=1.12, wpb=494930, bsz=16453.3, num_updates=45800, lr=0.000295527, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=42268 epoch 028: 367 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=557444, ups=1.13, wpb=494758, bsz=16576.9, num_updates=45900, lr=0.000295205, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=42356 epoch 028: 367 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=557444, ups=1.13, wpb=494758, bsz=16576.9, num_updates=45900, lr=0.000295205, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=42356 epoch 028: 367 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=557444, ups=1.13, wpb=494758, bsz=16576.9, num_updates=45900, lr=0.000295205, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=42356 epoch 028: 367 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=557444, ups=1.13, wpb=494758, bsz=16576.9, num_updates=45900, lr=0.000295205, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=42356 epoch 028: 367 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=557444, ups=1.13, wpb=494758, bsz=16576.9, num_updates=45900, lr=0.000295205, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=42356 epoch 028: 367 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=557444, ups=1.13, wpb=494758, bsz=16576.9, num_updates=45900, lr=0.000295205, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=42356 epoch 028: 367 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=557444, ups=1.13, wpb=494758, bsz=16576.9, num_updates=45900, lr=0.000295205, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=42356 epoch 028: 367 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=557444, ups=1.13, wpb=494758, bsz=16576.9, num_updates=45900, lr=0.000295205, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=42356 epoch 028: 367 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=557444, ups=1.13, wpb=494758, bsz=16576.9, num_updates=45900, lr=0.000295205, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=42356 epoch 028: 367 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=557444, ups=1.13, wpb=494758, bsz=16576.9, num_updates=45900, lr=0.000295205, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=42356 epoch 028: 367 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=557444, ups=1.13, wpb=494758, bsz=16576.9, num_updates=45900, lr=0.000295205, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=42356 epoch 028: 367 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=557444, ups=1.13, wpb=494758, bsz=16576.9, num_updates=45900, lr=0.000295205, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=42356 epoch 028: 367 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=557444, ups=1.13, wpb=494758, bsz=16576.9, num_updates=45900, lr=0.000295205, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=42356 epoch 028: 367 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=557444, ups=1.13, wpb=494758, bsz=16576.9, num_updates=45900, lr=0.000295205, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=42356 epoch 028: 367 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=557444, ups=1.13, wpb=494758, bsz=16576.9, num_updates=45900, lr=0.000295205, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=42356 epoch 028: 367 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=557444, ups=1.13, wpb=494758, bsz=16576.9, num_updates=45900, lr=0.000295205, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=42356 epoch 028: 367 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=557444, ups=1.13, wpb=494758, bsz=16576.9, num_updates=45900, lr=0.000295205, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=42356 epoch 028: 367 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=557444, ups=1.13, wpb=494758, bsz=16576.9, num_updates=45900, lr=0.000295205, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=42356 epoch 028: 367 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=557444, ups=1.13, wpb=494758, bsz=16576.9, num_updates=45900, lr=0.000295205, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=42356 epoch 028: 367 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=557444, ups=1.13, wpb=494758, bsz=16576.9, num_updates=45900, lr=0.000295205, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=42356 epoch 028: 367 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=557444, ups=1.13, wpb=494758, bsz=16576.9, num_updates=45900, lr=0.000295205, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=42356 epoch 028: 367 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=557444, ups=1.13, wpb=494758, bsz=16576.9, num_updates=45900, lr=0.000295205, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=42356 epoch 028: 367 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=557444, ups=1.13, wpb=494758, bsz=16576.9, num_updates=45900, lr=0.000295205, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=42356 epoch 028: 367 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=557444, ups=1.13, wpb=494758, bsz=16576.9, num_updates=45900, lr=0.000295205, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=42356 epoch 028: 367 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=557444, ups=1.13, wpb=494758, bsz=16576.9, num_updates=45900, lr=0.000295205, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=42356 epoch 028: 367 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=557444, ups=1.13, wpb=494758, bsz=16576.9, num_updates=45900, lr=0.000295205, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=42356 epoch 028: 367 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=557444, ups=1.13, wpb=494758, bsz=16576.9, num_updates=45900, lr=0.000295205, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=42356 epoch 028: 367 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=557444, ups=1.13, wpb=494758, bsz=16576.9, num_updates=45900, lr=0.000295205, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=42356 epoch 028: 467 / 1689 loss=3.468, nll_loss=1.909, ppl=3.75, wps=553024, ups=1.11, wpb=496460, bsz=16696.2, num_updates=46000, lr=0.000294884, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=42446 epoch 028: 467 / 1689 loss=3.468, nll_loss=1.909, ppl=3.75, wps=553024, ups=1.11, wpb=496460, bsz=16696.2, num_updates=46000, lr=0.000294884, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=42446 epoch 028: 467 / 1689 loss=3.468, nll_loss=1.909, ppl=3.75, wps=553024, ups=1.11, wpb=496460, bsz=16696.2, num_updates=46000, lr=0.000294884, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=42446 epoch 028: 467 / 1689 loss=3.468, nll_loss=1.909, ppl=3.75, wps=553024, ups=1.11, wpb=496460, bsz=16696.2, num_updates=46000, lr=0.000294884, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=42446 epoch 028: 467 / 1689 loss=3.468, nll_loss=1.909, ppl=3.75, wps=553024, ups=1.11, wpb=496460, bsz=16696.2, num_updates=46000, lr=0.000294884, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=42446 epoch 028: 467 / 1689 loss=3.468, nll_loss=1.909, ppl=3.75, wps=553024, ups=1.11, wpb=496460, bsz=16696.2, num_updates=46000, lr=0.000294884, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=42446 epoch 028: 467 / 1689 loss=3.468, nll_loss=1.909, ppl=3.75, wps=553024, ups=1.11, wpb=496460, bsz=16696.2, num_updates=46000, lr=0.000294884, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=42446 epoch 028: 467 / 1689 loss=3.468, nll_loss=1.909, ppl=3.75, wps=553024, ups=1.11, wpb=496460, bsz=16696.2, num_updates=46000, lr=0.000294884, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=42446 epoch 028: 467 / 1689 loss=3.468, nll_loss=1.909, ppl=3.75, wps=553024, ups=1.11, wpb=496460, bsz=16696.2, num_updates=46000, lr=0.000294884, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=42446 epoch 028: 467 / 1689 loss=3.468, nll_loss=1.909, ppl=3.75, wps=553024, ups=1.11, wpb=496460, bsz=16696.2, num_updates=46000, lr=0.000294884, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=42446 epoch 028: 467 / 1689 loss=3.468, nll_loss=1.909, ppl=3.75, wps=553024, ups=1.11, wpb=496460, bsz=16696.2, num_updates=46000, lr=0.000294884, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=42446 epoch 028: 467 / 1689 loss=3.468, nll_loss=1.909, ppl=3.75, wps=553024, ups=1.11, wpb=496460, bsz=16696.2, num_updates=46000, lr=0.000294884, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=42446 epoch 028: 467 / 1689 loss=3.468, nll_loss=1.909, ppl=3.75, wps=553024, ups=1.11, wpb=496460, bsz=16696.2, num_updates=46000, lr=0.000294884, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=42446 epoch 028: 467 / 1689 loss=3.468, nll_loss=1.909, ppl=3.75, wps=553024, ups=1.11, wpb=496460, bsz=16696.2, num_updates=46000, lr=0.000294884, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=42446 epoch 028: 467 / 1689 loss=3.468, nll_loss=1.909, ppl=3.75, wps=553024, ups=1.11, wpb=496460, bsz=16696.2, num_updates=46000, lr=0.000294884, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=42446 epoch 028: 467 / 1689 loss=3.468, nll_loss=1.909, ppl=3.75, wps=553024, ups=1.11, wpb=496460, bsz=16696.2, num_updates=46000, lr=0.000294884, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=42446 epoch 028: 467 / 1689 loss=3.468, nll_loss=1.909, ppl=3.75, wps=553024, ups=1.11, wpb=496460, bsz=16696.2, num_updates=46000, lr=0.000294884, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=42446 epoch 028: 467 / 1689 loss=3.468, nll_loss=1.909, ppl=3.75, wps=553024, ups=1.11, wpb=496460, bsz=16696.2, num_updates=46000, lr=0.000294884, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=42446 epoch 028: 467 / 1689 loss=3.468, nll_loss=1.909, ppl=3.75, wps=553024, ups=1.11, wpb=496460, bsz=16696.2, num_updates=46000, lr=0.000294884, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=42446 epoch 028: 467 / 1689 loss=3.468, nll_loss=1.909, ppl=3.75, wps=553024, ups=1.11, wpb=496460, bsz=16696.2, num_updates=46000, lr=0.000294884, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=42446 epoch 028: 467 / 1689 loss=3.468, nll_loss=1.909, ppl=3.75, wps=553024, ups=1.11, wpb=496460, bsz=16696.2, num_updates=46000, lr=0.000294884, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=42446 epoch 028: 467 / 1689 loss=3.468, nll_loss=1.909, ppl=3.75, wps=553024, ups=1.11, wpb=496460, bsz=16696.2, num_updates=46000, lr=0.000294884, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=42446 epoch 028: 467 / 1689 loss=3.468, nll_loss=1.909, ppl=3.75, wps=553024, ups=1.11, wpb=496460, bsz=16696.2, num_updates=46000, lr=0.000294884, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=42446 epoch 028: 467 / 1689 loss=3.468, nll_loss=1.909, ppl=3.75, wps=553024, ups=1.11, wpb=496460, bsz=16696.2, num_updates=46000, lr=0.000294884, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=42446 epoch 028: 467 / 1689 loss=3.468, nll_loss=1.909, ppl=3.75, wps=553024, ups=1.11, wpb=496460, bsz=16696.2, num_updates=46000, lr=0.000294884, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=42446 epoch 028: 467 / 1689 loss=3.468, nll_loss=1.909, ppl=3.75, wps=553024, ups=1.11, wpb=496460, bsz=16696.2, num_updates=46000, lr=0.000294884, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=42446 epoch 028: 467 / 1689 loss=3.468, nll_loss=1.909, ppl=3.75, wps=553024, ups=1.11, wpb=496460, bsz=16696.2, num_updates=46000, lr=0.000294884, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=42446 epoch 028: 467 / 1689 loss=3.468, nll_loss=1.909, ppl=3.75, wps=553024, ups=1.11, wpb=496460, bsz=16696.2, num_updates=46000, lr=0.000294884, gnorm=0.167, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=42446 begin validation on "valid" subset epoch 028 | valid on 'valid' subset | loss 3.727 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 46000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.727 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 46000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.727 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 46000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.727 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 46000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.727 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 46000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.727 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 46000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.727 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 46000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.727 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 46000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.727 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 46000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.727 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 46000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.727 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 46000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.727 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 46000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.727 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 46000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.727 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 46000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.727 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 46000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.727 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 46000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.727 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 46000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.727 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 46000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.727 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 46000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.727 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 46000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.727 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 46000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.727 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 46000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.727 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 46000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.727 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 46000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.727 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 46000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.727 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 46000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.727 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 46000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.727 | nll_loss 2.185 | ppl 4.55 | wps 0 | wpb 44526 | bsz 2008 | num_updates 46000 | best_loss 3.702 epoch 028: 567 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=481055, ups=0.97, wpb=494300, bsz=17089.4, num_updates=46100, lr=0.000294564, gnorm=0.166, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=42549 epoch 028: 567 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=481055, ups=0.97, wpb=494300, bsz=17089.4, num_updates=46100, lr=0.000294564, gnorm=0.166, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=42549 epoch 028: 567 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=481055, ups=0.97, wpb=494300, bsz=17089.4, num_updates=46100, lr=0.000294564, gnorm=0.166, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=42549 epoch 028: 567 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=481055, ups=0.97, wpb=494300, bsz=17089.4, num_updates=46100, lr=0.000294564, gnorm=0.166, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=42549 epoch 028: 567 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=481055, ups=0.97, wpb=494300, bsz=17089.4, num_updates=46100, lr=0.000294564, gnorm=0.166, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=42549 epoch 028: 567 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=481055, ups=0.97, wpb=494300, bsz=17089.4, num_updates=46100, lr=0.000294564, gnorm=0.166, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=42549 epoch 028: 567 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=481055, ups=0.97, wpb=494300, bsz=17089.4, num_updates=46100, lr=0.000294564, gnorm=0.166, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=42549 epoch 028: 567 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=481055, ups=0.97, wpb=494300, bsz=17089.4, num_updates=46100, lr=0.000294564, gnorm=0.166, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=42549 epoch 028: 567 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=481055, ups=0.97, wpb=494300, bsz=17089.4, num_updates=46100, lr=0.000294564, gnorm=0.166, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=42549 epoch 028: 567 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=481055, ups=0.97, wpb=494300, bsz=17089.4, num_updates=46100, lr=0.000294564, gnorm=0.166, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=42549 epoch 028: 567 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=481055, ups=0.97, wpb=494300, bsz=17089.4, num_updates=46100, lr=0.000294564, gnorm=0.166, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=42549 epoch 028: 567 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=481055, ups=0.97, wpb=494300, bsz=17089.4, num_updates=46100, lr=0.000294564, gnorm=0.166, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=42549 epoch 028: 567 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=481055, ups=0.97, wpb=494300, bsz=17089.4, num_updates=46100, lr=0.000294564, gnorm=0.166, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=42549 epoch 028: 567 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=481055, ups=0.97, wpb=494300, bsz=17089.4, num_updates=46100, lr=0.000294564, gnorm=0.166, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=42549 epoch 028: 567 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=481055, ups=0.97, wpb=494300, bsz=17089.4, num_updates=46100, lr=0.000294564, gnorm=0.166, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=42549 epoch 028: 567 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=481055, ups=0.97, wpb=494300, bsz=17089.4, num_updates=46100, lr=0.000294564, gnorm=0.166, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=42549 epoch 028: 567 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=481055, ups=0.97, wpb=494300, bsz=17089.4, num_updates=46100, lr=0.000294564, gnorm=0.166, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=42549 epoch 028: 567 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=481055, ups=0.97, wpb=494300, bsz=17089.4, num_updates=46100, lr=0.000294564, gnorm=0.166, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=42549 epoch 028: 567 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=481055, ups=0.97, wpb=494300, bsz=17089.4, num_updates=46100, lr=0.000294564, gnorm=0.166, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=42549 epoch 028: 567 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=481055, ups=0.97, wpb=494300, bsz=17089.4, num_updates=46100, lr=0.000294564, gnorm=0.166, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=42549 epoch 028: 567 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=481055, ups=0.97, wpb=494300, bsz=17089.4, num_updates=46100, lr=0.000294564, gnorm=0.166, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=42549 epoch 028: 567 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=481055, ups=0.97, wpb=494300, bsz=17089.4, num_updates=46100, lr=0.000294564, gnorm=0.166, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=42549 epoch 028: 567 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=481055, ups=0.97, wpb=494300, bsz=17089.4, num_updates=46100, lr=0.000294564, gnorm=0.166, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=42549 epoch 028: 567 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=481055, ups=0.97, wpb=494300, bsz=17089.4, num_updates=46100, lr=0.000294564, gnorm=0.166, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=42549 epoch 028: 567 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=481055, ups=0.97, wpb=494300, bsz=17089.4, num_updates=46100, lr=0.000294564, gnorm=0.166, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=42549 epoch 028: 567 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=481055, ups=0.97, wpb=494300, bsz=17089.4, num_updates=46100, lr=0.000294564, gnorm=0.166, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=42549 epoch 028: 567 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=481055, ups=0.97, wpb=494300, bsz=17089.4, num_updates=46100, lr=0.000294564, gnorm=0.166, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=42549 epoch 028: 567 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=481055, ups=0.97, wpb=494300, bsz=17089.4, num_updates=46100, lr=0.000294564, gnorm=0.166, clip=0, loss_scale=2, train_wall=89, gb_free=21.9, wall=42549 epoch 028: 667 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=556379, ups=1.12, wpb=495755, bsz=16555.3, num_updates=46200, lr=0.000294245, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=42638 epoch 028: 667 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=556379, ups=1.12, wpb=495755, bsz=16555.3, num_updates=46200, lr=0.000294245, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=42638 epoch 028: 667 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=556379, ups=1.12, wpb=495755, bsz=16555.3, num_updates=46200, lr=0.000294245, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=42638 epoch 028: 667 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=556379, ups=1.12, wpb=495755, bsz=16555.3, num_updates=46200, lr=0.000294245, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=42638 epoch 028: 667 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=556379, ups=1.12, wpb=495755, bsz=16555.3, num_updates=46200, lr=0.000294245, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=42638 epoch 028: 667 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=556379, ups=1.12, wpb=495755, bsz=16555.3, num_updates=46200, lr=0.000294245, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=42638 epoch 028: 667 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=556379, ups=1.12, wpb=495755, bsz=16555.3, num_updates=46200, lr=0.000294245, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=42638 epoch 028: 667 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=556379, ups=1.12, wpb=495755, bsz=16555.3, num_updates=46200, lr=0.000294245, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=42638 epoch 028: 667 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=556379, ups=1.12, wpb=495755, bsz=16555.3, num_updates=46200, lr=0.000294245, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=42638 epoch 028: 667 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=556379, ups=1.12, wpb=495755, bsz=16555.3, num_updates=46200, lr=0.000294245, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=42638 epoch 028: 667 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=556379, ups=1.12, wpb=495755, bsz=16555.3, num_updates=46200, lr=0.000294245, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=42638 epoch 028: 667 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=556379, ups=1.12, wpb=495755, bsz=16555.3, num_updates=46200, lr=0.000294245, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=42638 epoch 028: 667 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=556379, ups=1.12, wpb=495755, bsz=16555.3, num_updates=46200, lr=0.000294245, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=42638 epoch 028: 667 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=556379, ups=1.12, wpb=495755, bsz=16555.3, num_updates=46200, lr=0.000294245, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=42638 epoch 028: 667 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=556379, ups=1.12, wpb=495755, bsz=16555.3, num_updates=46200, lr=0.000294245, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=42638 epoch 028: 667 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=556379, ups=1.12, wpb=495755, bsz=16555.3, num_updates=46200, lr=0.000294245, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=42638 epoch 028: 667 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=556379, ups=1.12, wpb=495755, bsz=16555.3, num_updates=46200, lr=0.000294245, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=42638 epoch 028: 667 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=556379, ups=1.12, wpb=495755, bsz=16555.3, num_updates=46200, lr=0.000294245, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=42638 epoch 028: 667 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=556379, ups=1.12, wpb=495755, bsz=16555.3, num_updates=46200, lr=0.000294245, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=42638 epoch 028: 667 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=556379, ups=1.12, wpb=495755, bsz=16555.3, num_updates=46200, lr=0.000294245, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=42638 epoch 028: 667 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=556379, ups=1.12, wpb=495755, bsz=16555.3, num_updates=46200, lr=0.000294245, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=42638 epoch 028: 667 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=556379, ups=1.12, wpb=495755, bsz=16555.3, num_updates=46200, lr=0.000294245, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=42638 epoch 028: 667 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=556379, ups=1.12, wpb=495755, bsz=16555.3, num_updates=46200, lr=0.000294245, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=42638 epoch 028: 667 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=556379, ups=1.12, wpb=495755, bsz=16555.3, num_updates=46200, lr=0.000294245, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=42638 epoch 028: 667 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=556379, ups=1.12, wpb=495755, bsz=16555.3, num_updates=46200, lr=0.000294245, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=42638 epoch 028: 667 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=556379, ups=1.12, wpb=495755, bsz=16555.3, num_updates=46200, lr=0.000294245, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=42638 epoch 028: 667 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=556379, ups=1.12, wpb=495755, bsz=16555.3, num_updates=46200, lr=0.000294245, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=42638 epoch 028: 667 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=556379, ups=1.12, wpb=495755, bsz=16555.3, num_updates=46200, lr=0.000294245, gnorm=0.171, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=42638 epoch 028: 767 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=548739, ups=1.1, wpb=496659, bsz=16416.1, num_updates=46300, lr=0.000293927, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=42729 epoch 028: 767 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=548739, ups=1.1, wpb=496659, bsz=16416.1, num_updates=46300, lr=0.000293927, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=42729 epoch 028: 767 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=548739, ups=1.1, wpb=496659, bsz=16416.1, num_updates=46300, lr=0.000293927, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=42729 epoch 028: 767 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=548739, ups=1.1, wpb=496659, bsz=16416.1, num_updates=46300, lr=0.000293927, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=42729 epoch 028: 767 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=548739, ups=1.1, wpb=496659, bsz=16416.1, num_updates=46300, lr=0.000293927, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=42729 epoch 028: 767 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=548739, ups=1.1, wpb=496659, bsz=16416.1, num_updates=46300, lr=0.000293927, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=42729 epoch 028: 767 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=548739, ups=1.1, wpb=496659, bsz=16416.1, num_updates=46300, lr=0.000293927, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=42729 epoch 028: 767 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=548739, ups=1.1, wpb=496659, bsz=16416.1, num_updates=46300, lr=0.000293927, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=42729 epoch 028: 767 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=548739, ups=1.1, wpb=496659, bsz=16416.1, num_updates=46300, lr=0.000293927, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=42729 epoch 028: 767 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=548739, ups=1.1, wpb=496659, bsz=16416.1, num_updates=46300, lr=0.000293927, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=42729 epoch 028: 767 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=548739, ups=1.1, wpb=496659, bsz=16416.1, num_updates=46300, lr=0.000293927, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=42729 epoch 028: 767 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=548739, ups=1.1, wpb=496659, bsz=16416.1, num_updates=46300, lr=0.000293927, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=42729 epoch 028: 767 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=548739, ups=1.1, wpb=496659, bsz=16416.1, num_updates=46300, lr=0.000293927, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=42729 epoch 028: 767 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=548739, ups=1.1, wpb=496659, bsz=16416.1, num_updates=46300, lr=0.000293927, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=42729 epoch 028: 767 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=548739, ups=1.1, wpb=496659, bsz=16416.1, num_updates=46300, lr=0.000293927, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=42729 epoch 028: 767 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=548739, ups=1.1, wpb=496659, bsz=16416.1, num_updates=46300, lr=0.000293927, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=42729 epoch 028: 767 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=548739, ups=1.1, wpb=496659, bsz=16416.1, num_updates=46300, lr=0.000293927, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=42729 epoch 028: 767 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=548739, ups=1.1, wpb=496659, bsz=16416.1, num_updates=46300, lr=0.000293927, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=42729 epoch 028: 767 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=548739, ups=1.1, wpb=496659, bsz=16416.1, num_updates=46300, lr=0.000293927, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=42729 epoch 028: 767 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=548739, ups=1.1, wpb=496659, bsz=16416.1, num_updates=46300, lr=0.000293927, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=42729 epoch 028: 767 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=548739, ups=1.1, wpb=496659, bsz=16416.1, num_updates=46300, lr=0.000293927, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=42729 epoch 028: 767 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=548739, ups=1.1, wpb=496659, bsz=16416.1, num_updates=46300, lr=0.000293927, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=42729 epoch 028: 767 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=548739, ups=1.1, wpb=496659, bsz=16416.1, num_updates=46300, lr=0.000293927, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=42729 epoch 028: 767 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=548739, ups=1.1, wpb=496659, bsz=16416.1, num_updates=46300, lr=0.000293927, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=42729 epoch 028: 767 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=548739, ups=1.1, wpb=496659, bsz=16416.1, num_updates=46300, lr=0.000293927, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=42729 epoch 028: 767 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=548739, ups=1.1, wpb=496659, bsz=16416.1, num_updates=46300, lr=0.000293927, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=42729 epoch 028: 767 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=548739, ups=1.1, wpb=496659, bsz=16416.1, num_updates=46300, lr=0.000293927, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=42729 epoch 028: 767 / 1689 loss=3.472, nll_loss=1.913, ppl=3.77, wps=548739, ups=1.1, wpb=496659, bsz=16416.1, num_updates=46300, lr=0.000293927, gnorm=0.161, clip=0, loss_scale=2, train_wall=89, gb_free=21.4, wall=42729 epoch 028: 868 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=543225, ups=1.09, wpb=496102, bsz=16509.3, num_updates=46400, lr=0.00029361, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42820 epoch 028: 868 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=543225, ups=1.09, wpb=496102, bsz=16509.3, num_updates=46400, lr=0.00029361, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42820 epoch 028: 868 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=543225, ups=1.09, wpb=496102, bsz=16509.3, num_updates=46400, lr=0.00029361, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42820 epoch 028: 868 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=543225, ups=1.09, wpb=496102, bsz=16509.3, num_updates=46400, lr=0.00029361, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42820 epoch 028: 868 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=543225, ups=1.09, wpb=496102, bsz=16509.3, num_updates=46400, lr=0.00029361, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42820 epoch 028: 868 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=543225, ups=1.09, wpb=496102, bsz=16509.3, num_updates=46400, lr=0.00029361, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42820 epoch 028: 868 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=543225, ups=1.09, wpb=496102, bsz=16509.3, num_updates=46400, lr=0.00029361, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42820 epoch 028: 868 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=543225, ups=1.09, wpb=496102, bsz=16509.3, num_updates=46400, lr=0.00029361, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42820 epoch 028: 868 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=543225, ups=1.09, wpb=496102, bsz=16509.3, num_updates=46400, lr=0.00029361, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42820 epoch 028: 868 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=543225, ups=1.09, wpb=496102, bsz=16509.3, num_updates=46400, lr=0.00029361, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42820 epoch 028: 868 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=543225, ups=1.09, wpb=496102, bsz=16509.3, num_updates=46400, lr=0.00029361, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42820 epoch 028: 868 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=543225, ups=1.09, wpb=496102, bsz=16509.3, num_updates=46400, lr=0.00029361, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42820 epoch 028: 868 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=543225, ups=1.09, wpb=496102, bsz=16509.3, num_updates=46400, lr=0.00029361, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42820 epoch 028: 868 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=543225, ups=1.09, wpb=496102, bsz=16509.3, num_updates=46400, lr=0.00029361, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42820 epoch 028: 868 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=543225, ups=1.09, wpb=496102, bsz=16509.3, num_updates=46400, lr=0.00029361, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42820 epoch 028: 868 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=543225, ups=1.09, wpb=496102, bsz=16509.3, num_updates=46400, lr=0.00029361, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42820 epoch 028: 868 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=543225, ups=1.09, wpb=496102, bsz=16509.3, num_updates=46400, lr=0.00029361, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42820 epoch 028: 868 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=543225, ups=1.09, wpb=496102, bsz=16509.3, num_updates=46400, lr=0.00029361, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42820 epoch 028: 868 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=543225, ups=1.09, wpb=496102, bsz=16509.3, num_updates=46400, lr=0.00029361, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42820 epoch 028: 868 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=543225, ups=1.09, wpb=496102, bsz=16509.3, num_updates=46400, lr=0.00029361, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42820 epoch 028: 868 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=543225, ups=1.09, wpb=496102, bsz=16509.3, num_updates=46400, lr=0.00029361, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42820 epoch 028: 868 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=543225, ups=1.09, wpb=496102, bsz=16509.3, num_updates=46400, lr=0.00029361, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42820 epoch 028: 868 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=543225, ups=1.09, wpb=496102, bsz=16509.3, num_updates=46400, lr=0.00029361, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42820 epoch 028: 868 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=543225, ups=1.09, wpb=496102, bsz=16509.3, num_updates=46400, lr=0.00029361, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42820 epoch 028: 868 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=543225, ups=1.09, wpb=496102, bsz=16509.3, num_updates=46400, lr=0.00029361, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42820 epoch 028: 868 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=543225, ups=1.09, wpb=496102, bsz=16509.3, num_updates=46400, lr=0.00029361, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42820 epoch 028: 868 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=543225, ups=1.09, wpb=496102, bsz=16509.3, num_updates=46400, lr=0.00029361, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42820 epoch 028: 868 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=543225, ups=1.09, wpb=496102, bsz=16509.3, num_updates=46400, lr=0.00029361, gnorm=0.167, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42820 epoch 028: 968 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=550100, ups=1.11, wpb=495268, bsz=16357.7, num_updates=46500, lr=0.000293294, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42910 epoch 028: 968 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=550100, ups=1.11, wpb=495268, bsz=16357.7, num_updates=46500, lr=0.000293294, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42910 epoch 028: 968 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=550100, ups=1.11, wpb=495268, bsz=16357.7, num_updates=46500, lr=0.000293294, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42910 epoch 028: 968 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=550100, ups=1.11, wpb=495268, bsz=16357.7, num_updates=46500, lr=0.000293294, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42910 epoch 028: 968 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=550100, ups=1.11, wpb=495268, bsz=16357.7, num_updates=46500, lr=0.000293294, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42910 epoch 028: 968 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=550100, ups=1.11, wpb=495268, bsz=16357.7, num_updates=46500, lr=0.000293294, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42910 epoch 028: 968 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=550100, ups=1.11, wpb=495268, bsz=16357.7, num_updates=46500, lr=0.000293294, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42910 epoch 028: 968 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=550100, ups=1.11, wpb=495268, bsz=16357.7, num_updates=46500, lr=0.000293294, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42910 epoch 028: 968 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=550100, ups=1.11, wpb=495268, bsz=16357.7, num_updates=46500, lr=0.000293294, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42910 epoch 028: 968 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=550100, ups=1.11, wpb=495268, bsz=16357.7, num_updates=46500, lr=0.000293294, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42910 epoch 028: 968 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=550100, ups=1.11, wpb=495268, bsz=16357.7, num_updates=46500, lr=0.000293294, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42910 epoch 028: 968 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=550100, ups=1.11, wpb=495268, bsz=16357.7, num_updates=46500, lr=0.000293294, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42910 epoch 028: 968 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=550100, ups=1.11, wpb=495268, bsz=16357.7, num_updates=46500, lr=0.000293294, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42910 epoch 028: 968 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=550100, ups=1.11, wpb=495268, bsz=16357.7, num_updates=46500, lr=0.000293294, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42910 epoch 028: 968 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=550100, ups=1.11, wpb=495268, bsz=16357.7, num_updates=46500, lr=0.000293294, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42910 epoch 028: 968 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=550100, ups=1.11, wpb=495268, bsz=16357.7, num_updates=46500, lr=0.000293294, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42910 epoch 028: 968 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=550100, ups=1.11, wpb=495268, bsz=16357.7, num_updates=46500, lr=0.000293294, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42910 epoch 028: 968 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=550100, ups=1.11, wpb=495268, bsz=16357.7, num_updates=46500, lr=0.000293294, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42910 epoch 028: 968 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=550100, ups=1.11, wpb=495268, bsz=16357.7, num_updates=46500, lr=0.000293294, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42910 epoch 028: 968 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=550100, ups=1.11, wpb=495268, bsz=16357.7, num_updates=46500, lr=0.000293294, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42910 epoch 028: 968 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=550100, ups=1.11, wpb=495268, bsz=16357.7, num_updates=46500, lr=0.000293294, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42910 epoch 028: 968 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=550100, ups=1.11, wpb=495268, bsz=16357.7, num_updates=46500, lr=0.000293294, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42910 epoch 028: 968 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=550100, ups=1.11, wpb=495268, bsz=16357.7, num_updates=46500, lr=0.000293294, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42910 epoch 028: 968 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=550100, ups=1.11, wpb=495268, bsz=16357.7, num_updates=46500, lr=0.000293294, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42910 epoch 028: 968 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=550100, ups=1.11, wpb=495268, bsz=16357.7, num_updates=46500, lr=0.000293294, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42910 epoch 028: 968 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=550100, ups=1.11, wpb=495268, bsz=16357.7, num_updates=46500, lr=0.000293294, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42910 epoch 028: 968 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=550100, ups=1.11, wpb=495268, bsz=16357.7, num_updates=46500, lr=0.000293294, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42910 epoch 028: 968 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=550100, ups=1.11, wpb=495268, bsz=16357.7, num_updates=46500, lr=0.000293294, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=42910 epoch 028: 1068 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551910, ups=1.11, wpb=495896, bsz=16219.5, num_updates=46600, lr=0.000292979, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=43000 epoch 028: 1068 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551910, ups=1.11, wpb=495896, bsz=16219.5, num_updates=46600, lr=0.000292979, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=43000 epoch 028: 1068 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551910, ups=1.11, wpb=495896, bsz=16219.5, num_updates=46600, lr=0.000292979, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=43000 epoch 028: 1068 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551910, ups=1.11, wpb=495896, bsz=16219.5, num_updates=46600, lr=0.000292979, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=43000 epoch 028: 1068 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551910, ups=1.11, wpb=495896, bsz=16219.5, num_updates=46600, lr=0.000292979, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=43000 epoch 028: 1068 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551910, ups=1.11, wpb=495896, bsz=16219.5, num_updates=46600, lr=0.000292979, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=43000 epoch 028: 1068 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551910, ups=1.11, wpb=495896, bsz=16219.5, num_updates=46600, lr=0.000292979, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=43000 epoch 028: 1068 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551910, ups=1.11, wpb=495896, bsz=16219.5, num_updates=46600, lr=0.000292979, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=43000 epoch 028: 1068 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551910, ups=1.11, wpb=495896, bsz=16219.5, num_updates=46600, lr=0.000292979, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=43000 epoch 028: 1068 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551910, ups=1.11, wpb=495896, bsz=16219.5, num_updates=46600, lr=0.000292979, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=43000 epoch 028: 1068 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551910, ups=1.11, wpb=495896, bsz=16219.5, num_updates=46600, lr=0.000292979, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=43000 epoch 028: 1068 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551910, ups=1.11, wpb=495896, bsz=16219.5, num_updates=46600, lr=0.000292979, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=43000 epoch 028: 1068 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551910, ups=1.11, wpb=495896, bsz=16219.5, num_updates=46600, lr=0.000292979, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=43000 epoch 028: 1068 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551910, ups=1.11, wpb=495896, bsz=16219.5, num_updates=46600, lr=0.000292979, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=43000 epoch 028: 1068 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551910, ups=1.11, wpb=495896, bsz=16219.5, num_updates=46600, lr=0.000292979, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=43000 epoch 028: 1068 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551910, ups=1.11, wpb=495896, bsz=16219.5, num_updates=46600, lr=0.000292979, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=43000 epoch 028: 1068 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551910, ups=1.11, wpb=495896, bsz=16219.5, num_updates=46600, lr=0.000292979, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=43000 epoch 028: 1068 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551910, ups=1.11, wpb=495896, bsz=16219.5, num_updates=46600, lr=0.000292979, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=43000 epoch 028: 1068 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551910, ups=1.11, wpb=495896, bsz=16219.5, num_updates=46600, lr=0.000292979, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=43000 epoch 028: 1068 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551910, ups=1.11, wpb=495896, bsz=16219.5, num_updates=46600, lr=0.000292979, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=43000 epoch 028: 1068 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551910, ups=1.11, wpb=495896, bsz=16219.5, num_updates=46600, lr=0.000292979, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=43000 epoch 028: 1068 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551910, ups=1.11, wpb=495896, bsz=16219.5, num_updates=46600, lr=0.000292979, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=43000 epoch 028: 1068 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551910, ups=1.11, wpb=495896, bsz=16219.5, num_updates=46600, lr=0.000292979, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=43000 epoch 028: 1068 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551910, ups=1.11, wpb=495896, bsz=16219.5, num_updates=46600, lr=0.000292979, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=43000 epoch 028: 1068 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551910, ups=1.11, wpb=495896, bsz=16219.5, num_updates=46600, lr=0.000292979, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=43000 epoch 028: 1068 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551910, ups=1.11, wpb=495896, bsz=16219.5, num_updates=46600, lr=0.000292979, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=43000 epoch 028: 1068 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551910, ups=1.11, wpb=495896, bsz=16219.5, num_updates=46600, lr=0.000292979, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=43000 epoch 028: 1068 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551910, ups=1.11, wpb=495896, bsz=16219.5, num_updates=46600, lr=0.000292979, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=43000 epoch 028: 1168 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=555541, ups=1.12, wpb=496986, bsz=16275.6, num_updates=46700, lr=0.000292666, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=43089 epoch 028: 1168 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=555541, ups=1.12, wpb=496986, bsz=16275.6, num_updates=46700, lr=0.000292666, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=43089 epoch 028: 1168 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=555541, ups=1.12, wpb=496986, bsz=16275.6, num_updates=46700, lr=0.000292666, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=43089 epoch 028: 1168 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=555541, ups=1.12, wpb=496986, bsz=16275.6, num_updates=46700, lr=0.000292666, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=43089 epoch 028: 1168 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=555541, ups=1.12, wpb=496986, bsz=16275.6, num_updates=46700, lr=0.000292666, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=43089 epoch 028: 1168 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=555541, ups=1.12, wpb=496986, bsz=16275.6, num_updates=46700, lr=0.000292666, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=43089 epoch 028: 1168 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=555541, ups=1.12, wpb=496986, bsz=16275.6, num_updates=46700, lr=0.000292666, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=43089 epoch 028: 1168 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=555541, ups=1.12, wpb=496986, bsz=16275.6, num_updates=46700, lr=0.000292666, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=43089 epoch 028: 1168 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=555541, ups=1.12, wpb=496986, bsz=16275.6, num_updates=46700, lr=0.000292666, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=43089 epoch 028: 1168 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=555541, ups=1.12, wpb=496986, bsz=16275.6, num_updates=46700, lr=0.000292666, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=43089 epoch 028: 1168 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=555541, ups=1.12, wpb=496986, bsz=16275.6, num_updates=46700, lr=0.000292666, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=43089 epoch 028: 1168 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=555541, ups=1.12, wpb=496986, bsz=16275.6, num_updates=46700, lr=0.000292666, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=43089 epoch 028: 1168 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=555541, ups=1.12, wpb=496986, bsz=16275.6, num_updates=46700, lr=0.000292666, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=43089 epoch 028: 1168 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=555541, ups=1.12, wpb=496986, bsz=16275.6, num_updates=46700, lr=0.000292666, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=43089 epoch 028: 1168 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=555541, ups=1.12, wpb=496986, bsz=16275.6, num_updates=46700, lr=0.000292666, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=43089 epoch 028: 1168 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=555541, ups=1.12, wpb=496986, bsz=16275.6, num_updates=46700, lr=0.000292666, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=43089 epoch 028: 1168 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=555541, ups=1.12, wpb=496986, bsz=16275.6, num_updates=46700, lr=0.000292666, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=43089 epoch 028: 1168 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=555541, ups=1.12, wpb=496986, bsz=16275.6, num_updates=46700, lr=0.000292666, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=43089 epoch 028: 1168 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=555541, ups=1.12, wpb=496986, bsz=16275.6, num_updates=46700, lr=0.000292666, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=43089 epoch 028: 1168 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=555541, ups=1.12, wpb=496986, bsz=16275.6, num_updates=46700, lr=0.000292666, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=43089 epoch 028: 1168 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=555541, ups=1.12, wpb=496986, bsz=16275.6, num_updates=46700, lr=0.000292666, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=43089 epoch 028: 1168 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=555541, ups=1.12, wpb=496986, bsz=16275.6, num_updates=46700, lr=0.000292666, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=43089 epoch 028: 1168 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=555541, ups=1.12, wpb=496986, bsz=16275.6, num_updates=46700, lr=0.000292666, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=43089 epoch 028: 1168 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=555541, ups=1.12, wpb=496986, bsz=16275.6, num_updates=46700, lr=0.000292666, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=43089 epoch 028: 1168 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=555541, ups=1.12, wpb=496986, bsz=16275.6, num_updates=46700, lr=0.000292666, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=43089 epoch 028: 1168 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=555541, ups=1.12, wpb=496986, bsz=16275.6, num_updates=46700, lr=0.000292666, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=43089 epoch 028: 1168 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=555541, ups=1.12, wpb=496986, bsz=16275.6, num_updates=46700, lr=0.000292666, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=43089 epoch 028: 1168 / 1689 loss=3.48, nll_loss=1.923, ppl=3.79, wps=555541, ups=1.12, wpb=496986, bsz=16275.6, num_updates=46700, lr=0.000292666, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=43089 epoch 028: 1268 / 1689 loss=3.484, nll_loss=1.928, ppl=3.8, wps=552076, ups=1.12, wpb=494704, bsz=16336.6, num_updates=46800, lr=0.000292353, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=43179 epoch 028: 1268 / 1689 loss=3.484, nll_loss=1.928, ppl=3.8, wps=552076, ups=1.12, wpb=494704, bsz=16336.6, num_updates=46800, lr=0.000292353, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=43179 epoch 028: 1268 / 1689 loss=3.484, nll_loss=1.928, ppl=3.8, wps=552076, ups=1.12, wpb=494704, bsz=16336.6, num_updates=46800, lr=0.000292353, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=43179 epoch 028: 1268 / 1689 loss=3.484, nll_loss=1.928, ppl=3.8, wps=552076, ups=1.12, wpb=494704, bsz=16336.6, num_updates=46800, lr=0.000292353, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=43179 epoch 028: 1268 / 1689 loss=3.484, nll_loss=1.928, ppl=3.8, wps=552076, ups=1.12, wpb=494704, bsz=16336.6, num_updates=46800, lr=0.000292353, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=43179 epoch 028: 1268 / 1689 loss=3.484, nll_loss=1.928, ppl=3.8, wps=552076, ups=1.12, wpb=494704, bsz=16336.6, num_updates=46800, lr=0.000292353, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=43179 epoch 028: 1268 / 1689 loss=3.484, nll_loss=1.928, ppl=3.8, wps=552076, ups=1.12, wpb=494704, bsz=16336.6, num_updates=46800, lr=0.000292353, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=43179 epoch 028: 1268 / 1689 loss=3.484, nll_loss=1.928, ppl=3.8, wps=552076, ups=1.12, wpb=494704, bsz=16336.6, num_updates=46800, lr=0.000292353, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=43179 epoch 028: 1268 / 1689 loss=3.484, nll_loss=1.928, ppl=3.8, wps=552076, ups=1.12, wpb=494704, bsz=16336.6, num_updates=46800, lr=0.000292353, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=43179 epoch 028: 1268 / 1689 loss=3.484, nll_loss=1.928, ppl=3.8, wps=552076, ups=1.12, wpb=494704, bsz=16336.6, num_updates=46800, lr=0.000292353, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=43179 epoch 028: 1268 / 1689 loss=3.484, nll_loss=1.928, ppl=3.8, wps=552076, ups=1.12, wpb=494704, bsz=16336.6, num_updates=46800, lr=0.000292353, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=43179 epoch 028: 1268 / 1689 loss=3.484, nll_loss=1.928, ppl=3.8, wps=552076, ups=1.12, wpb=494704, bsz=16336.6, num_updates=46800, lr=0.000292353, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=43179 epoch 028: 1268 / 1689 loss=3.484, nll_loss=1.928, ppl=3.8, wps=552076, ups=1.12, wpb=494704, bsz=16336.6, num_updates=46800, lr=0.000292353, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=43179 epoch 028: 1268 / 1689 loss=3.484, nll_loss=1.928, ppl=3.8, wps=552076, ups=1.12, wpb=494704, bsz=16336.6, num_updates=46800, lr=0.000292353, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=43179 epoch 028: 1268 / 1689 loss=3.484, nll_loss=1.928, ppl=3.8, wps=552076, ups=1.12, wpb=494704, bsz=16336.6, num_updates=46800, lr=0.000292353, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=43179 epoch 028: 1268 / 1689 loss=3.484, nll_loss=1.928, ppl=3.8, wps=552076, ups=1.12, wpb=494704, bsz=16336.6, num_updates=46800, lr=0.000292353, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=43179 epoch 028: 1268 / 1689 loss=3.484, nll_loss=1.928, ppl=3.8, wps=552076, ups=1.12, wpb=494704, bsz=16336.6, num_updates=46800, lr=0.000292353, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=43179 epoch 028: 1268 / 1689 loss=3.484, nll_loss=1.928, ppl=3.8, wps=552076, ups=1.12, wpb=494704, bsz=16336.6, num_updates=46800, lr=0.000292353, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=43179 epoch 028: 1268 / 1689 loss=3.484, nll_loss=1.928, ppl=3.8, wps=552076, ups=1.12, wpb=494704, bsz=16336.6, num_updates=46800, lr=0.000292353, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=43179 epoch 028: 1268 / 1689 loss=3.484, nll_loss=1.928, ppl=3.8, wps=552076, ups=1.12, wpb=494704, bsz=16336.6, num_updates=46800, lr=0.000292353, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=43179 epoch 028: 1268 / 1689 loss=3.484, nll_loss=1.928, ppl=3.8, wps=552076, ups=1.12, wpb=494704, bsz=16336.6, num_updates=46800, lr=0.000292353, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=43179 epoch 028: 1268 / 1689 loss=3.484, nll_loss=1.928, ppl=3.8, wps=552076, ups=1.12, wpb=494704, bsz=16336.6, num_updates=46800, lr=0.000292353, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=43179 epoch 028: 1268 / 1689 loss=3.484, nll_loss=1.928, ppl=3.8, wps=552076, ups=1.12, wpb=494704, bsz=16336.6, num_updates=46800, lr=0.000292353, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=43179 epoch 028: 1268 / 1689 loss=3.484, nll_loss=1.928, ppl=3.8, wps=552076, ups=1.12, wpb=494704, bsz=16336.6, num_updates=46800, lr=0.000292353, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=43179 epoch 028: 1268 / 1689 loss=3.484, nll_loss=1.928, ppl=3.8, wps=552076, ups=1.12, wpb=494704, bsz=16336.6, num_updates=46800, lr=0.000292353, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=43179 epoch 028: 1268 / 1689 loss=3.484, nll_loss=1.928, ppl=3.8, wps=552076, ups=1.12, wpb=494704, bsz=16336.6, num_updates=46800, lr=0.000292353, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=43179 epoch 028: 1268 / 1689 loss=3.484, nll_loss=1.928, ppl=3.8, wps=552076, ups=1.12, wpb=494704, bsz=16336.6, num_updates=46800, lr=0.000292353, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=43179 epoch 028: 1268 / 1689 loss=3.484, nll_loss=1.928, ppl=3.8, wps=552076, ups=1.12, wpb=494704, bsz=16336.6, num_updates=46800, lr=0.000292353, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=43179 epoch 028: 1368 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=553238, ups=1.11, wpb=496661, bsz=16595, num_updates=46900, lr=0.000292041, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=43269 epoch 028: 1368 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=553238, ups=1.11, wpb=496661, bsz=16595, num_updates=46900, lr=0.000292041, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=43269 epoch 028: 1368 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=553238, ups=1.11, wpb=496661, bsz=16595, num_updates=46900, lr=0.000292041, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=43269 epoch 028: 1368 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=553238, ups=1.11, wpb=496661, bsz=16595, num_updates=46900, lr=0.000292041, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=43269 epoch 028: 1368 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=553238, ups=1.11, wpb=496661, bsz=16595, num_updates=46900, lr=0.000292041, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=43269 epoch 028: 1368 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=553238, ups=1.11, wpb=496661, bsz=16595, num_updates=46900, lr=0.000292041, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=43269 epoch 028: 1368 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=553238, ups=1.11, wpb=496661, bsz=16595, num_updates=46900, lr=0.000292041, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=43269 epoch 028: 1368 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=553238, ups=1.11, wpb=496661, bsz=16595, num_updates=46900, lr=0.000292041, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=43269 epoch 028: 1368 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=553238, ups=1.11, wpb=496661, bsz=16595, num_updates=46900, lr=0.000292041, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=43269 epoch 028: 1368 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=553238, ups=1.11, wpb=496661, bsz=16595, num_updates=46900, lr=0.000292041, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=43269 epoch 028: 1368 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=553238, ups=1.11, wpb=496661, bsz=16595, num_updates=46900, lr=0.000292041, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=43269 epoch 028: 1368 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=553238, ups=1.11, wpb=496661, bsz=16595, num_updates=46900, lr=0.000292041, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=43269 epoch 028: 1368 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=553238, ups=1.11, wpb=496661, bsz=16595, num_updates=46900, lr=0.000292041, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=43269 epoch 028: 1368 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=553238, ups=1.11, wpb=496661, bsz=16595, num_updates=46900, lr=0.000292041, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=43269 epoch 028: 1368 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=553238, ups=1.11, wpb=496661, bsz=16595, num_updates=46900, lr=0.000292041, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=43269 epoch 028: 1368 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=553238, ups=1.11, wpb=496661, bsz=16595, num_updates=46900, lr=0.000292041, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=43269 epoch 028: 1368 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=553238, ups=1.11, wpb=496661, bsz=16595, num_updates=46900, lr=0.000292041, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=43269 epoch 028: 1368 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=553238, ups=1.11, wpb=496661, bsz=16595, num_updates=46900, lr=0.000292041, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=43269 epoch 028: 1368 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=553238, ups=1.11, wpb=496661, bsz=16595, num_updates=46900, lr=0.000292041, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=43269 epoch 028: 1368 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=553238, ups=1.11, wpb=496661, bsz=16595, num_updates=46900, lr=0.000292041, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=43269 epoch 028: 1368 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=553238, ups=1.11, wpb=496661, bsz=16595, num_updates=46900, lr=0.000292041, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=43269 epoch 028: 1368 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=553238, ups=1.11, wpb=496661, bsz=16595, num_updates=46900, lr=0.000292041, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=43269 epoch 028: 1368 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=553238, ups=1.11, wpb=496661, bsz=16595, num_updates=46900, lr=0.000292041, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=43269 epoch 028: 1368 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=553238, ups=1.11, wpb=496661, bsz=16595, num_updates=46900, lr=0.000292041, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=43269 epoch 028: 1368 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=553238, ups=1.11, wpb=496661, bsz=16595, num_updates=46900, lr=0.000292041, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=43269 epoch 028: 1368 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=553238, ups=1.11, wpb=496661, bsz=16595, num_updates=46900, lr=0.000292041, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=43269 epoch 028: 1368 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=553238, ups=1.11, wpb=496661, bsz=16595, num_updates=46900, lr=0.000292041, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=43269 epoch 028: 1368 / 1689 loss=3.483, nll_loss=1.926, ppl=3.8, wps=553238, ups=1.11, wpb=496661, bsz=16595, num_updates=46900, lr=0.000292041, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=43269 epoch 028: 1469 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=546591, ups=1.11, wpb=494642, bsz=16656.5, num_updates=47000, lr=0.00029173, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=43359 epoch 028: 1469 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=546591, ups=1.11, wpb=494642, bsz=16656.5, num_updates=47000, lr=0.00029173, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=43359 epoch 028: 1469 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=546591, ups=1.11, wpb=494642, bsz=16656.5, num_updates=47000, lr=0.00029173, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=43359 epoch 028: 1469 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=546591, ups=1.11, wpb=494642, bsz=16656.5, num_updates=47000, lr=0.00029173, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=43359 epoch 028: 1469 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=546591, ups=1.11, wpb=494642, bsz=16656.5, num_updates=47000, lr=0.00029173, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=43359 epoch 028: 1469 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=546591, ups=1.11, wpb=494642, bsz=16656.5, num_updates=47000, lr=0.00029173, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=43359 epoch 028: 1469 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=546591, ups=1.11, wpb=494642, bsz=16656.5, num_updates=47000, lr=0.00029173, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=43359 epoch 028: 1469 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=546591, ups=1.11, wpb=494642, bsz=16656.5, num_updates=47000, lr=0.00029173, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=43359 epoch 028: 1469 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=546591, ups=1.11, wpb=494642, bsz=16656.5, num_updates=47000, lr=0.00029173, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=43359 epoch 028: 1469 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=546591, ups=1.11, wpb=494642, bsz=16656.5, num_updates=47000, lr=0.00029173, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=43359 epoch 028: 1469 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=546591, ups=1.11, wpb=494642, bsz=16656.5, num_updates=47000, lr=0.00029173, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=43359 epoch 028: 1469 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=546591, ups=1.11, wpb=494642, bsz=16656.5, num_updates=47000, lr=0.00029173, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=43359 epoch 028: 1469 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=546591, ups=1.11, wpb=494642, bsz=16656.5, num_updates=47000, lr=0.00029173, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=43359 epoch 028: 1469 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=546591, ups=1.11, wpb=494642, bsz=16656.5, num_updates=47000, lr=0.00029173, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=43359 epoch 028: 1469 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=546591, ups=1.11, wpb=494642, bsz=16656.5, num_updates=47000, lr=0.00029173, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=43359 epoch 028: 1469 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=546591, ups=1.11, wpb=494642, bsz=16656.5, num_updates=47000, lr=0.00029173, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=43359 epoch 028: 1469 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=546591, ups=1.11, wpb=494642, bsz=16656.5, num_updates=47000, lr=0.00029173, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=43359 epoch 028: 1469 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=546591, ups=1.11, wpb=494642, bsz=16656.5, num_updates=47000, lr=0.00029173, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=43359 epoch 028: 1469 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=546591, ups=1.11, wpb=494642, bsz=16656.5, num_updates=47000, lr=0.00029173, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=43359 epoch 028: 1469 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=546591, ups=1.11, wpb=494642, bsz=16656.5, num_updates=47000, lr=0.00029173, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=43359 epoch 028: 1469 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=546591, ups=1.11, wpb=494642, bsz=16656.5, num_updates=47000, lr=0.00029173, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=43359 epoch 028: 1469 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=546591, ups=1.11, wpb=494642, bsz=16656.5, num_updates=47000, lr=0.00029173, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=43359 epoch 028: 1469 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=546591, ups=1.11, wpb=494642, bsz=16656.5, num_updates=47000, lr=0.00029173, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=43359 epoch 028: 1469 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=546591, ups=1.11, wpb=494642, bsz=16656.5, num_updates=47000, lr=0.00029173, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=43359 epoch 028: 1469 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=546591, ups=1.11, wpb=494642, bsz=16656.5, num_updates=47000, lr=0.00029173, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=43359 epoch 028: 1469 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=546591, ups=1.11, wpb=494642, bsz=16656.5, num_updates=47000, lr=0.00029173, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=43359 epoch 028: 1469 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=546591, ups=1.11, wpb=494642, bsz=16656.5, num_updates=47000, lr=0.00029173, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=43359 epoch 028: 1469 / 1689 loss=3.489, nll_loss=1.933, ppl=3.82, wps=546591, ups=1.11, wpb=494642, bsz=16656.5, num_updates=47000, lr=0.00029173, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=43359 begin validation on "valid" subset epoch 028 | valid on 'valid' subset | loss 3.718 | nll_loss 2.176 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 47000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.718 | nll_loss 2.176 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 47000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.718 | nll_loss 2.176 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 47000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.718 | nll_loss 2.176 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 47000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.718 | nll_loss 2.176 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 47000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.718 | nll_loss 2.176 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 47000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.718 | nll_loss 2.176 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 47000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.718 | nll_loss 2.176 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 47000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.718 | nll_loss 2.176 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 47000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.718 | nll_loss 2.176 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 47000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.718 | nll_loss 2.176 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 47000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.718 | nll_loss 2.176 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 47000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.718 | nll_loss 2.176 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 47000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.718 | nll_loss 2.176 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 47000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.718 | nll_loss 2.176 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 47000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.718 | nll_loss 2.176 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 47000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.718 | nll_loss 2.176 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 47000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.718 | nll_loss 2.176 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 47000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.718 | nll_loss 2.176 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 47000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.718 | nll_loss 2.176 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 47000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.718 | nll_loss 2.176 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 47000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.718 | nll_loss 2.176 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 47000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.718 | nll_loss 2.176 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 47000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.718 | nll_loss 2.176 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 47000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.718 | nll_loss 2.176 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 47000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.718 | nll_loss 2.176 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 47000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.718 | nll_loss 2.176 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 47000 | best_loss 3.702 epoch 028 | valid on 'valid' subset | loss 3.718 | nll_loss 2.176 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 47000 | best_loss 3.702 epoch 028: 1569 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=372529, ups=0.75, wpb=494292, bsz=16394.2, num_updates=47100, lr=0.00029142, gnorm=0.168, clip=0, loss_scale=1, train_wall=114, gb_free=21.7, wall=43492 epoch 028: 1569 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=372529, ups=0.75, wpb=494292, bsz=16394.2, num_updates=47100, lr=0.00029142, gnorm=0.168, clip=0, loss_scale=1, train_wall=114, gb_free=21.7, wall=43492 epoch 028: 1569 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=372529, ups=0.75, wpb=494292, bsz=16394.2, num_updates=47100, lr=0.00029142, gnorm=0.168, clip=0, loss_scale=1, train_wall=114, gb_free=21.7, wall=43492 epoch 028: 1569 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=372529, ups=0.75, wpb=494292, bsz=16394.2, num_updates=47100, lr=0.00029142, gnorm=0.168, clip=0, loss_scale=1, train_wall=114, gb_free=21.7, wall=43492 epoch 028: 1569 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=372529, ups=0.75, wpb=494292, bsz=16394.2, num_updates=47100, lr=0.00029142, gnorm=0.168, clip=0, loss_scale=1, train_wall=114, gb_free=21.7, wall=43492 epoch 028: 1569 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=372529, ups=0.75, wpb=494292, bsz=16394.2, num_updates=47100, lr=0.00029142, gnorm=0.168, clip=0, loss_scale=1, train_wall=114, gb_free=21.7, wall=43492 epoch 028: 1569 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=372529, ups=0.75, wpb=494292, bsz=16394.2, num_updates=47100, lr=0.00029142, gnorm=0.168, clip=0, loss_scale=1, train_wall=114, gb_free=21.7, wall=43492 epoch 028: 1569 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=372529, ups=0.75, wpb=494292, bsz=16394.2, num_updates=47100, lr=0.00029142, gnorm=0.168, clip=0, loss_scale=1, train_wall=114, gb_free=21.7, wall=43492 epoch 028: 1569 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=372529, ups=0.75, wpb=494292, bsz=16394.2, num_updates=47100, lr=0.00029142, gnorm=0.168, clip=0, loss_scale=1, train_wall=114, gb_free=21.7, wall=43492 epoch 028: 1569 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=372529, ups=0.75, wpb=494292, bsz=16394.2, num_updates=47100, lr=0.00029142, gnorm=0.168, clip=0, loss_scale=1, train_wall=114, gb_free=21.7, wall=43492 epoch 028: 1569 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=372529, ups=0.75, wpb=494292, bsz=16394.2, num_updates=47100, lr=0.00029142, gnorm=0.168, clip=0, loss_scale=1, train_wall=114, gb_free=21.7, wall=43492 epoch 028: 1569 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=372529, ups=0.75, wpb=494292, bsz=16394.2, num_updates=47100, lr=0.00029142, gnorm=0.168, clip=0, loss_scale=1, train_wall=114, gb_free=21.7, wall=43492 epoch 028: 1569 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=372529, ups=0.75, wpb=494292, bsz=16394.2, num_updates=47100, lr=0.00029142, gnorm=0.168, clip=0, loss_scale=1, train_wall=114, gb_free=21.7, wall=43492 epoch 028: 1569 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=372529, ups=0.75, wpb=494292, bsz=16394.2, num_updates=47100, lr=0.00029142, gnorm=0.168, clip=0, loss_scale=1, train_wall=114, gb_free=21.7, wall=43492 epoch 028: 1569 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=372529, ups=0.75, wpb=494292, bsz=16394.2, num_updates=47100, lr=0.00029142, gnorm=0.168, clip=0, loss_scale=1, train_wall=114, gb_free=21.7, wall=43492 epoch 028: 1569 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=372529, ups=0.75, wpb=494292, bsz=16394.2, num_updates=47100, lr=0.00029142, gnorm=0.168, clip=0, loss_scale=1, train_wall=114, gb_free=21.7, wall=43492 epoch 028: 1569 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=372529, ups=0.75, wpb=494292, bsz=16394.2, num_updates=47100, lr=0.00029142, gnorm=0.168, clip=0, loss_scale=1, train_wall=114, gb_free=21.7, wall=43492 epoch 028: 1569 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=372529, ups=0.75, wpb=494292, bsz=16394.2, num_updates=47100, lr=0.00029142, gnorm=0.168, clip=0, loss_scale=1, train_wall=114, gb_free=21.7, wall=43492 epoch 028: 1569 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=372529, ups=0.75, wpb=494292, bsz=16394.2, num_updates=47100, lr=0.00029142, gnorm=0.168, clip=0, loss_scale=1, train_wall=114, gb_free=21.7, wall=43492 epoch 028: 1569 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=372529, ups=0.75, wpb=494292, bsz=16394.2, num_updates=47100, lr=0.00029142, gnorm=0.168, clip=0, loss_scale=1, train_wall=114, gb_free=21.7, wall=43492 epoch 028: 1569 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=372529, ups=0.75, wpb=494292, bsz=16394.2, num_updates=47100, lr=0.00029142, gnorm=0.168, clip=0, loss_scale=1, train_wall=114, gb_free=21.7, wall=43492 epoch 028: 1569 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=372529, ups=0.75, wpb=494292, bsz=16394.2, num_updates=47100, lr=0.00029142, gnorm=0.168, clip=0, loss_scale=1, train_wall=114, gb_free=21.7, wall=43492 epoch 028: 1569 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=372529, ups=0.75, wpb=494292, bsz=16394.2, num_updates=47100, lr=0.00029142, gnorm=0.168, clip=0, loss_scale=1, train_wall=114, gb_free=21.7, wall=43492 epoch 028: 1569 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=372529, ups=0.75, wpb=494292, bsz=16394.2, num_updates=47100, lr=0.00029142, gnorm=0.168, clip=0, loss_scale=1, train_wall=114, gb_free=21.7, wall=43492 epoch 028: 1569 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=372529, ups=0.75, wpb=494292, bsz=16394.2, num_updates=47100, lr=0.00029142, gnorm=0.168, clip=0, loss_scale=1, train_wall=114, gb_free=21.7, wall=43492 epoch 028: 1569 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=372529, ups=0.75, wpb=494292, bsz=16394.2, num_updates=47100, lr=0.00029142, gnorm=0.168, clip=0, loss_scale=1, train_wall=114, gb_free=21.7, wall=43492 epoch 028: 1569 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=372529, ups=0.75, wpb=494292, bsz=16394.2, num_updates=47100, lr=0.00029142, gnorm=0.168, clip=0, loss_scale=1, train_wall=114, gb_free=21.7, wall=43492 epoch 028: 1569 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=372529, ups=0.75, wpb=494292, bsz=16394.2, num_updates=47100, lr=0.00029142, gnorm=0.168, clip=0, loss_scale=1, train_wall=114, gb_free=21.7, wall=43492 epoch 028: 1669 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=546368, ups=1.11, wpb=492716, bsz=16669.4, num_updates=47200, lr=0.000291111, gnorm=0.158, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=43582 epoch 028: 1669 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=546368, ups=1.11, wpb=492716, bsz=16669.4, num_updates=47200, lr=0.000291111, gnorm=0.158, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=43582 epoch 028: 1669 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=546368, ups=1.11, wpb=492716, bsz=16669.4, num_updates=47200, lr=0.000291111, gnorm=0.158, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=43582 epoch 028: 1669 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=546368, ups=1.11, wpb=492716, bsz=16669.4, num_updates=47200, lr=0.000291111, gnorm=0.158, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=43582 epoch 028: 1669 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=546368, ups=1.11, wpb=492716, bsz=16669.4, num_updates=47200, lr=0.000291111, gnorm=0.158, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=43582 epoch 028: 1669 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=546368, ups=1.11, wpb=492716, bsz=16669.4, num_updates=47200, lr=0.000291111, gnorm=0.158, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=43582 epoch 028: 1669 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=546368, ups=1.11, wpb=492716, bsz=16669.4, num_updates=47200, lr=0.000291111, gnorm=0.158, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=43582 epoch 028: 1669 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=546368, ups=1.11, wpb=492716, bsz=16669.4, num_updates=47200, lr=0.000291111, gnorm=0.158, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=43582 epoch 028: 1669 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=546368, ups=1.11, wpb=492716, bsz=16669.4, num_updates=47200, lr=0.000291111, gnorm=0.158, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=43582 epoch 028: 1669 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=546368, ups=1.11, wpb=492716, bsz=16669.4, num_updates=47200, lr=0.000291111, gnorm=0.158, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=43582 epoch 028: 1669 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=546368, ups=1.11, wpb=492716, bsz=16669.4, num_updates=47200, lr=0.000291111, gnorm=0.158, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=43582 epoch 028: 1669 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=546368, ups=1.11, wpb=492716, bsz=16669.4, num_updates=47200, lr=0.000291111, gnorm=0.158, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=43582 epoch 028: 1669 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=546368, ups=1.11, wpb=492716, bsz=16669.4, num_updates=47200, lr=0.000291111, gnorm=0.158, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=43582 epoch 028: 1669 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=546368, ups=1.11, wpb=492716, bsz=16669.4, num_updates=47200, lr=0.000291111, gnorm=0.158, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=43582 epoch 028: 1669 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=546368, ups=1.11, wpb=492716, bsz=16669.4, num_updates=47200, lr=0.000291111, gnorm=0.158, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=43582 epoch 028: 1669 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=546368, ups=1.11, wpb=492716, bsz=16669.4, num_updates=47200, lr=0.000291111, gnorm=0.158, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=43582 epoch 028: 1669 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=546368, ups=1.11, wpb=492716, bsz=16669.4, num_updates=47200, lr=0.000291111, gnorm=0.158, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=43582 epoch 028: 1669 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=546368, ups=1.11, wpb=492716, bsz=16669.4, num_updates=47200, lr=0.000291111, gnorm=0.158, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=43582 epoch 028: 1669 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=546368, ups=1.11, wpb=492716, bsz=16669.4, num_updates=47200, lr=0.000291111, gnorm=0.158, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=43582 epoch 028: 1669 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=546368, ups=1.11, wpb=492716, bsz=16669.4, num_updates=47200, lr=0.000291111, gnorm=0.158, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=43582 epoch 028: 1669 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=546368, ups=1.11, wpb=492716, bsz=16669.4, num_updates=47200, lr=0.000291111, gnorm=0.158, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=43582 epoch 028: 1669 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=546368, ups=1.11, wpb=492716, bsz=16669.4, num_updates=47200, lr=0.000291111, gnorm=0.158, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=43582 epoch 028: 1669 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=546368, ups=1.11, wpb=492716, bsz=16669.4, num_updates=47200, lr=0.000291111, gnorm=0.158, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=43582 epoch 028: 1669 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=546368, ups=1.11, wpb=492716, bsz=16669.4, num_updates=47200, lr=0.000291111, gnorm=0.158, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=43582 epoch 028: 1669 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=546368, ups=1.11, wpb=492716, bsz=16669.4, num_updates=47200, lr=0.000291111, gnorm=0.158, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=43582 epoch 028: 1669 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=546368, ups=1.11, wpb=492716, bsz=16669.4, num_updates=47200, lr=0.000291111, gnorm=0.158, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=43582 epoch 028: 1669 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=546368, ups=1.11, wpb=492716, bsz=16669.4, num_updates=47200, lr=0.000291111, gnorm=0.158, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=43582 epoch 028: 1669 / 1689 loss=3.493, nll_loss=1.938, ppl=3.83, wps=546368, ups=1.11, wpb=492716, bsz=16669.4, num_updates=47200, lr=0.000291111, gnorm=0.158, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=43582 end of epoch 28 (average epoch stats below) epoch 028 | loss 3.478 | nll_loss 1.921 | ppl 3.79 | wps 531984 | ups 1.07 | wpb 495127 | bsz 16507.2 | num_updates 47220 | lr 0.00029105 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 22.6 | wall 43599 epoch 028 | loss 3.478 | nll_loss 1.921 | ppl 3.79 | wps 531984 | ups 1.07 | wpb 495127 | bsz 16507.2 | num_updates 47220 | lr 0.00029105 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 22.6 | wall 43599 epoch 028 | loss 3.478 | nll_loss 1.921 | ppl 3.79 | wps 531984 | ups 1.07 | wpb 495127 | bsz 16507.2 | num_updates 47220 | lr 0.00029105 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 22.6 | wall 43599 epoch 028 | loss 3.478 | nll_loss 1.921 | ppl 3.79 | wps 531984 | ups 1.07 | wpb 495127 | bsz 16507.2 | num_updates 47220 | lr 0.00029105 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 22.6 | wall 43599 epoch 028 | loss 3.478 | nll_loss 1.921 | ppl 3.79 | wps 531984 | ups 1.07 | wpb 495127 | bsz 16507.2 | num_updates 47220 | lr 0.00029105 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 22.6 | wall 43599 epoch 028 | loss 3.478 | nll_loss 1.921 | ppl 3.79 | wps 531984 | ups 1.07 | wpb 495127 | bsz 16507.2 | num_updates 47220 | lr 0.00029105 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 22.6 | wall 43599 epoch 028 | loss 3.478 | nll_loss 1.921 | ppl 3.79 | wps 531984 | ups 1.07 | wpb 495127 | bsz 16507.2 | num_updates 47220 | lr 0.00029105 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 22.6 | wall 43599 epoch 028 | loss 3.478 | nll_loss 1.921 | ppl 3.79 | wps 531984 | ups 1.07 | wpb 495127 | bsz 16507.2 | num_updates 47220 | lr 0.00029105 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 22.6 | wall 43599 epoch 028 | loss 3.478 | nll_loss 1.921 | ppl 3.79 | wps 531984 | ups 1.07 | wpb 495127 | bsz 16507.2 | num_updates 47220 | lr 0.00029105 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 22.6 | wall 43599 epoch 028 | loss 3.478 | nll_loss 1.921 | ppl 3.79 | wps 531984 | ups 1.07 | wpb 495127 | bsz 16507.2 | num_updates 47220 | lr 0.00029105 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 22.6 | wall 43599 epoch 028 | loss 3.478 | nll_loss 1.921 | ppl 3.79 | wps 531984 | ups 1.07 | wpb 495127 | bsz 16507.2 | num_updates 47220 | lr 0.00029105 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 22.6 | wall 43599 epoch 028 | loss 3.478 | nll_loss 1.921 | ppl 3.79 | wps 531984 | ups 1.07 | wpb 495127 | bsz 16507.2 | num_updates 47220 | lr 0.00029105 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 22.6 | wall 43599 epoch 028 | loss 3.478 | nll_loss 1.921 | ppl 3.79 | wps 531984 | ups 1.07 | wpb 495127 | bsz 16507.2 | num_updates 47220 | lr 0.00029105 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 22.6 | wall 43599 epoch 028 | loss 3.478 | nll_loss 1.921 | ppl 3.79 | wps 531984 | ups 1.07 | wpb 495127 | bsz 16507.2 | num_updates 47220 | lr 0.00029105 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 22.6 | wall 43599 epoch 028 | loss 3.478 | nll_loss 1.921 | ppl 3.79 | wps 531984 | ups 1.07 | wpb 495127 | bsz 16507.2 | num_updates 47220 | lr 0.00029105 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 22.6 | wall 43599 epoch 028 | loss 3.478 | nll_loss 1.921 | ppl 3.79 | wps 531984 | ups 1.07 | wpb 495127 | bsz 16507.2 | num_updates 47220 | lr 0.00029105 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 22.6 | wall 43599 epoch 028 | loss 3.478 | nll_loss 1.921 | ppl 3.79 | wps 531984 | ups 1.07 | wpb 495127 | bsz 16507.2 | num_updates 47220 | lr 0.00029105 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 22.6 | wall 43599 epoch 028 | loss 3.478 | nll_loss 1.921 | ppl 3.79 | wps 531984 | ups 1.07 | wpb 495127 | bsz 16507.2 | num_updates 47220 | lr 0.00029105 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 22.6 | wall 43599 epoch 028 | loss 3.478 | nll_loss 1.921 | ppl 3.79 | wps 531984 | ups 1.07 | wpb 495127 | bsz 16507.2 | num_updates 47220 | lr 0.00029105 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 22.6 | wall 43599 epoch 028 | loss 3.478 | nll_loss 1.921 | ppl 3.79 | wps 531984 | ups 1.07 | wpb 495127 | bsz 16507.2 | num_updates 47220 | lr 0.00029105 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 22.6 | wall 43599 epoch 028 | loss 3.478 | nll_loss 1.921 | ppl 3.79 | wps 531984 | ups 1.07 | wpb 495127 | bsz 16507.2 | num_updates 47220 | lr 0.00029105 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 22.6 | wall 43599 epoch 028 | loss 3.478 | nll_loss 1.921 | ppl 3.79 | wps 531984 | ups 1.07 | wpb 495127 | bsz 16507.2 | num_updates 47220 | lr 0.00029105 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 22.6 | wall 43599 epoch 028 | loss 3.478 | nll_loss 1.921 | ppl 3.79 | wps 531984 | ups 1.07 | wpb 495127 | bsz 16507.2 | num_updates 47220 | lr 0.00029105 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 22.6 | wall 43599 epoch 028 | loss 3.478 | nll_loss 1.921 | ppl 3.79 | wps 531984 | ups 1.07 | wpb 495127 | bsz 16507.2 | num_updates 47220 | lr 0.00029105 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 22.6 | wall 43599 epoch 028 | loss 3.478 | nll_loss 1.921 | ppl 3.79 | wps 531984 | ups 1.07 | wpb 495127 | bsz 16507.2 | num_updates 47220 | lr 0.00029105 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 22.6 | wall 43599 epoch 028 | loss 3.478 | nll_loss 1.921 | ppl 3.79 | wps 531984 | ups 1.07 | wpb 495127 | bsz 16507.2 | num_updates 47220 | lr 0.00029105 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 22.6 | wall 43599 epoch 028 | loss 3.478 | nll_loss 1.921 | ppl 3.79 | wps 531984 | ups 1.07 | wpb 495127 | bsz 16507.2 | num_updates 47220 | lr 0.00029105 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 22.6 | wall 43599 epoch 028 | loss 3.478 | nll_loss 1.921 | ppl 3.79 | wps 531984 | ups 1.07 | wpb 495127 | bsz 16507.2 | num_updates 47220 | lr 0.00029105 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1514 | gb_free 22.6 | wall 43599 Start iterating over samples epoch 029: 81 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=540099, ups=1.1, wpb=491043, bsz=16469, num_updates=47300, lr=0.000290803, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=43673 epoch 029: 81 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=540099, ups=1.1, wpb=491043, bsz=16469, num_updates=47300, lr=0.000290803, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=43673 epoch 029: 81 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=540099, ups=1.1, wpb=491043, bsz=16469, num_updates=47300, lr=0.000290803, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=43673 epoch 029: 81 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=540099, ups=1.1, wpb=491043, bsz=16469, num_updates=47300, lr=0.000290803, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=43673 epoch 029: 81 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=540099, ups=1.1, wpb=491043, bsz=16469, num_updates=47300, lr=0.000290803, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=43673 epoch 029: 81 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=540099, ups=1.1, wpb=491043, bsz=16469, num_updates=47300, lr=0.000290803, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=43673 epoch 029: 81 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=540099, ups=1.1, wpb=491043, bsz=16469, num_updates=47300, lr=0.000290803, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=43673 epoch 029: 81 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=540099, ups=1.1, wpb=491043, bsz=16469, num_updates=47300, lr=0.000290803, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=43673 epoch 029: 81 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=540099, ups=1.1, wpb=491043, bsz=16469, num_updates=47300, lr=0.000290803, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=43673 epoch 029: 81 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=540099, ups=1.1, wpb=491043, bsz=16469, num_updates=47300, lr=0.000290803, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=43673 epoch 029: 81 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=540099, ups=1.1, wpb=491043, bsz=16469, num_updates=47300, lr=0.000290803, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=43673 epoch 029: 81 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=540099, ups=1.1, wpb=491043, bsz=16469, num_updates=47300, lr=0.000290803, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=43673 epoch 029: 81 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=540099, ups=1.1, wpb=491043, bsz=16469, num_updates=47300, lr=0.000290803, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=43673 epoch 029: 81 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=540099, ups=1.1, wpb=491043, bsz=16469, num_updates=47300, lr=0.000290803, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=43673 epoch 029: 81 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=540099, ups=1.1, wpb=491043, bsz=16469, num_updates=47300, lr=0.000290803, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=43673 epoch 029: 81 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=540099, ups=1.1, wpb=491043, bsz=16469, num_updates=47300, lr=0.000290803, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=43673 epoch 029: 81 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=540099, ups=1.1, wpb=491043, bsz=16469, num_updates=47300, lr=0.000290803, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=43673 epoch 029: 81 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=540099, ups=1.1, wpb=491043, bsz=16469, num_updates=47300, lr=0.000290803, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=43673 epoch 029: 81 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=540099, ups=1.1, wpb=491043, bsz=16469, num_updates=47300, lr=0.000290803, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=43673 epoch 029: 81 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=540099, ups=1.1, wpb=491043, bsz=16469, num_updates=47300, lr=0.000290803, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=43673 epoch 029: 81 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=540099, ups=1.1, wpb=491043, bsz=16469, num_updates=47300, lr=0.000290803, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=43673 epoch 029: 81 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=540099, ups=1.1, wpb=491043, bsz=16469, num_updates=47300, lr=0.000290803, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=43673 epoch 029: 81 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=540099, ups=1.1, wpb=491043, bsz=16469, num_updates=47300, lr=0.000290803, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=43673 epoch 029: 81 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=540099, ups=1.1, wpb=491043, bsz=16469, num_updates=47300, lr=0.000290803, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=43673 epoch 029: 81 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=540099, ups=1.1, wpb=491043, bsz=16469, num_updates=47300, lr=0.000290803, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=43673 epoch 029: 81 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=540099, ups=1.1, wpb=491043, bsz=16469, num_updates=47300, lr=0.000290803, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=43673 epoch 029: 81 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=540099, ups=1.1, wpb=491043, bsz=16469, num_updates=47300, lr=0.000290803, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=43673 epoch 029: 81 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=540099, ups=1.1, wpb=491043, bsz=16469, num_updates=47300, lr=0.000290803, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=43673 epoch 029: 81 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=540099, ups=1.1, wpb=491043, bsz=16469, num_updates=47300, lr=0.000290803, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.7, wall=43673 epoch 029: 181 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=553465, ups=1.12, wpb=495351, bsz=16975.9, num_updates=47400, lr=0.000290496, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=43763 epoch 029: 181 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=553465, ups=1.12, wpb=495351, bsz=16975.9, num_updates=47400, lr=0.000290496, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=43763 epoch 029: 181 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=553465, ups=1.12, wpb=495351, bsz=16975.9, num_updates=47400, lr=0.000290496, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=43763 epoch 029: 181 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=553465, ups=1.12, wpb=495351, bsz=16975.9, num_updates=47400, lr=0.000290496, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=43763 epoch 029: 181 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=553465, ups=1.12, wpb=495351, bsz=16975.9, num_updates=47400, lr=0.000290496, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=43763 epoch 029: 181 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=553465, ups=1.12, wpb=495351, bsz=16975.9, num_updates=47400, lr=0.000290496, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=43763 epoch 029: 181 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=553465, ups=1.12, wpb=495351, bsz=16975.9, num_updates=47400, lr=0.000290496, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=43763 epoch 029: 181 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=553465, ups=1.12, wpb=495351, bsz=16975.9, num_updates=47400, lr=0.000290496, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=43763 epoch 029: 181 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=553465, ups=1.12, wpb=495351, bsz=16975.9, num_updates=47400, lr=0.000290496, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=43763 epoch 029: 181 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=553465, ups=1.12, wpb=495351, bsz=16975.9, num_updates=47400, lr=0.000290496, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=43763 epoch 029: 181 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=553465, ups=1.12, wpb=495351, bsz=16975.9, num_updates=47400, lr=0.000290496, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=43763 epoch 029: 181 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=553465, ups=1.12, wpb=495351, bsz=16975.9, num_updates=47400, lr=0.000290496, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=43763 epoch 029: 181 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=553465, ups=1.12, wpb=495351, bsz=16975.9, num_updates=47400, lr=0.000290496, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=43763 epoch 029: 181 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=553465, ups=1.12, wpb=495351, bsz=16975.9, num_updates=47400, lr=0.000290496, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=43763 epoch 029: 181 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=553465, ups=1.12, wpb=495351, bsz=16975.9, num_updates=47400, lr=0.000290496, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=43763 epoch 029: 181 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=553465, ups=1.12, wpb=495351, bsz=16975.9, num_updates=47400, lr=0.000290496, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=43763 epoch 029: 181 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=553465, ups=1.12, wpb=495351, bsz=16975.9, num_updates=47400, lr=0.000290496, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=43763 epoch 029: 181 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=553465, ups=1.12, wpb=495351, bsz=16975.9, num_updates=47400, lr=0.000290496, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=43763 epoch 029: 181 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=553465, ups=1.12, wpb=495351, bsz=16975.9, num_updates=47400, lr=0.000290496, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=43763 epoch 029: 181 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=553465, ups=1.12, wpb=495351, bsz=16975.9, num_updates=47400, lr=0.000290496, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=43763 epoch 029: 181 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=553465, ups=1.12, wpb=495351, bsz=16975.9, num_updates=47400, lr=0.000290496, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=43763 epoch 029: 181 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=553465, ups=1.12, wpb=495351, bsz=16975.9, num_updates=47400, lr=0.000290496, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=43763 epoch 029: 181 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=553465, ups=1.12, wpb=495351, bsz=16975.9, num_updates=47400, lr=0.000290496, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=43763 epoch 029: 181 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=553465, ups=1.12, wpb=495351, bsz=16975.9, num_updates=47400, lr=0.000290496, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=43763 epoch 029: 181 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=553465, ups=1.12, wpb=495351, bsz=16975.9, num_updates=47400, lr=0.000290496, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=43763 epoch 029: 181 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=553465, ups=1.12, wpb=495351, bsz=16975.9, num_updates=47400, lr=0.000290496, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=43763 epoch 029: 181 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=553465, ups=1.12, wpb=495351, bsz=16975.9, num_updates=47400, lr=0.000290496, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=43763 epoch 029: 181 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=553465, ups=1.12, wpb=495351, bsz=16975.9, num_updates=47400, lr=0.000290496, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=43763 epoch 029: 181 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=553465, ups=1.12, wpb=495351, bsz=16975.9, num_updates=47400, lr=0.000290496, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=43763 epoch 029: 281 / 1689 loss=3.461, nll_loss=1.901, ppl=3.74, wps=550272, ups=1.11, wpb=494994, bsz=16294.3, num_updates=47500, lr=0.000290191, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=43852 epoch 029: 281 / 1689 loss=3.461, nll_loss=1.901, ppl=3.74, wps=550272, ups=1.11, wpb=494994, bsz=16294.3, num_updates=47500, lr=0.000290191, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=43852 epoch 029: 281 / 1689 loss=3.461, nll_loss=1.901, ppl=3.74, wps=550272, ups=1.11, wpb=494994, bsz=16294.3, num_updates=47500, lr=0.000290191, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=43852 epoch 029: 281 / 1689 loss=3.461, nll_loss=1.901, ppl=3.74, wps=550272, ups=1.11, wpb=494994, bsz=16294.3, num_updates=47500, lr=0.000290191, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=43852 epoch 029: 281 / 1689 loss=3.461, nll_loss=1.901, ppl=3.74, wps=550272, ups=1.11, wpb=494994, bsz=16294.3, num_updates=47500, lr=0.000290191, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=43852 epoch 029: 281 / 1689 loss=3.461, nll_loss=1.901, ppl=3.74, wps=550272, ups=1.11, wpb=494994, bsz=16294.3, num_updates=47500, lr=0.000290191, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=43852 epoch 029: 281 / 1689 loss=3.461, nll_loss=1.901, ppl=3.74, wps=550272, ups=1.11, wpb=494994, bsz=16294.3, num_updates=47500, lr=0.000290191, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=43852 epoch 029: 281 / 1689 loss=3.461, nll_loss=1.901, ppl=3.74, wps=550272, ups=1.11, wpb=494994, bsz=16294.3, num_updates=47500, lr=0.000290191, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=43852 epoch 029: 281 / 1689 loss=3.461, nll_loss=1.901, ppl=3.74, wps=550272, ups=1.11, wpb=494994, bsz=16294.3, num_updates=47500, lr=0.000290191, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=43852 epoch 029: 281 / 1689 loss=3.461, nll_loss=1.901, ppl=3.74, wps=550272, ups=1.11, wpb=494994, bsz=16294.3, num_updates=47500, lr=0.000290191, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=43852 epoch 029: 281 / 1689 loss=3.461, nll_loss=1.901, ppl=3.74, wps=550272, ups=1.11, wpb=494994, bsz=16294.3, num_updates=47500, lr=0.000290191, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=43852 epoch 029: 281 / 1689 loss=3.461, nll_loss=1.901, ppl=3.74, wps=550272, ups=1.11, wpb=494994, bsz=16294.3, num_updates=47500, lr=0.000290191, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=43852 epoch 029: 281 / 1689 loss=3.461, nll_loss=1.901, ppl=3.74, wps=550272, ups=1.11, wpb=494994, bsz=16294.3, num_updates=47500, lr=0.000290191, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=43852 epoch 029: 281 / 1689 loss=3.461, nll_loss=1.901, ppl=3.74, wps=550272, ups=1.11, wpb=494994, bsz=16294.3, num_updates=47500, lr=0.000290191, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=43852 epoch 029: 281 / 1689 loss=3.461, nll_loss=1.901, ppl=3.74, wps=550272, ups=1.11, wpb=494994, bsz=16294.3, num_updates=47500, lr=0.000290191, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=43852 epoch 029: 281 / 1689 loss=3.461, nll_loss=1.901, ppl=3.74, wps=550272, ups=1.11, wpb=494994, bsz=16294.3, num_updates=47500, lr=0.000290191, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=43852 epoch 029: 281 / 1689 loss=3.461, nll_loss=1.901, ppl=3.74, wps=550272, ups=1.11, wpb=494994, bsz=16294.3, num_updates=47500, lr=0.000290191, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=43852 epoch 029: 281 / 1689 loss=3.461, nll_loss=1.901, ppl=3.74, wps=550272, ups=1.11, wpb=494994, bsz=16294.3, num_updates=47500, lr=0.000290191, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=43852 epoch 029: 281 / 1689 loss=3.461, nll_loss=1.901, ppl=3.74, wps=550272, ups=1.11, wpb=494994, bsz=16294.3, num_updates=47500, lr=0.000290191, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=43852 epoch 029: 281 / 1689 loss=3.461, nll_loss=1.901, ppl=3.74, wps=550272, ups=1.11, wpb=494994, bsz=16294.3, num_updates=47500, lr=0.000290191, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=43852 epoch 029: 281 / 1689 loss=3.461, nll_loss=1.901, ppl=3.74, wps=550272, ups=1.11, wpb=494994, bsz=16294.3, num_updates=47500, lr=0.000290191, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=43852 epoch 029: 281 / 1689 loss=3.461, nll_loss=1.901, ppl=3.74, wps=550272, ups=1.11, wpb=494994, bsz=16294.3, num_updates=47500, lr=0.000290191, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=43852 epoch 029: 281 / 1689 loss=3.461, nll_loss=1.901, ppl=3.74, wps=550272, ups=1.11, wpb=494994, bsz=16294.3, num_updates=47500, lr=0.000290191, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=43852 epoch 029: 281 / 1689 loss=3.461, nll_loss=1.901, ppl=3.74, wps=550272, ups=1.11, wpb=494994, bsz=16294.3, num_updates=47500, lr=0.000290191, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=43852 epoch 029: 281 / 1689 loss=3.461, nll_loss=1.901, ppl=3.74, wps=550272, ups=1.11, wpb=494994, bsz=16294.3, num_updates=47500, lr=0.000290191, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=43852 epoch 029: 281 / 1689 loss=3.461, nll_loss=1.901, ppl=3.74, wps=550272, ups=1.11, wpb=494994, bsz=16294.3, num_updates=47500, lr=0.000290191, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=43852 epoch 029: 281 / 1689 loss=3.461, nll_loss=1.901, ppl=3.74, wps=550272, ups=1.11, wpb=494994, bsz=16294.3, num_updates=47500, lr=0.000290191, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=43852 epoch 029: 281 / 1689 loss=3.461, nll_loss=1.901, ppl=3.74, wps=550272, ups=1.11, wpb=494994, bsz=16294.3, num_updates=47500, lr=0.000290191, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=43852 epoch 029: 281 / 1689 loss=3.461, nll_loss=1.901, ppl=3.74, wps=550272, ups=1.11, wpb=494994, bsz=16294.3, num_updates=47500, lr=0.000290191, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=43852 epoch 029: 381 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=553681, ups=1.12, wpb=494517, bsz=16001, num_updates=47600, lr=0.000289886, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=87, gb_free=22.1, wall=43942 epoch 029: 381 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=553681, ups=1.12, wpb=494517, bsz=16001, num_updates=47600, lr=0.000289886, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=87, gb_free=22.1, wall=43942 epoch 029: 381 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=553681, ups=1.12, wpb=494517, bsz=16001, num_updates=47600, lr=0.000289886, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=87, gb_free=22.1, wall=43942 epoch 029: 381 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=553681, ups=1.12, wpb=494517, bsz=16001, num_updates=47600, lr=0.000289886, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=87, gb_free=22.1, wall=43942 epoch 029: 381 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=553681, ups=1.12, wpb=494517, bsz=16001, num_updates=47600, lr=0.000289886, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=87, gb_free=22.1, wall=43942 epoch 029: 381 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=553681, ups=1.12, wpb=494517, bsz=16001, num_updates=47600, lr=0.000289886, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=87, gb_free=22.1, wall=43942 epoch 029: 381 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=553681, ups=1.12, wpb=494517, bsz=16001, num_updates=47600, lr=0.000289886, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=87, gb_free=22.1, wall=43942 epoch 029: 381 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=553681, ups=1.12, wpb=494517, bsz=16001, num_updates=47600, lr=0.000289886, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=87, gb_free=22.1, wall=43942 epoch 029: 381 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=553681, ups=1.12, wpb=494517, bsz=16001, num_updates=47600, lr=0.000289886, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=87, gb_free=22.1, wall=43942 epoch 029: 381 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=553681, ups=1.12, wpb=494517, bsz=16001, num_updates=47600, lr=0.000289886, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=87, gb_free=22.1, wall=43942 epoch 029: 381 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=553681, ups=1.12, wpb=494517, bsz=16001, num_updates=47600, lr=0.000289886, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=87, gb_free=22.1, wall=43942 epoch 029: 381 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=553681, ups=1.12, wpb=494517, bsz=16001, num_updates=47600, lr=0.000289886, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=87, gb_free=22.1, wall=43942 epoch 029: 381 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=553681, ups=1.12, wpb=494517, bsz=16001, num_updates=47600, lr=0.000289886, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=87, gb_free=22.1, wall=43942 epoch 029: 381 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=553681, ups=1.12, wpb=494517, bsz=16001, num_updates=47600, lr=0.000289886, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=87, gb_free=22.1, wall=43942 epoch 029: 381 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=553681, ups=1.12, wpb=494517, bsz=16001, num_updates=47600, lr=0.000289886, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=87, gb_free=22.1, wall=43942 epoch 029: 381 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=553681, ups=1.12, wpb=494517, bsz=16001, num_updates=47600, lr=0.000289886, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=87, gb_free=22.1, wall=43942 epoch 029: 381 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=553681, ups=1.12, wpb=494517, bsz=16001, num_updates=47600, lr=0.000289886, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=87, gb_free=22.1, wall=43942 epoch 029: 381 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=553681, ups=1.12, wpb=494517, bsz=16001, num_updates=47600, lr=0.000289886, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=87, gb_free=22.1, wall=43942 epoch 029: 381 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=553681, ups=1.12, wpb=494517, bsz=16001, num_updates=47600, lr=0.000289886, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=87, gb_free=22.1, wall=43942 epoch 029: 381 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=553681, ups=1.12, wpb=494517, bsz=16001, num_updates=47600, lr=0.000289886, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=87, gb_free=22.1, wall=43942 epoch 029: 381 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=553681, ups=1.12, wpb=494517, bsz=16001, num_updates=47600, lr=0.000289886, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=87, gb_free=22.1, wall=43942 epoch 029: 381 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=553681, ups=1.12, wpb=494517, bsz=16001, num_updates=47600, lr=0.000289886, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=87, gb_free=22.1, wall=43942 epoch 029: 381 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=553681, ups=1.12, wpb=494517, bsz=16001, num_updates=47600, lr=0.000289886, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=87, gb_free=22.1, wall=43942 epoch 029: 381 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=553681, ups=1.12, wpb=494517, bsz=16001, num_updates=47600, lr=0.000289886, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=87, gb_free=22.1, wall=43942 epoch 029: 381 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=553681, ups=1.12, wpb=494517, bsz=16001, num_updates=47600, lr=0.000289886, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=87, gb_free=22.1, wall=43942 epoch 029: 381 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=553681, ups=1.12, wpb=494517, bsz=16001, num_updates=47600, lr=0.000289886, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=87, gb_free=22.1, wall=43942 epoch 029: 381 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=553681, ups=1.12, wpb=494517, bsz=16001, num_updates=47600, lr=0.000289886, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=87, gb_free=22.1, wall=43942 epoch 029: 381 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=553681, ups=1.12, wpb=494517, bsz=16001, num_updates=47600, lr=0.000289886, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=87, gb_free=22.1, wall=43942 epoch 029: 381 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=553681, ups=1.12, wpb=494517, bsz=16001, num_updates=47600, lr=0.000289886, gnorm=0.17, clip=0, loss_scale=0.5, train_wall=87, gb_free=22.1, wall=43942 epoch 029: 481 / 1689 loss=3.46, nll_loss=1.9, ppl=3.73, wps=554458, ups=1.12, wpb=495481, bsz=16729.5, num_updates=47700, lr=0.000289581, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.3, wall=44031 epoch 029: 481 / 1689 loss=3.46, nll_loss=1.9, ppl=3.73, wps=554458, ups=1.12, wpb=495481, bsz=16729.5, num_updates=47700, lr=0.000289581, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.3, wall=44031 epoch 029: 481 / 1689 loss=3.46, nll_loss=1.9, ppl=3.73, wps=554458, ups=1.12, wpb=495481, bsz=16729.5, num_updates=47700, lr=0.000289581, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.3, wall=44031 epoch 029: 481 / 1689 loss=3.46, nll_loss=1.9, ppl=3.73, wps=554458, ups=1.12, wpb=495481, bsz=16729.5, num_updates=47700, lr=0.000289581, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.3, wall=44031 epoch 029: 481 / 1689 loss=3.46, nll_loss=1.9, ppl=3.73, wps=554458, ups=1.12, wpb=495481, bsz=16729.5, num_updates=47700, lr=0.000289581, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.3, wall=44031 epoch 029: 481 / 1689 loss=3.46, nll_loss=1.9, ppl=3.73, wps=554458, ups=1.12, wpb=495481, bsz=16729.5, num_updates=47700, lr=0.000289581, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.3, wall=44031 epoch 029: 481 / 1689 loss=3.46, nll_loss=1.9, ppl=3.73, wps=554458, ups=1.12, wpb=495481, bsz=16729.5, num_updates=47700, lr=0.000289581, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.3, wall=44031 epoch 029: 481 / 1689 loss=3.46, nll_loss=1.9, ppl=3.73, wps=554458, ups=1.12, wpb=495481, bsz=16729.5, num_updates=47700, lr=0.000289581, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.3, wall=44031 epoch 029: 481 / 1689 loss=3.46, nll_loss=1.9, ppl=3.73, wps=554458, ups=1.12, wpb=495481, bsz=16729.5, num_updates=47700, lr=0.000289581, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.3, wall=44031 epoch 029: 481 / 1689 loss=3.46, nll_loss=1.9, ppl=3.73, wps=554458, ups=1.12, wpb=495481, bsz=16729.5, num_updates=47700, lr=0.000289581, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.3, wall=44031 epoch 029: 481 / 1689 loss=3.46, nll_loss=1.9, ppl=3.73, wps=554458, ups=1.12, wpb=495481, bsz=16729.5, num_updates=47700, lr=0.000289581, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.3, wall=44031 epoch 029: 481 / 1689 loss=3.46, nll_loss=1.9, ppl=3.73, wps=554458, ups=1.12, wpb=495481, bsz=16729.5, num_updates=47700, lr=0.000289581, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.3, wall=44031 epoch 029: 481 / 1689 loss=3.46, nll_loss=1.9, ppl=3.73, wps=554458, ups=1.12, wpb=495481, bsz=16729.5, num_updates=47700, lr=0.000289581, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.3, wall=44031 epoch 029: 481 / 1689 loss=3.46, nll_loss=1.9, ppl=3.73, wps=554458, ups=1.12, wpb=495481, bsz=16729.5, num_updates=47700, lr=0.000289581, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.3, wall=44031 epoch 029: 481 / 1689 loss=3.46, nll_loss=1.9, ppl=3.73, wps=554458, ups=1.12, wpb=495481, bsz=16729.5, num_updates=47700, lr=0.000289581, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.3, wall=44031 epoch 029: 481 / 1689 loss=3.46, nll_loss=1.9, ppl=3.73, wps=554458, ups=1.12, wpb=495481, bsz=16729.5, num_updates=47700, lr=0.000289581, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.3, wall=44031 epoch 029: 481 / 1689 loss=3.46, nll_loss=1.9, ppl=3.73, wps=554458, ups=1.12, wpb=495481, bsz=16729.5, num_updates=47700, lr=0.000289581, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.3, wall=44031 epoch 029: 481 / 1689 loss=3.46, nll_loss=1.9, ppl=3.73, wps=554458, ups=1.12, wpb=495481, bsz=16729.5, num_updates=47700, lr=0.000289581, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.3, wall=44031 epoch 029: 481 / 1689 loss=3.46, nll_loss=1.9, ppl=3.73, wps=554458, ups=1.12, wpb=495481, bsz=16729.5, num_updates=47700, lr=0.000289581, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.3, wall=44031 epoch 029: 481 / 1689 loss=3.46, nll_loss=1.9, ppl=3.73, wps=554458, ups=1.12, wpb=495481, bsz=16729.5, num_updates=47700, lr=0.000289581, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.3, wall=44031 epoch 029: 481 / 1689 loss=3.46, nll_loss=1.9, ppl=3.73, wps=554458, ups=1.12, wpb=495481, bsz=16729.5, num_updates=47700, lr=0.000289581, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.3, wall=44031 epoch 029: 481 / 1689 loss=3.46, nll_loss=1.9, ppl=3.73, wps=554458, ups=1.12, wpb=495481, bsz=16729.5, num_updates=47700, lr=0.000289581, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.3, wall=44031 epoch 029: 481 / 1689 loss=3.46, nll_loss=1.9, ppl=3.73, wps=554458, ups=1.12, wpb=495481, bsz=16729.5, num_updates=47700, lr=0.000289581, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.3, wall=44031 epoch 029: 481 / 1689 loss=3.46, nll_loss=1.9, ppl=3.73, wps=554458, ups=1.12, wpb=495481, bsz=16729.5, num_updates=47700, lr=0.000289581, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.3, wall=44031 epoch 029: 481 / 1689 loss=3.46, nll_loss=1.9, ppl=3.73, wps=554458, ups=1.12, wpb=495481, bsz=16729.5, num_updates=47700, lr=0.000289581, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.3, wall=44031 epoch 029: 481 / 1689 loss=3.46, nll_loss=1.9, ppl=3.73, wps=554458, ups=1.12, wpb=495481, bsz=16729.5, num_updates=47700, lr=0.000289581, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.3, wall=44031 epoch 029: 481 / 1689 loss=3.46, nll_loss=1.9, ppl=3.73, wps=554458, ups=1.12, wpb=495481, bsz=16729.5, num_updates=47700, lr=0.000289581, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.3, wall=44031 epoch 029: 481 / 1689 loss=3.46, nll_loss=1.9, ppl=3.73, wps=554458, ups=1.12, wpb=495481, bsz=16729.5, num_updates=47700, lr=0.000289581, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.3, wall=44031 epoch 029: 481 / 1689 loss=3.46, nll_loss=1.9, ppl=3.73, wps=554458, ups=1.12, wpb=495481, bsz=16729.5, num_updates=47700, lr=0.000289581, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.3, wall=44031 epoch 029: 581 / 1689 loss=3.468, nll_loss=1.909, ppl=3.76, wps=551283, ups=1.11, wpb=494836, bsz=16884.7, num_updates=47800, lr=0.000289278, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=44121 epoch 029: 581 / 1689 loss=3.468, nll_loss=1.909, ppl=3.76, wps=551283, ups=1.11, wpb=494836, bsz=16884.7, num_updates=47800, lr=0.000289278, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=44121 epoch 029: 581 / 1689 loss=3.468, nll_loss=1.909, ppl=3.76, wps=551283, ups=1.11, wpb=494836, bsz=16884.7, num_updates=47800, lr=0.000289278, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=44121 epoch 029: 581 / 1689 loss=3.468, nll_loss=1.909, ppl=3.76, wps=551283, ups=1.11, wpb=494836, bsz=16884.7, num_updates=47800, lr=0.000289278, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=44121 epoch 029: 581 / 1689 loss=3.468, nll_loss=1.909, ppl=3.76, wps=551283, ups=1.11, wpb=494836, bsz=16884.7, num_updates=47800, lr=0.000289278, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=44121 epoch 029: 581 / 1689 loss=3.468, nll_loss=1.909, ppl=3.76, wps=551283, ups=1.11, wpb=494836, bsz=16884.7, num_updates=47800, lr=0.000289278, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=44121 epoch 029: 581 / 1689 loss=3.468, nll_loss=1.909, ppl=3.76, wps=551283, ups=1.11, wpb=494836, bsz=16884.7, num_updates=47800, lr=0.000289278, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=44121 epoch 029: 581 / 1689 loss=3.468, nll_loss=1.909, ppl=3.76, wps=551283, ups=1.11, wpb=494836, bsz=16884.7, num_updates=47800, lr=0.000289278, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=44121 epoch 029: 581 / 1689 loss=3.468, nll_loss=1.909, ppl=3.76, wps=551283, ups=1.11, wpb=494836, bsz=16884.7, num_updates=47800, lr=0.000289278, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=44121 epoch 029: 581 / 1689 loss=3.468, nll_loss=1.909, ppl=3.76, wps=551283, ups=1.11, wpb=494836, bsz=16884.7, num_updates=47800, lr=0.000289278, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=44121 epoch 029: 581 / 1689 loss=3.468, nll_loss=1.909, ppl=3.76, wps=551283, ups=1.11, wpb=494836, bsz=16884.7, num_updates=47800, lr=0.000289278, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=44121 epoch 029: 581 / 1689 loss=3.468, nll_loss=1.909, ppl=3.76, wps=551283, ups=1.11, wpb=494836, bsz=16884.7, num_updates=47800, lr=0.000289278, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=44121 epoch 029: 581 / 1689 loss=3.468, nll_loss=1.909, ppl=3.76, wps=551283, ups=1.11, wpb=494836, bsz=16884.7, num_updates=47800, lr=0.000289278, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=44121 epoch 029: 581 / 1689 loss=3.468, nll_loss=1.909, ppl=3.76, wps=551283, ups=1.11, wpb=494836, bsz=16884.7, num_updates=47800, lr=0.000289278, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=44121 epoch 029: 581 / 1689 loss=3.468, nll_loss=1.909, ppl=3.76, wps=551283, ups=1.11, wpb=494836, bsz=16884.7, num_updates=47800, lr=0.000289278, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=44121 epoch 029: 581 / 1689 loss=3.468, nll_loss=1.909, ppl=3.76, wps=551283, ups=1.11, wpb=494836, bsz=16884.7, num_updates=47800, lr=0.000289278, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=44121 epoch 029: 581 / 1689 loss=3.468, nll_loss=1.909, ppl=3.76, wps=551283, ups=1.11, wpb=494836, bsz=16884.7, num_updates=47800, lr=0.000289278, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=44121 epoch 029: 581 / 1689 loss=3.468, nll_loss=1.909, ppl=3.76, wps=551283, ups=1.11, wpb=494836, bsz=16884.7, num_updates=47800, lr=0.000289278, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=44121 epoch 029: 581 / 1689 loss=3.468, nll_loss=1.909, ppl=3.76, wps=551283, ups=1.11, wpb=494836, bsz=16884.7, num_updates=47800, lr=0.000289278, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=44121 epoch 029: 581 / 1689 loss=3.468, nll_loss=1.909, ppl=3.76, wps=551283, ups=1.11, wpb=494836, bsz=16884.7, num_updates=47800, lr=0.000289278, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=44121 epoch 029: 581 / 1689 loss=3.468, nll_loss=1.909, ppl=3.76, wps=551283, ups=1.11, wpb=494836, bsz=16884.7, num_updates=47800, lr=0.000289278, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=44121 epoch 029: 581 / 1689 loss=3.468, nll_loss=1.909, ppl=3.76, wps=551283, ups=1.11, wpb=494836, bsz=16884.7, num_updates=47800, lr=0.000289278, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=44121 epoch 029: 581 / 1689 loss=3.468, nll_loss=1.909, ppl=3.76, wps=551283, ups=1.11, wpb=494836, bsz=16884.7, num_updates=47800, lr=0.000289278, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=44121 epoch 029: 581 / 1689 loss=3.468, nll_loss=1.909, ppl=3.76, wps=551283, ups=1.11, wpb=494836, bsz=16884.7, num_updates=47800, lr=0.000289278, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=44121 epoch 029: 581 / 1689 loss=3.468, nll_loss=1.909, ppl=3.76, wps=551283, ups=1.11, wpb=494836, bsz=16884.7, num_updates=47800, lr=0.000289278, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=44121 epoch 029: 581 / 1689 loss=3.468, nll_loss=1.909, ppl=3.76, wps=551283, ups=1.11, wpb=494836, bsz=16884.7, num_updates=47800, lr=0.000289278, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=44121 epoch 029: 581 / 1689 loss=3.468, nll_loss=1.909, ppl=3.76, wps=551283, ups=1.11, wpb=494836, bsz=16884.7, num_updates=47800, lr=0.000289278, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=44121 epoch 029: 581 / 1689 loss=3.468, nll_loss=1.909, ppl=3.76, wps=551283, ups=1.11, wpb=494836, bsz=16884.7, num_updates=47800, lr=0.000289278, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=44121 epoch 029: 581 / 1689 loss=3.468, nll_loss=1.909, ppl=3.76, wps=551283, ups=1.11, wpb=494836, bsz=16884.7, num_updates=47800, lr=0.000289278, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=44121 epoch 029: 681 / 1689 loss=3.472, nll_loss=1.914, ppl=3.77, wps=552885, ups=1.11, wpb=496844, bsz=16553.6, num_updates=47900, lr=0.000288976, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=44211 epoch 029: 681 / 1689 loss=3.472, nll_loss=1.914, ppl=3.77, wps=552885, ups=1.11, wpb=496844, bsz=16553.6, num_updates=47900, lr=0.000288976, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=44211 epoch 029: 681 / 1689 loss=3.472, nll_loss=1.914, ppl=3.77, wps=552885, ups=1.11, wpb=496844, bsz=16553.6, num_updates=47900, lr=0.000288976, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=44211 epoch 029: 681 / 1689 loss=3.472, nll_loss=1.914, ppl=3.77, wps=552885, ups=1.11, wpb=496844, bsz=16553.6, num_updates=47900, lr=0.000288976, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=44211 epoch 029: 681 / 1689 loss=3.472, nll_loss=1.914, ppl=3.77, wps=552885, ups=1.11, wpb=496844, bsz=16553.6, num_updates=47900, lr=0.000288976, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=44211 epoch 029: 681 / 1689 loss=3.472, nll_loss=1.914, ppl=3.77, wps=552885, ups=1.11, wpb=496844, bsz=16553.6, num_updates=47900, lr=0.000288976, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=44211 epoch 029: 681 / 1689 loss=3.472, nll_loss=1.914, ppl=3.77, wps=552885, ups=1.11, wpb=496844, bsz=16553.6, num_updates=47900, lr=0.000288976, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=44211 epoch 029: 681 / 1689 loss=3.472, nll_loss=1.914, ppl=3.77, wps=552885, ups=1.11, wpb=496844, bsz=16553.6, num_updates=47900, lr=0.000288976, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=44211 epoch 029: 681 / 1689 loss=3.472, nll_loss=1.914, ppl=3.77, wps=552885, ups=1.11, wpb=496844, bsz=16553.6, num_updates=47900, lr=0.000288976, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=44211 epoch 029: 681 / 1689 loss=3.472, nll_loss=1.914, ppl=3.77, wps=552885, ups=1.11, wpb=496844, bsz=16553.6, num_updates=47900, lr=0.000288976, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=44211 epoch 029: 681 / 1689 loss=3.472, nll_loss=1.914, ppl=3.77, wps=552885, ups=1.11, wpb=496844, bsz=16553.6, num_updates=47900, lr=0.000288976, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=44211 epoch 029: 681 / 1689 loss=3.472, nll_loss=1.914, ppl=3.77, wps=552885, ups=1.11, wpb=496844, bsz=16553.6, num_updates=47900, lr=0.000288976, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=44211 epoch 029: 681 / 1689 loss=3.472, nll_loss=1.914, ppl=3.77, wps=552885, ups=1.11, wpb=496844, bsz=16553.6, num_updates=47900, lr=0.000288976, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=44211 epoch 029: 681 / 1689 loss=3.472, nll_loss=1.914, ppl=3.77, wps=552885, ups=1.11, wpb=496844, bsz=16553.6, num_updates=47900, lr=0.000288976, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=44211 epoch 029: 681 / 1689 loss=3.472, nll_loss=1.914, ppl=3.77, wps=552885, ups=1.11, wpb=496844, bsz=16553.6, num_updates=47900, lr=0.000288976, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=44211 epoch 029: 681 / 1689 loss=3.472, nll_loss=1.914, ppl=3.77, wps=552885, ups=1.11, wpb=496844, bsz=16553.6, num_updates=47900, lr=0.000288976, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=44211 epoch 029: 681 / 1689 loss=3.472, nll_loss=1.914, ppl=3.77, wps=552885, ups=1.11, wpb=496844, bsz=16553.6, num_updates=47900, lr=0.000288976, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=44211 epoch 029: 681 / 1689 loss=3.472, nll_loss=1.914, ppl=3.77, wps=552885, ups=1.11, wpb=496844, bsz=16553.6, num_updates=47900, lr=0.000288976, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=44211 epoch 029: 681 / 1689 loss=3.472, nll_loss=1.914, ppl=3.77, wps=552885, ups=1.11, wpb=496844, bsz=16553.6, num_updates=47900, lr=0.000288976, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=44211 epoch 029: 681 / 1689 loss=3.472, nll_loss=1.914, ppl=3.77, wps=552885, ups=1.11, wpb=496844, bsz=16553.6, num_updates=47900, lr=0.000288976, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=44211 epoch 029: 681 / 1689 loss=3.472, nll_loss=1.914, ppl=3.77, wps=552885, ups=1.11, wpb=496844, bsz=16553.6, num_updates=47900, lr=0.000288976, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=44211 epoch 029: 681 / 1689 loss=3.472, nll_loss=1.914, ppl=3.77, wps=552885, ups=1.11, wpb=496844, bsz=16553.6, num_updates=47900, lr=0.000288976, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=44211 epoch 029: 681 / 1689 loss=3.472, nll_loss=1.914, ppl=3.77, wps=552885, ups=1.11, wpb=496844, bsz=16553.6, num_updates=47900, lr=0.000288976, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=44211 epoch 029: 681 / 1689 loss=3.472, nll_loss=1.914, ppl=3.77, wps=552885, ups=1.11, wpb=496844, bsz=16553.6, num_updates=47900, lr=0.000288976, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=44211 epoch 029: 681 / 1689 loss=3.472, nll_loss=1.914, ppl=3.77, wps=552885, ups=1.11, wpb=496844, bsz=16553.6, num_updates=47900, lr=0.000288976, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=44211 epoch 029: 681 / 1689 loss=3.472, nll_loss=1.914, ppl=3.77, wps=552885, ups=1.11, wpb=496844, bsz=16553.6, num_updates=47900, lr=0.000288976, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=44211 epoch 029: 681 / 1689 loss=3.472, nll_loss=1.914, ppl=3.77, wps=552885, ups=1.11, wpb=496844, bsz=16553.6, num_updates=47900, lr=0.000288976, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=44211 epoch 029: 681 / 1689 loss=3.472, nll_loss=1.914, ppl=3.77, wps=552885, ups=1.11, wpb=496844, bsz=16553.6, num_updates=47900, lr=0.000288976, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=44211 epoch 029: 681 / 1689 loss=3.472, nll_loss=1.914, ppl=3.77, wps=552885, ups=1.11, wpb=496844, bsz=16553.6, num_updates=47900, lr=0.000288976, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=44211 epoch 029: 781 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=552664, ups=1.11, wpb=496823, bsz=16413.9, num_updates=48000, lr=0.000288675, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20, wall=44301 epoch 029: 781 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=552664, ups=1.11, wpb=496823, bsz=16413.9, num_updates=48000, lr=0.000288675, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20, wall=44301 epoch 029: 781 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=552664, ups=1.11, wpb=496823, bsz=16413.9, num_updates=48000, lr=0.000288675, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20, wall=44301 epoch 029: 781 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=552664, ups=1.11, wpb=496823, bsz=16413.9, num_updates=48000, lr=0.000288675, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20, wall=44301 epoch 029: 781 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=552664, ups=1.11, wpb=496823, bsz=16413.9, num_updates=48000, lr=0.000288675, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20, wall=44301 epoch 029: 781 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=552664, ups=1.11, wpb=496823, bsz=16413.9, num_updates=48000, lr=0.000288675, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20, wall=44301 epoch 029: 781 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=552664, ups=1.11, wpb=496823, bsz=16413.9, num_updates=48000, lr=0.000288675, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20, wall=44301 epoch 029: 781 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=552664, ups=1.11, wpb=496823, bsz=16413.9, num_updates=48000, lr=0.000288675, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20, wall=44301 epoch 029: 781 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=552664, ups=1.11, wpb=496823, bsz=16413.9, num_updates=48000, lr=0.000288675, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20, wall=44301 epoch 029: 781 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=552664, ups=1.11, wpb=496823, bsz=16413.9, num_updates=48000, lr=0.000288675, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20, wall=44301 epoch 029: 781 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=552664, ups=1.11, wpb=496823, bsz=16413.9, num_updates=48000, lr=0.000288675, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20, wall=44301 epoch 029: 781 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=552664, ups=1.11, wpb=496823, bsz=16413.9, num_updates=48000, lr=0.000288675, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20, wall=44301 epoch 029: 781 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=552664, ups=1.11, wpb=496823, bsz=16413.9, num_updates=48000, lr=0.000288675, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20, wall=44301 epoch 029: 781 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=552664, ups=1.11, wpb=496823, bsz=16413.9, num_updates=48000, lr=0.000288675, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20, wall=44301 epoch 029: 781 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=552664, ups=1.11, wpb=496823, bsz=16413.9, num_updates=48000, lr=0.000288675, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20, wall=44301 epoch 029: 781 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=552664, ups=1.11, wpb=496823, bsz=16413.9, num_updates=48000, lr=0.000288675, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20, wall=44301 epoch 029: 781 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=552664, ups=1.11, wpb=496823, bsz=16413.9, num_updates=48000, lr=0.000288675, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20, wall=44301 epoch 029: 781 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=552664, ups=1.11, wpb=496823, bsz=16413.9, num_updates=48000, lr=0.000288675, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20, wall=44301 epoch 029: 781 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=552664, ups=1.11, wpb=496823, bsz=16413.9, num_updates=48000, lr=0.000288675, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20, wall=44301 epoch 029: 781 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=552664, ups=1.11, wpb=496823, bsz=16413.9, num_updates=48000, lr=0.000288675, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20, wall=44301 epoch 029: 781 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=552664, ups=1.11, wpb=496823, bsz=16413.9, num_updates=48000, lr=0.000288675, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20, wall=44301 epoch 029: 781 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=552664, ups=1.11, wpb=496823, bsz=16413.9, num_updates=48000, lr=0.000288675, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20, wall=44301 epoch 029: 781 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=552664, ups=1.11, wpb=496823, bsz=16413.9, num_updates=48000, lr=0.000288675, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20, wall=44301 epoch 029: 781 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=552664, ups=1.11, wpb=496823, bsz=16413.9, num_updates=48000, lr=0.000288675, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20, wall=44301 epoch 029: 781 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=552664, ups=1.11, wpb=496823, bsz=16413.9, num_updates=48000, lr=0.000288675, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20, wall=44301 epoch 029: 781 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=552664, ups=1.11, wpb=496823, bsz=16413.9, num_updates=48000, lr=0.000288675, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20, wall=44301 epoch 029: 781 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=552664, ups=1.11, wpb=496823, bsz=16413.9, num_updates=48000, lr=0.000288675, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20, wall=44301 epoch 029: 781 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=552664, ups=1.11, wpb=496823, bsz=16413.9, num_updates=48000, lr=0.000288675, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20, wall=44301 epoch 029: 781 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=552664, ups=1.11, wpb=496823, bsz=16413.9, num_updates=48000, lr=0.000288675, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20, wall=44301 begin validation on "valid" subset epoch 029 | valid on 'valid' subset | loss 3.713 | nll_loss 2.171 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 48000 | best_loss 3.702 epoch 029 | valid on 'valid' subset | loss 3.713 | nll_loss 2.171 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 48000 | best_loss 3.702 epoch 029 | valid on 'valid' subset | loss 3.713 | nll_loss 2.171 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 48000 | best_loss 3.702 epoch 029 | valid on 'valid' subset | loss 3.713 | nll_loss 2.171 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 48000 | best_loss 3.702 epoch 029 | valid on 'valid' subset | loss 3.713 | nll_loss 2.171 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 48000 | best_loss 3.702 epoch 029 | valid on 'valid' subset | loss 3.713 | nll_loss 2.171 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 48000 | best_loss 3.702 epoch 029 | valid on 'valid' subset | loss 3.713 | nll_loss 2.171 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 48000 | best_loss 3.702 epoch 029 | valid on 'valid' subset | loss 3.713 | nll_loss 2.171 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 48000 | best_loss 3.702 epoch 029 | valid on 'valid' subset | loss 3.713 | nll_loss 2.171 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 48000 | best_loss 3.702 epoch 029 | valid on 'valid' subset | loss 3.713 | nll_loss 2.171 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 48000 | best_loss 3.702 epoch 029 | valid on 'valid' subset | loss 3.713 | nll_loss 2.171 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 48000 | best_loss 3.702 epoch 029 | valid on 'valid' subset | loss 3.713 | nll_loss 2.171 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 48000 | best_loss 3.702 epoch 029 | valid on 'valid' subset | loss 3.713 | nll_loss 2.171 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 48000 | best_loss 3.702 epoch 029 | valid on 'valid' subset | loss 3.713 | nll_loss 2.171 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 48000 | best_loss 3.702 epoch 029 | valid on 'valid' subset | loss 3.713 | nll_loss 2.171 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 48000 | best_loss 3.702 epoch 029 | valid on 'valid' subset | loss 3.713 | nll_loss 2.171 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 48000 | best_loss 3.702 epoch 029 | valid on 'valid' subset | loss 3.713 | nll_loss 2.171 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 48000 | best_loss 3.702 epoch 029 | valid on 'valid' subset | loss 3.713 | nll_loss 2.171 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 48000 | best_loss 3.702 epoch 029 | valid on 'valid' subset | loss 3.713 | nll_loss 2.171 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 48000 | best_loss 3.702 epoch 029 | valid on 'valid' subset | loss 3.713 | nll_loss 2.171 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 48000 | best_loss 3.702 epoch 029 | valid on 'valid' subset | loss 3.713 | nll_loss 2.171 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 48000 | best_loss 3.702 epoch 029 | valid on 'valid' subset | loss 3.713 | nll_loss 2.171 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 48000 | best_loss 3.702 epoch 029 | valid on 'valid' subset | loss 3.713 | nll_loss 2.171 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 48000 | best_loss 3.702 epoch 029 | valid on 'valid' subset | loss 3.713 | nll_loss 2.171 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 48000 | best_loss 3.702 epoch 029 | valid on 'valid' subset | loss 3.713 | nll_loss 2.171 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 48000 | best_loss 3.702 epoch 029 | valid on 'valid' subset | loss 3.713 | nll_loss 2.171 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 48000 | best_loss 3.702 epoch 029 | valid on 'valid' subset | loss 3.713 | nll_loss 2.171 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 48000 | best_loss 3.702 epoch 029 | valid on 'valid' subset | loss 3.713 | nll_loss 2.171 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 48000 | best_loss 3.702 epoch 029 | valid on 'valid' subset | loss 3.713 | nll_loss 2.171 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 48000 | best_loss 3.702 epoch 029: 881 / 1689 loss=3.481, nll_loss=1.924, ppl=3.8, wps=484921, ups=0.98, wpb=492493, bsz=16671.3, num_updates=48100, lr=0.000288375, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=44402 epoch 029: 881 / 1689 loss=3.481, nll_loss=1.924, ppl=3.8, wps=484921, ups=0.98, wpb=492493, bsz=16671.3, num_updates=48100, lr=0.000288375, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=44402 epoch 029: 881 / 1689 loss=3.481, nll_loss=1.924, ppl=3.8, wps=484921, ups=0.98, wpb=492493, bsz=16671.3, num_updates=48100, lr=0.000288375, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=44402 epoch 029: 881 / 1689 loss=3.481, nll_loss=1.924, ppl=3.8, wps=484921, ups=0.98, wpb=492493, bsz=16671.3, num_updates=48100, lr=0.000288375, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=44402 epoch 029: 881 / 1689 loss=3.481, nll_loss=1.924, ppl=3.8, wps=484921, ups=0.98, wpb=492493, bsz=16671.3, num_updates=48100, lr=0.000288375, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=44402 epoch 029: 881 / 1689 loss=3.481, nll_loss=1.924, ppl=3.8, wps=484921, ups=0.98, wpb=492493, bsz=16671.3, num_updates=48100, lr=0.000288375, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=44402 epoch 029: 881 / 1689 loss=3.481, nll_loss=1.924, ppl=3.8, wps=484921, ups=0.98, wpb=492493, bsz=16671.3, num_updates=48100, lr=0.000288375, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=44402 epoch 029: 881 / 1689 loss=3.481, nll_loss=1.924, ppl=3.8, wps=484921, ups=0.98, wpb=492493, bsz=16671.3, num_updates=48100, lr=0.000288375, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=44402 epoch 029: 881 / 1689 loss=3.481, nll_loss=1.924, ppl=3.8, wps=484921, ups=0.98, wpb=492493, bsz=16671.3, num_updates=48100, lr=0.000288375, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=44402 epoch 029: 881 / 1689 loss=3.481, nll_loss=1.924, ppl=3.8, wps=484921, ups=0.98, wpb=492493, bsz=16671.3, num_updates=48100, lr=0.000288375, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=44402 epoch 029: 881 / 1689 loss=3.481, nll_loss=1.924, ppl=3.8, wps=484921, ups=0.98, wpb=492493, bsz=16671.3, num_updates=48100, lr=0.000288375, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=44402 epoch 029: 881 / 1689 loss=3.481, nll_loss=1.924, ppl=3.8, wps=484921, ups=0.98, wpb=492493, bsz=16671.3, num_updates=48100, lr=0.000288375, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=44402 epoch 029: 881 / 1689 loss=3.481, nll_loss=1.924, ppl=3.8, wps=484921, ups=0.98, wpb=492493, bsz=16671.3, num_updates=48100, lr=0.000288375, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=44402 epoch 029: 881 / 1689 loss=3.481, nll_loss=1.924, ppl=3.8, wps=484921, ups=0.98, wpb=492493, bsz=16671.3, num_updates=48100, lr=0.000288375, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=44402 epoch 029: 881 / 1689 loss=3.481, nll_loss=1.924, ppl=3.8, wps=484921, ups=0.98, wpb=492493, bsz=16671.3, num_updates=48100, lr=0.000288375, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=44402 epoch 029: 881 / 1689 loss=3.481, nll_loss=1.924, ppl=3.8, wps=484921, ups=0.98, wpb=492493, bsz=16671.3, num_updates=48100, lr=0.000288375, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=44402 epoch 029: 881 / 1689 loss=3.481, nll_loss=1.924, ppl=3.8, wps=484921, ups=0.98, wpb=492493, bsz=16671.3, num_updates=48100, lr=0.000288375, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=44402 epoch 029: 881 / 1689 loss=3.481, nll_loss=1.924, ppl=3.8, wps=484921, ups=0.98, wpb=492493, bsz=16671.3, num_updates=48100, lr=0.000288375, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=44402 epoch 029: 881 / 1689 loss=3.481, nll_loss=1.924, ppl=3.8, wps=484921, ups=0.98, wpb=492493, bsz=16671.3, num_updates=48100, lr=0.000288375, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=44402 epoch 029: 881 / 1689 loss=3.481, nll_loss=1.924, ppl=3.8, wps=484921, ups=0.98, wpb=492493, bsz=16671.3, num_updates=48100, lr=0.000288375, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=44402 epoch 029: 881 / 1689 loss=3.481, nll_loss=1.924, ppl=3.8, wps=484921, ups=0.98, wpb=492493, bsz=16671.3, num_updates=48100, lr=0.000288375, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=44402 epoch 029: 881 / 1689 loss=3.481, nll_loss=1.924, ppl=3.8, wps=484921, ups=0.98, wpb=492493, bsz=16671.3, num_updates=48100, lr=0.000288375, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=44402 epoch 029: 881 / 1689 loss=3.481, nll_loss=1.924, ppl=3.8, wps=484921, ups=0.98, wpb=492493, bsz=16671.3, num_updates=48100, lr=0.000288375, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=44402 epoch 029: 881 / 1689 loss=3.481, nll_loss=1.924, ppl=3.8, wps=484921, ups=0.98, wpb=492493, bsz=16671.3, num_updates=48100, lr=0.000288375, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=44402 epoch 029: 881 / 1689 loss=3.481, nll_loss=1.924, ppl=3.8, wps=484921, ups=0.98, wpb=492493, bsz=16671.3, num_updates=48100, lr=0.000288375, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=44402 epoch 029: 881 / 1689 loss=3.481, nll_loss=1.924, ppl=3.8, wps=484921, ups=0.98, wpb=492493, bsz=16671.3, num_updates=48100, lr=0.000288375, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=44402 epoch 029: 881 / 1689 loss=3.481, nll_loss=1.924, ppl=3.8, wps=484921, ups=0.98, wpb=492493, bsz=16671.3, num_updates=48100, lr=0.000288375, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=44402 epoch 029: 881 / 1689 loss=3.481, nll_loss=1.924, ppl=3.8, wps=484921, ups=0.98, wpb=492493, bsz=16671.3, num_updates=48100, lr=0.000288375, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=44402 epoch 029: 881 / 1689 loss=3.481, nll_loss=1.924, ppl=3.8, wps=484921, ups=0.98, wpb=492493, bsz=16671.3, num_updates=48100, lr=0.000288375, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=44402 epoch 029: 981 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=551594, ups=1.12, wpb=493165, bsz=16486.2, num_updates=48200, lr=0.000288076, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=44492 epoch 029: 981 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=551594, ups=1.12, wpb=493165, bsz=16486.2, num_updates=48200, lr=0.000288076, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=44492 epoch 029: 981 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=551594, ups=1.12, wpb=493165, bsz=16486.2, num_updates=48200, lr=0.000288076, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=44492 epoch 029: 981 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=551594, ups=1.12, wpb=493165, bsz=16486.2, num_updates=48200, lr=0.000288076, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=44492 epoch 029: 981 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=551594, ups=1.12, wpb=493165, bsz=16486.2, num_updates=48200, lr=0.000288076, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=44492 epoch 029: 981 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=551594, ups=1.12, wpb=493165, bsz=16486.2, num_updates=48200, lr=0.000288076, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=44492 epoch 029: 981 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=551594, ups=1.12, wpb=493165, bsz=16486.2, num_updates=48200, lr=0.000288076, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=44492 epoch 029: 981 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=551594, ups=1.12, wpb=493165, bsz=16486.2, num_updates=48200, lr=0.000288076, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=44492 epoch 029: 981 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=551594, ups=1.12, wpb=493165, bsz=16486.2, num_updates=48200, lr=0.000288076, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=44492 epoch 029: 981 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=551594, ups=1.12, wpb=493165, bsz=16486.2, num_updates=48200, lr=0.000288076, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=44492 epoch 029: 981 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=551594, ups=1.12, wpb=493165, bsz=16486.2, num_updates=48200, lr=0.000288076, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=44492 epoch 029: 981 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=551594, ups=1.12, wpb=493165, bsz=16486.2, num_updates=48200, lr=0.000288076, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=44492 epoch 029: 981 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=551594, ups=1.12, wpb=493165, bsz=16486.2, num_updates=48200, lr=0.000288076, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=44492 epoch 029: 981 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=551594, ups=1.12, wpb=493165, bsz=16486.2, num_updates=48200, lr=0.000288076, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=44492 epoch 029: 981 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=551594, ups=1.12, wpb=493165, bsz=16486.2, num_updates=48200, lr=0.000288076, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=44492 epoch 029: 981 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=551594, ups=1.12, wpb=493165, bsz=16486.2, num_updates=48200, lr=0.000288076, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=44492 epoch 029: 981 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=551594, ups=1.12, wpb=493165, bsz=16486.2, num_updates=48200, lr=0.000288076, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=44492 epoch 029: 981 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=551594, ups=1.12, wpb=493165, bsz=16486.2, num_updates=48200, lr=0.000288076, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=44492 epoch 029: 981 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=551594, ups=1.12, wpb=493165, bsz=16486.2, num_updates=48200, lr=0.000288076, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=44492 epoch 029: 981 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=551594, ups=1.12, wpb=493165, bsz=16486.2, num_updates=48200, lr=0.000288076, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=44492 epoch 029: 981 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=551594, ups=1.12, wpb=493165, bsz=16486.2, num_updates=48200, lr=0.000288076, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=44492 epoch 029: 981 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=551594, ups=1.12, wpb=493165, bsz=16486.2, num_updates=48200, lr=0.000288076, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=44492 epoch 029: 981 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=551594, ups=1.12, wpb=493165, bsz=16486.2, num_updates=48200, lr=0.000288076, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=44492 epoch 029: 981 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=551594, ups=1.12, wpb=493165, bsz=16486.2, num_updates=48200, lr=0.000288076, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=44492 epoch 029: 981 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=551594, ups=1.12, wpb=493165, bsz=16486.2, num_updates=48200, lr=0.000288076, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=44492 epoch 029: 981 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=551594, ups=1.12, wpb=493165, bsz=16486.2, num_updates=48200, lr=0.000288076, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=44492 epoch 029: 981 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=551594, ups=1.12, wpb=493165, bsz=16486.2, num_updates=48200, lr=0.000288076, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=44492 epoch 029: 981 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=551594, ups=1.12, wpb=493165, bsz=16486.2, num_updates=48200, lr=0.000288076, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=44492 epoch 029: 981 / 1689 loss=3.478, nll_loss=1.921, ppl=3.79, wps=551594, ups=1.12, wpb=493165, bsz=16486.2, num_updates=48200, lr=0.000288076, gnorm=0.174, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=44492 epoch 029: 1082 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=557557, ups=1.12, wpb=496219, bsz=16338.6, num_updates=48300, lr=0.000287777, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=44581 epoch 029: 1082 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=557557, ups=1.12, wpb=496219, bsz=16338.6, num_updates=48300, lr=0.000287777, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=44581 epoch 029: 1082 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=557557, ups=1.12, wpb=496219, bsz=16338.6, num_updates=48300, lr=0.000287777, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=44581 epoch 029: 1082 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=557557, ups=1.12, wpb=496219, bsz=16338.6, num_updates=48300, lr=0.000287777, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=44581 epoch 029: 1082 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=557557, ups=1.12, wpb=496219, bsz=16338.6, num_updates=48300, lr=0.000287777, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=44581 epoch 029: 1082 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=557557, ups=1.12, wpb=496219, bsz=16338.6, num_updates=48300, lr=0.000287777, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=44581 epoch 029: 1082 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=557557, ups=1.12, wpb=496219, bsz=16338.6, num_updates=48300, lr=0.000287777, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=44581 epoch 029: 1082 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=557557, ups=1.12, wpb=496219, bsz=16338.6, num_updates=48300, lr=0.000287777, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=44581 epoch 029: 1082 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=557557, ups=1.12, wpb=496219, bsz=16338.6, num_updates=48300, lr=0.000287777, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=44581 epoch 029: 1082 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=557557, ups=1.12, wpb=496219, bsz=16338.6, num_updates=48300, lr=0.000287777, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=44581 epoch 029: 1082 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=557557, ups=1.12, wpb=496219, bsz=16338.6, num_updates=48300, lr=0.000287777, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=44581 epoch 029: 1082 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=557557, ups=1.12, wpb=496219, bsz=16338.6, num_updates=48300, lr=0.000287777, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=44581 epoch 029: 1082 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=557557, ups=1.12, wpb=496219, bsz=16338.6, num_updates=48300, lr=0.000287777, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=44581 epoch 029: 1082 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=557557, ups=1.12, wpb=496219, bsz=16338.6, num_updates=48300, lr=0.000287777, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=44581 epoch 029: 1082 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=557557, ups=1.12, wpb=496219, bsz=16338.6, num_updates=48300, lr=0.000287777, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=44581 epoch 029: 1082 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=557557, ups=1.12, wpb=496219, bsz=16338.6, num_updates=48300, lr=0.000287777, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=44581 epoch 029: 1082 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=557557, ups=1.12, wpb=496219, bsz=16338.6, num_updates=48300, lr=0.000287777, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=44581 epoch 029: 1082 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=557557, ups=1.12, wpb=496219, bsz=16338.6, num_updates=48300, lr=0.000287777, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=44581 epoch 029: 1082 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=557557, ups=1.12, wpb=496219, bsz=16338.6, num_updates=48300, lr=0.000287777, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=44581 epoch 029: 1082 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=557557, ups=1.12, wpb=496219, bsz=16338.6, num_updates=48300, lr=0.000287777, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=44581 epoch 029: 1082 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=557557, ups=1.12, wpb=496219, bsz=16338.6, num_updates=48300, lr=0.000287777, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=44581 epoch 029: 1082 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=557557, ups=1.12, wpb=496219, bsz=16338.6, num_updates=48300, lr=0.000287777, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=44581 epoch 029: 1082 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=557557, ups=1.12, wpb=496219, bsz=16338.6, num_updates=48300, lr=0.000287777, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=44581 epoch 029: 1082 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=557557, ups=1.12, wpb=496219, bsz=16338.6, num_updates=48300, lr=0.000287777, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=44581 epoch 029: 1082 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=557557, ups=1.12, wpb=496219, bsz=16338.6, num_updates=48300, lr=0.000287777, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=44581 epoch 029: 1082 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=557557, ups=1.12, wpb=496219, bsz=16338.6, num_updates=48300, lr=0.000287777, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=44581 epoch 029: 1082 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=557557, ups=1.12, wpb=496219, bsz=16338.6, num_updates=48300, lr=0.000287777, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=44581 epoch 029: 1082 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=557557, ups=1.12, wpb=496219, bsz=16338.6, num_updates=48300, lr=0.000287777, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=44581 epoch 029: 1082 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=557557, ups=1.12, wpb=496219, bsz=16338.6, num_updates=48300, lr=0.000287777, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=44581 epoch 029: 1182 / 1689 loss=3.477, nll_loss=1.92, ppl=3.79, wps=562320, ups=1.13, wpb=496538, bsz=16762.9, num_updates=48400, lr=0.00028748, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.4, wall=44669 epoch 029: 1182 / 1689 loss=3.477, nll_loss=1.92, ppl=3.79, wps=562320, ups=1.13, wpb=496538, bsz=16762.9, num_updates=48400, lr=0.00028748, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.4, wall=44669 epoch 029: 1182 / 1689 loss=3.477, nll_loss=1.92, ppl=3.79, wps=562320, ups=1.13, wpb=496538, bsz=16762.9, num_updates=48400, lr=0.00028748, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.4, wall=44669 epoch 029: 1182 / 1689 loss=3.477, nll_loss=1.92, ppl=3.79, wps=562320, ups=1.13, wpb=496538, bsz=16762.9, num_updates=48400, lr=0.00028748, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.4, wall=44669 epoch 029: 1182 / 1689 loss=3.477, nll_loss=1.92, ppl=3.79, wps=562320, ups=1.13, wpb=496538, bsz=16762.9, num_updates=48400, lr=0.00028748, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.4, wall=44669 epoch 029: 1182 / 1689 loss=3.477, nll_loss=1.92, ppl=3.79, wps=562320, ups=1.13, wpb=496538, bsz=16762.9, num_updates=48400, lr=0.00028748, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.4, wall=44669 epoch 029: 1182 / 1689 loss=3.477, nll_loss=1.92, ppl=3.79, wps=562320, ups=1.13, wpb=496538, bsz=16762.9, num_updates=48400, lr=0.00028748, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.4, wall=44669 epoch 029: 1182 / 1689 loss=3.477, nll_loss=1.92, ppl=3.79, wps=562320, ups=1.13, wpb=496538, bsz=16762.9, num_updates=48400, lr=0.00028748, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.4, wall=44669 epoch 029: 1182 / 1689 loss=3.477, nll_loss=1.92, ppl=3.79, wps=562320, ups=1.13, wpb=496538, bsz=16762.9, num_updates=48400, lr=0.00028748, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.4, wall=44669 epoch 029: 1182 / 1689 loss=3.477, nll_loss=1.92, ppl=3.79, wps=562320, ups=1.13, wpb=496538, bsz=16762.9, num_updates=48400, lr=0.00028748, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.4, wall=44669 epoch 029: 1182 / 1689 loss=3.477, nll_loss=1.92, ppl=3.79, wps=562320, ups=1.13, wpb=496538, bsz=16762.9, num_updates=48400, lr=0.00028748, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.4, wall=44669 epoch 029: 1182 / 1689 loss=3.477, nll_loss=1.92, ppl=3.79, wps=562320, ups=1.13, wpb=496538, bsz=16762.9, num_updates=48400, lr=0.00028748, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.4, wall=44669 epoch 029: 1182 / 1689 loss=3.477, nll_loss=1.92, ppl=3.79, wps=562320, ups=1.13, wpb=496538, bsz=16762.9, num_updates=48400, lr=0.00028748, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.4, wall=44669 epoch 029: 1182 / 1689 loss=3.477, nll_loss=1.92, ppl=3.79, wps=562320, ups=1.13, wpb=496538, bsz=16762.9, num_updates=48400, lr=0.00028748, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.4, wall=44669 epoch 029: 1182 / 1689 loss=3.477, nll_loss=1.92, ppl=3.79, wps=562320, ups=1.13, wpb=496538, bsz=16762.9, num_updates=48400, lr=0.00028748, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.4, wall=44669 epoch 029: 1182 / 1689 loss=3.477, nll_loss=1.92, ppl=3.79, wps=562320, ups=1.13, wpb=496538, bsz=16762.9, num_updates=48400, lr=0.00028748, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.4, wall=44669 epoch 029: 1182 / 1689 loss=3.477, nll_loss=1.92, ppl=3.79, wps=562320, ups=1.13, wpb=496538, bsz=16762.9, num_updates=48400, lr=0.00028748, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.4, wall=44669 epoch 029: 1182 / 1689 loss=3.477, nll_loss=1.92, ppl=3.79, wps=562320, ups=1.13, wpb=496538, bsz=16762.9, num_updates=48400, lr=0.00028748, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.4, wall=44669 epoch 029: 1182 / 1689 loss=3.477, nll_loss=1.92, ppl=3.79, wps=562320, ups=1.13, wpb=496538, bsz=16762.9, num_updates=48400, lr=0.00028748, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.4, wall=44669 epoch 029: 1182 / 1689 loss=3.477, nll_loss=1.92, ppl=3.79, wps=562320, ups=1.13, wpb=496538, bsz=16762.9, num_updates=48400, lr=0.00028748, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.4, wall=44669 epoch 029: 1182 / 1689 loss=3.477, nll_loss=1.92, ppl=3.79, wps=562320, ups=1.13, wpb=496538, bsz=16762.9, num_updates=48400, lr=0.00028748, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.4, wall=44669 epoch 029: 1182 / 1689 loss=3.477, nll_loss=1.92, ppl=3.79, wps=562320, ups=1.13, wpb=496538, bsz=16762.9, num_updates=48400, lr=0.00028748, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.4, wall=44669 epoch 029: 1182 / 1689 loss=3.477, nll_loss=1.92, ppl=3.79, wps=562320, ups=1.13, wpb=496538, bsz=16762.9, num_updates=48400, lr=0.00028748, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.4, wall=44669 epoch 029: 1182 / 1689 loss=3.477, nll_loss=1.92, ppl=3.79, wps=562320, ups=1.13, wpb=496538, bsz=16762.9, num_updates=48400, lr=0.00028748, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.4, wall=44669 epoch 029: 1182 / 1689 loss=3.477, nll_loss=1.92, ppl=3.79, wps=562320, ups=1.13, wpb=496538, bsz=16762.9, num_updates=48400, lr=0.00028748, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.4, wall=44669 epoch 029: 1182 / 1689 loss=3.477, nll_loss=1.92, ppl=3.79, wps=562320, ups=1.13, wpb=496538, bsz=16762.9, num_updates=48400, lr=0.00028748, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.4, wall=44669 epoch 029: 1182 / 1689 loss=3.477, nll_loss=1.92, ppl=3.79, wps=562320, ups=1.13, wpb=496538, bsz=16762.9, num_updates=48400, lr=0.00028748, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.4, wall=44669 epoch 029: 1182 / 1689 loss=3.477, nll_loss=1.92, ppl=3.79, wps=562320, ups=1.13, wpb=496538, bsz=16762.9, num_updates=48400, lr=0.00028748, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.4, wall=44669 epoch 029: 1182 / 1689 loss=3.477, nll_loss=1.92, ppl=3.79, wps=562320, ups=1.13, wpb=496538, bsz=16762.9, num_updates=48400, lr=0.00028748, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.4, wall=44669 epoch 029: 1282 / 1689 loss=3.483, nll_loss=1.927, ppl=3.8, wps=563303, ups=1.14, wpb=496057, bsz=16542.7, num_updates=48500, lr=0.000287183, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.3, wall=44757 epoch 029: 1282 / 1689 loss=3.483, nll_loss=1.927, ppl=3.8, wps=563303, ups=1.14, wpb=496057, bsz=16542.7, num_updates=48500, lr=0.000287183, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.3, wall=44757 epoch 029: 1282 / 1689 loss=3.483, nll_loss=1.927, ppl=3.8, wps=563303, ups=1.14, wpb=496057, bsz=16542.7, num_updates=48500, lr=0.000287183, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.3, wall=44757 epoch 029: 1282 / 1689 loss=3.483, nll_loss=1.927, ppl=3.8, wps=563303, ups=1.14, wpb=496057, bsz=16542.7, num_updates=48500, lr=0.000287183, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.3, wall=44757 epoch 029: 1282 / 1689 loss=3.483, nll_loss=1.927, ppl=3.8, wps=563303, ups=1.14, wpb=496057, bsz=16542.7, num_updates=48500, lr=0.000287183, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.3, wall=44757 epoch 029: 1282 / 1689 loss=3.483, nll_loss=1.927, ppl=3.8, wps=563303, ups=1.14, wpb=496057, bsz=16542.7, num_updates=48500, lr=0.000287183, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.3, wall=44757 epoch 029: 1282 / 1689 loss=3.483, nll_loss=1.927, ppl=3.8, wps=563303, ups=1.14, wpb=496057, bsz=16542.7, num_updates=48500, lr=0.000287183, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.3, wall=44757 epoch 029: 1282 / 1689 loss=3.483, nll_loss=1.927, ppl=3.8, wps=563303, ups=1.14, wpb=496057, bsz=16542.7, num_updates=48500, lr=0.000287183, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.3, wall=44757 epoch 029: 1282 / 1689 loss=3.483, nll_loss=1.927, ppl=3.8, wps=563303, ups=1.14, wpb=496057, bsz=16542.7, num_updates=48500, lr=0.000287183, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.3, wall=44757 epoch 029: 1282 / 1689 loss=3.483, nll_loss=1.927, ppl=3.8, wps=563303, ups=1.14, wpb=496057, bsz=16542.7, num_updates=48500, lr=0.000287183, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.3, wall=44757 epoch 029: 1282 / 1689 loss=3.483, nll_loss=1.927, ppl=3.8, wps=563303, ups=1.14, wpb=496057, bsz=16542.7, num_updates=48500, lr=0.000287183, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.3, wall=44757 epoch 029: 1282 / 1689 loss=3.483, nll_loss=1.927, ppl=3.8, wps=563303, ups=1.14, wpb=496057, bsz=16542.7, num_updates=48500, lr=0.000287183, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.3, wall=44757 epoch 029: 1282 / 1689 loss=3.483, nll_loss=1.927, ppl=3.8, wps=563303, ups=1.14, wpb=496057, bsz=16542.7, num_updates=48500, lr=0.000287183, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.3, wall=44757 epoch 029: 1282 / 1689 loss=3.483, nll_loss=1.927, ppl=3.8, wps=563303, ups=1.14, wpb=496057, bsz=16542.7, num_updates=48500, lr=0.000287183, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.3, wall=44757 epoch 029: 1282 / 1689 loss=3.483, nll_loss=1.927, ppl=3.8, wps=563303, ups=1.14, wpb=496057, bsz=16542.7, num_updates=48500, lr=0.000287183, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.3, wall=44757 epoch 029: 1282 / 1689 loss=3.483, nll_loss=1.927, ppl=3.8, wps=563303, ups=1.14, wpb=496057, bsz=16542.7, num_updates=48500, lr=0.000287183, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.3, wall=44757 epoch 029: 1282 / 1689 loss=3.483, nll_loss=1.927, ppl=3.8, wps=563303, ups=1.14, wpb=496057, bsz=16542.7, num_updates=48500, lr=0.000287183, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.3, wall=44757 epoch 029: 1282 / 1689 loss=3.483, nll_loss=1.927, ppl=3.8, wps=563303, ups=1.14, wpb=496057, bsz=16542.7, num_updates=48500, lr=0.000287183, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.3, wall=44757 epoch 029: 1282 / 1689 loss=3.483, nll_loss=1.927, ppl=3.8, wps=563303, ups=1.14, wpb=496057, bsz=16542.7, num_updates=48500, lr=0.000287183, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.3, wall=44757 epoch 029: 1282 / 1689 loss=3.483, nll_loss=1.927, ppl=3.8, wps=563303, ups=1.14, wpb=496057, bsz=16542.7, num_updates=48500, lr=0.000287183, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.3, wall=44757 epoch 029: 1282 / 1689 loss=3.483, nll_loss=1.927, ppl=3.8, wps=563303, ups=1.14, wpb=496057, bsz=16542.7, num_updates=48500, lr=0.000287183, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.3, wall=44757 epoch 029: 1282 / 1689 loss=3.483, nll_loss=1.927, ppl=3.8, wps=563303, ups=1.14, wpb=496057, bsz=16542.7, num_updates=48500, lr=0.000287183, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.3, wall=44757 epoch 029: 1282 / 1689 loss=3.483, nll_loss=1.927, ppl=3.8, wps=563303, ups=1.14, wpb=496057, bsz=16542.7, num_updates=48500, lr=0.000287183, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.3, wall=44757 epoch 029: 1282 / 1689 loss=3.483, nll_loss=1.927, ppl=3.8, wps=563303, ups=1.14, wpb=496057, bsz=16542.7, num_updates=48500, lr=0.000287183, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.3, wall=44757 epoch 029: 1282 / 1689 loss=3.483, nll_loss=1.927, ppl=3.8, wps=563303, ups=1.14, wpb=496057, bsz=16542.7, num_updates=48500, lr=0.000287183, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.3, wall=44757 epoch 029: 1282 / 1689 loss=3.483, nll_loss=1.927, ppl=3.8, wps=563303, ups=1.14, wpb=496057, bsz=16542.7, num_updates=48500, lr=0.000287183, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.3, wall=44757 epoch 029: 1282 / 1689 loss=3.483, nll_loss=1.927, ppl=3.8, wps=563303, ups=1.14, wpb=496057, bsz=16542.7, num_updates=48500, lr=0.000287183, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.3, wall=44757 epoch 029: 1282 / 1689 loss=3.483, nll_loss=1.927, ppl=3.8, wps=563303, ups=1.14, wpb=496057, bsz=16542.7, num_updates=48500, lr=0.000287183, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.3, wall=44757 epoch 029: 1282 / 1689 loss=3.483, nll_loss=1.927, ppl=3.8, wps=563303, ups=1.14, wpb=496057, bsz=16542.7, num_updates=48500, lr=0.000287183, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.3, wall=44757 epoch 029: 1382 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=565208, ups=1.14, wpb=496664, bsz=16528.2, num_updates=48600, lr=0.000286888, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.7, wall=44845 epoch 029: 1382 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=565208, ups=1.14, wpb=496664, bsz=16528.2, num_updates=48600, lr=0.000286888, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.7, wall=44845 epoch 029: 1382 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=565208, ups=1.14, wpb=496664, bsz=16528.2, num_updates=48600, lr=0.000286888, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.7, wall=44845 epoch 029: 1382 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=565208, ups=1.14, wpb=496664, bsz=16528.2, num_updates=48600, lr=0.000286888, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.7, wall=44845 epoch 029: 1382 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=565208, ups=1.14, wpb=496664, bsz=16528.2, num_updates=48600, lr=0.000286888, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.7, wall=44845 epoch 029: 1382 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=565208, ups=1.14, wpb=496664, bsz=16528.2, num_updates=48600, lr=0.000286888, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.7, wall=44845 epoch 029: 1382 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=565208, ups=1.14, wpb=496664, bsz=16528.2, num_updates=48600, lr=0.000286888, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.7, wall=44845 epoch 029: 1382 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=565208, ups=1.14, wpb=496664, bsz=16528.2, num_updates=48600, lr=0.000286888, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.7, wall=44845 epoch 029: 1382 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=565208, ups=1.14, wpb=496664, bsz=16528.2, num_updates=48600, lr=0.000286888, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.7, wall=44845 epoch 029: 1382 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=565208, ups=1.14, wpb=496664, bsz=16528.2, num_updates=48600, lr=0.000286888, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.7, wall=44845 epoch 029: 1382 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=565208, ups=1.14, wpb=496664, bsz=16528.2, num_updates=48600, lr=0.000286888, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.7, wall=44845 epoch 029: 1382 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=565208, ups=1.14, wpb=496664, bsz=16528.2, num_updates=48600, lr=0.000286888, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.7, wall=44845 epoch 029: 1382 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=565208, ups=1.14, wpb=496664, bsz=16528.2, num_updates=48600, lr=0.000286888, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.7, wall=44845 epoch 029: 1382 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=565208, ups=1.14, wpb=496664, bsz=16528.2, num_updates=48600, lr=0.000286888, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.7, wall=44845 epoch 029: 1382 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=565208, ups=1.14, wpb=496664, bsz=16528.2, num_updates=48600, lr=0.000286888, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.7, wall=44845 epoch 029: 1382 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=565208, ups=1.14, wpb=496664, bsz=16528.2, num_updates=48600, lr=0.000286888, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.7, wall=44845 epoch 029: 1382 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=565208, ups=1.14, wpb=496664, bsz=16528.2, num_updates=48600, lr=0.000286888, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.7, wall=44845 epoch 029: 1382 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=565208, ups=1.14, wpb=496664, bsz=16528.2, num_updates=48600, lr=0.000286888, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.7, wall=44845 epoch 029: 1382 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=565208, ups=1.14, wpb=496664, bsz=16528.2, num_updates=48600, lr=0.000286888, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.7, wall=44845 epoch 029: 1382 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=565208, ups=1.14, wpb=496664, bsz=16528.2, num_updates=48600, lr=0.000286888, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.7, wall=44845 epoch 029: 1382 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=565208, ups=1.14, wpb=496664, bsz=16528.2, num_updates=48600, lr=0.000286888, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.7, wall=44845 epoch 029: 1382 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=565208, ups=1.14, wpb=496664, bsz=16528.2, num_updates=48600, lr=0.000286888, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.7, wall=44845 epoch 029: 1382 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=565208, ups=1.14, wpb=496664, bsz=16528.2, num_updates=48600, lr=0.000286888, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.7, wall=44845 epoch 029: 1382 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=565208, ups=1.14, wpb=496664, bsz=16528.2, num_updates=48600, lr=0.000286888, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.7, wall=44845 epoch 029: 1382 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=565208, ups=1.14, wpb=496664, bsz=16528.2, num_updates=48600, lr=0.000286888, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.7, wall=44845 epoch 029: 1382 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=565208, ups=1.14, wpb=496664, bsz=16528.2, num_updates=48600, lr=0.000286888, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.7, wall=44845 epoch 029: 1382 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=565208, ups=1.14, wpb=496664, bsz=16528.2, num_updates=48600, lr=0.000286888, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.7, wall=44845 epoch 029: 1382 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=565208, ups=1.14, wpb=496664, bsz=16528.2, num_updates=48600, lr=0.000286888, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.7, wall=44845 epoch 029: 1382 / 1689 loss=3.49, nll_loss=1.935, ppl=3.82, wps=565208, ups=1.14, wpb=496664, bsz=16528.2, num_updates=48600, lr=0.000286888, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=87, gb_free=21.7, wall=44845 epoch 029: 1482 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=554808, ups=1.12, wpb=494716, bsz=16411.1, num_updates=48700, lr=0.000286593, gnorm=0.172, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.6, wall=44934 epoch 029: 1482 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=554808, ups=1.12, wpb=494716, bsz=16411.1, num_updates=48700, lr=0.000286593, gnorm=0.172, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.6, wall=44934 epoch 029: 1482 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=554808, ups=1.12, wpb=494716, bsz=16411.1, num_updates=48700, lr=0.000286593, gnorm=0.172, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.6, wall=44934 epoch 029: 1482 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=554808, ups=1.12, wpb=494716, bsz=16411.1, num_updates=48700, lr=0.000286593, gnorm=0.172, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.6, wall=44934 epoch 029: 1482 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=554808, ups=1.12, wpb=494716, bsz=16411.1, num_updates=48700, lr=0.000286593, gnorm=0.172, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.6, wall=44934 epoch 029: 1482 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=554808, ups=1.12, wpb=494716, bsz=16411.1, num_updates=48700, lr=0.000286593, gnorm=0.172, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.6, wall=44934 epoch 029: 1482 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=554808, ups=1.12, wpb=494716, bsz=16411.1, num_updates=48700, lr=0.000286593, gnorm=0.172, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.6, wall=44934 epoch 029: 1482 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=554808, ups=1.12, wpb=494716, bsz=16411.1, num_updates=48700, lr=0.000286593, gnorm=0.172, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.6, wall=44934 epoch 029: 1482 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=554808, ups=1.12, wpb=494716, bsz=16411.1, num_updates=48700, lr=0.000286593, gnorm=0.172, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.6, wall=44934 epoch 029: 1482 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=554808, ups=1.12, wpb=494716, bsz=16411.1, num_updates=48700, lr=0.000286593, gnorm=0.172, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.6, wall=44934 epoch 029: 1482 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=554808, ups=1.12, wpb=494716, bsz=16411.1, num_updates=48700, lr=0.000286593, gnorm=0.172, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.6, wall=44934 epoch 029: 1482 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=554808, ups=1.12, wpb=494716, bsz=16411.1, num_updates=48700, lr=0.000286593, gnorm=0.172, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.6, wall=44934 epoch 029: 1482 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=554808, ups=1.12, wpb=494716, bsz=16411.1, num_updates=48700, lr=0.000286593, gnorm=0.172, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.6, wall=44934 epoch 029: 1482 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=554808, ups=1.12, wpb=494716, bsz=16411.1, num_updates=48700, lr=0.000286593, gnorm=0.172, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.6, wall=44934 epoch 029: 1482 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=554808, ups=1.12, wpb=494716, bsz=16411.1, num_updates=48700, lr=0.000286593, gnorm=0.172, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.6, wall=44934 epoch 029: 1482 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=554808, ups=1.12, wpb=494716, bsz=16411.1, num_updates=48700, lr=0.000286593, gnorm=0.172, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.6, wall=44934 epoch 029: 1482 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=554808, ups=1.12, wpb=494716, bsz=16411.1, num_updates=48700, lr=0.000286593, gnorm=0.172, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.6, wall=44934 epoch 029: 1482 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=554808, ups=1.12, wpb=494716, bsz=16411.1, num_updates=48700, lr=0.000286593, gnorm=0.172, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.6, wall=44934 epoch 029: 1482 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=554808, ups=1.12, wpb=494716, bsz=16411.1, num_updates=48700, lr=0.000286593, gnorm=0.172, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.6, wall=44934 epoch 029: 1482 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=554808, ups=1.12, wpb=494716, bsz=16411.1, num_updates=48700, lr=0.000286593, gnorm=0.172, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.6, wall=44934 epoch 029: 1482 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=554808, ups=1.12, wpb=494716, bsz=16411.1, num_updates=48700, lr=0.000286593, gnorm=0.172, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.6, wall=44934 epoch 029: 1482 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=554808, ups=1.12, wpb=494716, bsz=16411.1, num_updates=48700, lr=0.000286593, gnorm=0.172, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.6, wall=44934 epoch 029: 1482 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=554808, ups=1.12, wpb=494716, bsz=16411.1, num_updates=48700, lr=0.000286593, gnorm=0.172, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.6, wall=44934 epoch 029: 1482 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=554808, ups=1.12, wpb=494716, bsz=16411.1, num_updates=48700, lr=0.000286593, gnorm=0.172, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.6, wall=44934 epoch 029: 1482 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=554808, ups=1.12, wpb=494716, bsz=16411.1, num_updates=48700, lr=0.000286593, gnorm=0.172, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.6, wall=44934 epoch 029: 1482 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=554808, ups=1.12, wpb=494716, bsz=16411.1, num_updates=48700, lr=0.000286593, gnorm=0.172, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.6, wall=44934 epoch 029: 1482 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=554808, ups=1.12, wpb=494716, bsz=16411.1, num_updates=48700, lr=0.000286593, gnorm=0.172, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.6, wall=44934 epoch 029: 1482 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=554808, ups=1.12, wpb=494716, bsz=16411.1, num_updates=48700, lr=0.000286593, gnorm=0.172, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.6, wall=44934 epoch 029: 1482 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=554808, ups=1.12, wpb=494716, bsz=16411.1, num_updates=48700, lr=0.000286593, gnorm=0.172, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.6, wall=44934 epoch 029: 1582 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=557083, ups=1.13, wpb=494015, bsz=16286.8, num_updates=48800, lr=0.000286299, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45023 epoch 029: 1582 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=557083, ups=1.13, wpb=494015, bsz=16286.8, num_updates=48800, lr=0.000286299, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45023 epoch 029: 1582 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=557083, ups=1.13, wpb=494015, bsz=16286.8, num_updates=48800, lr=0.000286299, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45023 epoch 029: 1582 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=557083, ups=1.13, wpb=494015, bsz=16286.8, num_updates=48800, lr=0.000286299, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45023 epoch 029: 1582 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=557083, ups=1.13, wpb=494015, bsz=16286.8, num_updates=48800, lr=0.000286299, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45023 epoch 029: 1582 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=557083, ups=1.13, wpb=494015, bsz=16286.8, num_updates=48800, lr=0.000286299, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45023 epoch 029: 1582 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=557083, ups=1.13, wpb=494015, bsz=16286.8, num_updates=48800, lr=0.000286299, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45023 epoch 029: 1582 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=557083, ups=1.13, wpb=494015, bsz=16286.8, num_updates=48800, lr=0.000286299, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45023 epoch 029: 1582 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=557083, ups=1.13, wpb=494015, bsz=16286.8, num_updates=48800, lr=0.000286299, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45023 epoch 029: 1582 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=557083, ups=1.13, wpb=494015, bsz=16286.8, num_updates=48800, lr=0.000286299, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45023 epoch 029: 1582 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=557083, ups=1.13, wpb=494015, bsz=16286.8, num_updates=48800, lr=0.000286299, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45023 epoch 029: 1582 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=557083, ups=1.13, wpb=494015, bsz=16286.8, num_updates=48800, lr=0.000286299, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45023 epoch 029: 1582 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=557083, ups=1.13, wpb=494015, bsz=16286.8, num_updates=48800, lr=0.000286299, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45023 epoch 029: 1582 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=557083, ups=1.13, wpb=494015, bsz=16286.8, num_updates=48800, lr=0.000286299, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45023 epoch 029: 1582 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=557083, ups=1.13, wpb=494015, bsz=16286.8, num_updates=48800, lr=0.000286299, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45023 epoch 029: 1582 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=557083, ups=1.13, wpb=494015, bsz=16286.8, num_updates=48800, lr=0.000286299, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45023 epoch 029: 1582 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=557083, ups=1.13, wpb=494015, bsz=16286.8, num_updates=48800, lr=0.000286299, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45023 epoch 029: 1582 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=557083, ups=1.13, wpb=494015, bsz=16286.8, num_updates=48800, lr=0.000286299, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45023 epoch 029: 1582 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=557083, ups=1.13, wpb=494015, bsz=16286.8, num_updates=48800, lr=0.000286299, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45023 epoch 029: 1582 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=557083, ups=1.13, wpb=494015, bsz=16286.8, num_updates=48800, lr=0.000286299, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45023 epoch 029: 1582 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=557083, ups=1.13, wpb=494015, bsz=16286.8, num_updates=48800, lr=0.000286299, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45023 epoch 029: 1582 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=557083, ups=1.13, wpb=494015, bsz=16286.8, num_updates=48800, lr=0.000286299, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45023 epoch 029: 1582 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=557083, ups=1.13, wpb=494015, bsz=16286.8, num_updates=48800, lr=0.000286299, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45023 epoch 029: 1582 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=557083, ups=1.13, wpb=494015, bsz=16286.8, num_updates=48800, lr=0.000286299, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45023 epoch 029: 1582 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=557083, ups=1.13, wpb=494015, bsz=16286.8, num_updates=48800, lr=0.000286299, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45023 epoch 029: 1582 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=557083, ups=1.13, wpb=494015, bsz=16286.8, num_updates=48800, lr=0.000286299, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45023 epoch 029: 1582 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=557083, ups=1.13, wpb=494015, bsz=16286.8, num_updates=48800, lr=0.000286299, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45023 epoch 029: 1582 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=557083, ups=1.13, wpb=494015, bsz=16286.8, num_updates=48800, lr=0.000286299, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45023 epoch 029: 1582 / 1689 loss=3.484, nll_loss=1.927, ppl=3.8, wps=557083, ups=1.13, wpb=494015, bsz=16286.8, num_updates=48800, lr=0.000286299, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45023 epoch 029: 1682 / 1689 loss=3.485, nll_loss=1.929, ppl=3.81, wps=559557, ups=1.13, wpb=496510, bsz=16245.8, num_updates=48900, lr=0.000286006, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=45111 epoch 029: 1682 / 1689 loss=3.485, nll_loss=1.929, ppl=3.81, wps=559557, ups=1.13, wpb=496510, bsz=16245.8, num_updates=48900, lr=0.000286006, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=45111 epoch 029: 1682 / 1689 loss=3.485, nll_loss=1.929, ppl=3.81, wps=559557, ups=1.13, wpb=496510, bsz=16245.8, num_updates=48900, lr=0.000286006, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=45111 epoch 029: 1682 / 1689 loss=3.485, nll_loss=1.929, ppl=3.81, wps=559557, ups=1.13, wpb=496510, bsz=16245.8, num_updates=48900, lr=0.000286006, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=45111 epoch 029: 1682 / 1689 loss=3.485, nll_loss=1.929, ppl=3.81, wps=559557, ups=1.13, wpb=496510, bsz=16245.8, num_updates=48900, lr=0.000286006, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=45111 epoch 029: 1682 / 1689 loss=3.485, nll_loss=1.929, ppl=3.81, wps=559557, ups=1.13, wpb=496510, bsz=16245.8, num_updates=48900, lr=0.000286006, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=45111 epoch 029: 1682 / 1689 loss=3.485, nll_loss=1.929, ppl=3.81, wps=559557, ups=1.13, wpb=496510, bsz=16245.8, num_updates=48900, lr=0.000286006, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=45111 epoch 029: 1682 / 1689 loss=3.485, nll_loss=1.929, ppl=3.81, wps=559557, ups=1.13, wpb=496510, bsz=16245.8, num_updates=48900, lr=0.000286006, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=45111 epoch 029: 1682 / 1689 loss=3.485, nll_loss=1.929, ppl=3.81, wps=559557, ups=1.13, wpb=496510, bsz=16245.8, num_updates=48900, lr=0.000286006, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=45111 epoch 029: 1682 / 1689 loss=3.485, nll_loss=1.929, ppl=3.81, wps=559557, ups=1.13, wpb=496510, bsz=16245.8, num_updates=48900, lr=0.000286006, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=45111 epoch 029: 1682 / 1689 loss=3.485, nll_loss=1.929, ppl=3.81, wps=559557, ups=1.13, wpb=496510, bsz=16245.8, num_updates=48900, lr=0.000286006, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=45111 epoch 029: 1682 / 1689 loss=3.485, nll_loss=1.929, ppl=3.81, wps=559557, ups=1.13, wpb=496510, bsz=16245.8, num_updates=48900, lr=0.000286006, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=45111 epoch 029: 1682 / 1689 loss=3.485, nll_loss=1.929, ppl=3.81, wps=559557, ups=1.13, wpb=496510, bsz=16245.8, num_updates=48900, lr=0.000286006, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=45111 epoch 029: 1682 / 1689 loss=3.485, nll_loss=1.929, ppl=3.81, wps=559557, ups=1.13, wpb=496510, bsz=16245.8, num_updates=48900, lr=0.000286006, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=45111 epoch 029: 1682 / 1689 loss=3.485, nll_loss=1.929, ppl=3.81, wps=559557, ups=1.13, wpb=496510, bsz=16245.8, num_updates=48900, lr=0.000286006, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=45111 epoch 029: 1682 / 1689 loss=3.485, nll_loss=1.929, ppl=3.81, wps=559557, ups=1.13, wpb=496510, bsz=16245.8, num_updates=48900, lr=0.000286006, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=45111 epoch 029: 1682 / 1689 loss=3.485, nll_loss=1.929, ppl=3.81, wps=559557, ups=1.13, wpb=496510, bsz=16245.8, num_updates=48900, lr=0.000286006, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=45111 epoch 029: 1682 / 1689 loss=3.485, nll_loss=1.929, ppl=3.81, wps=559557, ups=1.13, wpb=496510, bsz=16245.8, num_updates=48900, lr=0.000286006, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=45111 epoch 029: 1682 / 1689 loss=3.485, nll_loss=1.929, ppl=3.81, wps=559557, ups=1.13, wpb=496510, bsz=16245.8, num_updates=48900, lr=0.000286006, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=45111 epoch 029: 1682 / 1689 loss=3.485, nll_loss=1.929, ppl=3.81, wps=559557, ups=1.13, wpb=496510, bsz=16245.8, num_updates=48900, lr=0.000286006, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=45111 epoch 029: 1682 / 1689 loss=3.485, nll_loss=1.929, ppl=3.81, wps=559557, ups=1.13, wpb=496510, bsz=16245.8, num_updates=48900, lr=0.000286006, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=45111 epoch 029: 1682 / 1689 loss=3.485, nll_loss=1.929, ppl=3.81, wps=559557, ups=1.13, wpb=496510, bsz=16245.8, num_updates=48900, lr=0.000286006, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=45111 epoch 029: 1682 / 1689 loss=3.485, nll_loss=1.929, ppl=3.81, wps=559557, ups=1.13, wpb=496510, bsz=16245.8, num_updates=48900, lr=0.000286006, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=45111 epoch 029: 1682 / 1689 loss=3.485, nll_loss=1.929, ppl=3.81, wps=559557, ups=1.13, wpb=496510, bsz=16245.8, num_updates=48900, lr=0.000286006, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=45111 epoch 029: 1682 / 1689 loss=3.485, nll_loss=1.929, ppl=3.81, wps=559557, ups=1.13, wpb=496510, bsz=16245.8, num_updates=48900, lr=0.000286006, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=45111 epoch 029: 1682 / 1689 loss=3.485, nll_loss=1.929, ppl=3.81, wps=559557, ups=1.13, wpb=496510, bsz=16245.8, num_updates=48900, lr=0.000286006, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=45111 epoch 029: 1682 / 1689 loss=3.485, nll_loss=1.929, ppl=3.81, wps=559557, ups=1.13, wpb=496510, bsz=16245.8, num_updates=48900, lr=0.000286006, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=45111 epoch 029: 1682 / 1689 loss=3.485, nll_loss=1.929, ppl=3.81, wps=559557, ups=1.13, wpb=496510, bsz=16245.8, num_updates=48900, lr=0.000286006, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=45111 epoch 029: 1682 / 1689 loss=3.485, nll_loss=1.929, ppl=3.81, wps=559557, ups=1.13, wpb=496510, bsz=16245.8, num_updates=48900, lr=0.000286006, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=22.2, wall=45111 end of epoch 29 (average epoch stats below) epoch 029 | loss 3.474 | nll_loss 1.916 | ppl 3.77 | wps 550409 | ups 1.11 | wpb 495122 | bsz 16506 | num_updates 48907 | lr 0.000285986 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1483 | gb_free 25.8 | wall 45117 epoch 029 | loss 3.474 | nll_loss 1.916 | ppl 3.77 | wps 550409 | ups 1.11 | wpb 495122 | bsz 16506 | num_updates 48907 | lr 0.000285986 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1483 | gb_free 25.8 | wall 45117 epoch 029 | loss 3.474 | nll_loss 1.916 | ppl 3.77 | wps 550409 | ups 1.11 | wpb 495122 | bsz 16506 | num_updates 48907 | lr 0.000285986 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1483 | gb_free 25.8 | wall 45117 epoch 029 | loss 3.474 | nll_loss 1.916 | ppl 3.77 | wps 550409 | ups 1.11 | wpb 495122 | bsz 16506 | num_updates 48907 | lr 0.000285986 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1483 | gb_free 25.8 | wall 45117 epoch 029 | loss 3.474 | nll_loss 1.916 | ppl 3.77 | wps 550409 | ups 1.11 | wpb 495122 | bsz 16506 | num_updates 48907 | lr 0.000285986 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1483 | gb_free 25.8 | wall 45117 epoch 029 | loss 3.474 | nll_loss 1.916 | ppl 3.77 | wps 550409 | ups 1.11 | wpb 495122 | bsz 16506 | num_updates 48907 | lr 0.000285986 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1483 | gb_free 25.8 | wall 45117 epoch 029 | loss 3.474 | nll_loss 1.916 | ppl 3.77 | wps 550409 | ups 1.11 | wpb 495122 | bsz 16506 | num_updates 48907 | lr 0.000285986 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1483 | gb_free 25.8 | wall 45117 epoch 029 | loss 3.474 | nll_loss 1.916 | ppl 3.77 | wps 550409 | ups 1.11 | wpb 495122 | bsz 16506 | num_updates 48907 | lr 0.000285986 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1483 | gb_free 25.8 | wall 45117 epoch 029 | loss 3.474 | nll_loss 1.916 | ppl 3.77 | wps 550409 | ups 1.11 | wpb 495122 | bsz 16506 | num_updates 48907 | lr 0.000285986 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1483 | gb_free 25.8 | wall 45117 epoch 029 | loss 3.474 | nll_loss 1.916 | ppl 3.77 | wps 550409 | ups 1.11 | wpb 495122 | bsz 16506 | num_updates 48907 | lr 0.000285986 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1483 | gb_free 25.8 | wall 45117 epoch 029 | loss 3.474 | nll_loss 1.916 | ppl 3.77 | wps 550409 | ups 1.11 | wpb 495122 | bsz 16506 | num_updates 48907 | lr 0.000285986 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1483 | gb_free 25.8 | wall 45117 epoch 029 | loss 3.474 | nll_loss 1.916 | ppl 3.77 | wps 550409 | ups 1.11 | wpb 495122 | bsz 16506 | num_updates 48907 | lr 0.000285986 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1483 | gb_free 25.8 | wall 45117 epoch 029 | loss 3.474 | nll_loss 1.916 | ppl 3.77 | wps 550409 | ups 1.11 | wpb 495122 | bsz 16506 | num_updates 48907 | lr 0.000285986 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1483 | gb_free 25.8 | wall 45117 epoch 029 | loss 3.474 | nll_loss 1.916 | ppl 3.77 | wps 550409 | ups 1.11 | wpb 495122 | bsz 16506 | num_updates 48907 | lr 0.000285986 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1483 | gb_free 25.8 | wall 45117 epoch 029 | loss 3.474 | nll_loss 1.916 | ppl 3.77 | wps 550409 | ups 1.11 | wpb 495122 | bsz 16506 | num_updates 48907 | lr 0.000285986 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1483 | gb_free 25.8 | wall 45117 epoch 029 | loss 3.474 | nll_loss 1.916 | ppl 3.77 | wps 550409 | ups 1.11 | wpb 495122 | bsz 16506 | num_updates 48907 | lr 0.000285986 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1483 | gb_free 25.8 | wall 45117 epoch 029 | loss 3.474 | nll_loss 1.916 | ppl 3.77 | wps 550409 | ups 1.11 | wpb 495122 | bsz 16506 | num_updates 48907 | lr 0.000285986 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1483 | gb_free 25.8 | wall 45117 epoch 029 | loss 3.474 | nll_loss 1.916 | ppl 3.77 | wps 550409 | ups 1.11 | wpb 495122 | bsz 16506 | num_updates 48907 | lr 0.000285986 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1483 | gb_free 25.8 | wall 45117 epoch 029 | loss 3.474 | nll_loss 1.916 | ppl 3.77 | wps 550409 | ups 1.11 | wpb 495122 | bsz 16506 | num_updates 48907 | lr 0.000285986 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1483 | gb_free 25.8 | wall 45117 epoch 029 | loss 3.474 | nll_loss 1.916 | ppl 3.77 | wps 550409 | ups 1.11 | wpb 495122 | bsz 16506 | num_updates 48907 | lr 0.000285986 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1483 | gb_free 25.8 | wall 45117 epoch 029 | loss 3.474 | nll_loss 1.916 | ppl 3.77 | wps 550409 | ups 1.11 | wpb 495122 | bsz 16506 | num_updates 48907 | lr 0.000285986 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1483 | gb_free 25.8 | wall 45117 epoch 029 | loss 3.474 | nll_loss 1.916 | ppl 3.77 | wps 550409 | ups 1.11 | wpb 495122 | bsz 16506 | num_updates 48907 | lr 0.000285986 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1483 | gb_free 25.8 | wall 45117 epoch 029 | loss 3.474 | nll_loss 1.916 | ppl 3.77 | wps 550409 | ups 1.11 | wpb 495122 | bsz 16506 | num_updates 48907 | lr 0.000285986 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1483 | gb_free 25.8 | wall 45117 epoch 029 | loss 3.474 | nll_loss 1.916 | ppl 3.77 | wps 550409 | ups 1.11 | wpb 495122 | bsz 16506 | num_updates 48907 | lr 0.000285986 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1483 | gb_free 25.8 | wall 45117 epoch 029 | loss 3.474 | nll_loss 1.916 | ppl 3.77 | wps 550409 | ups 1.11 | wpb 495122 | bsz 16506 | num_updates 48907 | lr 0.000285986 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1483 | gb_free 25.8 | wall 45117 epoch 029 | loss 3.474 | nll_loss 1.916 | ppl 3.77 | wps 550409 | ups 1.11 | wpb 495122 | bsz 16506 | num_updates 48907 | lr 0.000285986 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1483 | gb_free 25.8 | wall 45117 epoch 029 | loss 3.474 | nll_loss 1.916 | ppl 3.77 | wps 550409 | ups 1.11 | wpb 495122 | bsz 16506 | num_updates 48907 | lr 0.000285986 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1483 | gb_free 25.8 | wall 45117 epoch 029 | loss 3.474 | nll_loss 1.916 | ppl 3.77 | wps 550409 | ups 1.11 | wpb 495122 | bsz 16506 | num_updates 48907 | lr 0.000285986 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1483 | gb_free 25.8 | wall 45117 epoch 029 | loss 3.474 | nll_loss 1.916 | ppl 3.77 | wps 550409 | ups 1.11 | wpb 495122 | bsz 16506 | num_updates 48907 | lr 0.000285986 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1483 | gb_free 25.8 | wall 45117 Start iterating over samples epoch 030: 93 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=553233, ups=1.13, wpb=491302, bsz=16485.7, num_updates=49000, lr=0.000285714, gnorm=0.173, clip=0, loss_scale=1, train_wall=87, gb_free=21.6, wall=45200 epoch 030: 93 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=553233, ups=1.13, wpb=491302, bsz=16485.7, num_updates=49000, lr=0.000285714, gnorm=0.173, clip=0, loss_scale=1, train_wall=87, gb_free=21.6, wall=45200 epoch 030: 93 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=553233, ups=1.13, wpb=491302, bsz=16485.7, num_updates=49000, lr=0.000285714, gnorm=0.173, clip=0, loss_scale=1, train_wall=87, gb_free=21.6, wall=45200 epoch 030: 93 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=553233, ups=1.13, wpb=491302, bsz=16485.7, num_updates=49000, lr=0.000285714, gnorm=0.173, clip=0, loss_scale=1, train_wall=87, gb_free=21.6, wall=45200 epoch 030: 93 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=553233, ups=1.13, wpb=491302, bsz=16485.7, num_updates=49000, lr=0.000285714, gnorm=0.173, clip=0, loss_scale=1, train_wall=87, gb_free=21.6, wall=45200 epoch 030: 93 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=553233, ups=1.13, wpb=491302, bsz=16485.7, num_updates=49000, lr=0.000285714, gnorm=0.173, clip=0, loss_scale=1, train_wall=87, gb_free=21.6, wall=45200 epoch 030: 93 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=553233, ups=1.13, wpb=491302, bsz=16485.7, num_updates=49000, lr=0.000285714, gnorm=0.173, clip=0, loss_scale=1, train_wall=87, gb_free=21.6, wall=45200 epoch 030: 93 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=553233, ups=1.13, wpb=491302, bsz=16485.7, num_updates=49000, lr=0.000285714, gnorm=0.173, clip=0, loss_scale=1, train_wall=87, gb_free=21.6, wall=45200 epoch 030: 93 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=553233, ups=1.13, wpb=491302, bsz=16485.7, num_updates=49000, lr=0.000285714, gnorm=0.173, clip=0, loss_scale=1, train_wall=87, gb_free=21.6, wall=45200 epoch 030: 93 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=553233, ups=1.13, wpb=491302, bsz=16485.7, num_updates=49000, lr=0.000285714, gnorm=0.173, clip=0, loss_scale=1, train_wall=87, gb_free=21.6, wall=45200 epoch 030: 93 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=553233, ups=1.13, wpb=491302, bsz=16485.7, num_updates=49000, lr=0.000285714, gnorm=0.173, clip=0, loss_scale=1, train_wall=87, gb_free=21.6, wall=45200 epoch 030: 93 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=553233, ups=1.13, wpb=491302, bsz=16485.7, num_updates=49000, lr=0.000285714, gnorm=0.173, clip=0, loss_scale=1, train_wall=87, gb_free=21.6, wall=45200 epoch 030: 93 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=553233, ups=1.13, wpb=491302, bsz=16485.7, num_updates=49000, lr=0.000285714, gnorm=0.173, clip=0, loss_scale=1, train_wall=87, gb_free=21.6, wall=45200 epoch 030: 93 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=553233, ups=1.13, wpb=491302, bsz=16485.7, num_updates=49000, lr=0.000285714, gnorm=0.173, clip=0, loss_scale=1, train_wall=87, gb_free=21.6, wall=45200 epoch 030: 93 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=553233, ups=1.13, wpb=491302, bsz=16485.7, num_updates=49000, lr=0.000285714, gnorm=0.173, clip=0, loss_scale=1, train_wall=87, gb_free=21.6, wall=45200 epoch 030: 93 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=553233, ups=1.13, wpb=491302, bsz=16485.7, num_updates=49000, lr=0.000285714, gnorm=0.173, clip=0, loss_scale=1, train_wall=87, gb_free=21.6, wall=45200 epoch 030: 93 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=553233, ups=1.13, wpb=491302, bsz=16485.7, num_updates=49000, lr=0.000285714, gnorm=0.173, clip=0, loss_scale=1, train_wall=87, gb_free=21.6, wall=45200 epoch 030: 93 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=553233, ups=1.13, wpb=491302, bsz=16485.7, num_updates=49000, lr=0.000285714, gnorm=0.173, clip=0, loss_scale=1, train_wall=87, gb_free=21.6, wall=45200 epoch 030: 93 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=553233, ups=1.13, wpb=491302, bsz=16485.7, num_updates=49000, lr=0.000285714, gnorm=0.173, clip=0, loss_scale=1, train_wall=87, gb_free=21.6, wall=45200 epoch 030: 93 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=553233, ups=1.13, wpb=491302, bsz=16485.7, num_updates=49000, lr=0.000285714, gnorm=0.173, clip=0, loss_scale=1, train_wall=87, gb_free=21.6, wall=45200 epoch 030: 93 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=553233, ups=1.13, wpb=491302, bsz=16485.7, num_updates=49000, lr=0.000285714, gnorm=0.173, clip=0, loss_scale=1, train_wall=87, gb_free=21.6, wall=45200 epoch 030: 93 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=553233, ups=1.13, wpb=491302, bsz=16485.7, num_updates=49000, lr=0.000285714, gnorm=0.173, clip=0, loss_scale=1, train_wall=87, gb_free=21.6, wall=45200 epoch 030: 93 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=553233, ups=1.13, wpb=491302, bsz=16485.7, num_updates=49000, lr=0.000285714, gnorm=0.173, clip=0, loss_scale=1, train_wall=87, gb_free=21.6, wall=45200 epoch 030: 93 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=553233, ups=1.13, wpb=491302, bsz=16485.7, num_updates=49000, lr=0.000285714, gnorm=0.173, clip=0, loss_scale=1, train_wall=87, gb_free=21.6, wall=45200 epoch 030: 93 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=553233, ups=1.13, wpb=491302, bsz=16485.7, num_updates=49000, lr=0.000285714, gnorm=0.173, clip=0, loss_scale=1, train_wall=87, gb_free=21.6, wall=45200 epoch 030: 93 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=553233, ups=1.13, wpb=491302, bsz=16485.7, num_updates=49000, lr=0.000285714, gnorm=0.173, clip=0, loss_scale=1, train_wall=87, gb_free=21.6, wall=45200 epoch 030: 93 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=553233, ups=1.13, wpb=491302, bsz=16485.7, num_updates=49000, lr=0.000285714, gnorm=0.173, clip=0, loss_scale=1, train_wall=87, gb_free=21.6, wall=45200 epoch 030: 93 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=553233, ups=1.13, wpb=491302, bsz=16485.7, num_updates=49000, lr=0.000285714, gnorm=0.173, clip=0, loss_scale=1, train_wall=87, gb_free=21.6, wall=45200 epoch 030: 93 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=553233, ups=1.13, wpb=491302, bsz=16485.7, num_updates=49000, lr=0.000285714, gnorm=0.173, clip=0, loss_scale=1, train_wall=87, gb_free=21.6, wall=45200 epoch 030: 93 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=553233, ups=1.13, wpb=491302, bsz=16485.7, num_updates=49000, lr=0.000285714, gnorm=0.173, clip=0, loss_scale=1, train_wall=87, gb_free=21.6, wall=45200 begin validation on "valid" subset epoch 030 | valid on 'valid' subset | loss 3.718 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 49000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.718 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 49000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.718 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 49000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.718 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 49000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.718 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 49000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.718 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 49000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.718 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 49000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.718 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 49000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.718 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 49000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.718 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 49000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.718 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 49000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.718 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 49000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.718 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 49000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.718 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 49000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.718 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 49000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.718 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 49000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.718 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 49000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.718 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 49000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.718 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 49000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.718 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 49000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.718 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 49000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.718 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 49000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.718 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 49000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.718 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 49000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.718 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 49000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.718 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 49000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.718 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 49000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.718 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 49000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.718 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 49000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.718 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 49000 | best_loss 3.702 epoch 030: 193 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=485358, ups=0.98, wpb=494984, bsz=16227.8, num_updates=49100, lr=0.000285423, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=45302 epoch 030: 193 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=485358, ups=0.98, wpb=494984, bsz=16227.8, num_updates=49100, lr=0.000285423, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=45302 epoch 030: 193 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=485358, ups=0.98, wpb=494984, bsz=16227.8, num_updates=49100, lr=0.000285423, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=45302 epoch 030: 193 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=485358, ups=0.98, wpb=494984, bsz=16227.8, num_updates=49100, lr=0.000285423, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=45302 epoch 030: 193 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=485358, ups=0.98, wpb=494984, bsz=16227.8, num_updates=49100, lr=0.000285423, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=45302 epoch 030: 193 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=485358, ups=0.98, wpb=494984, bsz=16227.8, num_updates=49100, lr=0.000285423, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=45302 epoch 030: 193 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=485358, ups=0.98, wpb=494984, bsz=16227.8, num_updates=49100, lr=0.000285423, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=45302 epoch 030: 193 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=485358, ups=0.98, wpb=494984, bsz=16227.8, num_updates=49100, lr=0.000285423, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=45302 epoch 030: 193 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=485358, ups=0.98, wpb=494984, bsz=16227.8, num_updates=49100, lr=0.000285423, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=45302 epoch 030: 193 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=485358, ups=0.98, wpb=494984, bsz=16227.8, num_updates=49100, lr=0.000285423, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=45302 epoch 030: 193 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=485358, ups=0.98, wpb=494984, bsz=16227.8, num_updates=49100, lr=0.000285423, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=45302 epoch 030: 193 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=485358, ups=0.98, wpb=494984, bsz=16227.8, num_updates=49100, lr=0.000285423, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=45302 epoch 030: 193 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=485358, ups=0.98, wpb=494984, bsz=16227.8, num_updates=49100, lr=0.000285423, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=45302 epoch 030: 193 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=485358, ups=0.98, wpb=494984, bsz=16227.8, num_updates=49100, lr=0.000285423, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=45302 epoch 030: 193 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=485358, ups=0.98, wpb=494984, bsz=16227.8, num_updates=49100, lr=0.000285423, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=45302 epoch 030: 193 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=485358, ups=0.98, wpb=494984, bsz=16227.8, num_updates=49100, lr=0.000285423, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=45302 epoch 030: 193 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=485358, ups=0.98, wpb=494984, bsz=16227.8, num_updates=49100, lr=0.000285423, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=45302 epoch 030: 193 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=485358, ups=0.98, wpb=494984, bsz=16227.8, num_updates=49100, lr=0.000285423, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=45302 epoch 030: 193 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=485358, ups=0.98, wpb=494984, bsz=16227.8, num_updates=49100, lr=0.000285423, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=45302 epoch 030: 193 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=485358, ups=0.98, wpb=494984, bsz=16227.8, num_updates=49100, lr=0.000285423, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=45302 epoch 030: 193 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=485358, ups=0.98, wpb=494984, bsz=16227.8, num_updates=49100, lr=0.000285423, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=45302 epoch 030: 193 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=485358, ups=0.98, wpb=494984, bsz=16227.8, num_updates=49100, lr=0.000285423, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=45302 epoch 030: 193 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=485358, ups=0.98, wpb=494984, bsz=16227.8, num_updates=49100, lr=0.000285423, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=45302 epoch 030: 193 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=485358, ups=0.98, wpb=494984, bsz=16227.8, num_updates=49100, lr=0.000285423, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=45302 epoch 030: 193 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=485358, ups=0.98, wpb=494984, bsz=16227.8, num_updates=49100, lr=0.000285423, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=45302 epoch 030: 193 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=485358, ups=0.98, wpb=494984, bsz=16227.8, num_updates=49100, lr=0.000285423, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=45302 epoch 030: 193 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=485358, ups=0.98, wpb=494984, bsz=16227.8, num_updates=49100, lr=0.000285423, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=45302 epoch 030: 193 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=485358, ups=0.98, wpb=494984, bsz=16227.8, num_updates=49100, lr=0.000285423, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=45302 epoch 030: 193 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=485358, ups=0.98, wpb=494984, bsz=16227.8, num_updates=49100, lr=0.000285423, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=45302 epoch 030: 193 / 1689 loss=3.454, nll_loss=1.893, ppl=3.71, wps=485358, ups=0.98, wpb=494984, bsz=16227.8, num_updates=49100, lr=0.000285423, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=45302 epoch 030: 293 / 1689 loss=3.459, nll_loss=1.899, ppl=3.73, wps=554408, ups=1.12, wpb=496139, bsz=16367.2, num_updates=49200, lr=0.000285133, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=45392 epoch 030: 293 / 1689 loss=3.459, nll_loss=1.899, ppl=3.73, wps=554408, ups=1.12, wpb=496139, bsz=16367.2, num_updates=49200, lr=0.000285133, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=45392 epoch 030: 293 / 1689 loss=3.459, nll_loss=1.899, ppl=3.73, wps=554408, ups=1.12, wpb=496139, bsz=16367.2, num_updates=49200, lr=0.000285133, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=45392 epoch 030: 293 / 1689 loss=3.459, nll_loss=1.899, ppl=3.73, wps=554408, ups=1.12, wpb=496139, bsz=16367.2, num_updates=49200, lr=0.000285133, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=45392 epoch 030: 293 / 1689 loss=3.459, nll_loss=1.899, ppl=3.73, wps=554408, ups=1.12, wpb=496139, bsz=16367.2, num_updates=49200, lr=0.000285133, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=45392 epoch 030: 293 / 1689 loss=3.459, nll_loss=1.899, ppl=3.73, wps=554408, ups=1.12, wpb=496139, bsz=16367.2, num_updates=49200, lr=0.000285133, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=45392 epoch 030: 293 / 1689 loss=3.459, nll_loss=1.899, ppl=3.73, wps=554408, ups=1.12, wpb=496139, bsz=16367.2, num_updates=49200, lr=0.000285133, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=45392 epoch 030: 293 / 1689 loss=3.459, nll_loss=1.899, ppl=3.73, wps=554408, ups=1.12, wpb=496139, bsz=16367.2, num_updates=49200, lr=0.000285133, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=45392 epoch 030: 293 / 1689 loss=3.459, nll_loss=1.899, ppl=3.73, wps=554408, ups=1.12, wpb=496139, bsz=16367.2, num_updates=49200, lr=0.000285133, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=45392 epoch 030: 293 / 1689 loss=3.459, nll_loss=1.899, ppl=3.73, wps=554408, ups=1.12, wpb=496139, bsz=16367.2, num_updates=49200, lr=0.000285133, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=45392 epoch 030: 293 / 1689 loss=3.459, nll_loss=1.899, ppl=3.73, wps=554408, ups=1.12, wpb=496139, bsz=16367.2, num_updates=49200, lr=0.000285133, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=45392 epoch 030: 293 / 1689 loss=3.459, nll_loss=1.899, ppl=3.73, wps=554408, ups=1.12, wpb=496139, bsz=16367.2, num_updates=49200, lr=0.000285133, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=45392 epoch 030: 293 / 1689 loss=3.459, nll_loss=1.899, ppl=3.73, wps=554408, ups=1.12, wpb=496139, bsz=16367.2, num_updates=49200, lr=0.000285133, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=45392 epoch 030: 293 / 1689 loss=3.459, nll_loss=1.899, ppl=3.73, wps=554408, ups=1.12, wpb=496139, bsz=16367.2, num_updates=49200, lr=0.000285133, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=45392 epoch 030: 293 / 1689 loss=3.459, nll_loss=1.899, ppl=3.73, wps=554408, ups=1.12, wpb=496139, bsz=16367.2, num_updates=49200, lr=0.000285133, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=45392 epoch 030: 293 / 1689 loss=3.459, nll_loss=1.899, ppl=3.73, wps=554408, ups=1.12, wpb=496139, bsz=16367.2, num_updates=49200, lr=0.000285133, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=45392 epoch 030: 293 / 1689 loss=3.459, nll_loss=1.899, ppl=3.73, wps=554408, ups=1.12, wpb=496139, bsz=16367.2, num_updates=49200, lr=0.000285133, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=45392 epoch 030: 293 / 1689 loss=3.459, nll_loss=1.899, ppl=3.73, wps=554408, ups=1.12, wpb=496139, bsz=16367.2, num_updates=49200, lr=0.000285133, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=45392 epoch 030: 293 / 1689 loss=3.459, nll_loss=1.899, ppl=3.73, wps=554408, ups=1.12, wpb=496139, bsz=16367.2, num_updates=49200, lr=0.000285133, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=45392 epoch 030: 293 / 1689 loss=3.459, nll_loss=1.899, ppl=3.73, wps=554408, ups=1.12, wpb=496139, bsz=16367.2, num_updates=49200, lr=0.000285133, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=45392 epoch 030: 293 / 1689 loss=3.459, nll_loss=1.899, ppl=3.73, wps=554408, ups=1.12, wpb=496139, bsz=16367.2, num_updates=49200, lr=0.000285133, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=45392 epoch 030: 293 / 1689 loss=3.459, nll_loss=1.899, ppl=3.73, wps=554408, ups=1.12, wpb=496139, bsz=16367.2, num_updates=49200, lr=0.000285133, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=45392 epoch 030: 293 / 1689 loss=3.459, nll_loss=1.899, ppl=3.73, wps=554408, ups=1.12, wpb=496139, bsz=16367.2, num_updates=49200, lr=0.000285133, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=45392 epoch 030: 293 / 1689 loss=3.459, nll_loss=1.899, ppl=3.73, wps=554408, ups=1.12, wpb=496139, bsz=16367.2, num_updates=49200, lr=0.000285133, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=45392 epoch 030: 293 / 1689 loss=3.459, nll_loss=1.899, ppl=3.73, wps=554408, ups=1.12, wpb=496139, bsz=16367.2, num_updates=49200, lr=0.000285133, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=45392 epoch 030: 293 / 1689 loss=3.459, nll_loss=1.899, ppl=3.73, wps=554408, ups=1.12, wpb=496139, bsz=16367.2, num_updates=49200, lr=0.000285133, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=45392 epoch 030: 293 / 1689 loss=3.459, nll_loss=1.899, ppl=3.73, wps=554408, ups=1.12, wpb=496139, bsz=16367.2, num_updates=49200, lr=0.000285133, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=45392 epoch 030: 293 / 1689 loss=3.459, nll_loss=1.899, ppl=3.73, wps=554408, ups=1.12, wpb=496139, bsz=16367.2, num_updates=49200, lr=0.000285133, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=45392 epoch 030: 293 / 1689 loss=3.459, nll_loss=1.899, ppl=3.73, wps=554408, ups=1.12, wpb=496139, bsz=16367.2, num_updates=49200, lr=0.000285133, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=45392 epoch 030: 293 / 1689 loss=3.459, nll_loss=1.899, ppl=3.73, wps=554408, ups=1.12, wpb=496139, bsz=16367.2, num_updates=49200, lr=0.000285133, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=45392 epoch 030: 393 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=550394, ups=1.11, wpb=494948, bsz=16342.9, num_updates=49300, lr=0.000284844, gnorm=0.158, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=45482 epoch 030: 393 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=550394, ups=1.11, wpb=494948, bsz=16342.9, num_updates=49300, lr=0.000284844, gnorm=0.158, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=45482 epoch 030: 393 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=550394, ups=1.11, wpb=494948, bsz=16342.9, num_updates=49300, lr=0.000284844, gnorm=0.158, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=45482 epoch 030: 393 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=550394, ups=1.11, wpb=494948, bsz=16342.9, num_updates=49300, lr=0.000284844, gnorm=0.158, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=45482 epoch 030: 393 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=550394, ups=1.11, wpb=494948, bsz=16342.9, num_updates=49300, lr=0.000284844, gnorm=0.158, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=45482 epoch 030: 393 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=550394, ups=1.11, wpb=494948, bsz=16342.9, num_updates=49300, lr=0.000284844, gnorm=0.158, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=45482 epoch 030: 393 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=550394, ups=1.11, wpb=494948, bsz=16342.9, num_updates=49300, lr=0.000284844, gnorm=0.158, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=45482 epoch 030: 393 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=550394, ups=1.11, wpb=494948, bsz=16342.9, num_updates=49300, lr=0.000284844, gnorm=0.158, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=45482 epoch 030: 393 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=550394, ups=1.11, wpb=494948, bsz=16342.9, num_updates=49300, lr=0.000284844, gnorm=0.158, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=45482 epoch 030: 393 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=550394, ups=1.11, wpb=494948, bsz=16342.9, num_updates=49300, lr=0.000284844, gnorm=0.158, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=45482 epoch 030: 393 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=550394, ups=1.11, wpb=494948, bsz=16342.9, num_updates=49300, lr=0.000284844, gnorm=0.158, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=45482 epoch 030: 393 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=550394, ups=1.11, wpb=494948, bsz=16342.9, num_updates=49300, lr=0.000284844, gnorm=0.158, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=45482 epoch 030: 393 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=550394, ups=1.11, wpb=494948, bsz=16342.9, num_updates=49300, lr=0.000284844, gnorm=0.158, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=45482 epoch 030: 393 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=550394, ups=1.11, wpb=494948, bsz=16342.9, num_updates=49300, lr=0.000284844, gnorm=0.158, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=45482 epoch 030: 393 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=550394, ups=1.11, wpb=494948, bsz=16342.9, num_updates=49300, lr=0.000284844, gnorm=0.158, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=45482 epoch 030: 393 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=550394, ups=1.11, wpb=494948, bsz=16342.9, num_updates=49300, lr=0.000284844, gnorm=0.158, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=45482 epoch 030: 393 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=550394, ups=1.11, wpb=494948, bsz=16342.9, num_updates=49300, lr=0.000284844, gnorm=0.158, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=45482 epoch 030: 393 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=550394, ups=1.11, wpb=494948, bsz=16342.9, num_updates=49300, lr=0.000284844, gnorm=0.158, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=45482 epoch 030: 393 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=550394, ups=1.11, wpb=494948, bsz=16342.9, num_updates=49300, lr=0.000284844, gnorm=0.158, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=45482 epoch 030: 393 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=550394, ups=1.11, wpb=494948, bsz=16342.9, num_updates=49300, lr=0.000284844, gnorm=0.158, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=45482 epoch 030: 393 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=550394, ups=1.11, wpb=494948, bsz=16342.9, num_updates=49300, lr=0.000284844, gnorm=0.158, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=45482 epoch 030: 393 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=550394, ups=1.11, wpb=494948, bsz=16342.9, num_updates=49300, lr=0.000284844, gnorm=0.158, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=45482 epoch 030: 393 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=550394, ups=1.11, wpb=494948, bsz=16342.9, num_updates=49300, lr=0.000284844, gnorm=0.158, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=45482 epoch 030: 393 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=550394, ups=1.11, wpb=494948, bsz=16342.9, num_updates=49300, lr=0.000284844, gnorm=0.158, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=45482 epoch 030: 393 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=550394, ups=1.11, wpb=494948, bsz=16342.9, num_updates=49300, lr=0.000284844, gnorm=0.158, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=45482 epoch 030: 393 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=550394, ups=1.11, wpb=494948, bsz=16342.9, num_updates=49300, lr=0.000284844, gnorm=0.158, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=45482 epoch 030: 393 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=550394, ups=1.11, wpb=494948, bsz=16342.9, num_updates=49300, lr=0.000284844, gnorm=0.158, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=45482 epoch 030: 393 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=550394, ups=1.11, wpb=494948, bsz=16342.9, num_updates=49300, lr=0.000284844, gnorm=0.158, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=45482 epoch 030: 393 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=550394, ups=1.11, wpb=494948, bsz=16342.9, num_updates=49300, lr=0.000284844, gnorm=0.158, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=45482 epoch 030: 393 / 1689 loss=3.461, nll_loss=1.901, ppl=3.73, wps=550394, ups=1.11, wpb=494948, bsz=16342.9, num_updates=49300, lr=0.000284844, gnorm=0.158, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=45482 epoch 030: 493 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=552640, ups=1.11, wpb=496319, bsz=16759, num_updates=49400, lr=0.000284555, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=45572 epoch 030: 493 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=552640, ups=1.11, wpb=496319, bsz=16759, num_updates=49400, lr=0.000284555, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=45572 epoch 030: 493 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=552640, ups=1.11, wpb=496319, bsz=16759, num_updates=49400, lr=0.000284555, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=45572 epoch 030: 493 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=552640, ups=1.11, wpb=496319, bsz=16759, num_updates=49400, lr=0.000284555, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=45572 epoch 030: 493 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=552640, ups=1.11, wpb=496319, bsz=16759, num_updates=49400, lr=0.000284555, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=45572 epoch 030: 493 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=552640, ups=1.11, wpb=496319, bsz=16759, num_updates=49400, lr=0.000284555, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=45572 epoch 030: 493 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=552640, ups=1.11, wpb=496319, bsz=16759, num_updates=49400, lr=0.000284555, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=45572 epoch 030: 493 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=552640, ups=1.11, wpb=496319, bsz=16759, num_updates=49400, lr=0.000284555, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=45572 epoch 030: 493 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=552640, ups=1.11, wpb=496319, bsz=16759, num_updates=49400, lr=0.000284555, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=45572 epoch 030: 493 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=552640, ups=1.11, wpb=496319, bsz=16759, num_updates=49400, lr=0.000284555, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=45572 epoch 030: 493 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=552640, ups=1.11, wpb=496319, bsz=16759, num_updates=49400, lr=0.000284555, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=45572 epoch 030: 493 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=552640, ups=1.11, wpb=496319, bsz=16759, num_updates=49400, lr=0.000284555, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=45572 epoch 030: 493 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=552640, ups=1.11, wpb=496319, bsz=16759, num_updates=49400, lr=0.000284555, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=45572 epoch 030: 493 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=552640, ups=1.11, wpb=496319, bsz=16759, num_updates=49400, lr=0.000284555, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=45572 epoch 030: 493 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=552640, ups=1.11, wpb=496319, bsz=16759, num_updates=49400, lr=0.000284555, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=45572 epoch 030: 493 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=552640, ups=1.11, wpb=496319, bsz=16759, num_updates=49400, lr=0.000284555, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=45572 epoch 030: 493 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=552640, ups=1.11, wpb=496319, bsz=16759, num_updates=49400, lr=0.000284555, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=45572 epoch 030: 493 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=552640, ups=1.11, wpb=496319, bsz=16759, num_updates=49400, lr=0.000284555, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=45572 epoch 030: 493 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=552640, ups=1.11, wpb=496319, bsz=16759, num_updates=49400, lr=0.000284555, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=45572 epoch 030: 493 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=552640, ups=1.11, wpb=496319, bsz=16759, num_updates=49400, lr=0.000284555, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=45572 epoch 030: 493 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=552640, ups=1.11, wpb=496319, bsz=16759, num_updates=49400, lr=0.000284555, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=45572 epoch 030: 493 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=552640, ups=1.11, wpb=496319, bsz=16759, num_updates=49400, lr=0.000284555, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=45572 epoch 030: 493 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=552640, ups=1.11, wpb=496319, bsz=16759, num_updates=49400, lr=0.000284555, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=45572 epoch 030: 493 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=552640, ups=1.11, wpb=496319, bsz=16759, num_updates=49400, lr=0.000284555, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=45572 epoch 030: 493 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=552640, ups=1.11, wpb=496319, bsz=16759, num_updates=49400, lr=0.000284555, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=45572 epoch 030: 493 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=552640, ups=1.11, wpb=496319, bsz=16759, num_updates=49400, lr=0.000284555, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=45572 epoch 030: 493 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=552640, ups=1.11, wpb=496319, bsz=16759, num_updates=49400, lr=0.000284555, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=45572 epoch 030: 493 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=552640, ups=1.11, wpb=496319, bsz=16759, num_updates=49400, lr=0.000284555, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=45572 epoch 030: 493 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=552640, ups=1.11, wpb=496319, bsz=16759, num_updates=49400, lr=0.000284555, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=45572 epoch 030: 493 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=552640, ups=1.11, wpb=496319, bsz=16759, num_updates=49400, lr=0.000284555, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.7, wall=45572 epoch 030: 594 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554886, ups=1.12, wpb=495959, bsz=16536.2, num_updates=49500, lr=0.000284268, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=45661 epoch 030: 594 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554886, ups=1.12, wpb=495959, bsz=16536.2, num_updates=49500, lr=0.000284268, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=45661 epoch 030: 594 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554886, ups=1.12, wpb=495959, bsz=16536.2, num_updates=49500, lr=0.000284268, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=45661 epoch 030: 594 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554886, ups=1.12, wpb=495959, bsz=16536.2, num_updates=49500, lr=0.000284268, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=45661 epoch 030: 594 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554886, ups=1.12, wpb=495959, bsz=16536.2, num_updates=49500, lr=0.000284268, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=45661 epoch 030: 594 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554886, ups=1.12, wpb=495959, bsz=16536.2, num_updates=49500, lr=0.000284268, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=45661 epoch 030: 594 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554886, ups=1.12, wpb=495959, bsz=16536.2, num_updates=49500, lr=0.000284268, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=45661 epoch 030: 594 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554886, ups=1.12, wpb=495959, bsz=16536.2, num_updates=49500, lr=0.000284268, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=45661 epoch 030: 594 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554886, ups=1.12, wpb=495959, bsz=16536.2, num_updates=49500, lr=0.000284268, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=45661 epoch 030: 594 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554886, ups=1.12, wpb=495959, bsz=16536.2, num_updates=49500, lr=0.000284268, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=45661 epoch 030: 594 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554886, ups=1.12, wpb=495959, bsz=16536.2, num_updates=49500, lr=0.000284268, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=45661 epoch 030: 594 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554886, ups=1.12, wpb=495959, bsz=16536.2, num_updates=49500, lr=0.000284268, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=45661 epoch 030: 594 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554886, ups=1.12, wpb=495959, bsz=16536.2, num_updates=49500, lr=0.000284268, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=45661 epoch 030: 594 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554886, ups=1.12, wpb=495959, bsz=16536.2, num_updates=49500, lr=0.000284268, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=45661 epoch 030: 594 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554886, ups=1.12, wpb=495959, bsz=16536.2, num_updates=49500, lr=0.000284268, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=45661 epoch 030: 594 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554886, ups=1.12, wpb=495959, bsz=16536.2, num_updates=49500, lr=0.000284268, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=45661 epoch 030: 594 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554886, ups=1.12, wpb=495959, bsz=16536.2, num_updates=49500, lr=0.000284268, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=45661 epoch 030: 594 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554886, ups=1.12, wpb=495959, bsz=16536.2, num_updates=49500, lr=0.000284268, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=45661 epoch 030: 594 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554886, ups=1.12, wpb=495959, bsz=16536.2, num_updates=49500, lr=0.000284268, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=45661 epoch 030: 594 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554886, ups=1.12, wpb=495959, bsz=16536.2, num_updates=49500, lr=0.000284268, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=45661 epoch 030: 594 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554886, ups=1.12, wpb=495959, bsz=16536.2, num_updates=49500, lr=0.000284268, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=45661 epoch 030: 594 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554886, ups=1.12, wpb=495959, bsz=16536.2, num_updates=49500, lr=0.000284268, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=45661 epoch 030: 594 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554886, ups=1.12, wpb=495959, bsz=16536.2, num_updates=49500, lr=0.000284268, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=45661 epoch 030: 594 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554886, ups=1.12, wpb=495959, bsz=16536.2, num_updates=49500, lr=0.000284268, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=45661 epoch 030: 594 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554886, ups=1.12, wpb=495959, bsz=16536.2, num_updates=49500, lr=0.000284268, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=45661 epoch 030: 594 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554886, ups=1.12, wpb=495959, bsz=16536.2, num_updates=49500, lr=0.000284268, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=45661 epoch 030: 594 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554886, ups=1.12, wpb=495959, bsz=16536.2, num_updates=49500, lr=0.000284268, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=45661 epoch 030: 594 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554886, ups=1.12, wpb=495959, bsz=16536.2, num_updates=49500, lr=0.000284268, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=45661 epoch 030: 594 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554886, ups=1.12, wpb=495959, bsz=16536.2, num_updates=49500, lr=0.000284268, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=45661 epoch 030: 594 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=554886, ups=1.12, wpb=495959, bsz=16536.2, num_updates=49500, lr=0.000284268, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=45661 epoch 030: 694 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=560676, ups=1.13, wpb=495660, bsz=16464.6, num_updates=49600, lr=0.000283981, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45749 epoch 030: 694 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=560676, ups=1.13, wpb=495660, bsz=16464.6, num_updates=49600, lr=0.000283981, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45749 epoch 030: 694 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=560676, ups=1.13, wpb=495660, bsz=16464.6, num_updates=49600, lr=0.000283981, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45749 epoch 030: 694 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=560676, ups=1.13, wpb=495660, bsz=16464.6, num_updates=49600, lr=0.000283981, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45749 epoch 030: 694 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=560676, ups=1.13, wpb=495660, bsz=16464.6, num_updates=49600, lr=0.000283981, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45749 epoch 030: 694 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=560676, ups=1.13, wpb=495660, bsz=16464.6, num_updates=49600, lr=0.000283981, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45749 epoch 030: 694 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=560676, ups=1.13, wpb=495660, bsz=16464.6, num_updates=49600, lr=0.000283981, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45749 epoch 030: 694 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=560676, ups=1.13, wpb=495660, bsz=16464.6, num_updates=49600, lr=0.000283981, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45749 epoch 030: 694 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=560676, ups=1.13, wpb=495660, bsz=16464.6, num_updates=49600, lr=0.000283981, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45749 epoch 030: 694 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=560676, ups=1.13, wpb=495660, bsz=16464.6, num_updates=49600, lr=0.000283981, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45749 epoch 030: 694 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=560676, ups=1.13, wpb=495660, bsz=16464.6, num_updates=49600, lr=0.000283981, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45749 epoch 030: 694 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=560676, ups=1.13, wpb=495660, bsz=16464.6, num_updates=49600, lr=0.000283981, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45749 epoch 030: 694 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=560676, ups=1.13, wpb=495660, bsz=16464.6, num_updates=49600, lr=0.000283981, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45749 epoch 030: 694 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=560676, ups=1.13, wpb=495660, bsz=16464.6, num_updates=49600, lr=0.000283981, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45749 epoch 030: 694 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=560676, ups=1.13, wpb=495660, bsz=16464.6, num_updates=49600, lr=0.000283981, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45749 epoch 030: 694 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=560676, ups=1.13, wpb=495660, bsz=16464.6, num_updates=49600, lr=0.000283981, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45749 epoch 030: 694 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=560676, ups=1.13, wpb=495660, bsz=16464.6, num_updates=49600, lr=0.000283981, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45749 epoch 030: 694 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=560676, ups=1.13, wpb=495660, bsz=16464.6, num_updates=49600, lr=0.000283981, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45749 epoch 030: 694 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=560676, ups=1.13, wpb=495660, bsz=16464.6, num_updates=49600, lr=0.000283981, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45749 epoch 030: 694 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=560676, ups=1.13, wpb=495660, bsz=16464.6, num_updates=49600, lr=0.000283981, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45749 epoch 030: 694 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=560676, ups=1.13, wpb=495660, bsz=16464.6, num_updates=49600, lr=0.000283981, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45749 epoch 030: 694 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=560676, ups=1.13, wpb=495660, bsz=16464.6, num_updates=49600, lr=0.000283981, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45749 epoch 030: 694 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=560676, ups=1.13, wpb=495660, bsz=16464.6, num_updates=49600, lr=0.000283981, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45749 epoch 030: 694 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=560676, ups=1.13, wpb=495660, bsz=16464.6, num_updates=49600, lr=0.000283981, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45749 epoch 030: 694 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=560676, ups=1.13, wpb=495660, bsz=16464.6, num_updates=49600, lr=0.000283981, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45749 epoch 030: 694 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=560676, ups=1.13, wpb=495660, bsz=16464.6, num_updates=49600, lr=0.000283981, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45749 epoch 030: 694 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=560676, ups=1.13, wpb=495660, bsz=16464.6, num_updates=49600, lr=0.000283981, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45749 epoch 030: 694 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=560676, ups=1.13, wpb=495660, bsz=16464.6, num_updates=49600, lr=0.000283981, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45749 epoch 030: 694 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=560676, ups=1.13, wpb=495660, bsz=16464.6, num_updates=49600, lr=0.000283981, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45749 epoch 030: 694 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=560676, ups=1.13, wpb=495660, bsz=16464.6, num_updates=49600, lr=0.000283981, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=45749 epoch 030: 794 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=561557, ups=1.13, wpb=496284, bsz=16229, num_updates=49700, lr=0.000283695, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=45838 epoch 030: 794 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=561557, ups=1.13, wpb=496284, bsz=16229, num_updates=49700, lr=0.000283695, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=45838 epoch 030: 794 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=561557, ups=1.13, wpb=496284, bsz=16229, num_updates=49700, lr=0.000283695, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=45838 epoch 030: 794 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=561557, ups=1.13, wpb=496284, bsz=16229, num_updates=49700, lr=0.000283695, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=45838 epoch 030: 794 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=561557, ups=1.13, wpb=496284, bsz=16229, num_updates=49700, lr=0.000283695, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=45838 epoch 030: 794 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=561557, ups=1.13, wpb=496284, bsz=16229, num_updates=49700, lr=0.000283695, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=45838 epoch 030: 794 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=561557, ups=1.13, wpb=496284, bsz=16229, num_updates=49700, lr=0.000283695, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=45838 epoch 030: 794 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=561557, ups=1.13, wpb=496284, bsz=16229, num_updates=49700, lr=0.000283695, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=45838 epoch 030: 794 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=561557, ups=1.13, wpb=496284, bsz=16229, num_updates=49700, lr=0.000283695, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=45838 epoch 030: 794 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=561557, ups=1.13, wpb=496284, bsz=16229, num_updates=49700, lr=0.000283695, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=45838 epoch 030: 794 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=561557, ups=1.13, wpb=496284, bsz=16229, num_updates=49700, lr=0.000283695, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=45838 epoch 030: 794 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=561557, ups=1.13, wpb=496284, bsz=16229, num_updates=49700, lr=0.000283695, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=45838 epoch 030: 794 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=561557, ups=1.13, wpb=496284, bsz=16229, num_updates=49700, lr=0.000283695, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=45838 epoch 030: 794 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=561557, ups=1.13, wpb=496284, bsz=16229, num_updates=49700, lr=0.000283695, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=45838 epoch 030: 794 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=561557, ups=1.13, wpb=496284, bsz=16229, num_updates=49700, lr=0.000283695, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=45838 epoch 030: 794 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=561557, ups=1.13, wpb=496284, bsz=16229, num_updates=49700, lr=0.000283695, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=45838 epoch 030: 794 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=561557, ups=1.13, wpb=496284, bsz=16229, num_updates=49700, lr=0.000283695, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=45838 epoch 030: 794 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=561557, ups=1.13, wpb=496284, bsz=16229, num_updates=49700, lr=0.000283695, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=45838 epoch 030: 794 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=561557, ups=1.13, wpb=496284, bsz=16229, num_updates=49700, lr=0.000283695, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=45838 epoch 030: 794 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=561557, ups=1.13, wpb=496284, bsz=16229, num_updates=49700, lr=0.000283695, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=45838 epoch 030: 794 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=561557, ups=1.13, wpb=496284, bsz=16229, num_updates=49700, lr=0.000283695, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=45838 epoch 030: 794 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=561557, ups=1.13, wpb=496284, bsz=16229, num_updates=49700, lr=0.000283695, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=45838 epoch 030: 794 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=561557, ups=1.13, wpb=496284, bsz=16229, num_updates=49700, lr=0.000283695, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=45838 epoch 030: 794 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=561557, ups=1.13, wpb=496284, bsz=16229, num_updates=49700, lr=0.000283695, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=45838 epoch 030: 794 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=561557, ups=1.13, wpb=496284, bsz=16229, num_updates=49700, lr=0.000283695, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=45838 epoch 030: 794 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=561557, ups=1.13, wpb=496284, bsz=16229, num_updates=49700, lr=0.000283695, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=45838 epoch 030: 794 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=561557, ups=1.13, wpb=496284, bsz=16229, num_updates=49700, lr=0.000283695, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=45838 epoch 030: 794 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=561557, ups=1.13, wpb=496284, bsz=16229, num_updates=49700, lr=0.000283695, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=45838 epoch 030: 794 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=561557, ups=1.13, wpb=496284, bsz=16229, num_updates=49700, lr=0.000283695, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=45838 epoch 030: 794 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=561557, ups=1.13, wpb=496284, bsz=16229, num_updates=49700, lr=0.000283695, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.4, wall=45838 epoch 030: 894 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=556433, ups=1.12, wpb=494749, bsz=16527.8, num_updates=49800, lr=0.00028341, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=45927 epoch 030: 894 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=556433, ups=1.12, wpb=494749, bsz=16527.8, num_updates=49800, lr=0.00028341, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=45927 epoch 030: 894 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=556433, ups=1.12, wpb=494749, bsz=16527.8, num_updates=49800, lr=0.00028341, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=45927 epoch 030: 894 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=556433, ups=1.12, wpb=494749, bsz=16527.8, num_updates=49800, lr=0.00028341, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=45927 epoch 030: 894 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=556433, ups=1.12, wpb=494749, bsz=16527.8, num_updates=49800, lr=0.00028341, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=45927 epoch 030: 894 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=556433, ups=1.12, wpb=494749, bsz=16527.8, num_updates=49800, lr=0.00028341, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=45927 epoch 030: 894 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=556433, ups=1.12, wpb=494749, bsz=16527.8, num_updates=49800, lr=0.00028341, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=45927 epoch 030: 894 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=556433, ups=1.12, wpb=494749, bsz=16527.8, num_updates=49800, lr=0.00028341, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=45927 epoch 030: 894 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=556433, ups=1.12, wpb=494749, bsz=16527.8, num_updates=49800, lr=0.00028341, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=45927 epoch 030: 894 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=556433, ups=1.12, wpb=494749, bsz=16527.8, num_updates=49800, lr=0.00028341, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=45927 epoch 030: 894 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=556433, ups=1.12, wpb=494749, bsz=16527.8, num_updates=49800, lr=0.00028341, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=45927 epoch 030: 894 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=556433, ups=1.12, wpb=494749, bsz=16527.8, num_updates=49800, lr=0.00028341, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=45927 epoch 030: 894 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=556433, ups=1.12, wpb=494749, bsz=16527.8, num_updates=49800, lr=0.00028341, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=45927 epoch 030: 894 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=556433, ups=1.12, wpb=494749, bsz=16527.8, num_updates=49800, lr=0.00028341, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=45927 epoch 030: 894 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=556433, ups=1.12, wpb=494749, bsz=16527.8, num_updates=49800, lr=0.00028341, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=45927 epoch 030: 894 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=556433, ups=1.12, wpb=494749, bsz=16527.8, num_updates=49800, lr=0.00028341, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=45927 epoch 030: 894 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=556433, ups=1.12, wpb=494749, bsz=16527.8, num_updates=49800, lr=0.00028341, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=45927 epoch 030: 894 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=556433, ups=1.12, wpb=494749, bsz=16527.8, num_updates=49800, lr=0.00028341, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=45927 epoch 030: 894 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=556433, ups=1.12, wpb=494749, bsz=16527.8, num_updates=49800, lr=0.00028341, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=45927 epoch 030: 894 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=556433, ups=1.12, wpb=494749, bsz=16527.8, num_updates=49800, lr=0.00028341, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=45927 epoch 030: 894 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=556433, ups=1.12, wpb=494749, bsz=16527.8, num_updates=49800, lr=0.00028341, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=45927 epoch 030: 894 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=556433, ups=1.12, wpb=494749, bsz=16527.8, num_updates=49800, lr=0.00028341, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=45927 epoch 030: 894 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=556433, ups=1.12, wpb=494749, bsz=16527.8, num_updates=49800, lr=0.00028341, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=45927 epoch 030: 894 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=556433, ups=1.12, wpb=494749, bsz=16527.8, num_updates=49800, lr=0.00028341, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=45927 epoch 030: 894 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=556433, ups=1.12, wpb=494749, bsz=16527.8, num_updates=49800, lr=0.00028341, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=45927 epoch 030: 894 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=556433, ups=1.12, wpb=494749, bsz=16527.8, num_updates=49800, lr=0.00028341, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=45927 epoch 030: 894 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=556433, ups=1.12, wpb=494749, bsz=16527.8, num_updates=49800, lr=0.00028341, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=45927 epoch 030: 894 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=556433, ups=1.12, wpb=494749, bsz=16527.8, num_updates=49800, lr=0.00028341, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=45927 epoch 030: 894 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=556433, ups=1.12, wpb=494749, bsz=16527.8, num_updates=49800, lr=0.00028341, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=45927 epoch 030: 894 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=556433, ups=1.12, wpb=494749, bsz=16527.8, num_updates=49800, lr=0.00028341, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=45927 epoch 030: 994 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=559538, ups=1.13, wpb=494784, bsz=16659.7, num_updates=49900, lr=0.000283126, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46015 epoch 030: 994 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=559538, ups=1.13, wpb=494784, bsz=16659.7, num_updates=49900, lr=0.000283126, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46015 epoch 030: 994 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=559538, ups=1.13, wpb=494784, bsz=16659.7, num_updates=49900, lr=0.000283126, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46015 epoch 030: 994 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=559538, ups=1.13, wpb=494784, bsz=16659.7, num_updates=49900, lr=0.000283126, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46015 epoch 030: 994 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=559538, ups=1.13, wpb=494784, bsz=16659.7, num_updates=49900, lr=0.000283126, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46015 epoch 030: 994 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=559538, ups=1.13, wpb=494784, bsz=16659.7, num_updates=49900, lr=0.000283126, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46015 epoch 030: 994 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=559538, ups=1.13, wpb=494784, bsz=16659.7, num_updates=49900, lr=0.000283126, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46015 epoch 030: 994 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=559538, ups=1.13, wpb=494784, bsz=16659.7, num_updates=49900, lr=0.000283126, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46015 epoch 030: 994 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=559538, ups=1.13, wpb=494784, bsz=16659.7, num_updates=49900, lr=0.000283126, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46015 epoch 030: 994 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=559538, ups=1.13, wpb=494784, bsz=16659.7, num_updates=49900, lr=0.000283126, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46015 epoch 030: 994 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=559538, ups=1.13, wpb=494784, bsz=16659.7, num_updates=49900, lr=0.000283126, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46015 epoch 030: 994 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=559538, ups=1.13, wpb=494784, bsz=16659.7, num_updates=49900, lr=0.000283126, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46015 epoch 030: 994 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=559538, ups=1.13, wpb=494784, bsz=16659.7, num_updates=49900, lr=0.000283126, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46015 epoch 030: 994 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=559538, ups=1.13, wpb=494784, bsz=16659.7, num_updates=49900, lr=0.000283126, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46015 epoch 030: 994 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=559538, ups=1.13, wpb=494784, bsz=16659.7, num_updates=49900, lr=0.000283126, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46015 epoch 030: 994 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=559538, ups=1.13, wpb=494784, bsz=16659.7, num_updates=49900, lr=0.000283126, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46015 epoch 030: 994 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=559538, ups=1.13, wpb=494784, bsz=16659.7, num_updates=49900, lr=0.000283126, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46015 epoch 030: 994 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=559538, ups=1.13, wpb=494784, bsz=16659.7, num_updates=49900, lr=0.000283126, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46015 epoch 030: 994 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=559538, ups=1.13, wpb=494784, bsz=16659.7, num_updates=49900, lr=0.000283126, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46015 epoch 030: 994 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=559538, ups=1.13, wpb=494784, bsz=16659.7, num_updates=49900, lr=0.000283126, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46015 epoch 030: 994 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=559538, ups=1.13, wpb=494784, bsz=16659.7, num_updates=49900, lr=0.000283126, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46015 epoch 030: 994 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=559538, ups=1.13, wpb=494784, bsz=16659.7, num_updates=49900, lr=0.000283126, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46015 epoch 030: 994 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=559538, ups=1.13, wpb=494784, bsz=16659.7, num_updates=49900, lr=0.000283126, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46015 epoch 030: 994 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=559538, ups=1.13, wpb=494784, bsz=16659.7, num_updates=49900, lr=0.000283126, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46015 epoch 030: 994 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=559538, ups=1.13, wpb=494784, bsz=16659.7, num_updates=49900, lr=0.000283126, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46015 epoch 030: 994 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=559538, ups=1.13, wpb=494784, bsz=16659.7, num_updates=49900, lr=0.000283126, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46015 epoch 030: 994 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=559538, ups=1.13, wpb=494784, bsz=16659.7, num_updates=49900, lr=0.000283126, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46015 epoch 030: 994 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=559538, ups=1.13, wpb=494784, bsz=16659.7, num_updates=49900, lr=0.000283126, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46015 epoch 030: 994 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=559538, ups=1.13, wpb=494784, bsz=16659.7, num_updates=49900, lr=0.000283126, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46015 epoch 030: 994 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=559538, ups=1.13, wpb=494784, bsz=16659.7, num_updates=49900, lr=0.000283126, gnorm=0.163, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46015 epoch 030: 1094 / 1689 loss=3.477, nll_loss=1.92, ppl=3.78, wps=553744, ups=1.12, wpb=494885, bsz=16518.9, num_updates=50000, lr=0.000282843, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=46104 epoch 030: 1094 / 1689 loss=3.477, nll_loss=1.92, ppl=3.78, wps=553744, ups=1.12, wpb=494885, bsz=16518.9, num_updates=50000, lr=0.000282843, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=46104 epoch 030: 1094 / 1689 loss=3.477, nll_loss=1.92, ppl=3.78, wps=553744, ups=1.12, wpb=494885, bsz=16518.9, num_updates=50000, lr=0.000282843, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=46104 epoch 030: 1094 / 1689 loss=3.477, nll_loss=1.92, ppl=3.78, wps=553744, ups=1.12, wpb=494885, bsz=16518.9, num_updates=50000, lr=0.000282843, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=46104 epoch 030: 1094 / 1689 loss=3.477, nll_loss=1.92, ppl=3.78, wps=553744, ups=1.12, wpb=494885, bsz=16518.9, num_updates=50000, lr=0.000282843, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=46104 epoch 030: 1094 / 1689 loss=3.477, nll_loss=1.92, ppl=3.78, wps=553744, ups=1.12, wpb=494885, bsz=16518.9, num_updates=50000, lr=0.000282843, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=46104 epoch 030: 1094 / 1689 loss=3.477, nll_loss=1.92, ppl=3.78, wps=553744, ups=1.12, wpb=494885, bsz=16518.9, num_updates=50000, lr=0.000282843, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=46104 epoch 030: 1094 / 1689 loss=3.477, nll_loss=1.92, ppl=3.78, wps=553744, ups=1.12, wpb=494885, bsz=16518.9, num_updates=50000, lr=0.000282843, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=46104 epoch 030: 1094 / 1689 loss=3.477, nll_loss=1.92, ppl=3.78, wps=553744, ups=1.12, wpb=494885, bsz=16518.9, num_updates=50000, lr=0.000282843, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=46104 epoch 030: 1094 / 1689 loss=3.477, nll_loss=1.92, ppl=3.78, wps=553744, ups=1.12, wpb=494885, bsz=16518.9, num_updates=50000, lr=0.000282843, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=46104 epoch 030: 1094 / 1689 loss=3.477, nll_loss=1.92, ppl=3.78, wps=553744, ups=1.12, wpb=494885, bsz=16518.9, num_updates=50000, lr=0.000282843, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=46104 epoch 030: 1094 / 1689 loss=3.477, nll_loss=1.92, ppl=3.78, wps=553744, ups=1.12, wpb=494885, bsz=16518.9, num_updates=50000, lr=0.000282843, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=46104 epoch 030: 1094 / 1689 loss=3.477, nll_loss=1.92, ppl=3.78, wps=553744, ups=1.12, wpb=494885, bsz=16518.9, num_updates=50000, lr=0.000282843, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=46104 epoch 030: 1094 / 1689 loss=3.477, nll_loss=1.92, ppl=3.78, wps=553744, ups=1.12, wpb=494885, bsz=16518.9, num_updates=50000, lr=0.000282843, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=46104 epoch 030: 1094 / 1689 loss=3.477, nll_loss=1.92, ppl=3.78, wps=553744, ups=1.12, wpb=494885, bsz=16518.9, num_updates=50000, lr=0.000282843, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=46104 epoch 030: 1094 / 1689 loss=3.477, nll_loss=1.92, ppl=3.78, wps=553744, ups=1.12, wpb=494885, bsz=16518.9, num_updates=50000, lr=0.000282843, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=46104 epoch 030: 1094 / 1689 loss=3.477, nll_loss=1.92, ppl=3.78, wps=553744, ups=1.12, wpb=494885, bsz=16518.9, num_updates=50000, lr=0.000282843, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=46104 epoch 030: 1094 / 1689 loss=3.477, nll_loss=1.92, ppl=3.78, wps=553744, ups=1.12, wpb=494885, bsz=16518.9, num_updates=50000, lr=0.000282843, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=46104 epoch 030: 1094 / 1689 loss=3.477, nll_loss=1.92, ppl=3.78, wps=553744, ups=1.12, wpb=494885, bsz=16518.9, num_updates=50000, lr=0.000282843, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=46104 epoch 030: 1094 / 1689 loss=3.477, nll_loss=1.92, ppl=3.78, wps=553744, ups=1.12, wpb=494885, bsz=16518.9, num_updates=50000, lr=0.000282843, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=46104 epoch 030: 1094 / 1689 loss=3.477, nll_loss=1.92, ppl=3.78, wps=553744, ups=1.12, wpb=494885, bsz=16518.9, num_updates=50000, lr=0.000282843, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=46104 epoch 030: 1094 / 1689 loss=3.477, nll_loss=1.92, ppl=3.78, wps=553744, ups=1.12, wpb=494885, bsz=16518.9, num_updates=50000, lr=0.000282843, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=46104 epoch 030: 1094 / 1689 loss=3.477, nll_loss=1.92, ppl=3.78, wps=553744, ups=1.12, wpb=494885, bsz=16518.9, num_updates=50000, lr=0.000282843, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=46104 epoch 030: 1094 / 1689 loss=3.477, nll_loss=1.92, ppl=3.78, wps=553744, ups=1.12, wpb=494885, bsz=16518.9, num_updates=50000, lr=0.000282843, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=46104 epoch 030: 1094 / 1689 loss=3.477, nll_loss=1.92, ppl=3.78, wps=553744, ups=1.12, wpb=494885, bsz=16518.9, num_updates=50000, lr=0.000282843, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=46104 epoch 030: 1094 / 1689 loss=3.477, nll_loss=1.92, ppl=3.78, wps=553744, ups=1.12, wpb=494885, bsz=16518.9, num_updates=50000, lr=0.000282843, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=46104 epoch 030: 1094 / 1689 loss=3.477, nll_loss=1.92, ppl=3.78, wps=553744, ups=1.12, wpb=494885, bsz=16518.9, num_updates=50000, lr=0.000282843, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=46104 epoch 030: 1094 / 1689 loss=3.477, nll_loss=1.92, ppl=3.78, wps=553744, ups=1.12, wpb=494885, bsz=16518.9, num_updates=50000, lr=0.000282843, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=46104 epoch 030: 1094 / 1689 loss=3.477, nll_loss=1.92, ppl=3.78, wps=553744, ups=1.12, wpb=494885, bsz=16518.9, num_updates=50000, lr=0.000282843, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=46104 epoch 030: 1094 / 1689 loss=3.477, nll_loss=1.92, ppl=3.78, wps=553744, ups=1.12, wpb=494885, bsz=16518.9, num_updates=50000, lr=0.000282843, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=21.8, wall=46104 begin validation on "valid" subset epoch 030 | valid on 'valid' subset | loss 3.708 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 50000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.708 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 50000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.708 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 50000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.708 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 50000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.708 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 50000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.708 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 50000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.708 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 50000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.708 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 50000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.708 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 50000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.708 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 50000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.708 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 50000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.708 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 50000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.708 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 50000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.708 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 50000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.708 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 50000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.708 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 50000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.708 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 50000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.708 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 50000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.708 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 50000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.708 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 50000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.708 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 50000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.708 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 50000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.708 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 50000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.708 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 50000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.708 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 50000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.708 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 50000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.708 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 50000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.708 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 50000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.708 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 50000 | best_loss 3.702 epoch 030 | valid on 'valid' subset | loss 3.708 | nll_loss 2.164 | ppl 4.48 | wps 0 | wpb 44526 | bsz 2008 | num_updates 50000 | best_loss 3.702 epoch 030: 1195 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=351137, ups=0.71, wpb=494565, bsz=16881.8, num_updates=50100, lr=0.00028256, gnorm=0.168, clip=0, loss_scale=1, train_wall=95, gb_free=21.9, wall=46245 epoch 030: 1195 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=351137, ups=0.71, wpb=494565, bsz=16881.8, num_updates=50100, lr=0.00028256, gnorm=0.168, clip=0, loss_scale=1, train_wall=95, gb_free=21.9, wall=46245 epoch 030: 1195 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=351137, ups=0.71, wpb=494565, bsz=16881.8, num_updates=50100, lr=0.00028256, gnorm=0.168, clip=0, loss_scale=1, train_wall=95, gb_free=21.9, wall=46245 epoch 030: 1195 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=351137, ups=0.71, wpb=494565, bsz=16881.8, num_updates=50100, lr=0.00028256, gnorm=0.168, clip=0, loss_scale=1, train_wall=95, gb_free=21.9, wall=46245 epoch 030: 1195 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=351137, ups=0.71, wpb=494565, bsz=16881.8, num_updates=50100, lr=0.00028256, gnorm=0.168, clip=0, loss_scale=1, train_wall=95, gb_free=21.9, wall=46245 epoch 030: 1195 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=351137, ups=0.71, wpb=494565, bsz=16881.8, num_updates=50100, lr=0.00028256, gnorm=0.168, clip=0, loss_scale=1, train_wall=95, gb_free=21.9, wall=46245 epoch 030: 1195 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=351137, ups=0.71, wpb=494565, bsz=16881.8, num_updates=50100, lr=0.00028256, gnorm=0.168, clip=0, loss_scale=1, train_wall=95, gb_free=21.9, wall=46245 epoch 030: 1195 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=351137, ups=0.71, wpb=494565, bsz=16881.8, num_updates=50100, lr=0.00028256, gnorm=0.168, clip=0, loss_scale=1, train_wall=95, gb_free=21.9, wall=46245 epoch 030: 1195 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=351137, ups=0.71, wpb=494565, bsz=16881.8, num_updates=50100, lr=0.00028256, gnorm=0.168, clip=0, loss_scale=1, train_wall=95, gb_free=21.9, wall=46245 epoch 030: 1195 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=351137, ups=0.71, wpb=494565, bsz=16881.8, num_updates=50100, lr=0.00028256, gnorm=0.168, clip=0, loss_scale=1, train_wall=95, gb_free=21.9, wall=46245 epoch 030: 1195 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=351137, ups=0.71, wpb=494565, bsz=16881.8, num_updates=50100, lr=0.00028256, gnorm=0.168, clip=0, loss_scale=1, train_wall=95, gb_free=21.9, wall=46245 epoch 030: 1195 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=351137, ups=0.71, wpb=494565, bsz=16881.8, num_updates=50100, lr=0.00028256, gnorm=0.168, clip=0, loss_scale=1, train_wall=95, gb_free=21.9, wall=46245 epoch 030: 1195 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=351137, ups=0.71, wpb=494565, bsz=16881.8, num_updates=50100, lr=0.00028256, gnorm=0.168, clip=0, loss_scale=1, train_wall=95, gb_free=21.9, wall=46245 epoch 030: 1195 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=351137, ups=0.71, wpb=494565, bsz=16881.8, num_updates=50100, lr=0.00028256, gnorm=0.168, clip=0, loss_scale=1, train_wall=95, gb_free=21.9, wall=46245 epoch 030: 1195 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=351137, ups=0.71, wpb=494565, bsz=16881.8, num_updates=50100, lr=0.00028256, gnorm=0.168, clip=0, loss_scale=1, train_wall=95, gb_free=21.9, wall=46245 epoch 030: 1195 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=351137, ups=0.71, wpb=494565, bsz=16881.8, num_updates=50100, lr=0.00028256, gnorm=0.168, clip=0, loss_scale=1, train_wall=95, gb_free=21.9, wall=46245 epoch 030: 1195 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=351137, ups=0.71, wpb=494565, bsz=16881.8, num_updates=50100, lr=0.00028256, gnorm=0.168, clip=0, loss_scale=1, train_wall=95, gb_free=21.9, wall=46245 epoch 030: 1195 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=351137, ups=0.71, wpb=494565, bsz=16881.8, num_updates=50100, lr=0.00028256, gnorm=0.168, clip=0, loss_scale=1, train_wall=95, gb_free=21.9, wall=46245 epoch 030: 1195 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=351137, ups=0.71, wpb=494565, bsz=16881.8, num_updates=50100, lr=0.00028256, gnorm=0.168, clip=0, loss_scale=1, train_wall=95, gb_free=21.9, wall=46245 epoch 030: 1195 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=351137, ups=0.71, wpb=494565, bsz=16881.8, num_updates=50100, lr=0.00028256, gnorm=0.168, clip=0, loss_scale=1, train_wall=95, gb_free=21.9, wall=46245 epoch 030: 1195 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=351137, ups=0.71, wpb=494565, bsz=16881.8, num_updates=50100, lr=0.00028256, gnorm=0.168, clip=0, loss_scale=1, train_wall=95, gb_free=21.9, wall=46245 epoch 030: 1195 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=351137, ups=0.71, wpb=494565, bsz=16881.8, num_updates=50100, lr=0.00028256, gnorm=0.168, clip=0, loss_scale=1, train_wall=95, gb_free=21.9, wall=46245 epoch 030: 1195 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=351137, ups=0.71, wpb=494565, bsz=16881.8, num_updates=50100, lr=0.00028256, gnorm=0.168, clip=0, loss_scale=1, train_wall=95, gb_free=21.9, wall=46245 epoch 030: 1195 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=351137, ups=0.71, wpb=494565, bsz=16881.8, num_updates=50100, lr=0.00028256, gnorm=0.168, clip=0, loss_scale=1, train_wall=95, gb_free=21.9, wall=46245 epoch 030: 1195 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=351137, ups=0.71, wpb=494565, bsz=16881.8, num_updates=50100, lr=0.00028256, gnorm=0.168, clip=0, loss_scale=1, train_wall=95, gb_free=21.9, wall=46245 epoch 030: 1195 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=351137, ups=0.71, wpb=494565, bsz=16881.8, num_updates=50100, lr=0.00028256, gnorm=0.168, clip=0, loss_scale=1, train_wall=95, gb_free=21.9, wall=46245 epoch 030: 1195 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=351137, ups=0.71, wpb=494565, bsz=16881.8, num_updates=50100, lr=0.00028256, gnorm=0.168, clip=0, loss_scale=1, train_wall=95, gb_free=21.9, wall=46245 epoch 030: 1195 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=351137, ups=0.71, wpb=494565, bsz=16881.8, num_updates=50100, lr=0.00028256, gnorm=0.168, clip=0, loss_scale=1, train_wall=95, gb_free=21.9, wall=46245 epoch 030: 1195 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=351137, ups=0.71, wpb=494565, bsz=16881.8, num_updates=50100, lr=0.00028256, gnorm=0.168, clip=0, loss_scale=1, train_wall=95, gb_free=21.9, wall=46245 epoch 030: 1195 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=351137, ups=0.71, wpb=494565, bsz=16881.8, num_updates=50100, lr=0.00028256, gnorm=0.168, clip=0, loss_scale=1, train_wall=95, gb_free=21.9, wall=46245 epoch 030: 1295 / 1689 loss=3.479, nll_loss=1.923, ppl=3.79, wps=558380, ups=1.13, wpb=494096, bsz=16370.1, num_updates=50200, lr=0.000282279, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46334 epoch 030: 1295 / 1689 loss=3.479, nll_loss=1.923, ppl=3.79, wps=558380, ups=1.13, wpb=494096, bsz=16370.1, num_updates=50200, lr=0.000282279, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46334 epoch 030: 1295 / 1689 loss=3.479, nll_loss=1.923, ppl=3.79, wps=558380, ups=1.13, wpb=494096, bsz=16370.1, num_updates=50200, lr=0.000282279, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46334 epoch 030: 1295 / 1689 loss=3.479, nll_loss=1.923, ppl=3.79, wps=558380, ups=1.13, wpb=494096, bsz=16370.1, num_updates=50200, lr=0.000282279, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46334 epoch 030: 1295 / 1689 loss=3.479, nll_loss=1.923, ppl=3.79, wps=558380, ups=1.13, wpb=494096, bsz=16370.1, num_updates=50200, lr=0.000282279, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46334 epoch 030: 1295 / 1689 loss=3.479, nll_loss=1.923, ppl=3.79, wps=558380, ups=1.13, wpb=494096, bsz=16370.1, num_updates=50200, lr=0.000282279, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46334 epoch 030: 1295 / 1689 loss=3.479, nll_loss=1.923, ppl=3.79, wps=558380, ups=1.13, wpb=494096, bsz=16370.1, num_updates=50200, lr=0.000282279, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46334 epoch 030: 1295 / 1689 loss=3.479, nll_loss=1.923, ppl=3.79, wps=558380, ups=1.13, wpb=494096, bsz=16370.1, num_updates=50200, lr=0.000282279, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46334 epoch 030: 1295 / 1689 loss=3.479, nll_loss=1.923, ppl=3.79, wps=558380, ups=1.13, wpb=494096, bsz=16370.1, num_updates=50200, lr=0.000282279, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46334 epoch 030: 1295 / 1689 loss=3.479, nll_loss=1.923, ppl=3.79, wps=558380, ups=1.13, wpb=494096, bsz=16370.1, num_updates=50200, lr=0.000282279, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46334 epoch 030: 1295 / 1689 loss=3.479, nll_loss=1.923, ppl=3.79, wps=558380, ups=1.13, wpb=494096, bsz=16370.1, num_updates=50200, lr=0.000282279, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46334 epoch 030: 1295 / 1689 loss=3.479, nll_loss=1.923, ppl=3.79, wps=558380, ups=1.13, wpb=494096, bsz=16370.1, num_updates=50200, lr=0.000282279, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46334 epoch 030: 1295 / 1689 loss=3.479, nll_loss=1.923, ppl=3.79, wps=558380, ups=1.13, wpb=494096, bsz=16370.1, num_updates=50200, lr=0.000282279, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46334 epoch 030: 1295 / 1689 loss=3.479, nll_loss=1.923, ppl=3.79, wps=558380, ups=1.13, wpb=494096, bsz=16370.1, num_updates=50200, lr=0.000282279, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46334 epoch 030: 1295 / 1689 loss=3.479, nll_loss=1.923, ppl=3.79, wps=558380, ups=1.13, wpb=494096, bsz=16370.1, num_updates=50200, lr=0.000282279, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46334 epoch 030: 1295 / 1689 loss=3.479, nll_loss=1.923, ppl=3.79, wps=558380, ups=1.13, wpb=494096, bsz=16370.1, num_updates=50200, lr=0.000282279, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46334 epoch 030: 1295 / 1689 loss=3.479, nll_loss=1.923, ppl=3.79, wps=558380, ups=1.13, wpb=494096, bsz=16370.1, num_updates=50200, lr=0.000282279, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46334 epoch 030: 1295 / 1689 loss=3.479, nll_loss=1.923, ppl=3.79, wps=558380, ups=1.13, wpb=494096, bsz=16370.1, num_updates=50200, lr=0.000282279, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46334 epoch 030: 1295 / 1689 loss=3.479, nll_loss=1.923, ppl=3.79, wps=558380, ups=1.13, wpb=494096, bsz=16370.1, num_updates=50200, lr=0.000282279, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46334 epoch 030: 1295 / 1689 loss=3.479, nll_loss=1.923, ppl=3.79, wps=558380, ups=1.13, wpb=494096, bsz=16370.1, num_updates=50200, lr=0.000282279, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46334 epoch 030: 1295 / 1689 loss=3.479, nll_loss=1.923, ppl=3.79, wps=558380, ups=1.13, wpb=494096, bsz=16370.1, num_updates=50200, lr=0.000282279, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46334 epoch 030: 1295 / 1689 loss=3.479, nll_loss=1.923, ppl=3.79, wps=558380, ups=1.13, wpb=494096, bsz=16370.1, num_updates=50200, lr=0.000282279, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46334 epoch 030: 1295 / 1689 loss=3.479, nll_loss=1.923, ppl=3.79, wps=558380, ups=1.13, wpb=494096, bsz=16370.1, num_updates=50200, lr=0.000282279, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46334 epoch 030: 1295 / 1689 loss=3.479, nll_loss=1.923, ppl=3.79, wps=558380, ups=1.13, wpb=494096, bsz=16370.1, num_updates=50200, lr=0.000282279, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46334 epoch 030: 1295 / 1689 loss=3.479, nll_loss=1.923, ppl=3.79, wps=558380, ups=1.13, wpb=494096, bsz=16370.1, num_updates=50200, lr=0.000282279, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46334 epoch 030: 1295 / 1689 loss=3.479, nll_loss=1.923, ppl=3.79, wps=558380, ups=1.13, wpb=494096, bsz=16370.1, num_updates=50200, lr=0.000282279, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46334 epoch 030: 1295 / 1689 loss=3.479, nll_loss=1.923, ppl=3.79, wps=558380, ups=1.13, wpb=494096, bsz=16370.1, num_updates=50200, lr=0.000282279, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46334 epoch 030: 1295 / 1689 loss=3.479, nll_loss=1.923, ppl=3.79, wps=558380, ups=1.13, wpb=494096, bsz=16370.1, num_updates=50200, lr=0.000282279, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46334 epoch 030: 1295 / 1689 loss=3.479, nll_loss=1.923, ppl=3.79, wps=558380, ups=1.13, wpb=494096, bsz=16370.1, num_updates=50200, lr=0.000282279, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46334 epoch 030: 1295 / 1689 loss=3.479, nll_loss=1.923, ppl=3.79, wps=558380, ups=1.13, wpb=494096, bsz=16370.1, num_updates=50200, lr=0.000282279, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46334 epoch 030: 1395 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=557387, ups=1.13, wpb=495453, bsz=16570.6, num_updates=50300, lr=0.000281998, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=46423 epoch 030: 1395 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=557387, ups=1.13, wpb=495453, bsz=16570.6, num_updates=50300, lr=0.000281998, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=46423 epoch 030: 1395 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=557387, ups=1.13, wpb=495453, bsz=16570.6, num_updates=50300, lr=0.000281998, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=46423 epoch 030: 1395 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=557387, ups=1.13, wpb=495453, bsz=16570.6, num_updates=50300, lr=0.000281998, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=46423 epoch 030: 1395 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=557387, ups=1.13, wpb=495453, bsz=16570.6, num_updates=50300, lr=0.000281998, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=46423 epoch 030: 1395 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=557387, ups=1.13, wpb=495453, bsz=16570.6, num_updates=50300, lr=0.000281998, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=46423 epoch 030: 1395 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=557387, ups=1.13, wpb=495453, bsz=16570.6, num_updates=50300, lr=0.000281998, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=46423 epoch 030: 1395 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=557387, ups=1.13, wpb=495453, bsz=16570.6, num_updates=50300, lr=0.000281998, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=46423 epoch 030: 1395 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=557387, ups=1.13, wpb=495453, bsz=16570.6, num_updates=50300, lr=0.000281998, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=46423 epoch 030: 1395 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=557387, ups=1.13, wpb=495453, bsz=16570.6, num_updates=50300, lr=0.000281998, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=46423 epoch 030: 1395 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=557387, ups=1.13, wpb=495453, bsz=16570.6, num_updates=50300, lr=0.000281998, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=46423 epoch 030: 1395 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=557387, ups=1.13, wpb=495453, bsz=16570.6, num_updates=50300, lr=0.000281998, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=46423 epoch 030: 1395 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=557387, ups=1.13, wpb=495453, bsz=16570.6, num_updates=50300, lr=0.000281998, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=46423 epoch 030: 1395 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=557387, ups=1.13, wpb=495453, bsz=16570.6, num_updates=50300, lr=0.000281998, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=46423 epoch 030: 1395 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=557387, ups=1.13, wpb=495453, bsz=16570.6, num_updates=50300, lr=0.000281998, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=46423 epoch 030: 1395 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=557387, ups=1.13, wpb=495453, bsz=16570.6, num_updates=50300, lr=0.000281998, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=46423 epoch 030: 1395 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=557387, ups=1.13, wpb=495453, bsz=16570.6, num_updates=50300, lr=0.000281998, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=46423 epoch 030: 1395 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=557387, ups=1.13, wpb=495453, bsz=16570.6, num_updates=50300, lr=0.000281998, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=46423 epoch 030: 1395 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=557387, ups=1.13, wpb=495453, bsz=16570.6, num_updates=50300, lr=0.000281998, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=46423 epoch 030: 1395 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=557387, ups=1.13, wpb=495453, bsz=16570.6, num_updates=50300, lr=0.000281998, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=46423 epoch 030: 1395 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=557387, ups=1.13, wpb=495453, bsz=16570.6, num_updates=50300, lr=0.000281998, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=46423 epoch 030: 1395 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=557387, ups=1.13, wpb=495453, bsz=16570.6, num_updates=50300, lr=0.000281998, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=46423 epoch 030: 1395 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=557387, ups=1.13, wpb=495453, bsz=16570.6, num_updates=50300, lr=0.000281998, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=46423 epoch 030: 1395 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=557387, ups=1.13, wpb=495453, bsz=16570.6, num_updates=50300, lr=0.000281998, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=46423 epoch 030: 1395 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=557387, ups=1.13, wpb=495453, bsz=16570.6, num_updates=50300, lr=0.000281998, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=46423 epoch 030: 1395 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=557387, ups=1.13, wpb=495453, bsz=16570.6, num_updates=50300, lr=0.000281998, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=46423 epoch 030: 1395 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=557387, ups=1.13, wpb=495453, bsz=16570.6, num_updates=50300, lr=0.000281998, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=46423 epoch 030: 1395 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=557387, ups=1.13, wpb=495453, bsz=16570.6, num_updates=50300, lr=0.000281998, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=46423 epoch 030: 1395 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=557387, ups=1.13, wpb=495453, bsz=16570.6, num_updates=50300, lr=0.000281998, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=46423 epoch 030: 1395 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=557387, ups=1.13, wpb=495453, bsz=16570.6, num_updates=50300, lr=0.000281998, gnorm=0.18, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=46423 epoch 030: 1495 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=558716, ups=1.13, wpb=495878, bsz=16742.1, num_updates=50400, lr=0.000281718, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46511 epoch 030: 1495 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=558716, ups=1.13, wpb=495878, bsz=16742.1, num_updates=50400, lr=0.000281718, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46511 epoch 030: 1495 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=558716, ups=1.13, wpb=495878, bsz=16742.1, num_updates=50400, lr=0.000281718, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46511 epoch 030: 1495 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=558716, ups=1.13, wpb=495878, bsz=16742.1, num_updates=50400, lr=0.000281718, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46511 epoch 030: 1495 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=558716, ups=1.13, wpb=495878, bsz=16742.1, num_updates=50400, lr=0.000281718, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46511 epoch 030: 1495 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=558716, ups=1.13, wpb=495878, bsz=16742.1, num_updates=50400, lr=0.000281718, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46511 epoch 030: 1495 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=558716, ups=1.13, wpb=495878, bsz=16742.1, num_updates=50400, lr=0.000281718, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46511 epoch 030: 1495 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=558716, ups=1.13, wpb=495878, bsz=16742.1, num_updates=50400, lr=0.000281718, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46511 epoch 030: 1495 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=558716, ups=1.13, wpb=495878, bsz=16742.1, num_updates=50400, lr=0.000281718, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46511 epoch 030: 1495 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=558716, ups=1.13, wpb=495878, bsz=16742.1, num_updates=50400, lr=0.000281718, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46511 epoch 030: 1495 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=558716, ups=1.13, wpb=495878, bsz=16742.1, num_updates=50400, lr=0.000281718, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46511 epoch 030: 1495 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=558716, ups=1.13, wpb=495878, bsz=16742.1, num_updates=50400, lr=0.000281718, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46511 epoch 030: 1495 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=558716, ups=1.13, wpb=495878, bsz=16742.1, num_updates=50400, lr=0.000281718, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46511 epoch 030: 1495 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=558716, ups=1.13, wpb=495878, bsz=16742.1, num_updates=50400, lr=0.000281718, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46511 epoch 030: 1495 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=558716, ups=1.13, wpb=495878, bsz=16742.1, num_updates=50400, lr=0.000281718, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46511 epoch 030: 1495 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=558716, ups=1.13, wpb=495878, bsz=16742.1, num_updates=50400, lr=0.000281718, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46511 epoch 030: 1495 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=558716, ups=1.13, wpb=495878, bsz=16742.1, num_updates=50400, lr=0.000281718, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46511 epoch 030: 1495 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=558716, ups=1.13, wpb=495878, bsz=16742.1, num_updates=50400, lr=0.000281718, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46511 epoch 030: 1495 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=558716, ups=1.13, wpb=495878, bsz=16742.1, num_updates=50400, lr=0.000281718, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46511 epoch 030: 1495 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=558716, ups=1.13, wpb=495878, bsz=16742.1, num_updates=50400, lr=0.000281718, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46511 epoch 030: 1495 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=558716, ups=1.13, wpb=495878, bsz=16742.1, num_updates=50400, lr=0.000281718, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46511 epoch 030: 1495 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=558716, ups=1.13, wpb=495878, bsz=16742.1, num_updates=50400, lr=0.000281718, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46511 epoch 030: 1495 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=558716, ups=1.13, wpb=495878, bsz=16742.1, num_updates=50400, lr=0.000281718, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46511 epoch 030: 1495 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=558716, ups=1.13, wpb=495878, bsz=16742.1, num_updates=50400, lr=0.000281718, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46511 epoch 030: 1495 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=558716, ups=1.13, wpb=495878, bsz=16742.1, num_updates=50400, lr=0.000281718, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46511 epoch 030: 1495 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=558716, ups=1.13, wpb=495878, bsz=16742.1, num_updates=50400, lr=0.000281718, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46511 epoch 030: 1495 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=558716, ups=1.13, wpb=495878, bsz=16742.1, num_updates=50400, lr=0.000281718, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46511 epoch 030: 1495 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=558716, ups=1.13, wpb=495878, bsz=16742.1, num_updates=50400, lr=0.000281718, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46511 epoch 030: 1495 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=558716, ups=1.13, wpb=495878, bsz=16742.1, num_updates=50400, lr=0.000281718, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46511 epoch 030: 1495 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=558716, ups=1.13, wpb=495878, bsz=16742.1, num_updates=50400, lr=0.000281718, gnorm=0.159, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=46511 epoch 030: 1595 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=562092, ups=1.13, wpb=496866, bsz=16518.6, num_updates=50500, lr=0.000281439, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=46600 epoch 030: 1595 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=562092, ups=1.13, wpb=496866, bsz=16518.6, num_updates=50500, lr=0.000281439, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=46600 epoch 030: 1595 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=562092, ups=1.13, wpb=496866, bsz=16518.6, num_updates=50500, lr=0.000281439, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=46600 epoch 030: 1595 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=562092, ups=1.13, wpb=496866, bsz=16518.6, num_updates=50500, lr=0.000281439, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=46600 epoch 030: 1595 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=562092, ups=1.13, wpb=496866, bsz=16518.6, num_updates=50500, lr=0.000281439, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=46600 epoch 030: 1595 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=562092, ups=1.13, wpb=496866, bsz=16518.6, num_updates=50500, lr=0.000281439, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=46600 epoch 030: 1595 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=562092, ups=1.13, wpb=496866, bsz=16518.6, num_updates=50500, lr=0.000281439, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=46600 epoch 030: 1595 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=562092, ups=1.13, wpb=496866, bsz=16518.6, num_updates=50500, lr=0.000281439, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=46600 epoch 030: 1595 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=562092, ups=1.13, wpb=496866, bsz=16518.6, num_updates=50500, lr=0.000281439, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=46600 epoch 030: 1595 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=562092, ups=1.13, wpb=496866, bsz=16518.6, num_updates=50500, lr=0.000281439, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=46600 epoch 030: 1595 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=562092, ups=1.13, wpb=496866, bsz=16518.6, num_updates=50500, lr=0.000281439, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=46600 epoch 030: 1595 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=562092, ups=1.13, wpb=496866, bsz=16518.6, num_updates=50500, lr=0.000281439, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=46600 epoch 030: 1595 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=562092, ups=1.13, wpb=496866, bsz=16518.6, num_updates=50500, lr=0.000281439, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=46600 epoch 030: 1595 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=562092, ups=1.13, wpb=496866, bsz=16518.6, num_updates=50500, lr=0.000281439, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=46600 epoch 030: 1595 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=562092, ups=1.13, wpb=496866, bsz=16518.6, num_updates=50500, lr=0.000281439, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=46600 epoch 030: 1595 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=562092, ups=1.13, wpb=496866, bsz=16518.6, num_updates=50500, lr=0.000281439, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=46600 epoch 030: 1595 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=562092, ups=1.13, wpb=496866, bsz=16518.6, num_updates=50500, lr=0.000281439, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=46600 epoch 030: 1595 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=562092, ups=1.13, wpb=496866, bsz=16518.6, num_updates=50500, lr=0.000281439, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=46600 epoch 030: 1595 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=562092, ups=1.13, wpb=496866, bsz=16518.6, num_updates=50500, lr=0.000281439, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=46600 epoch 030: 1595 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=562092, ups=1.13, wpb=496866, bsz=16518.6, num_updates=50500, lr=0.000281439, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=46600 epoch 030: 1595 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=562092, ups=1.13, wpb=496866, bsz=16518.6, num_updates=50500, lr=0.000281439, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=46600 epoch 030: 1595 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=562092, ups=1.13, wpb=496866, bsz=16518.6, num_updates=50500, lr=0.000281439, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=46600 epoch 030: 1595 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=562092, ups=1.13, wpb=496866, bsz=16518.6, num_updates=50500, lr=0.000281439, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=46600 epoch 030: 1595 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=562092, ups=1.13, wpb=496866, bsz=16518.6, num_updates=50500, lr=0.000281439, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=46600 epoch 030: 1595 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=562092, ups=1.13, wpb=496866, bsz=16518.6, num_updates=50500, lr=0.000281439, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=46600 epoch 030: 1595 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=562092, ups=1.13, wpb=496866, bsz=16518.6, num_updates=50500, lr=0.000281439, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=46600 epoch 030: 1595 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=562092, ups=1.13, wpb=496866, bsz=16518.6, num_updates=50500, lr=0.000281439, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=46600 epoch 030: 1595 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=562092, ups=1.13, wpb=496866, bsz=16518.6, num_updates=50500, lr=0.000281439, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=46600 epoch 030: 1595 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=562092, ups=1.13, wpb=496866, bsz=16518.6, num_updates=50500, lr=0.000281439, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=46600 epoch 030: 1595 / 1689 loss=3.475, nll_loss=1.918, ppl=3.78, wps=562092, ups=1.13, wpb=496866, bsz=16518.6, num_updates=50500, lr=0.000281439, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=46600 end of epoch 30 (average epoch stats below) epoch 030 | loss 3.47 | nll_loss 1.911 | ppl 3.76 | wps 533712 | ups 1.08 | wpb 495134 | bsz 16505.2 | num_updates 50594 | lr 0.000281177 | gnorm 0.167 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.8 | wall 46682 epoch 030 | loss 3.47 | nll_loss 1.911 | ppl 3.76 | wps 533712 | ups 1.08 | wpb 495134 | bsz 16505.2 | num_updates 50594 | lr 0.000281177 | gnorm 0.167 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.8 | wall 46682 epoch 030 | loss 3.47 | nll_loss 1.911 | ppl 3.76 | wps 533712 | ups 1.08 | wpb 495134 | bsz 16505.2 | num_updates 50594 | lr 0.000281177 | gnorm 0.167 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.8 | wall 46682 epoch 030 | loss 3.47 | nll_loss 1.911 | ppl 3.76 | wps 533712 | ups 1.08 | wpb 495134 | bsz 16505.2 | num_updates 50594 | lr 0.000281177 | gnorm 0.167 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.8 | wall 46682 epoch 030 | loss 3.47 | nll_loss 1.911 | ppl 3.76 | wps 533712 | ups 1.08 | wpb 495134 | bsz 16505.2 | num_updates 50594 | lr 0.000281177 | gnorm 0.167 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.8 | wall 46682 epoch 030 | loss 3.47 | nll_loss 1.911 | ppl 3.76 | wps 533712 | ups 1.08 | wpb 495134 | bsz 16505.2 | num_updates 50594 | lr 0.000281177 | gnorm 0.167 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.8 | wall 46682 epoch 030 | loss 3.47 | nll_loss 1.911 | ppl 3.76 | wps 533712 | ups 1.08 | wpb 495134 | bsz 16505.2 | num_updates 50594 | lr 0.000281177 | gnorm 0.167 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.8 | wall 46682 epoch 030 | loss 3.47 | nll_loss 1.911 | ppl 3.76 | wps 533712 | ups 1.08 | wpb 495134 | bsz 16505.2 | num_updates 50594 | lr 0.000281177 | gnorm 0.167 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.8 | wall 46682 epoch 030 | loss 3.47 | nll_loss 1.911 | ppl 3.76 | wps 533712 | ups 1.08 | wpb 495134 | bsz 16505.2 | num_updates 50594 | lr 0.000281177 | gnorm 0.167 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.8 | wall 46682 epoch 030 | loss 3.47 | nll_loss 1.911 | ppl 3.76 | wps 533712 | ups 1.08 | wpb 495134 | bsz 16505.2 | num_updates 50594 | lr 0.000281177 | gnorm 0.167 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.8 | wall 46682 epoch 030 | loss 3.47 | nll_loss 1.911 | ppl 3.76 | wps 533712 | ups 1.08 | wpb 495134 | bsz 16505.2 | num_updates 50594 | lr 0.000281177 | gnorm 0.167 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.8 | wall 46682 epoch 030 | loss 3.47 | nll_loss 1.911 | ppl 3.76 | wps 533712 | ups 1.08 | wpb 495134 | bsz 16505.2 | num_updates 50594 | lr 0.000281177 | gnorm 0.167 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.8 | wall 46682 epoch 030 | loss 3.47 | nll_loss 1.911 | ppl 3.76 | wps 533712 | ups 1.08 | wpb 495134 | bsz 16505.2 | num_updates 50594 | lr 0.000281177 | gnorm 0.167 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.8 | wall 46682 epoch 030 | loss 3.47 | nll_loss 1.911 | ppl 3.76 | wps 533712 | ups 1.08 | wpb 495134 | bsz 16505.2 | num_updates 50594 | lr 0.000281177 | gnorm 0.167 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.8 | wall 46682 epoch 030 | loss 3.47 | nll_loss 1.911 | ppl 3.76 | wps 533712 | ups 1.08 | wpb 495134 | bsz 16505.2 | num_updates 50594 | lr 0.000281177 | gnorm 0.167 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.8 | wall 46682 epoch 030 | loss 3.47 | nll_loss 1.911 | ppl 3.76 | wps 533712 | ups 1.08 | wpb 495134 | bsz 16505.2 | num_updates 50594 | lr 0.000281177 | gnorm 0.167 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.8 | wall 46682 epoch 030 | loss 3.47 | nll_loss 1.911 | ppl 3.76 | wps 533712 | ups 1.08 | wpb 495134 | bsz 16505.2 | num_updates 50594 | lr 0.000281177 | gnorm 0.167 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.8 | wall 46682 epoch 030 | loss 3.47 | nll_loss 1.911 | ppl 3.76 | wps 533712 | ups 1.08 | wpb 495134 | bsz 16505.2 | num_updates 50594 | lr 0.000281177 | gnorm 0.167 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.8 | wall 46682 epoch 030 | loss 3.47 | nll_loss 1.911 | ppl 3.76 | wps 533712 | ups 1.08 | wpb 495134 | bsz 16505.2 | num_updates 50594 | lr 0.000281177 | gnorm 0.167 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.8 | wall 46682 epoch 030 | loss 3.47 | nll_loss 1.911 | ppl 3.76 | wps 533712 | ups 1.08 | wpb 495134 | bsz 16505.2 | num_updates 50594 | lr 0.000281177 | gnorm 0.167 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.8 | wall 46682 epoch 030 | loss 3.47 | nll_loss 1.911 | ppl 3.76 | wps 533712 | ups 1.08 | wpb 495134 | bsz 16505.2 | num_updates 50594 | lr 0.000281177 | gnorm 0.167 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.8 | wall 46682 epoch 030 | loss 3.47 | nll_loss 1.911 | ppl 3.76 | wps 533712 | ups 1.08 | wpb 495134 | bsz 16505.2 | num_updates 50594 | lr 0.000281177 | gnorm 0.167 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.8 | wall 46682 epoch 030 | loss 3.47 | nll_loss 1.911 | ppl 3.76 | wps 533712 | ups 1.08 | wpb 495134 | bsz 16505.2 | num_updates 50594 | lr 0.000281177 | gnorm 0.167 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.8 | wall 46682 epoch 030 | loss 3.47 | nll_loss 1.911 | ppl 3.76 | wps 533712 | ups 1.08 | wpb 495134 | bsz 16505.2 | num_updates 50594 | lr 0.000281177 | gnorm 0.167 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.8 | wall 46682 epoch 030 | loss 3.47 | nll_loss 1.911 | ppl 3.76 | wps 533712 | ups 1.08 | wpb 495134 | bsz 16505.2 | num_updates 50594 | lr 0.000281177 | gnorm 0.167 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.8 | wall 46682 epoch 030 | loss 3.47 | nll_loss 1.911 | ppl 3.76 | wps 533712 | ups 1.08 | wpb 495134 | bsz 16505.2 | num_updates 50594 | lr 0.000281177 | gnorm 0.167 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.8 | wall 46682 epoch 030 | loss 3.47 | nll_loss 1.911 | ppl 3.76 | wps 533712 | ups 1.08 | wpb 495134 | bsz 16505.2 | num_updates 50594 | lr 0.000281177 | gnorm 0.167 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.8 | wall 46682 epoch 030 | loss 3.47 | nll_loss 1.911 | ppl 3.76 | wps 533712 | ups 1.08 | wpb 495134 | bsz 16505.2 | num_updates 50594 | lr 0.000281177 | gnorm 0.167 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.8 | wall 46682 epoch 030 | loss 3.47 | nll_loss 1.911 | ppl 3.76 | wps 533712 | ups 1.08 | wpb 495134 | bsz 16505.2 | num_updates 50594 | lr 0.000281177 | gnorm 0.167 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.8 | wall 46682 epoch 030 | loss 3.47 | nll_loss 1.911 | ppl 3.76 | wps 533712 | ups 1.08 | wpb 495134 | bsz 16505.2 | num_updates 50594 | lr 0.000281177 | gnorm 0.167 | clip 0 | loss_scale 2 | train_wall 1491 | gb_free 23.8 | wall 46682 Start iterating over samples epoch 031: 6 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=556067, ups=1.13, wpb=490414, bsz=16399.8, num_updates=50600, lr=0.000281161, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=46688 epoch 031: 6 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=556067, ups=1.13, wpb=490414, bsz=16399.8, num_updates=50600, lr=0.000281161, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=46688 epoch 031: 6 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=556067, ups=1.13, wpb=490414, bsz=16399.8, num_updates=50600, lr=0.000281161, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=46688 epoch 031: 6 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=556067, ups=1.13, wpb=490414, bsz=16399.8, num_updates=50600, lr=0.000281161, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=46688 epoch 031: 6 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=556067, ups=1.13, wpb=490414, bsz=16399.8, num_updates=50600, lr=0.000281161, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=46688 epoch 031: 6 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=556067, ups=1.13, wpb=490414, bsz=16399.8, num_updates=50600, lr=0.000281161, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=46688 epoch 031: 6 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=556067, ups=1.13, wpb=490414, bsz=16399.8, num_updates=50600, lr=0.000281161, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=46688 epoch 031: 6 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=556067, ups=1.13, wpb=490414, bsz=16399.8, num_updates=50600, lr=0.000281161, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=46688 epoch 031: 6 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=556067, ups=1.13, wpb=490414, bsz=16399.8, num_updates=50600, lr=0.000281161, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=46688 epoch 031: 6 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=556067, ups=1.13, wpb=490414, bsz=16399.8, num_updates=50600, lr=0.000281161, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=46688 epoch 031: 6 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=556067, ups=1.13, wpb=490414, bsz=16399.8, num_updates=50600, lr=0.000281161, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=46688 epoch 031: 6 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=556067, ups=1.13, wpb=490414, bsz=16399.8, num_updates=50600, lr=0.000281161, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=46688 epoch 031: 6 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=556067, ups=1.13, wpb=490414, bsz=16399.8, num_updates=50600, lr=0.000281161, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=46688 epoch 031: 6 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=556067, ups=1.13, wpb=490414, bsz=16399.8, num_updates=50600, lr=0.000281161, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=46688 epoch 031: 6 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=556067, ups=1.13, wpb=490414, bsz=16399.8, num_updates=50600, lr=0.000281161, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=46688 epoch 031: 6 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=556067, ups=1.13, wpb=490414, bsz=16399.8, num_updates=50600, lr=0.000281161, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=46688 epoch 031: 6 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=556067, ups=1.13, wpb=490414, bsz=16399.8, num_updates=50600, lr=0.000281161, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=46688 epoch 031: 6 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=556067, ups=1.13, wpb=490414, bsz=16399.8, num_updates=50600, lr=0.000281161, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=46688 epoch 031: 6 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=556067, ups=1.13, wpb=490414, bsz=16399.8, num_updates=50600, lr=0.000281161, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=46688 epoch 031: 6 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=556067, ups=1.13, wpb=490414, bsz=16399.8, num_updates=50600, lr=0.000281161, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=46688 epoch 031: 6 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=556067, ups=1.13, wpb=490414, bsz=16399.8, num_updates=50600, lr=0.000281161, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=46688 epoch 031: 6 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=556067, ups=1.13, wpb=490414, bsz=16399.8, num_updates=50600, lr=0.000281161, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=46688 epoch 031: 6 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=556067, ups=1.13, wpb=490414, bsz=16399.8, num_updates=50600, lr=0.000281161, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=46688 epoch 031: 6 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=556067, ups=1.13, wpb=490414, bsz=16399.8, num_updates=50600, lr=0.000281161, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=46688 epoch 031: 6 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=556067, ups=1.13, wpb=490414, bsz=16399.8, num_updates=50600, lr=0.000281161, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=46688 epoch 031: 6 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=556067, ups=1.13, wpb=490414, bsz=16399.8, num_updates=50600, lr=0.000281161, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=46688 epoch 031: 6 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=556067, ups=1.13, wpb=490414, bsz=16399.8, num_updates=50600, lr=0.000281161, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=46688 epoch 031: 6 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=556067, ups=1.13, wpb=490414, bsz=16399.8, num_updates=50600, lr=0.000281161, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=46688 epoch 031: 6 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=556067, ups=1.13, wpb=490414, bsz=16399.8, num_updates=50600, lr=0.000281161, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=46688 epoch 031: 6 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=556067, ups=1.13, wpb=490414, bsz=16399.8, num_updates=50600, lr=0.000281161, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=46688 epoch 031: 6 / 1689 loss=3.481, nll_loss=1.925, ppl=3.8, wps=556067, ups=1.13, wpb=490414, bsz=16399.8, num_updates=50600, lr=0.000281161, gnorm=0.182, clip=0, loss_scale=2, train_wall=87, gb_free=21.9, wall=46688 epoch 031: 107 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=552916, ups=1.12, wpb=494739, bsz=16405.5, num_updates=50700, lr=0.000280883, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=46777 epoch 031: 107 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=552916, ups=1.12, wpb=494739, bsz=16405.5, num_updates=50700, lr=0.000280883, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=46777 epoch 031: 107 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=552916, ups=1.12, wpb=494739, bsz=16405.5, num_updates=50700, lr=0.000280883, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=46777 epoch 031: 107 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=552916, ups=1.12, wpb=494739, bsz=16405.5, num_updates=50700, lr=0.000280883, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=46777 epoch 031: 107 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=552916, ups=1.12, wpb=494739, bsz=16405.5, num_updates=50700, lr=0.000280883, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=46777 epoch 031: 107 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=552916, ups=1.12, wpb=494739, bsz=16405.5, num_updates=50700, lr=0.000280883, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=46777 epoch 031: 107 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=552916, ups=1.12, wpb=494739, bsz=16405.5, num_updates=50700, lr=0.000280883, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=46777 epoch 031: 107 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=552916, ups=1.12, wpb=494739, bsz=16405.5, num_updates=50700, lr=0.000280883, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=46777 epoch 031: 107 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=552916, ups=1.12, wpb=494739, bsz=16405.5, num_updates=50700, lr=0.000280883, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=46777 epoch 031: 107 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=552916, ups=1.12, wpb=494739, bsz=16405.5, num_updates=50700, lr=0.000280883, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=46777 epoch 031: 107 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=552916, ups=1.12, wpb=494739, bsz=16405.5, num_updates=50700, lr=0.000280883, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=46777 epoch 031: 107 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=552916, ups=1.12, wpb=494739, bsz=16405.5, num_updates=50700, lr=0.000280883, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=46777 epoch 031: 107 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=552916, ups=1.12, wpb=494739, bsz=16405.5, num_updates=50700, lr=0.000280883, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=46777 epoch 031: 107 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=552916, ups=1.12, wpb=494739, bsz=16405.5, num_updates=50700, lr=0.000280883, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=46777 epoch 031: 107 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=552916, ups=1.12, wpb=494739, bsz=16405.5, num_updates=50700, lr=0.000280883, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=46777 epoch 031: 107 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=552916, ups=1.12, wpb=494739, bsz=16405.5, num_updates=50700, lr=0.000280883, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=46777 epoch 031: 107 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=552916, ups=1.12, wpb=494739, bsz=16405.5, num_updates=50700, lr=0.000280883, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=46777 epoch 031: 107 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=552916, ups=1.12, wpb=494739, bsz=16405.5, num_updates=50700, lr=0.000280883, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=46777 epoch 031: 107 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=552916, ups=1.12, wpb=494739, bsz=16405.5, num_updates=50700, lr=0.000280883, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=46777 epoch 031: 107 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=552916, ups=1.12, wpb=494739, bsz=16405.5, num_updates=50700, lr=0.000280883, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=46777 epoch 031: 107 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=552916, ups=1.12, wpb=494739, bsz=16405.5, num_updates=50700, lr=0.000280883, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=46777 epoch 031: 107 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=552916, ups=1.12, wpb=494739, bsz=16405.5, num_updates=50700, lr=0.000280883, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=46777 epoch 031: 107 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=552916, ups=1.12, wpb=494739, bsz=16405.5, num_updates=50700, lr=0.000280883, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=46777 epoch 031: 107 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=552916, ups=1.12, wpb=494739, bsz=16405.5, num_updates=50700, lr=0.000280883, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=46777 epoch 031: 107 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=552916, ups=1.12, wpb=494739, bsz=16405.5, num_updates=50700, lr=0.000280883, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=46777 epoch 031: 107 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=552916, ups=1.12, wpb=494739, bsz=16405.5, num_updates=50700, lr=0.000280883, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=46777 epoch 031: 107 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=552916, ups=1.12, wpb=494739, bsz=16405.5, num_updates=50700, lr=0.000280883, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=46777 epoch 031: 107 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=552916, ups=1.12, wpb=494739, bsz=16405.5, num_updates=50700, lr=0.000280883, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=46777 epoch 031: 107 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=552916, ups=1.12, wpb=494739, bsz=16405.5, num_updates=50700, lr=0.000280883, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=46777 epoch 031: 107 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=552916, ups=1.12, wpb=494739, bsz=16405.5, num_updates=50700, lr=0.000280883, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=46777 epoch 031: 107 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=552916, ups=1.12, wpb=494739, bsz=16405.5, num_updates=50700, lr=0.000280883, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=46777 epoch 031: 207 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=549853, ups=1.11, wpb=495314, bsz=16575.1, num_updates=50800, lr=0.000280607, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=46868 epoch 031: 207 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=549853, ups=1.11, wpb=495314, bsz=16575.1, num_updates=50800, lr=0.000280607, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=46868 epoch 031: 207 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=549853, ups=1.11, wpb=495314, bsz=16575.1, num_updates=50800, lr=0.000280607, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=46868 epoch 031: 207 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=549853, ups=1.11, wpb=495314, bsz=16575.1, num_updates=50800, lr=0.000280607, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=46868 epoch 031: 207 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=549853, ups=1.11, wpb=495314, bsz=16575.1, num_updates=50800, lr=0.000280607, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=46868 epoch 031: 207 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=549853, ups=1.11, wpb=495314, bsz=16575.1, num_updates=50800, lr=0.000280607, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=46868 epoch 031: 207 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=549853, ups=1.11, wpb=495314, bsz=16575.1, num_updates=50800, lr=0.000280607, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=46868 epoch 031: 207 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=549853, ups=1.11, wpb=495314, bsz=16575.1, num_updates=50800, lr=0.000280607, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=46868 epoch 031: 207 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=549853, ups=1.11, wpb=495314, bsz=16575.1, num_updates=50800, lr=0.000280607, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=46868 epoch 031: 207 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=549853, ups=1.11, wpb=495314, bsz=16575.1, num_updates=50800, lr=0.000280607, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=46868 epoch 031: 207 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=549853, ups=1.11, wpb=495314, bsz=16575.1, num_updates=50800, lr=0.000280607, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=46868 epoch 031: 207 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=549853, ups=1.11, wpb=495314, bsz=16575.1, num_updates=50800, lr=0.000280607, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=46868 epoch 031: 207 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=549853, ups=1.11, wpb=495314, bsz=16575.1, num_updates=50800, lr=0.000280607, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=46868 epoch 031: 207 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=549853, ups=1.11, wpb=495314, bsz=16575.1, num_updates=50800, lr=0.000280607, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=46868 epoch 031: 207 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=549853, ups=1.11, wpb=495314, bsz=16575.1, num_updates=50800, lr=0.000280607, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=46868 epoch 031: 207 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=549853, ups=1.11, wpb=495314, bsz=16575.1, num_updates=50800, lr=0.000280607, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=46868 epoch 031: 207 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=549853, ups=1.11, wpb=495314, bsz=16575.1, num_updates=50800, lr=0.000280607, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=46868 epoch 031: 207 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=549853, ups=1.11, wpb=495314, bsz=16575.1, num_updates=50800, lr=0.000280607, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=46868 epoch 031: 207 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=549853, ups=1.11, wpb=495314, bsz=16575.1, num_updates=50800, lr=0.000280607, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=46868 epoch 031: 207 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=549853, ups=1.11, wpb=495314, bsz=16575.1, num_updates=50800, lr=0.000280607, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=46868 epoch 031: 207 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=549853, ups=1.11, wpb=495314, bsz=16575.1, num_updates=50800, lr=0.000280607, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=46868 epoch 031: 207 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=549853, ups=1.11, wpb=495314, bsz=16575.1, num_updates=50800, lr=0.000280607, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=46868 epoch 031: 207 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=549853, ups=1.11, wpb=495314, bsz=16575.1, num_updates=50800, lr=0.000280607, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=46868 epoch 031: 207 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=549853, ups=1.11, wpb=495314, bsz=16575.1, num_updates=50800, lr=0.000280607, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=46868 epoch 031: 207 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=549853, ups=1.11, wpb=495314, bsz=16575.1, num_updates=50800, lr=0.000280607, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=46868 epoch 031: 207 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=549853, ups=1.11, wpb=495314, bsz=16575.1, num_updates=50800, lr=0.000280607, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=46868 epoch 031: 207 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=549853, ups=1.11, wpb=495314, bsz=16575.1, num_updates=50800, lr=0.000280607, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=46868 epoch 031: 207 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=549853, ups=1.11, wpb=495314, bsz=16575.1, num_updates=50800, lr=0.000280607, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=46868 epoch 031: 207 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=549853, ups=1.11, wpb=495314, bsz=16575.1, num_updates=50800, lr=0.000280607, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=46868 epoch 031: 207 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=549853, ups=1.11, wpb=495314, bsz=16575.1, num_updates=50800, lr=0.000280607, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=46868 epoch 031: 207 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=549853, ups=1.11, wpb=495314, bsz=16575.1, num_updates=50800, lr=0.000280607, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=46868 epoch 031: 307 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=559519, ups=1.13, wpb=495665, bsz=16289.1, num_updates=50900, lr=0.000280331, gnorm=0.165, clip=0, loss_scale=1, train_wall=87, gb_free=22.6, wall=46956 epoch 031: 307 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=559519, ups=1.13, wpb=495665, bsz=16289.1, num_updates=50900, lr=0.000280331, gnorm=0.165, clip=0, loss_scale=1, train_wall=87, gb_free=22.6, wall=46956 epoch 031: 307 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=559519, ups=1.13, wpb=495665, bsz=16289.1, num_updates=50900, lr=0.000280331, gnorm=0.165, clip=0, loss_scale=1, train_wall=87, gb_free=22.6, wall=46956 epoch 031: 307 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=559519, ups=1.13, wpb=495665, bsz=16289.1, num_updates=50900, lr=0.000280331, gnorm=0.165, clip=0, loss_scale=1, train_wall=87, gb_free=22.6, wall=46956 epoch 031: 307 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=559519, ups=1.13, wpb=495665, bsz=16289.1, num_updates=50900, lr=0.000280331, gnorm=0.165, clip=0, loss_scale=1, train_wall=87, gb_free=22.6, wall=46956 epoch 031: 307 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=559519, ups=1.13, wpb=495665, bsz=16289.1, num_updates=50900, lr=0.000280331, gnorm=0.165, clip=0, loss_scale=1, train_wall=87, gb_free=22.6, wall=46956 epoch 031: 307 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=559519, ups=1.13, wpb=495665, bsz=16289.1, num_updates=50900, lr=0.000280331, gnorm=0.165, clip=0, loss_scale=1, train_wall=87, gb_free=22.6, wall=46956 epoch 031: 307 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=559519, ups=1.13, wpb=495665, bsz=16289.1, num_updates=50900, lr=0.000280331, gnorm=0.165, clip=0, loss_scale=1, train_wall=87, gb_free=22.6, wall=46956 epoch 031: 307 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=559519, ups=1.13, wpb=495665, bsz=16289.1, num_updates=50900, lr=0.000280331, gnorm=0.165, clip=0, loss_scale=1, train_wall=87, gb_free=22.6, wall=46956 epoch 031: 307 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=559519, ups=1.13, wpb=495665, bsz=16289.1, num_updates=50900, lr=0.000280331, gnorm=0.165, clip=0, loss_scale=1, train_wall=87, gb_free=22.6, wall=46956 epoch 031: 307 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=559519, ups=1.13, wpb=495665, bsz=16289.1, num_updates=50900, lr=0.000280331, gnorm=0.165, clip=0, loss_scale=1, train_wall=87, gb_free=22.6, wall=46956 epoch 031: 307 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=559519, ups=1.13, wpb=495665, bsz=16289.1, num_updates=50900, lr=0.000280331, gnorm=0.165, clip=0, loss_scale=1, train_wall=87, gb_free=22.6, wall=46956 epoch 031: 307 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=559519, ups=1.13, wpb=495665, bsz=16289.1, num_updates=50900, lr=0.000280331, gnorm=0.165, clip=0, loss_scale=1, train_wall=87, gb_free=22.6, wall=46956 epoch 031: 307 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=559519, ups=1.13, wpb=495665, bsz=16289.1, num_updates=50900, lr=0.000280331, gnorm=0.165, clip=0, loss_scale=1, train_wall=87, gb_free=22.6, wall=46956 epoch 031: 307 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=559519, ups=1.13, wpb=495665, bsz=16289.1, num_updates=50900, lr=0.000280331, gnorm=0.165, clip=0, loss_scale=1, train_wall=87, gb_free=22.6, wall=46956 epoch 031: 307 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=559519, ups=1.13, wpb=495665, bsz=16289.1, num_updates=50900, lr=0.000280331, gnorm=0.165, clip=0, loss_scale=1, train_wall=87, gb_free=22.6, wall=46956 epoch 031: 307 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=559519, ups=1.13, wpb=495665, bsz=16289.1, num_updates=50900, lr=0.000280331, gnorm=0.165, clip=0, loss_scale=1, train_wall=87, gb_free=22.6, wall=46956 epoch 031: 307 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=559519, ups=1.13, wpb=495665, bsz=16289.1, num_updates=50900, lr=0.000280331, gnorm=0.165, clip=0, loss_scale=1, train_wall=87, gb_free=22.6, wall=46956 epoch 031: 307 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=559519, ups=1.13, wpb=495665, bsz=16289.1, num_updates=50900, lr=0.000280331, gnorm=0.165, clip=0, loss_scale=1, train_wall=87, gb_free=22.6, wall=46956 epoch 031: 307 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=559519, ups=1.13, wpb=495665, bsz=16289.1, num_updates=50900, lr=0.000280331, gnorm=0.165, clip=0, loss_scale=1, train_wall=87, gb_free=22.6, wall=46956 epoch 031: 307 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=559519, ups=1.13, wpb=495665, bsz=16289.1, num_updates=50900, lr=0.000280331, gnorm=0.165, clip=0, loss_scale=1, train_wall=87, gb_free=22.6, wall=46956 epoch 031: 307 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=559519, ups=1.13, wpb=495665, bsz=16289.1, num_updates=50900, lr=0.000280331, gnorm=0.165, clip=0, loss_scale=1, train_wall=87, gb_free=22.6, wall=46956 epoch 031: 307 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=559519, ups=1.13, wpb=495665, bsz=16289.1, num_updates=50900, lr=0.000280331, gnorm=0.165, clip=0, loss_scale=1, train_wall=87, gb_free=22.6, wall=46956 epoch 031: 307 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=559519, ups=1.13, wpb=495665, bsz=16289.1, num_updates=50900, lr=0.000280331, gnorm=0.165, clip=0, loss_scale=1, train_wall=87, gb_free=22.6, wall=46956 epoch 031: 307 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=559519, ups=1.13, wpb=495665, bsz=16289.1, num_updates=50900, lr=0.000280331, gnorm=0.165, clip=0, loss_scale=1, train_wall=87, gb_free=22.6, wall=46956 epoch 031: 307 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=559519, ups=1.13, wpb=495665, bsz=16289.1, num_updates=50900, lr=0.000280331, gnorm=0.165, clip=0, loss_scale=1, train_wall=87, gb_free=22.6, wall=46956 epoch 031: 307 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=559519, ups=1.13, wpb=495665, bsz=16289.1, num_updates=50900, lr=0.000280331, gnorm=0.165, clip=0, loss_scale=1, train_wall=87, gb_free=22.6, wall=46956 epoch 031: 307 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=559519, ups=1.13, wpb=495665, bsz=16289.1, num_updates=50900, lr=0.000280331, gnorm=0.165, clip=0, loss_scale=1, train_wall=87, gb_free=22.6, wall=46956 epoch 031: 307 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=559519, ups=1.13, wpb=495665, bsz=16289.1, num_updates=50900, lr=0.000280331, gnorm=0.165, clip=0, loss_scale=1, train_wall=87, gb_free=22.6, wall=46956 epoch 031: 307 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=559519, ups=1.13, wpb=495665, bsz=16289.1, num_updates=50900, lr=0.000280331, gnorm=0.165, clip=0, loss_scale=1, train_wall=87, gb_free=22.6, wall=46956 epoch 031: 307 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=559519, ups=1.13, wpb=495665, bsz=16289.1, num_updates=50900, lr=0.000280331, gnorm=0.165, clip=0, loss_scale=1, train_wall=87, gb_free=22.6, wall=46956 epoch 031: 407 / 1689 loss=3.464, nll_loss=1.904, ppl=3.74, wps=553124, ups=1.12, wpb=493461, bsz=16456.5, num_updates=51000, lr=0.000280056, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=47045 epoch 031: 407 / 1689 loss=3.464, nll_loss=1.904, ppl=3.74, wps=553124, ups=1.12, wpb=493461, bsz=16456.5, num_updates=51000, lr=0.000280056, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=47045 epoch 031: 407 / 1689 loss=3.464, nll_loss=1.904, ppl=3.74, wps=553124, ups=1.12, wpb=493461, bsz=16456.5, num_updates=51000, lr=0.000280056, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=47045 epoch 031: 407 / 1689 loss=3.464, nll_loss=1.904, ppl=3.74, wps=553124, ups=1.12, wpb=493461, bsz=16456.5, num_updates=51000, lr=0.000280056, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=47045 epoch 031: 407 / 1689 loss=3.464, nll_loss=1.904, ppl=3.74, wps=553124, ups=1.12, wpb=493461, bsz=16456.5, num_updates=51000, lr=0.000280056, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=47045 epoch 031: 407 / 1689 loss=3.464, nll_loss=1.904, ppl=3.74, wps=553124, ups=1.12, wpb=493461, bsz=16456.5, num_updates=51000, lr=0.000280056, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=47045 epoch 031: 407 / 1689 loss=3.464, nll_loss=1.904, ppl=3.74, wps=553124, ups=1.12, wpb=493461, bsz=16456.5, num_updates=51000, lr=0.000280056, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=47045 epoch 031: 407 / 1689 loss=3.464, nll_loss=1.904, ppl=3.74, wps=553124, ups=1.12, wpb=493461, bsz=16456.5, num_updates=51000, lr=0.000280056, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=47045 epoch 031: 407 / 1689 loss=3.464, nll_loss=1.904, ppl=3.74, wps=553124, ups=1.12, wpb=493461, bsz=16456.5, num_updates=51000, lr=0.000280056, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=47045 epoch 031: 407 / 1689 loss=3.464, nll_loss=1.904, ppl=3.74, wps=553124, ups=1.12, wpb=493461, bsz=16456.5, num_updates=51000, lr=0.000280056, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=47045 epoch 031: 407 / 1689 loss=3.464, nll_loss=1.904, ppl=3.74, wps=553124, ups=1.12, wpb=493461, bsz=16456.5, num_updates=51000, lr=0.000280056, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=47045 epoch 031: 407 / 1689 loss=3.464, nll_loss=1.904, ppl=3.74, wps=553124, ups=1.12, wpb=493461, bsz=16456.5, num_updates=51000, lr=0.000280056, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=47045 epoch 031: 407 / 1689 loss=3.464, nll_loss=1.904, ppl=3.74, wps=553124, ups=1.12, wpb=493461, bsz=16456.5, num_updates=51000, lr=0.000280056, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=47045 epoch 031: 407 / 1689 loss=3.464, nll_loss=1.904, ppl=3.74, wps=553124, ups=1.12, wpb=493461, bsz=16456.5, num_updates=51000, lr=0.000280056, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=47045 epoch 031: 407 / 1689 loss=3.464, nll_loss=1.904, ppl=3.74, wps=553124, ups=1.12, wpb=493461, bsz=16456.5, num_updates=51000, lr=0.000280056, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=47045 epoch 031: 407 / 1689 loss=3.464, nll_loss=1.904, ppl=3.74, wps=553124, ups=1.12, wpb=493461, bsz=16456.5, num_updates=51000, lr=0.000280056, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=47045 epoch 031: 407 / 1689 loss=3.464, nll_loss=1.904, ppl=3.74, wps=553124, ups=1.12, wpb=493461, bsz=16456.5, num_updates=51000, lr=0.000280056, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=47045 epoch 031: 407 / 1689 loss=3.464, nll_loss=1.904, ppl=3.74, wps=553124, ups=1.12, wpb=493461, bsz=16456.5, num_updates=51000, lr=0.000280056, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=47045 epoch 031: 407 / 1689 loss=3.464, nll_loss=1.904, ppl=3.74, wps=553124, ups=1.12, wpb=493461, bsz=16456.5, num_updates=51000, lr=0.000280056, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=47045 epoch 031: 407 / 1689 loss=3.464, nll_loss=1.904, ppl=3.74, wps=553124, ups=1.12, wpb=493461, bsz=16456.5, num_updates=51000, lr=0.000280056, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=47045 epoch 031: 407 / 1689 loss=3.464, nll_loss=1.904, ppl=3.74, wps=553124, ups=1.12, wpb=493461, bsz=16456.5, num_updates=51000, lr=0.000280056, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=47045 epoch 031: 407 / 1689 loss=3.464, nll_loss=1.904, ppl=3.74, wps=553124, ups=1.12, wpb=493461, bsz=16456.5, num_updates=51000, lr=0.000280056, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=47045 epoch 031: 407 / 1689 loss=3.464, nll_loss=1.904, ppl=3.74, wps=553124, ups=1.12, wpb=493461, bsz=16456.5, num_updates=51000, lr=0.000280056, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=47045 epoch 031: 407 / 1689 loss=3.464, nll_loss=1.904, ppl=3.74, wps=553124, ups=1.12, wpb=493461, bsz=16456.5, num_updates=51000, lr=0.000280056, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=47045 epoch 031: 407 / 1689 loss=3.464, nll_loss=1.904, ppl=3.74, wps=553124, ups=1.12, wpb=493461, bsz=16456.5, num_updates=51000, lr=0.000280056, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=47045 epoch 031: 407 / 1689 loss=3.464, nll_loss=1.904, ppl=3.74, wps=553124, ups=1.12, wpb=493461, bsz=16456.5, num_updates=51000, lr=0.000280056, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=47045 epoch 031: 407 / 1689 loss=3.464, nll_loss=1.904, ppl=3.74, wps=553124, ups=1.12, wpb=493461, bsz=16456.5, num_updates=51000, lr=0.000280056, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=47045 epoch 031: 407 / 1689 loss=3.464, nll_loss=1.904, ppl=3.74, wps=553124, ups=1.12, wpb=493461, bsz=16456.5, num_updates=51000, lr=0.000280056, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=47045 epoch 031: 407 / 1689 loss=3.464, nll_loss=1.904, ppl=3.74, wps=553124, ups=1.12, wpb=493461, bsz=16456.5, num_updates=51000, lr=0.000280056, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=47045 epoch 031: 407 / 1689 loss=3.464, nll_loss=1.904, ppl=3.74, wps=553124, ups=1.12, wpb=493461, bsz=16456.5, num_updates=51000, lr=0.000280056, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=47045 epoch 031: 407 / 1689 loss=3.464, nll_loss=1.904, ppl=3.74, wps=553124, ups=1.12, wpb=493461, bsz=16456.5, num_updates=51000, lr=0.000280056, gnorm=0.175, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=47045 begin validation on "valid" subset epoch 031 | valid on 'valid' subset | loss 3.721 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 51000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.721 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 51000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.721 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 51000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.721 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 51000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.721 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 51000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.721 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 51000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.721 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 51000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.721 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 51000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.721 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 51000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.721 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 51000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.721 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 51000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.721 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 51000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.721 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 51000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.721 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 51000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.721 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 51000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.721 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 51000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.721 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 51000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.721 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 51000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.721 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 51000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.721 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 51000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.721 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 51000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.721 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 51000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.721 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 51000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.721 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 51000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.721 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 51000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.721 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 51000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.721 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 51000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.721 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 51000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.721 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 51000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.721 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 51000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.721 | nll_loss 2.179 | ppl 4.53 | wps 0 | wpb 44526 | bsz 2008 | num_updates 51000 | best_loss 3.702 epoch 031: 507 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=473824, ups=0.96, wpb=494497, bsz=16804, num_updates=51100, lr=0.000279782, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=47150 epoch 031: 507 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=473824, ups=0.96, wpb=494497, bsz=16804, num_updates=51100, lr=0.000279782, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=47150 epoch 031: 507 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=473824, ups=0.96, wpb=494497, bsz=16804, num_updates=51100, lr=0.000279782, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=47150 epoch 031: 507 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=473824, ups=0.96, wpb=494497, bsz=16804, num_updates=51100, lr=0.000279782, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=47150 epoch 031: 507 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=473824, ups=0.96, wpb=494497, bsz=16804, num_updates=51100, lr=0.000279782, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=47150 epoch 031: 507 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=473824, ups=0.96, wpb=494497, bsz=16804, num_updates=51100, lr=0.000279782, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=47150 epoch 031: 507 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=473824, ups=0.96, wpb=494497, bsz=16804, num_updates=51100, lr=0.000279782, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=47150 epoch 031: 507 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=473824, ups=0.96, wpb=494497, bsz=16804, num_updates=51100, lr=0.000279782, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=47150 epoch 031: 507 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=473824, ups=0.96, wpb=494497, bsz=16804, num_updates=51100, lr=0.000279782, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=47150 epoch 031: 507 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=473824, ups=0.96, wpb=494497, bsz=16804, num_updates=51100, lr=0.000279782, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=47150 epoch 031: 507 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=473824, ups=0.96, wpb=494497, bsz=16804, num_updates=51100, lr=0.000279782, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=47150 epoch 031: 507 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=473824, ups=0.96, wpb=494497, bsz=16804, num_updates=51100, lr=0.000279782, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=47150 epoch 031: 507 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=473824, ups=0.96, wpb=494497, bsz=16804, num_updates=51100, lr=0.000279782, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=47150 epoch 031: 507 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=473824, ups=0.96, wpb=494497, bsz=16804, num_updates=51100, lr=0.000279782, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=47150 epoch 031: 507 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=473824, ups=0.96, wpb=494497, bsz=16804, num_updates=51100, lr=0.000279782, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=47150 epoch 031: 507 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=473824, ups=0.96, wpb=494497, bsz=16804, num_updates=51100, lr=0.000279782, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=47150 epoch 031: 507 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=473824, ups=0.96, wpb=494497, bsz=16804, num_updates=51100, lr=0.000279782, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=47150 epoch 031: 507 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=473824, ups=0.96, wpb=494497, bsz=16804, num_updates=51100, lr=0.000279782, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=47150 epoch 031: 507 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=473824, ups=0.96, wpb=494497, bsz=16804, num_updates=51100, lr=0.000279782, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=47150 epoch 031: 507 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=473824, ups=0.96, wpb=494497, bsz=16804, num_updates=51100, lr=0.000279782, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=47150 epoch 031: 507 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=473824, ups=0.96, wpb=494497, bsz=16804, num_updates=51100, lr=0.000279782, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=47150 epoch 031: 507 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=473824, ups=0.96, wpb=494497, bsz=16804, num_updates=51100, lr=0.000279782, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=47150 epoch 031: 507 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=473824, ups=0.96, wpb=494497, bsz=16804, num_updates=51100, lr=0.000279782, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=47150 epoch 031: 507 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=473824, ups=0.96, wpb=494497, bsz=16804, num_updates=51100, lr=0.000279782, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=47150 epoch 031: 507 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=473824, ups=0.96, wpb=494497, bsz=16804, num_updates=51100, lr=0.000279782, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=47150 epoch 031: 507 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=473824, ups=0.96, wpb=494497, bsz=16804, num_updates=51100, lr=0.000279782, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=47150 epoch 031: 507 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=473824, ups=0.96, wpb=494497, bsz=16804, num_updates=51100, lr=0.000279782, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=47150 epoch 031: 507 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=473824, ups=0.96, wpb=494497, bsz=16804, num_updates=51100, lr=0.000279782, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=47150 epoch 031: 507 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=473824, ups=0.96, wpb=494497, bsz=16804, num_updates=51100, lr=0.000279782, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=47150 epoch 031: 507 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=473824, ups=0.96, wpb=494497, bsz=16804, num_updates=51100, lr=0.000279782, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=47150 epoch 031: 507 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=473824, ups=0.96, wpb=494497, bsz=16804, num_updates=51100, lr=0.000279782, gnorm=0.163, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=47150 epoch 031: 607 / 1689 loss=3.457, nll_loss=1.897, ppl=3.73, wps=552807, ups=1.12, wpb=495382, bsz=16514.6, num_updates=51200, lr=0.000279508, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=47239 epoch 031: 607 / 1689 loss=3.457, nll_loss=1.897, ppl=3.73, wps=552807, ups=1.12, wpb=495382, bsz=16514.6, num_updates=51200, lr=0.000279508, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=47239 epoch 031: 607 / 1689 loss=3.457, nll_loss=1.897, ppl=3.73, wps=552807, ups=1.12, wpb=495382, bsz=16514.6, num_updates=51200, lr=0.000279508, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=47239 epoch 031: 607 / 1689 loss=3.457, nll_loss=1.897, ppl=3.73, wps=552807, ups=1.12, wpb=495382, bsz=16514.6, num_updates=51200, lr=0.000279508, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=47239 epoch 031: 607 / 1689 loss=3.457, nll_loss=1.897, ppl=3.73, wps=552807, ups=1.12, wpb=495382, bsz=16514.6, num_updates=51200, lr=0.000279508, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=47239 epoch 031: 607 / 1689 loss=3.457, nll_loss=1.897, ppl=3.73, wps=552807, ups=1.12, wpb=495382, bsz=16514.6, num_updates=51200, lr=0.000279508, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=47239 epoch 031: 607 / 1689 loss=3.457, nll_loss=1.897, ppl=3.73, wps=552807, ups=1.12, wpb=495382, bsz=16514.6, num_updates=51200, lr=0.000279508, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=47239 epoch 031: 607 / 1689 loss=3.457, nll_loss=1.897, ppl=3.73, wps=552807, ups=1.12, wpb=495382, bsz=16514.6, num_updates=51200, lr=0.000279508, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=47239 epoch 031: 607 / 1689 loss=3.457, nll_loss=1.897, ppl=3.73, wps=552807, ups=1.12, wpb=495382, bsz=16514.6, num_updates=51200, lr=0.000279508, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=47239 epoch 031: 607 / 1689 loss=3.457, nll_loss=1.897, ppl=3.73, wps=552807, ups=1.12, wpb=495382, bsz=16514.6, num_updates=51200, lr=0.000279508, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=47239 epoch 031: 607 / 1689 loss=3.457, nll_loss=1.897, ppl=3.73, wps=552807, ups=1.12, wpb=495382, bsz=16514.6, num_updates=51200, lr=0.000279508, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=47239 epoch 031: 607 / 1689 loss=3.457, nll_loss=1.897, ppl=3.73, wps=552807, ups=1.12, wpb=495382, bsz=16514.6, num_updates=51200, lr=0.000279508, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=47239 epoch 031: 607 / 1689 loss=3.457, nll_loss=1.897, ppl=3.73, wps=552807, ups=1.12, wpb=495382, bsz=16514.6, num_updates=51200, lr=0.000279508, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=47239 epoch 031: 607 / 1689 loss=3.457, nll_loss=1.897, ppl=3.73, wps=552807, ups=1.12, wpb=495382, bsz=16514.6, num_updates=51200, lr=0.000279508, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=47239 epoch 031: 607 / 1689 loss=3.457, nll_loss=1.897, ppl=3.73, wps=552807, ups=1.12, wpb=495382, bsz=16514.6, num_updates=51200, lr=0.000279508, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=47239 epoch 031: 607 / 1689 loss=3.457, nll_loss=1.897, ppl=3.73, wps=552807, ups=1.12, wpb=495382, bsz=16514.6, num_updates=51200, lr=0.000279508, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=47239 epoch 031: 607 / 1689 loss=3.457, nll_loss=1.897, ppl=3.73, wps=552807, ups=1.12, wpb=495382, bsz=16514.6, num_updates=51200, lr=0.000279508, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=47239 epoch 031: 607 / 1689 loss=3.457, nll_loss=1.897, ppl=3.73, wps=552807, ups=1.12, wpb=495382, bsz=16514.6, num_updates=51200, lr=0.000279508, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=47239 epoch 031: 607 / 1689 loss=3.457, nll_loss=1.897, ppl=3.73, wps=552807, ups=1.12, wpb=495382, bsz=16514.6, num_updates=51200, lr=0.000279508, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=47239 epoch 031: 607 / 1689 loss=3.457, nll_loss=1.897, ppl=3.73, wps=552807, ups=1.12, wpb=495382, bsz=16514.6, num_updates=51200, lr=0.000279508, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=47239 epoch 031: 607 / 1689 loss=3.457, nll_loss=1.897, ppl=3.73, wps=552807, ups=1.12, wpb=495382, bsz=16514.6, num_updates=51200, lr=0.000279508, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=47239 epoch 031: 607 / 1689 loss=3.457, nll_loss=1.897, ppl=3.73, wps=552807, ups=1.12, wpb=495382, bsz=16514.6, num_updates=51200, lr=0.000279508, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=47239 epoch 031: 607 / 1689 loss=3.457, nll_loss=1.897, ppl=3.73, wps=552807, ups=1.12, wpb=495382, bsz=16514.6, num_updates=51200, lr=0.000279508, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=47239 epoch 031: 607 / 1689 loss=3.457, nll_loss=1.897, ppl=3.73, wps=552807, ups=1.12, wpb=495382, bsz=16514.6, num_updates=51200, lr=0.000279508, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=47239 epoch 031: 607 / 1689 loss=3.457, nll_loss=1.897, ppl=3.73, wps=552807, ups=1.12, wpb=495382, bsz=16514.6, num_updates=51200, lr=0.000279508, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=47239 epoch 031: 607 / 1689 loss=3.457, nll_loss=1.897, ppl=3.73, wps=552807, ups=1.12, wpb=495382, bsz=16514.6, num_updates=51200, lr=0.000279508, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=47239 epoch 031: 607 / 1689 loss=3.457, nll_loss=1.897, ppl=3.73, wps=552807, ups=1.12, wpb=495382, bsz=16514.6, num_updates=51200, lr=0.000279508, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=47239 epoch 031: 607 / 1689 loss=3.457, nll_loss=1.897, ppl=3.73, wps=552807, ups=1.12, wpb=495382, bsz=16514.6, num_updates=51200, lr=0.000279508, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=47239 epoch 031: 607 / 1689 loss=3.457, nll_loss=1.897, ppl=3.73, wps=552807, ups=1.12, wpb=495382, bsz=16514.6, num_updates=51200, lr=0.000279508, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=47239 epoch 031: 607 / 1689 loss=3.457, nll_loss=1.897, ppl=3.73, wps=552807, ups=1.12, wpb=495382, bsz=16514.6, num_updates=51200, lr=0.000279508, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=47239 epoch 031: 607 / 1689 loss=3.457, nll_loss=1.897, ppl=3.73, wps=552807, ups=1.12, wpb=495382, bsz=16514.6, num_updates=51200, lr=0.000279508, gnorm=0.174, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=47239 epoch 031: 708 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=540959, ups=1.09, wpb=496232, bsz=16617.8, num_updates=51300, lr=0.000279236, gnorm=0.175, clip=0, loss_scale=1, train_wall=90, gb_free=21.7, wall=47331 epoch 031: 708 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=540959, ups=1.09, wpb=496232, bsz=16617.8, num_updates=51300, lr=0.000279236, gnorm=0.175, clip=0, loss_scale=1, train_wall=90, gb_free=21.7, wall=47331 epoch 031: 708 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=540959, ups=1.09, wpb=496232, bsz=16617.8, num_updates=51300, lr=0.000279236, gnorm=0.175, clip=0, loss_scale=1, train_wall=90, gb_free=21.7, wall=47331 epoch 031: 708 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=540959, ups=1.09, wpb=496232, bsz=16617.8, num_updates=51300, lr=0.000279236, gnorm=0.175, clip=0, loss_scale=1, train_wall=90, gb_free=21.7, wall=47331 epoch 031: 708 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=540959, ups=1.09, wpb=496232, bsz=16617.8, num_updates=51300, lr=0.000279236, gnorm=0.175, clip=0, loss_scale=1, train_wall=90, gb_free=21.7, wall=47331 epoch 031: 708 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=540959, ups=1.09, wpb=496232, bsz=16617.8, num_updates=51300, lr=0.000279236, gnorm=0.175, clip=0, loss_scale=1, train_wall=90, gb_free=21.7, wall=47331 epoch 031: 708 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=540959, ups=1.09, wpb=496232, bsz=16617.8, num_updates=51300, lr=0.000279236, gnorm=0.175, clip=0, loss_scale=1, train_wall=90, gb_free=21.7, wall=47331 epoch 031: 708 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=540959, ups=1.09, wpb=496232, bsz=16617.8, num_updates=51300, lr=0.000279236, gnorm=0.175, clip=0, loss_scale=1, train_wall=90, gb_free=21.7, wall=47331 epoch 031: 708 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=540959, ups=1.09, wpb=496232, bsz=16617.8, num_updates=51300, lr=0.000279236, gnorm=0.175, clip=0, loss_scale=1, train_wall=90, gb_free=21.7, wall=47331 epoch 031: 708 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=540959, ups=1.09, wpb=496232, bsz=16617.8, num_updates=51300, lr=0.000279236, gnorm=0.175, clip=0, loss_scale=1, train_wall=90, gb_free=21.7, wall=47331 epoch 031: 708 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=540959, ups=1.09, wpb=496232, bsz=16617.8, num_updates=51300, lr=0.000279236, gnorm=0.175, clip=0, loss_scale=1, train_wall=90, gb_free=21.7, wall=47331 epoch 031: 708 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=540959, ups=1.09, wpb=496232, bsz=16617.8, num_updates=51300, lr=0.000279236, gnorm=0.175, clip=0, loss_scale=1, train_wall=90, gb_free=21.7, wall=47331 epoch 031: 708 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=540959, ups=1.09, wpb=496232, bsz=16617.8, num_updates=51300, lr=0.000279236, gnorm=0.175, clip=0, loss_scale=1, train_wall=90, gb_free=21.7, wall=47331 epoch 031: 708 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=540959, ups=1.09, wpb=496232, bsz=16617.8, num_updates=51300, lr=0.000279236, gnorm=0.175, clip=0, loss_scale=1, train_wall=90, gb_free=21.7, wall=47331 epoch 031: 708 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=540959, ups=1.09, wpb=496232, bsz=16617.8, num_updates=51300, lr=0.000279236, gnorm=0.175, clip=0, loss_scale=1, train_wall=90, gb_free=21.7, wall=47331 epoch 031: 708 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=540959, ups=1.09, wpb=496232, bsz=16617.8, num_updates=51300, lr=0.000279236, gnorm=0.175, clip=0, loss_scale=1, train_wall=90, gb_free=21.7, wall=47331 epoch 031: 708 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=540959, ups=1.09, wpb=496232, bsz=16617.8, num_updates=51300, lr=0.000279236, gnorm=0.175, clip=0, loss_scale=1, train_wall=90, gb_free=21.7, wall=47331 epoch 031: 708 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=540959, ups=1.09, wpb=496232, bsz=16617.8, num_updates=51300, lr=0.000279236, gnorm=0.175, clip=0, loss_scale=1, train_wall=90, gb_free=21.7, wall=47331 epoch 031: 708 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=540959, ups=1.09, wpb=496232, bsz=16617.8, num_updates=51300, lr=0.000279236, gnorm=0.175, clip=0, loss_scale=1, train_wall=90, gb_free=21.7, wall=47331 epoch 031: 708 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=540959, ups=1.09, wpb=496232, bsz=16617.8, num_updates=51300, lr=0.000279236, gnorm=0.175, clip=0, loss_scale=1, train_wall=90, gb_free=21.7, wall=47331 epoch 031: 708 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=540959, ups=1.09, wpb=496232, bsz=16617.8, num_updates=51300, lr=0.000279236, gnorm=0.175, clip=0, loss_scale=1, train_wall=90, gb_free=21.7, wall=47331 epoch 031: 708 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=540959, ups=1.09, wpb=496232, bsz=16617.8, num_updates=51300, lr=0.000279236, gnorm=0.175, clip=0, loss_scale=1, train_wall=90, gb_free=21.7, wall=47331 epoch 031: 708 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=540959, ups=1.09, wpb=496232, bsz=16617.8, num_updates=51300, lr=0.000279236, gnorm=0.175, clip=0, loss_scale=1, train_wall=90, gb_free=21.7, wall=47331 epoch 031: 708 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=540959, ups=1.09, wpb=496232, bsz=16617.8, num_updates=51300, lr=0.000279236, gnorm=0.175, clip=0, loss_scale=1, train_wall=90, gb_free=21.7, wall=47331 epoch 031: 708 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=540959, ups=1.09, wpb=496232, bsz=16617.8, num_updates=51300, lr=0.000279236, gnorm=0.175, clip=0, loss_scale=1, train_wall=90, gb_free=21.7, wall=47331 epoch 031: 708 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=540959, ups=1.09, wpb=496232, bsz=16617.8, num_updates=51300, lr=0.000279236, gnorm=0.175, clip=0, loss_scale=1, train_wall=90, gb_free=21.7, wall=47331 epoch 031: 708 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=540959, ups=1.09, wpb=496232, bsz=16617.8, num_updates=51300, lr=0.000279236, gnorm=0.175, clip=0, loss_scale=1, train_wall=90, gb_free=21.7, wall=47331 epoch 031: 708 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=540959, ups=1.09, wpb=496232, bsz=16617.8, num_updates=51300, lr=0.000279236, gnorm=0.175, clip=0, loss_scale=1, train_wall=90, gb_free=21.7, wall=47331 epoch 031: 708 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=540959, ups=1.09, wpb=496232, bsz=16617.8, num_updates=51300, lr=0.000279236, gnorm=0.175, clip=0, loss_scale=1, train_wall=90, gb_free=21.7, wall=47331 epoch 031: 708 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=540959, ups=1.09, wpb=496232, bsz=16617.8, num_updates=51300, lr=0.000279236, gnorm=0.175, clip=0, loss_scale=1, train_wall=90, gb_free=21.7, wall=47331 epoch 031: 708 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=540959, ups=1.09, wpb=496232, bsz=16617.8, num_updates=51300, lr=0.000279236, gnorm=0.175, clip=0, loss_scale=1, train_wall=90, gb_free=21.7, wall=47331 epoch 031: 808 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=546930, ups=1.1, wpb=496097, bsz=16340.4, num_updates=51400, lr=0.000278964, gnorm=0.161, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47422 epoch 031: 808 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=546930, ups=1.1, wpb=496097, bsz=16340.4, num_updates=51400, lr=0.000278964, gnorm=0.161, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47422 epoch 031: 808 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=546930, ups=1.1, wpb=496097, bsz=16340.4, num_updates=51400, lr=0.000278964, gnorm=0.161, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47422 epoch 031: 808 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=546930, ups=1.1, wpb=496097, bsz=16340.4, num_updates=51400, lr=0.000278964, gnorm=0.161, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47422 epoch 031: 808 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=546930, ups=1.1, wpb=496097, bsz=16340.4, num_updates=51400, lr=0.000278964, gnorm=0.161, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47422 epoch 031: 808 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=546930, ups=1.1, wpb=496097, bsz=16340.4, num_updates=51400, lr=0.000278964, gnorm=0.161, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47422 epoch 031: 808 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=546930, ups=1.1, wpb=496097, bsz=16340.4, num_updates=51400, lr=0.000278964, gnorm=0.161, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47422 epoch 031: 808 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=546930, ups=1.1, wpb=496097, bsz=16340.4, num_updates=51400, lr=0.000278964, gnorm=0.161, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47422 epoch 031: 808 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=546930, ups=1.1, wpb=496097, bsz=16340.4, num_updates=51400, lr=0.000278964, gnorm=0.161, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47422 epoch 031: 808 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=546930, ups=1.1, wpb=496097, bsz=16340.4, num_updates=51400, lr=0.000278964, gnorm=0.161, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47422 epoch 031: 808 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=546930, ups=1.1, wpb=496097, bsz=16340.4, num_updates=51400, lr=0.000278964, gnorm=0.161, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47422 epoch 031: 808 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=546930, ups=1.1, wpb=496097, bsz=16340.4, num_updates=51400, lr=0.000278964, gnorm=0.161, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47422 epoch 031: 808 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=546930, ups=1.1, wpb=496097, bsz=16340.4, num_updates=51400, lr=0.000278964, gnorm=0.161, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47422 epoch 031: 808 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=546930, ups=1.1, wpb=496097, bsz=16340.4, num_updates=51400, lr=0.000278964, gnorm=0.161, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47422 epoch 031: 808 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=546930, ups=1.1, wpb=496097, bsz=16340.4, num_updates=51400, lr=0.000278964, gnorm=0.161, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47422 epoch 031: 808 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=546930, ups=1.1, wpb=496097, bsz=16340.4, num_updates=51400, lr=0.000278964, gnorm=0.161, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47422 epoch 031: 808 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=546930, ups=1.1, wpb=496097, bsz=16340.4, num_updates=51400, lr=0.000278964, gnorm=0.161, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47422 epoch 031: 808 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=546930, ups=1.1, wpb=496097, bsz=16340.4, num_updates=51400, lr=0.000278964, gnorm=0.161, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47422 epoch 031: 808 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=546930, ups=1.1, wpb=496097, bsz=16340.4, num_updates=51400, lr=0.000278964, gnorm=0.161, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47422 epoch 031: 808 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=546930, ups=1.1, wpb=496097, bsz=16340.4, num_updates=51400, lr=0.000278964, gnorm=0.161, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47422 epoch 031: 808 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=546930, ups=1.1, wpb=496097, bsz=16340.4, num_updates=51400, lr=0.000278964, gnorm=0.161, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47422 epoch 031: 808 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=546930, ups=1.1, wpb=496097, bsz=16340.4, num_updates=51400, lr=0.000278964, gnorm=0.161, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47422 epoch 031: 808 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=546930, ups=1.1, wpb=496097, bsz=16340.4, num_updates=51400, lr=0.000278964, gnorm=0.161, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47422 epoch 031: 808 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=546930, ups=1.1, wpb=496097, bsz=16340.4, num_updates=51400, lr=0.000278964, gnorm=0.161, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47422 epoch 031: 808 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=546930, ups=1.1, wpb=496097, bsz=16340.4, num_updates=51400, lr=0.000278964, gnorm=0.161, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47422 epoch 031: 808 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=546930, ups=1.1, wpb=496097, bsz=16340.4, num_updates=51400, lr=0.000278964, gnorm=0.161, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47422 epoch 031: 808 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=546930, ups=1.1, wpb=496097, bsz=16340.4, num_updates=51400, lr=0.000278964, gnorm=0.161, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47422 epoch 031: 808 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=546930, ups=1.1, wpb=496097, bsz=16340.4, num_updates=51400, lr=0.000278964, gnorm=0.161, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47422 epoch 031: 808 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=546930, ups=1.1, wpb=496097, bsz=16340.4, num_updates=51400, lr=0.000278964, gnorm=0.161, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47422 epoch 031: 808 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=546930, ups=1.1, wpb=496097, bsz=16340.4, num_updates=51400, lr=0.000278964, gnorm=0.161, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47422 epoch 031: 808 / 1689 loss=3.465, nll_loss=1.906, ppl=3.75, wps=546930, ups=1.1, wpb=496097, bsz=16340.4, num_updates=51400, lr=0.000278964, gnorm=0.161, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47422 epoch 031: 908 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=550504, ups=1.11, wpb=495869, bsz=16396.5, num_updates=51500, lr=0.000278693, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.2, wall=47512 epoch 031: 908 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=550504, ups=1.11, wpb=495869, bsz=16396.5, num_updates=51500, lr=0.000278693, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.2, wall=47512 epoch 031: 908 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=550504, ups=1.11, wpb=495869, bsz=16396.5, num_updates=51500, lr=0.000278693, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.2, wall=47512 epoch 031: 908 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=550504, ups=1.11, wpb=495869, bsz=16396.5, num_updates=51500, lr=0.000278693, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.2, wall=47512 epoch 031: 908 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=550504, ups=1.11, wpb=495869, bsz=16396.5, num_updates=51500, lr=0.000278693, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.2, wall=47512 epoch 031: 908 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=550504, ups=1.11, wpb=495869, bsz=16396.5, num_updates=51500, lr=0.000278693, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.2, wall=47512 epoch 031: 908 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=550504, ups=1.11, wpb=495869, bsz=16396.5, num_updates=51500, lr=0.000278693, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.2, wall=47512 epoch 031: 908 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=550504, ups=1.11, wpb=495869, bsz=16396.5, num_updates=51500, lr=0.000278693, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.2, wall=47512 epoch 031: 908 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=550504, ups=1.11, wpb=495869, bsz=16396.5, num_updates=51500, lr=0.000278693, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.2, wall=47512 epoch 031: 908 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=550504, ups=1.11, wpb=495869, bsz=16396.5, num_updates=51500, lr=0.000278693, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.2, wall=47512 epoch 031: 908 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=550504, ups=1.11, wpb=495869, bsz=16396.5, num_updates=51500, lr=0.000278693, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.2, wall=47512 epoch 031: 908 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=550504, ups=1.11, wpb=495869, bsz=16396.5, num_updates=51500, lr=0.000278693, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.2, wall=47512 epoch 031: 908 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=550504, ups=1.11, wpb=495869, bsz=16396.5, num_updates=51500, lr=0.000278693, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.2, wall=47512 epoch 031: 908 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=550504, ups=1.11, wpb=495869, bsz=16396.5, num_updates=51500, lr=0.000278693, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.2, wall=47512 epoch 031: 908 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=550504, ups=1.11, wpb=495869, bsz=16396.5, num_updates=51500, lr=0.000278693, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.2, wall=47512 epoch 031: 908 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=550504, ups=1.11, wpb=495869, bsz=16396.5, num_updates=51500, lr=0.000278693, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.2, wall=47512 epoch 031: 908 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=550504, ups=1.11, wpb=495869, bsz=16396.5, num_updates=51500, lr=0.000278693, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.2, wall=47512 epoch 031: 908 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=550504, ups=1.11, wpb=495869, bsz=16396.5, num_updates=51500, lr=0.000278693, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.2, wall=47512 epoch 031: 908 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=550504, ups=1.11, wpb=495869, bsz=16396.5, num_updates=51500, lr=0.000278693, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.2, wall=47512 epoch 031: 908 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=550504, ups=1.11, wpb=495869, bsz=16396.5, num_updates=51500, lr=0.000278693, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.2, wall=47512 epoch 031: 908 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=550504, ups=1.11, wpb=495869, bsz=16396.5, num_updates=51500, lr=0.000278693, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.2, wall=47512 epoch 031: 908 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=550504, ups=1.11, wpb=495869, bsz=16396.5, num_updates=51500, lr=0.000278693, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.2, wall=47512 epoch 031: 908 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=550504, ups=1.11, wpb=495869, bsz=16396.5, num_updates=51500, lr=0.000278693, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.2, wall=47512 epoch 031: 908 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=550504, ups=1.11, wpb=495869, bsz=16396.5, num_updates=51500, lr=0.000278693, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.2, wall=47512 epoch 031: 908 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=550504, ups=1.11, wpb=495869, bsz=16396.5, num_updates=51500, lr=0.000278693, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.2, wall=47512 epoch 031: 908 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=550504, ups=1.11, wpb=495869, bsz=16396.5, num_updates=51500, lr=0.000278693, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.2, wall=47512 epoch 031: 908 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=550504, ups=1.11, wpb=495869, bsz=16396.5, num_updates=51500, lr=0.000278693, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.2, wall=47512 epoch 031: 908 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=550504, ups=1.11, wpb=495869, bsz=16396.5, num_updates=51500, lr=0.000278693, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.2, wall=47512 epoch 031: 908 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=550504, ups=1.11, wpb=495869, bsz=16396.5, num_updates=51500, lr=0.000278693, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.2, wall=47512 epoch 031: 908 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=550504, ups=1.11, wpb=495869, bsz=16396.5, num_updates=51500, lr=0.000278693, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.2, wall=47512 epoch 031: 908 / 1689 loss=3.47, nll_loss=1.911, ppl=3.76, wps=550504, ups=1.11, wpb=495869, bsz=16396.5, num_updates=51500, lr=0.000278693, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.2, wall=47512 epoch 031: 1008 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=552998, ups=1.12, wpb=495135, bsz=16805.8, num_updates=51600, lr=0.000278423, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=47601 epoch 031: 1008 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=552998, ups=1.12, wpb=495135, bsz=16805.8, num_updates=51600, lr=0.000278423, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=47601 epoch 031: 1008 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=552998, ups=1.12, wpb=495135, bsz=16805.8, num_updates=51600, lr=0.000278423, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=47601 epoch 031: 1008 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=552998, ups=1.12, wpb=495135, bsz=16805.8, num_updates=51600, lr=0.000278423, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=47601 epoch 031: 1008 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=552998, ups=1.12, wpb=495135, bsz=16805.8, num_updates=51600, lr=0.000278423, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=47601 epoch 031: 1008 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=552998, ups=1.12, wpb=495135, bsz=16805.8, num_updates=51600, lr=0.000278423, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=47601 epoch 031: 1008 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=552998, ups=1.12, wpb=495135, bsz=16805.8, num_updates=51600, lr=0.000278423, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=47601 epoch 031: 1008 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=552998, ups=1.12, wpb=495135, bsz=16805.8, num_updates=51600, lr=0.000278423, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=47601 epoch 031: 1008 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=552998, ups=1.12, wpb=495135, bsz=16805.8, num_updates=51600, lr=0.000278423, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=47601 epoch 031: 1008 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=552998, ups=1.12, wpb=495135, bsz=16805.8, num_updates=51600, lr=0.000278423, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=47601 epoch 031: 1008 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=552998, ups=1.12, wpb=495135, bsz=16805.8, num_updates=51600, lr=0.000278423, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=47601 epoch 031: 1008 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=552998, ups=1.12, wpb=495135, bsz=16805.8, num_updates=51600, lr=0.000278423, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=47601 epoch 031: 1008 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=552998, ups=1.12, wpb=495135, bsz=16805.8, num_updates=51600, lr=0.000278423, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=47601 epoch 031: 1008 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=552998, ups=1.12, wpb=495135, bsz=16805.8, num_updates=51600, lr=0.000278423, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=47601 epoch 031: 1008 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=552998, ups=1.12, wpb=495135, bsz=16805.8, num_updates=51600, lr=0.000278423, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=47601 epoch 031: 1008 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=552998, ups=1.12, wpb=495135, bsz=16805.8, num_updates=51600, lr=0.000278423, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=47601 epoch 031: 1008 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=552998, ups=1.12, wpb=495135, bsz=16805.8, num_updates=51600, lr=0.000278423, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=47601 epoch 031: 1008 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=552998, ups=1.12, wpb=495135, bsz=16805.8, num_updates=51600, lr=0.000278423, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=47601 epoch 031: 1008 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=552998, ups=1.12, wpb=495135, bsz=16805.8, num_updates=51600, lr=0.000278423, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=47601 epoch 031: 1008 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=552998, ups=1.12, wpb=495135, bsz=16805.8, num_updates=51600, lr=0.000278423, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=47601 epoch 031: 1008 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=552998, ups=1.12, wpb=495135, bsz=16805.8, num_updates=51600, lr=0.000278423, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=47601 epoch 031: 1008 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=552998, ups=1.12, wpb=495135, bsz=16805.8, num_updates=51600, lr=0.000278423, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=47601 epoch 031: 1008 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=552998, ups=1.12, wpb=495135, bsz=16805.8, num_updates=51600, lr=0.000278423, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=47601 epoch 031: 1008 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=552998, ups=1.12, wpb=495135, bsz=16805.8, num_updates=51600, lr=0.000278423, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=47601 epoch 031: 1008 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=552998, ups=1.12, wpb=495135, bsz=16805.8, num_updates=51600, lr=0.000278423, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=47601 epoch 031: 1008 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=552998, ups=1.12, wpb=495135, bsz=16805.8, num_updates=51600, lr=0.000278423, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=47601 epoch 031: 1008 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=552998, ups=1.12, wpb=495135, bsz=16805.8, num_updates=51600, lr=0.000278423, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=47601 epoch 031: 1008 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=552998, ups=1.12, wpb=495135, bsz=16805.8, num_updates=51600, lr=0.000278423, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=47601 epoch 031: 1008 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=552998, ups=1.12, wpb=495135, bsz=16805.8, num_updates=51600, lr=0.000278423, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=47601 epoch 031: 1008 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=552998, ups=1.12, wpb=495135, bsz=16805.8, num_updates=51600, lr=0.000278423, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=47601 epoch 031: 1008 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=552998, ups=1.12, wpb=495135, bsz=16805.8, num_updates=51600, lr=0.000278423, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.9, wall=47601 epoch 031: 1108 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=558661, ups=1.13, wpb=496180, bsz=15956.7, num_updates=51700, lr=0.000278154, gnorm=0.16, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=47690 epoch 031: 1108 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=558661, ups=1.13, wpb=496180, bsz=15956.7, num_updates=51700, lr=0.000278154, gnorm=0.16, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=47690 epoch 031: 1108 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=558661, ups=1.13, wpb=496180, bsz=15956.7, num_updates=51700, lr=0.000278154, gnorm=0.16, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=47690 epoch 031: 1108 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=558661, ups=1.13, wpb=496180, bsz=15956.7, num_updates=51700, lr=0.000278154, gnorm=0.16, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=47690 epoch 031: 1108 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=558661, ups=1.13, wpb=496180, bsz=15956.7, num_updates=51700, lr=0.000278154, gnorm=0.16, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=47690 epoch 031: 1108 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=558661, ups=1.13, wpb=496180, bsz=15956.7, num_updates=51700, lr=0.000278154, gnorm=0.16, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=47690 epoch 031: 1108 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=558661, ups=1.13, wpb=496180, bsz=15956.7, num_updates=51700, lr=0.000278154, gnorm=0.16, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=47690 epoch 031: 1108 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=558661, ups=1.13, wpb=496180, bsz=15956.7, num_updates=51700, lr=0.000278154, gnorm=0.16, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=47690 epoch 031: 1108 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=558661, ups=1.13, wpb=496180, bsz=15956.7, num_updates=51700, lr=0.000278154, gnorm=0.16, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=47690 epoch 031: 1108 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=558661, ups=1.13, wpb=496180, bsz=15956.7, num_updates=51700, lr=0.000278154, gnorm=0.16, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=47690 epoch 031: 1108 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=558661, ups=1.13, wpb=496180, bsz=15956.7, num_updates=51700, lr=0.000278154, gnorm=0.16, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=47690 epoch 031: 1108 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=558661, ups=1.13, wpb=496180, bsz=15956.7, num_updates=51700, lr=0.000278154, gnorm=0.16, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=47690 epoch 031: 1108 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=558661, ups=1.13, wpb=496180, bsz=15956.7, num_updates=51700, lr=0.000278154, gnorm=0.16, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=47690 epoch 031: 1108 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=558661, ups=1.13, wpb=496180, bsz=15956.7, num_updates=51700, lr=0.000278154, gnorm=0.16, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=47690 epoch 031: 1108 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=558661, ups=1.13, wpb=496180, bsz=15956.7, num_updates=51700, lr=0.000278154, gnorm=0.16, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=47690 epoch 031: 1108 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=558661, ups=1.13, wpb=496180, bsz=15956.7, num_updates=51700, lr=0.000278154, gnorm=0.16, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=47690 epoch 031: 1108 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=558661, ups=1.13, wpb=496180, bsz=15956.7, num_updates=51700, lr=0.000278154, gnorm=0.16, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=47690 epoch 031: 1108 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=558661, ups=1.13, wpb=496180, bsz=15956.7, num_updates=51700, lr=0.000278154, gnorm=0.16, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=47690 epoch 031: 1108 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=558661, ups=1.13, wpb=496180, bsz=15956.7, num_updates=51700, lr=0.000278154, gnorm=0.16, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=47690 epoch 031: 1108 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=558661, ups=1.13, wpb=496180, bsz=15956.7, num_updates=51700, lr=0.000278154, gnorm=0.16, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=47690 epoch 031: 1108 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=558661, ups=1.13, wpb=496180, bsz=15956.7, num_updates=51700, lr=0.000278154, gnorm=0.16, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=47690 epoch 031: 1108 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=558661, ups=1.13, wpb=496180, bsz=15956.7, num_updates=51700, lr=0.000278154, gnorm=0.16, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=47690 epoch 031: 1108 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=558661, ups=1.13, wpb=496180, bsz=15956.7, num_updates=51700, lr=0.000278154, gnorm=0.16, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=47690 epoch 031: 1108 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=558661, ups=1.13, wpb=496180, bsz=15956.7, num_updates=51700, lr=0.000278154, gnorm=0.16, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=47690 epoch 031: 1108 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=558661, ups=1.13, wpb=496180, bsz=15956.7, num_updates=51700, lr=0.000278154, gnorm=0.16, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=47690 epoch 031: 1108 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=558661, ups=1.13, wpb=496180, bsz=15956.7, num_updates=51700, lr=0.000278154, gnorm=0.16, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=47690 epoch 031: 1108 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=558661, ups=1.13, wpb=496180, bsz=15956.7, num_updates=51700, lr=0.000278154, gnorm=0.16, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=47690 epoch 031: 1108 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=558661, ups=1.13, wpb=496180, bsz=15956.7, num_updates=51700, lr=0.000278154, gnorm=0.16, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=47690 epoch 031: 1108 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=558661, ups=1.13, wpb=496180, bsz=15956.7, num_updates=51700, lr=0.000278154, gnorm=0.16, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=47690 epoch 031: 1108 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=558661, ups=1.13, wpb=496180, bsz=15956.7, num_updates=51700, lr=0.000278154, gnorm=0.16, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=47690 epoch 031: 1108 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=558661, ups=1.13, wpb=496180, bsz=15956.7, num_updates=51700, lr=0.000278154, gnorm=0.16, clip=0, loss_scale=1, train_wall=87, gb_free=21.7, wall=47690 epoch 031: 1208 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=551991, ups=1.12, wpb=493579, bsz=16719.8, num_updates=51800, lr=0.000277885, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=47780 epoch 031: 1208 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=551991, ups=1.12, wpb=493579, bsz=16719.8, num_updates=51800, lr=0.000277885, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=47780 epoch 031: 1208 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=551991, ups=1.12, wpb=493579, bsz=16719.8, num_updates=51800, lr=0.000277885, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=47780 epoch 031: 1208 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=551991, ups=1.12, wpb=493579, bsz=16719.8, num_updates=51800, lr=0.000277885, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=47780 epoch 031: 1208 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=551991, ups=1.12, wpb=493579, bsz=16719.8, num_updates=51800, lr=0.000277885, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=47780 epoch 031: 1208 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=551991, ups=1.12, wpb=493579, bsz=16719.8, num_updates=51800, lr=0.000277885, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=47780 epoch 031: 1208 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=551991, ups=1.12, wpb=493579, bsz=16719.8, num_updates=51800, lr=0.000277885, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=47780 epoch 031: 1208 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=551991, ups=1.12, wpb=493579, bsz=16719.8, num_updates=51800, lr=0.000277885, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=47780 epoch 031: 1208 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=551991, ups=1.12, wpb=493579, bsz=16719.8, num_updates=51800, lr=0.000277885, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=47780 epoch 031: 1208 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=551991, ups=1.12, wpb=493579, bsz=16719.8, num_updates=51800, lr=0.000277885, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=47780 epoch 031: 1208 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=551991, ups=1.12, wpb=493579, bsz=16719.8, num_updates=51800, lr=0.000277885, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=47780 epoch 031: 1208 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=551991, ups=1.12, wpb=493579, bsz=16719.8, num_updates=51800, lr=0.000277885, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=47780 epoch 031: 1208 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=551991, ups=1.12, wpb=493579, bsz=16719.8, num_updates=51800, lr=0.000277885, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=47780 epoch 031: 1208 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=551991, ups=1.12, wpb=493579, bsz=16719.8, num_updates=51800, lr=0.000277885, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=47780 epoch 031: 1208 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=551991, ups=1.12, wpb=493579, bsz=16719.8, num_updates=51800, lr=0.000277885, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=47780 epoch 031: 1208 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=551991, ups=1.12, wpb=493579, bsz=16719.8, num_updates=51800, lr=0.000277885, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=47780 epoch 031: 1208 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=551991, ups=1.12, wpb=493579, bsz=16719.8, num_updates=51800, lr=0.000277885, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=47780 epoch 031: 1208 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=551991, ups=1.12, wpb=493579, bsz=16719.8, num_updates=51800, lr=0.000277885, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=47780 epoch 031: 1208 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=551991, ups=1.12, wpb=493579, bsz=16719.8, num_updates=51800, lr=0.000277885, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=47780 epoch 031: 1208 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=551991, ups=1.12, wpb=493579, bsz=16719.8, num_updates=51800, lr=0.000277885, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=47780 epoch 031: 1208 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=551991, ups=1.12, wpb=493579, bsz=16719.8, num_updates=51800, lr=0.000277885, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=47780 epoch 031: 1208 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=551991, ups=1.12, wpb=493579, bsz=16719.8, num_updates=51800, lr=0.000277885, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=47780 epoch 031: 1208 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=551991, ups=1.12, wpb=493579, bsz=16719.8, num_updates=51800, lr=0.000277885, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=47780 epoch 031: 1208 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=551991, ups=1.12, wpb=493579, bsz=16719.8, num_updates=51800, lr=0.000277885, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=47780 epoch 031: 1208 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=551991, ups=1.12, wpb=493579, bsz=16719.8, num_updates=51800, lr=0.000277885, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=47780 epoch 031: 1208 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=551991, ups=1.12, wpb=493579, bsz=16719.8, num_updates=51800, lr=0.000277885, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=47780 epoch 031: 1208 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=551991, ups=1.12, wpb=493579, bsz=16719.8, num_updates=51800, lr=0.000277885, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=47780 epoch 031: 1208 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=551991, ups=1.12, wpb=493579, bsz=16719.8, num_updates=51800, lr=0.000277885, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=47780 epoch 031: 1208 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=551991, ups=1.12, wpb=493579, bsz=16719.8, num_updates=51800, lr=0.000277885, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=47780 epoch 031: 1208 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=551991, ups=1.12, wpb=493579, bsz=16719.8, num_updates=51800, lr=0.000277885, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=47780 epoch 031: 1208 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=551991, ups=1.12, wpb=493579, bsz=16719.8, num_updates=51800, lr=0.000277885, gnorm=0.165, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=47780 epoch 031: 1309 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=546516, ups=1.1, wpb=495331, bsz=16236.4, num_updates=51900, lr=0.000277617, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47870 epoch 031: 1309 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=546516, ups=1.1, wpb=495331, bsz=16236.4, num_updates=51900, lr=0.000277617, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47870 epoch 031: 1309 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=546516, ups=1.1, wpb=495331, bsz=16236.4, num_updates=51900, lr=0.000277617, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47870 epoch 031: 1309 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=546516, ups=1.1, wpb=495331, bsz=16236.4, num_updates=51900, lr=0.000277617, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47870 epoch 031: 1309 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=546516, ups=1.1, wpb=495331, bsz=16236.4, num_updates=51900, lr=0.000277617, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47870 epoch 031: 1309 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=546516, ups=1.1, wpb=495331, bsz=16236.4, num_updates=51900, lr=0.000277617, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47870 epoch 031: 1309 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=546516, ups=1.1, wpb=495331, bsz=16236.4, num_updates=51900, lr=0.000277617, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47870 epoch 031: 1309 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=546516, ups=1.1, wpb=495331, bsz=16236.4, num_updates=51900, lr=0.000277617, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47870 epoch 031: 1309 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=546516, ups=1.1, wpb=495331, bsz=16236.4, num_updates=51900, lr=0.000277617, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47870 epoch 031: 1309 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=546516, ups=1.1, wpb=495331, bsz=16236.4, num_updates=51900, lr=0.000277617, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47870 epoch 031: 1309 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=546516, ups=1.1, wpb=495331, bsz=16236.4, num_updates=51900, lr=0.000277617, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47870 epoch 031: 1309 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=546516, ups=1.1, wpb=495331, bsz=16236.4, num_updates=51900, lr=0.000277617, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47870 epoch 031: 1309 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=546516, ups=1.1, wpb=495331, bsz=16236.4, num_updates=51900, lr=0.000277617, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47870 epoch 031: 1309 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=546516, ups=1.1, wpb=495331, bsz=16236.4, num_updates=51900, lr=0.000277617, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47870 epoch 031: 1309 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=546516, ups=1.1, wpb=495331, bsz=16236.4, num_updates=51900, lr=0.000277617, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47870 epoch 031: 1309 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=546516, ups=1.1, wpb=495331, bsz=16236.4, num_updates=51900, lr=0.000277617, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47870 epoch 031: 1309 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=546516, ups=1.1, wpb=495331, bsz=16236.4, num_updates=51900, lr=0.000277617, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47870 epoch 031: 1309 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=546516, ups=1.1, wpb=495331, bsz=16236.4, num_updates=51900, lr=0.000277617, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47870 epoch 031: 1309 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=546516, ups=1.1, wpb=495331, bsz=16236.4, num_updates=51900, lr=0.000277617, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47870 epoch 031: 1309 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=546516, ups=1.1, wpb=495331, bsz=16236.4, num_updates=51900, lr=0.000277617, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47870 epoch 031: 1309 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=546516, ups=1.1, wpb=495331, bsz=16236.4, num_updates=51900, lr=0.000277617, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47870 epoch 031: 1309 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=546516, ups=1.1, wpb=495331, bsz=16236.4, num_updates=51900, lr=0.000277617, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47870 epoch 031: 1309 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=546516, ups=1.1, wpb=495331, bsz=16236.4, num_updates=51900, lr=0.000277617, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47870 epoch 031: 1309 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=546516, ups=1.1, wpb=495331, bsz=16236.4, num_updates=51900, lr=0.000277617, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47870 epoch 031: 1309 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=546516, ups=1.1, wpb=495331, bsz=16236.4, num_updates=51900, lr=0.000277617, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47870 epoch 031: 1309 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=546516, ups=1.1, wpb=495331, bsz=16236.4, num_updates=51900, lr=0.000277617, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47870 epoch 031: 1309 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=546516, ups=1.1, wpb=495331, bsz=16236.4, num_updates=51900, lr=0.000277617, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47870 epoch 031: 1309 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=546516, ups=1.1, wpb=495331, bsz=16236.4, num_updates=51900, lr=0.000277617, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47870 epoch 031: 1309 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=546516, ups=1.1, wpb=495331, bsz=16236.4, num_updates=51900, lr=0.000277617, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47870 epoch 031: 1309 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=546516, ups=1.1, wpb=495331, bsz=16236.4, num_updates=51900, lr=0.000277617, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47870 epoch 031: 1309 / 1689 loss=3.479, nll_loss=1.922, ppl=3.79, wps=546516, ups=1.1, wpb=495331, bsz=16236.4, num_updates=51900, lr=0.000277617, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=47870 epoch 031: 1409 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551227, ups=1.11, wpb=494728, bsz=16833, num_updates=52000, lr=0.00027735, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=47960 epoch 031: 1409 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551227, ups=1.11, wpb=494728, bsz=16833, num_updates=52000, lr=0.00027735, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=47960 epoch 031: 1409 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551227, ups=1.11, wpb=494728, bsz=16833, num_updates=52000, lr=0.00027735, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=47960 epoch 031: 1409 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551227, ups=1.11, wpb=494728, bsz=16833, num_updates=52000, lr=0.00027735, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=47960 epoch 031: 1409 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551227, ups=1.11, wpb=494728, bsz=16833, num_updates=52000, lr=0.00027735, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=47960 epoch 031: 1409 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551227, ups=1.11, wpb=494728, bsz=16833, num_updates=52000, lr=0.00027735, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=47960 epoch 031: 1409 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551227, ups=1.11, wpb=494728, bsz=16833, num_updates=52000, lr=0.00027735, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=47960 epoch 031: 1409 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551227, ups=1.11, wpb=494728, bsz=16833, num_updates=52000, lr=0.00027735, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=47960 epoch 031: 1409 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551227, ups=1.11, wpb=494728, bsz=16833, num_updates=52000, lr=0.00027735, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=47960 epoch 031: 1409 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551227, ups=1.11, wpb=494728, bsz=16833, num_updates=52000, lr=0.00027735, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=47960 epoch 031: 1409 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551227, ups=1.11, wpb=494728, bsz=16833, num_updates=52000, lr=0.00027735, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=47960 epoch 031: 1409 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551227, ups=1.11, wpb=494728, bsz=16833, num_updates=52000, lr=0.00027735, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=47960 epoch 031: 1409 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551227, ups=1.11, wpb=494728, bsz=16833, num_updates=52000, lr=0.00027735, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=47960 epoch 031: 1409 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551227, ups=1.11, wpb=494728, bsz=16833, num_updates=52000, lr=0.00027735, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=47960 epoch 031: 1409 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551227, ups=1.11, wpb=494728, bsz=16833, num_updates=52000, lr=0.00027735, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=47960 epoch 031: 1409 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551227, ups=1.11, wpb=494728, bsz=16833, num_updates=52000, lr=0.00027735, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=47960 epoch 031: 1409 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551227, ups=1.11, wpb=494728, bsz=16833, num_updates=52000, lr=0.00027735, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=47960 epoch 031: 1409 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551227, ups=1.11, wpb=494728, bsz=16833, num_updates=52000, lr=0.00027735, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=47960 epoch 031: 1409 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551227, ups=1.11, wpb=494728, bsz=16833, num_updates=52000, lr=0.00027735, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=47960 epoch 031: 1409 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551227, ups=1.11, wpb=494728, bsz=16833, num_updates=52000, lr=0.00027735, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=47960 epoch 031: 1409 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551227, ups=1.11, wpb=494728, bsz=16833, num_updates=52000, lr=0.00027735, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=47960 epoch 031: 1409 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551227, ups=1.11, wpb=494728, bsz=16833, num_updates=52000, lr=0.00027735, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=47960 epoch 031: 1409 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551227, ups=1.11, wpb=494728, bsz=16833, num_updates=52000, lr=0.00027735, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=47960 epoch 031: 1409 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551227, ups=1.11, wpb=494728, bsz=16833, num_updates=52000, lr=0.00027735, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=47960 epoch 031: 1409 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551227, ups=1.11, wpb=494728, bsz=16833, num_updates=52000, lr=0.00027735, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=47960 epoch 031: 1409 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551227, ups=1.11, wpb=494728, bsz=16833, num_updates=52000, lr=0.00027735, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=47960 epoch 031: 1409 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551227, ups=1.11, wpb=494728, bsz=16833, num_updates=52000, lr=0.00027735, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=47960 epoch 031: 1409 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551227, ups=1.11, wpb=494728, bsz=16833, num_updates=52000, lr=0.00027735, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=47960 epoch 031: 1409 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551227, ups=1.11, wpb=494728, bsz=16833, num_updates=52000, lr=0.00027735, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=47960 epoch 031: 1409 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551227, ups=1.11, wpb=494728, bsz=16833, num_updates=52000, lr=0.00027735, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=47960 epoch 031: 1409 / 1689 loss=3.473, nll_loss=1.915, ppl=3.77, wps=551227, ups=1.11, wpb=494728, bsz=16833, num_updates=52000, lr=0.00027735, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=22.4, wall=47960 begin validation on "valid" subset epoch 031 | valid on 'valid' subset | loss 3.726 | nll_loss 2.182 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 52000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.726 | nll_loss 2.182 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 52000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.726 | nll_loss 2.182 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 52000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.726 | nll_loss 2.182 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 52000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.726 | nll_loss 2.182 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 52000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.726 | nll_loss 2.182 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 52000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.726 | nll_loss 2.182 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 52000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.726 | nll_loss 2.182 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 52000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.726 | nll_loss 2.182 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 52000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.726 | nll_loss 2.182 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 52000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.726 | nll_loss 2.182 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 52000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.726 | nll_loss 2.182 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 52000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.726 | nll_loss 2.182 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 52000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.726 | nll_loss 2.182 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 52000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.726 | nll_loss 2.182 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 52000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.726 | nll_loss 2.182 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 52000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.726 | nll_loss 2.182 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 52000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.726 | nll_loss 2.182 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 52000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.726 | nll_loss 2.182 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 52000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.726 | nll_loss 2.182 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 52000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.726 | nll_loss 2.182 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 52000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.726 | nll_loss 2.182 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 52000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.726 | nll_loss 2.182 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 52000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.726 | nll_loss 2.182 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 52000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.726 | nll_loss 2.182 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 52000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.726 | nll_loss 2.182 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 52000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.726 | nll_loss 2.182 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 52000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.726 | nll_loss 2.182 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 52000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.726 | nll_loss 2.182 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 52000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.726 | nll_loss 2.182 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 52000 | best_loss 3.702 epoch 031 | valid on 'valid' subset | loss 3.726 | nll_loss 2.182 | ppl 4.54 | wps 0 | wpb 44526 | bsz 2008 | num_updates 52000 | best_loss 3.702 epoch 031: 1509 / 1689 loss=3.478, nll_loss=1.922, ppl=3.79, wps=377386, ups=0.76, wpb=497053, bsz=16848.7, num_updates=52100, lr=0.000277084, gnorm=0.169, clip=0, loss_scale=1, train_wall=113, gb_free=22, wall=48092 epoch 031: 1509 / 1689 loss=3.478, nll_loss=1.922, ppl=3.79, wps=377386, ups=0.76, wpb=497053, bsz=16848.7, num_updates=52100, lr=0.000277084, gnorm=0.169, clip=0, loss_scale=1, train_wall=113, gb_free=22, wall=48092 epoch 031: 1509 / 1689 loss=3.478, nll_loss=1.922, ppl=3.79, wps=377386, ups=0.76, wpb=497053, bsz=16848.7, num_updates=52100, lr=0.000277084, gnorm=0.169, clip=0, loss_scale=1, train_wall=113, gb_free=22, wall=48092 epoch 031: 1509 / 1689 loss=3.478, nll_loss=1.922, ppl=3.79, wps=377386, ups=0.76, wpb=497053, bsz=16848.7, num_updates=52100, lr=0.000277084, gnorm=0.169, clip=0, loss_scale=1, train_wall=113, gb_free=22, wall=48092 epoch 031: 1509 / 1689 loss=3.478, nll_loss=1.922, ppl=3.79, wps=377386, ups=0.76, wpb=497053, bsz=16848.7, num_updates=52100, lr=0.000277084, gnorm=0.169, clip=0, loss_scale=1, train_wall=113, gb_free=22, wall=48092 epoch 031: 1509 / 1689 loss=3.478, nll_loss=1.922, ppl=3.79, wps=377386, ups=0.76, wpb=497053, bsz=16848.7, num_updates=52100, lr=0.000277084, gnorm=0.169, clip=0, loss_scale=1, train_wall=113, gb_free=22, wall=48092 epoch 031: 1509 / 1689 loss=3.478, nll_loss=1.922, ppl=3.79, wps=377386, ups=0.76, wpb=497053, bsz=16848.7, num_updates=52100, lr=0.000277084, gnorm=0.169, clip=0, loss_scale=1, train_wall=113, gb_free=22, wall=48092 epoch 031: 1509 / 1689 loss=3.478, nll_loss=1.922, ppl=3.79, wps=377386, ups=0.76, wpb=497053, bsz=16848.7, num_updates=52100, lr=0.000277084, gnorm=0.169, clip=0, loss_scale=1, train_wall=113, gb_free=22, wall=48092 epoch 031: 1509 / 1689 loss=3.478, nll_loss=1.922, ppl=3.79, wps=377386, ups=0.76, wpb=497053, bsz=16848.7, num_updates=52100, lr=0.000277084, gnorm=0.169, clip=0, loss_scale=1, train_wall=113, gb_free=22, wall=48092 epoch 031: 1509 / 1689 loss=3.478, nll_loss=1.922, ppl=3.79, wps=377386, ups=0.76, wpb=497053, bsz=16848.7, num_updates=52100, lr=0.000277084, gnorm=0.169, clip=0, loss_scale=1, train_wall=113, gb_free=22, wall=48092 epoch 031: 1509 / 1689 loss=3.478, nll_loss=1.922, ppl=3.79, wps=377386, ups=0.76, wpb=497053, bsz=16848.7, num_updates=52100, lr=0.000277084, gnorm=0.169, clip=0, loss_scale=1, train_wall=113, gb_free=22, wall=48092 epoch 031: 1509 / 1689 loss=3.478, nll_loss=1.922, ppl=3.79, wps=377386, ups=0.76, wpb=497053, bsz=16848.7, num_updates=52100, lr=0.000277084, gnorm=0.169, clip=0, loss_scale=1, train_wall=113, gb_free=22, wall=48092 epoch 031: 1509 / 1689 loss=3.478, nll_loss=1.922, ppl=3.79, wps=377386, ups=0.76, wpb=497053, bsz=16848.7, num_updates=52100, lr=0.000277084, gnorm=0.169, clip=0, loss_scale=1, train_wall=113, gb_free=22, wall=48092 epoch 031: 1509 / 1689 loss=3.478, nll_loss=1.922, ppl=3.79, wps=377386, ups=0.76, wpb=497053, bsz=16848.7, num_updates=52100, lr=0.000277084, gnorm=0.169, clip=0, loss_scale=1, train_wall=113, gb_free=22, wall=48092 epoch 031: 1509 / 1689 loss=3.478, nll_loss=1.922, ppl=3.79, wps=377386, ups=0.76, wpb=497053, bsz=16848.7, num_updates=52100, lr=0.000277084, gnorm=0.169, clip=0, loss_scale=1, train_wall=113, gb_free=22, wall=48092 epoch 031: 1509 / 1689 loss=3.478, nll_loss=1.922, ppl=3.79, wps=377386, ups=0.76, wpb=497053, bsz=16848.7, num_updates=52100, lr=0.000277084, gnorm=0.169, clip=0, loss_scale=1, train_wall=113, gb_free=22, wall=48092 epoch 031: 1509 / 1689 loss=3.478, nll_loss=1.922, ppl=3.79, wps=377386, ups=0.76, wpb=497053, bsz=16848.7, num_updates=52100, lr=0.000277084, gnorm=0.169, clip=0, loss_scale=1, train_wall=113, gb_free=22, wall=48092 epoch 031: 1509 / 1689 loss=3.478, nll_loss=1.922, ppl=3.79, wps=377386, ups=0.76, wpb=497053, bsz=16848.7, num_updates=52100, lr=0.000277084, gnorm=0.169, clip=0, loss_scale=1, train_wall=113, gb_free=22, wall=48092 epoch 031: 1509 / 1689 loss=3.478, nll_loss=1.922, ppl=3.79, wps=377386, ups=0.76, wpb=497053, bsz=16848.7, num_updates=52100, lr=0.000277084, gnorm=0.169, clip=0, loss_scale=1, train_wall=113, gb_free=22, wall=48092 epoch 031: 1509 / 1689 loss=3.478, nll_loss=1.922, ppl=3.79, wps=377386, ups=0.76, wpb=497053, bsz=16848.7, num_updates=52100, lr=0.000277084, gnorm=0.169, clip=0, loss_scale=1, train_wall=113, gb_free=22, wall=48092 epoch 031: 1509 / 1689 loss=3.478, nll_loss=1.922, ppl=3.79, wps=377386, ups=0.76, wpb=497053, bsz=16848.7, num_updates=52100, lr=0.000277084, gnorm=0.169, clip=0, loss_scale=1, train_wall=113, gb_free=22, wall=48092 epoch 031: 1509 / 1689 loss=3.478, nll_loss=1.922, ppl=3.79, wps=377386, ups=0.76, wpb=497053, bsz=16848.7, num_updates=52100, lr=0.000277084, gnorm=0.169, clip=0, loss_scale=1, train_wall=113, gb_free=22, wall=48092 epoch 031: 1509 / 1689 loss=3.478, nll_loss=1.922, ppl=3.79, wps=377386, ups=0.76, wpb=497053, bsz=16848.7, num_updates=52100, lr=0.000277084, gnorm=0.169, clip=0, loss_scale=1, train_wall=113, gb_free=22, wall=48092 epoch 031: 1509 / 1689 loss=3.478, nll_loss=1.922, ppl=3.79, wps=377386, ups=0.76, wpb=497053, bsz=16848.7, num_updates=52100, lr=0.000277084, gnorm=0.169, clip=0, loss_scale=1, train_wall=113, gb_free=22, wall=48092 epoch 031: 1509 / 1689 loss=3.478, nll_loss=1.922, ppl=3.79, wps=377386, ups=0.76, wpb=497053, bsz=16848.7, num_updates=52100, lr=0.000277084, gnorm=0.169, clip=0, loss_scale=1, train_wall=113, gb_free=22, wall=48092 epoch 031: 1509 / 1689 loss=3.478, nll_loss=1.922, ppl=3.79, wps=377386, ups=0.76, wpb=497053, bsz=16848.7, num_updates=52100, lr=0.000277084, gnorm=0.169, clip=0, loss_scale=1, train_wall=113, gb_free=22, wall=48092 epoch 031: 1509 / 1689 loss=3.478, nll_loss=1.922, ppl=3.79, wps=377386, ups=0.76, wpb=497053, bsz=16848.7, num_updates=52100, lr=0.000277084, gnorm=0.169, clip=0, loss_scale=1, train_wall=113, gb_free=22, wall=48092 epoch 031: 1509 / 1689 loss=3.478, nll_loss=1.922, ppl=3.79, wps=377386, ups=0.76, wpb=497053, bsz=16848.7, num_updates=52100, lr=0.000277084, gnorm=0.169, clip=0, loss_scale=1, train_wall=113, gb_free=22, wall=48092 epoch 031: 1509 / 1689 loss=3.478, nll_loss=1.922, ppl=3.79, wps=377386, ups=0.76, wpb=497053, bsz=16848.7, num_updates=52100, lr=0.000277084, gnorm=0.169, clip=0, loss_scale=1, train_wall=113, gb_free=22, wall=48092 epoch 031: 1509 / 1689 loss=3.478, nll_loss=1.922, ppl=3.79, wps=377386, ups=0.76, wpb=497053, bsz=16848.7, num_updates=52100, lr=0.000277084, gnorm=0.169, clip=0, loss_scale=1, train_wall=113, gb_free=22, wall=48092 epoch 031: 1509 / 1689 loss=3.478, nll_loss=1.922, ppl=3.79, wps=377386, ups=0.76, wpb=497053, bsz=16848.7, num_updates=52100, lr=0.000277084, gnorm=0.169, clip=0, loss_scale=1, train_wall=113, gb_free=22, wall=48092 epoch 031: 1609 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=552056, ups=1.12, wpb=494986, bsz=16441.3, num_updates=52200, lr=0.000276818, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=48181 epoch 031: 1609 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=552056, ups=1.12, wpb=494986, bsz=16441.3, num_updates=52200, lr=0.000276818, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=48181 epoch 031: 1609 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=552056, ups=1.12, wpb=494986, bsz=16441.3, num_updates=52200, lr=0.000276818, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=48181 epoch 031: 1609 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=552056, ups=1.12, wpb=494986, bsz=16441.3, num_updates=52200, lr=0.000276818, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=48181 epoch 031: 1609 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=552056, ups=1.12, wpb=494986, bsz=16441.3, num_updates=52200, lr=0.000276818, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=48181 epoch 031: 1609 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=552056, ups=1.12, wpb=494986, bsz=16441.3, num_updates=52200, lr=0.000276818, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=48181 epoch 031: 1609 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=552056, ups=1.12, wpb=494986, bsz=16441.3, num_updates=52200, lr=0.000276818, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=48181 epoch 031: 1609 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=552056, ups=1.12, wpb=494986, bsz=16441.3, num_updates=52200, lr=0.000276818, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=48181 epoch 031: 1609 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=552056, ups=1.12, wpb=494986, bsz=16441.3, num_updates=52200, lr=0.000276818, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=48181 epoch 031: 1609 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=552056, ups=1.12, wpb=494986, bsz=16441.3, num_updates=52200, lr=0.000276818, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=48181 epoch 031: 1609 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=552056, ups=1.12, wpb=494986, bsz=16441.3, num_updates=52200, lr=0.000276818, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=48181 epoch 031: 1609 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=552056, ups=1.12, wpb=494986, bsz=16441.3, num_updates=52200, lr=0.000276818, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=48181 epoch 031: 1609 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=552056, ups=1.12, wpb=494986, bsz=16441.3, num_updates=52200, lr=0.000276818, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=48181 epoch 031: 1609 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=552056, ups=1.12, wpb=494986, bsz=16441.3, num_updates=52200, lr=0.000276818, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=48181 epoch 031: 1609 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=552056, ups=1.12, wpb=494986, bsz=16441.3, num_updates=52200, lr=0.000276818, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=48181 epoch 031: 1609 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=552056, ups=1.12, wpb=494986, bsz=16441.3, num_updates=52200, lr=0.000276818, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=48181 epoch 031: 1609 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=552056, ups=1.12, wpb=494986, bsz=16441.3, num_updates=52200, lr=0.000276818, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=48181 epoch 031: 1609 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=552056, ups=1.12, wpb=494986, bsz=16441.3, num_updates=52200, lr=0.000276818, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=48181 epoch 031: 1609 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=552056, ups=1.12, wpb=494986, bsz=16441.3, num_updates=52200, lr=0.000276818, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=48181 epoch 031: 1609 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=552056, ups=1.12, wpb=494986, bsz=16441.3, num_updates=52200, lr=0.000276818, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=48181 epoch 031: 1609 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=552056, ups=1.12, wpb=494986, bsz=16441.3, num_updates=52200, lr=0.000276818, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=48181 epoch 031: 1609 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=552056, ups=1.12, wpb=494986, bsz=16441.3, num_updates=52200, lr=0.000276818, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=48181 epoch 031: 1609 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=552056, ups=1.12, wpb=494986, bsz=16441.3, num_updates=52200, lr=0.000276818, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=48181 epoch 031: 1609 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=552056, ups=1.12, wpb=494986, bsz=16441.3, num_updates=52200, lr=0.000276818, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=48181 epoch 031: 1609 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=552056, ups=1.12, wpb=494986, bsz=16441.3, num_updates=52200, lr=0.000276818, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=48181 epoch 031: 1609 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=552056, ups=1.12, wpb=494986, bsz=16441.3, num_updates=52200, lr=0.000276818, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=48181 epoch 031: 1609 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=552056, ups=1.12, wpb=494986, bsz=16441.3, num_updates=52200, lr=0.000276818, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=48181 epoch 031: 1609 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=552056, ups=1.12, wpb=494986, bsz=16441.3, num_updates=52200, lr=0.000276818, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=48181 epoch 031: 1609 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=552056, ups=1.12, wpb=494986, bsz=16441.3, num_updates=52200, lr=0.000276818, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=48181 epoch 031: 1609 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=552056, ups=1.12, wpb=494986, bsz=16441.3, num_updates=52200, lr=0.000276818, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=48181 epoch 031: 1609 / 1689 loss=3.474, nll_loss=1.916, ppl=3.77, wps=552056, ups=1.12, wpb=494986, bsz=16441.3, num_updates=52200, lr=0.000276818, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=20.9, wall=48181 end of epoch 31 (average epoch stats below) epoch 031 | loss 3.466 | nll_loss 1.907 | ppl 3.75 | wps 531694 | ups 1.07 | wpb 495119 | bsz 16505.9 | num_updates 52280 | lr 0.000276606 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1513 | gb_free 22.9 | wall 48252 epoch 031 | loss 3.466 | nll_loss 1.907 | ppl 3.75 | wps 531694 | ups 1.07 | wpb 495119 | bsz 16505.9 | num_updates 52280 | lr 0.000276606 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1513 | gb_free 22.9 | wall 48252 epoch 031 | loss 3.466 | nll_loss 1.907 | ppl 3.75 | wps 531694 | ups 1.07 | wpb 495119 | bsz 16505.9 | num_updates 52280 | lr 0.000276606 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1513 | gb_free 22.9 | wall 48252 epoch 031 | loss 3.466 | nll_loss 1.907 | ppl 3.75 | wps 531694 | ups 1.07 | wpb 495119 | bsz 16505.9 | num_updates 52280 | lr 0.000276606 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1513 | gb_free 22.9 | wall 48252 epoch 031 | loss 3.466 | nll_loss 1.907 | ppl 3.75 | wps 531694 | ups 1.07 | wpb 495119 | bsz 16505.9 | num_updates 52280 | lr 0.000276606 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1513 | gb_free 22.9 | wall 48252 epoch 031 | loss 3.466 | nll_loss 1.907 | ppl 3.75 | wps 531694 | ups 1.07 | wpb 495119 | bsz 16505.9 | num_updates 52280 | lr 0.000276606 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1513 | gb_free 22.9 | wall 48252 epoch 031 | loss 3.466 | nll_loss 1.907 | ppl 3.75 | wps 531694 | ups 1.07 | wpb 495119 | bsz 16505.9 | num_updates 52280 | lr 0.000276606 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1513 | gb_free 22.9 | wall 48252 epoch 031 | loss 3.466 | nll_loss 1.907 | ppl 3.75 | wps 531694 | ups 1.07 | wpb 495119 | bsz 16505.9 | num_updates 52280 | lr 0.000276606 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1513 | gb_free 22.9 | wall 48252 epoch 031 | loss 3.466 | nll_loss 1.907 | ppl 3.75 | wps 531694 | ups 1.07 | wpb 495119 | bsz 16505.9 | num_updates 52280 | lr 0.000276606 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1513 | gb_free 22.9 | wall 48252 epoch 031 | loss 3.466 | nll_loss 1.907 | ppl 3.75 | wps 531694 | ups 1.07 | wpb 495119 | bsz 16505.9 | num_updates 52280 | lr 0.000276606 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1513 | gb_free 22.9 | wall 48252 epoch 031 | loss 3.466 | nll_loss 1.907 | ppl 3.75 | wps 531694 | ups 1.07 | wpb 495119 | bsz 16505.9 | num_updates 52280 | lr 0.000276606 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1513 | gb_free 22.9 | wall 48252 epoch 031 | loss 3.466 | nll_loss 1.907 | ppl 3.75 | wps 531694 | ups 1.07 | wpb 495119 | bsz 16505.9 | num_updates 52280 | lr 0.000276606 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1513 | gb_free 22.9 | wall 48252 epoch 031 | loss 3.466 | nll_loss 1.907 | ppl 3.75 | wps 531694 | ups 1.07 | wpb 495119 | bsz 16505.9 | num_updates 52280 | lr 0.000276606 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1513 | gb_free 22.9 | wall 48252 epoch 031 | loss 3.466 | nll_loss 1.907 | ppl 3.75 | wps 531694 | ups 1.07 | wpb 495119 | bsz 16505.9 | num_updates 52280 | lr 0.000276606 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1513 | gb_free 22.9 | wall 48252 epoch 031 | loss 3.466 | nll_loss 1.907 | ppl 3.75 | wps 531694 | ups 1.07 | wpb 495119 | bsz 16505.9 | num_updates 52280 | lr 0.000276606 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1513 | gb_free 22.9 | wall 48252 epoch 031 | loss 3.466 | nll_loss 1.907 | ppl 3.75 | wps 531694 | ups 1.07 | wpb 495119 | bsz 16505.9 | num_updates 52280 | lr 0.000276606 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1513 | gb_free 22.9 | wall 48252 epoch 031 | loss 3.466 | nll_loss 1.907 | ppl 3.75 | wps 531694 | ups 1.07 | wpb 495119 | bsz 16505.9 | num_updates 52280 | lr 0.000276606 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1513 | gb_free 22.9 | wall 48252 epoch 031 | loss 3.466 | nll_loss 1.907 | ppl 3.75 | wps 531694 | ups 1.07 | wpb 495119 | bsz 16505.9 | num_updates 52280 | lr 0.000276606 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1513 | gb_free 22.9 | wall 48252 epoch 031 | loss 3.466 | nll_loss 1.907 | ppl 3.75 | wps 531694 | ups 1.07 | wpb 495119 | bsz 16505.9 | num_updates 52280 | lr 0.000276606 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1513 | gb_free 22.9 | wall 48252 epoch 031 | loss 3.466 | nll_loss 1.907 | ppl 3.75 | wps 531694 | ups 1.07 | wpb 495119 | bsz 16505.9 | num_updates 52280 | lr 0.000276606 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1513 | gb_free 22.9 | wall 48252 epoch 031 | loss 3.466 | nll_loss 1.907 | ppl 3.75 | wps 531694 | ups 1.07 | wpb 495119 | bsz 16505.9 | num_updates 52280 | lr 0.000276606 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1513 | gb_free 22.9 | wall 48252 epoch 031 | loss 3.466 | nll_loss 1.907 | ppl 3.75 | wps 531694 | ups 1.07 | wpb 495119 | bsz 16505.9 | num_updates 52280 | lr 0.000276606 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1513 | gb_free 22.9 | wall 48252 epoch 031 | loss 3.466 | nll_loss 1.907 | ppl 3.75 | wps 531694 | ups 1.07 | wpb 495119 | bsz 16505.9 | num_updates 52280 | lr 0.000276606 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1513 | gb_free 22.9 | wall 48252 epoch 031 | loss 3.466 | nll_loss 1.907 | ppl 3.75 | wps 531694 | ups 1.07 | wpb 495119 | bsz 16505.9 | num_updates 52280 | lr 0.000276606 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1513 | gb_free 22.9 | wall 48252 epoch 031 | loss 3.466 | nll_loss 1.907 | ppl 3.75 | wps 531694 | ups 1.07 | wpb 495119 | bsz 16505.9 | num_updates 52280 | lr 0.000276606 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1513 | gb_free 22.9 | wall 48252 epoch 031 | loss 3.466 | nll_loss 1.907 | ppl 3.75 | wps 531694 | ups 1.07 | wpb 495119 | bsz 16505.9 | num_updates 52280 | lr 0.000276606 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1513 | gb_free 22.9 | wall 48252 epoch 031 | loss 3.466 | nll_loss 1.907 | ppl 3.75 | wps 531694 | ups 1.07 | wpb 495119 | bsz 16505.9 | num_updates 52280 | lr 0.000276606 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1513 | gb_free 22.9 | wall 48252 epoch 031 | loss 3.466 | nll_loss 1.907 | ppl 3.75 | wps 531694 | ups 1.07 | wpb 495119 | bsz 16505.9 | num_updates 52280 | lr 0.000276606 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1513 | gb_free 22.9 | wall 48252 epoch 031 | loss 3.466 | nll_loss 1.907 | ppl 3.75 | wps 531694 | ups 1.07 | wpb 495119 | bsz 16505.9 | num_updates 52280 | lr 0.000276606 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1513 | gb_free 22.9 | wall 48252 epoch 031 | loss 3.466 | nll_loss 1.907 | ppl 3.75 | wps 531694 | ups 1.07 | wpb 495119 | bsz 16505.9 | num_updates 52280 | lr 0.000276606 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1513 | gb_free 22.9 | wall 48252 epoch 031 | loss 3.466 | nll_loss 1.907 | ppl 3.75 | wps 531694 | ups 1.07 | wpb 495119 | bsz 16505.9 | num_updates 52280 | lr 0.000276606 | gnorm 0.167 | clip 0 | loss_scale 1 | train_wall 1513 | gb_free 22.9 | wall 48252 Start iterating over samples epoch 032: 20 / 1689 loss=3.464, nll_loss=1.906, ppl=3.75, wps=547990, ups=1.11, wpb=493246, bsz=16484.4, num_updates=52300, lr=0.000276553, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=48271 epoch 032: 20 / 1689 loss=3.464, nll_loss=1.906, ppl=3.75, wps=547990, ups=1.11, wpb=493246, bsz=16484.4, num_updates=52300, lr=0.000276553, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=48271 epoch 032: 20 / 1689 loss=3.464, nll_loss=1.906, ppl=3.75, wps=547990, ups=1.11, wpb=493246, bsz=16484.4, num_updates=52300, lr=0.000276553, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=48271 epoch 032: 20 / 1689 loss=3.464, nll_loss=1.906, ppl=3.75, wps=547990, ups=1.11, wpb=493246, bsz=16484.4, num_updates=52300, lr=0.000276553, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=48271 epoch 032: 20 / 1689 loss=3.464, nll_loss=1.906, ppl=3.75, wps=547990, ups=1.11, wpb=493246, bsz=16484.4, num_updates=52300, lr=0.000276553, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=48271 epoch 032: 20 / 1689 loss=3.464, nll_loss=1.906, ppl=3.75, wps=547990, ups=1.11, wpb=493246, bsz=16484.4, num_updates=52300, lr=0.000276553, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=48271 epoch 032: 20 / 1689 loss=3.464, nll_loss=1.906, ppl=3.75, wps=547990, ups=1.11, wpb=493246, bsz=16484.4, num_updates=52300, lr=0.000276553, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=48271 epoch 032: 20 / 1689 loss=3.464, nll_loss=1.906, ppl=3.75, wps=547990, ups=1.11, wpb=493246, bsz=16484.4, num_updates=52300, lr=0.000276553, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=48271 epoch 032: 20 / 1689 loss=3.464, nll_loss=1.906, ppl=3.75, wps=547990, ups=1.11, wpb=493246, bsz=16484.4, num_updates=52300, lr=0.000276553, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=48271 epoch 032: 20 / 1689 loss=3.464, nll_loss=1.906, ppl=3.75, wps=547990, ups=1.11, wpb=493246, bsz=16484.4, num_updates=52300, lr=0.000276553, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=48271 epoch 032: 20 / 1689 loss=3.464, nll_loss=1.906, ppl=3.75, wps=547990, ups=1.11, wpb=493246, bsz=16484.4, num_updates=52300, lr=0.000276553, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=48271 epoch 032: 20 / 1689 loss=3.464, nll_loss=1.906, ppl=3.75, wps=547990, ups=1.11, wpb=493246, bsz=16484.4, num_updates=52300, lr=0.000276553, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=48271 epoch 032: 20 / 1689 loss=3.464, nll_loss=1.906, ppl=3.75, wps=547990, ups=1.11, wpb=493246, bsz=16484.4, num_updates=52300, lr=0.000276553, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=48271 epoch 032: 20 / 1689 loss=3.464, nll_loss=1.906, ppl=3.75, wps=547990, ups=1.11, wpb=493246, bsz=16484.4, num_updates=52300, lr=0.000276553, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=48271 epoch 032: 20 / 1689 loss=3.464, nll_loss=1.906, ppl=3.75, wps=547990, ups=1.11, wpb=493246, bsz=16484.4, num_updates=52300, lr=0.000276553, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=48271 epoch 032: 20 / 1689 loss=3.464, nll_loss=1.906, ppl=3.75, wps=547990, ups=1.11, wpb=493246, bsz=16484.4, num_updates=52300, lr=0.000276553, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=48271 epoch 032: 20 / 1689 loss=3.464, nll_loss=1.906, ppl=3.75, wps=547990, ups=1.11, wpb=493246, bsz=16484.4, num_updates=52300, lr=0.000276553, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=48271 epoch 032: 20 / 1689 loss=3.464, nll_loss=1.906, ppl=3.75, wps=547990, ups=1.11, wpb=493246, bsz=16484.4, num_updates=52300, lr=0.000276553, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=48271 epoch 032: 20 / 1689 loss=3.464, nll_loss=1.906, ppl=3.75, wps=547990, ups=1.11, wpb=493246, bsz=16484.4, num_updates=52300, lr=0.000276553, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=48271 epoch 032: 20 / 1689 loss=3.464, nll_loss=1.906, ppl=3.75, wps=547990, ups=1.11, wpb=493246, bsz=16484.4, num_updates=52300, lr=0.000276553, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=48271 epoch 032: 20 / 1689 loss=3.464, nll_loss=1.906, ppl=3.75, wps=547990, ups=1.11, wpb=493246, bsz=16484.4, num_updates=52300, lr=0.000276553, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=48271 epoch 032: 20 / 1689 loss=3.464, nll_loss=1.906, ppl=3.75, wps=547990, ups=1.11, wpb=493246, bsz=16484.4, num_updates=52300, lr=0.000276553, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=48271 epoch 032: 20 / 1689 loss=3.464, nll_loss=1.906, ppl=3.75, wps=547990, ups=1.11, wpb=493246, bsz=16484.4, num_updates=52300, lr=0.000276553, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=48271 epoch 032: 20 / 1689 loss=3.464, nll_loss=1.906, ppl=3.75, wps=547990, ups=1.11, wpb=493246, bsz=16484.4, num_updates=52300, lr=0.000276553, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=48271 epoch 032: 20 / 1689 loss=3.464, nll_loss=1.906, ppl=3.75, wps=547990, ups=1.11, wpb=493246, bsz=16484.4, num_updates=52300, lr=0.000276553, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=48271 epoch 032: 20 / 1689 loss=3.464, nll_loss=1.906, ppl=3.75, wps=547990, ups=1.11, wpb=493246, bsz=16484.4, num_updates=52300, lr=0.000276553, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=48271 epoch 032: 20 / 1689 loss=3.464, nll_loss=1.906, ppl=3.75, wps=547990, ups=1.11, wpb=493246, bsz=16484.4, num_updates=52300, lr=0.000276553, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=48271 epoch 032: 20 / 1689 loss=3.464, nll_loss=1.906, ppl=3.75, wps=547990, ups=1.11, wpb=493246, bsz=16484.4, num_updates=52300, lr=0.000276553, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=48271 epoch 032: 20 / 1689 loss=3.464, nll_loss=1.906, ppl=3.75, wps=547990, ups=1.11, wpb=493246, bsz=16484.4, num_updates=52300, lr=0.000276553, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=48271 epoch 032: 20 / 1689 loss=3.464, nll_loss=1.906, ppl=3.75, wps=547990, ups=1.11, wpb=493246, bsz=16484.4, num_updates=52300, lr=0.000276553, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=48271 epoch 032: 20 / 1689 loss=3.464, nll_loss=1.906, ppl=3.75, wps=547990, ups=1.11, wpb=493246, bsz=16484.4, num_updates=52300, lr=0.000276553, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=48271 epoch 032: 20 / 1689 loss=3.464, nll_loss=1.906, ppl=3.75, wps=547990, ups=1.11, wpb=493246, bsz=16484.4, num_updates=52300, lr=0.000276553, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=48271 epoch 032: 120 / 1689 loss=3.447, nll_loss=1.885, ppl=3.69, wps=549026, ups=1.11, wpb=494752, bsz=16780.2, num_updates=52400, lr=0.000276289, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=48362 epoch 032: 120 / 1689 loss=3.447, nll_loss=1.885, ppl=3.69, wps=549026, ups=1.11, wpb=494752, bsz=16780.2, num_updates=52400, lr=0.000276289, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=48362 epoch 032: 120 / 1689 loss=3.447, nll_loss=1.885, ppl=3.69, wps=549026, ups=1.11, wpb=494752, bsz=16780.2, num_updates=52400, lr=0.000276289, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=48362 epoch 032: 120 / 1689 loss=3.447, nll_loss=1.885, ppl=3.69, wps=549026, ups=1.11, wpb=494752, bsz=16780.2, num_updates=52400, lr=0.000276289, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=48362 epoch 032: 120 / 1689 loss=3.447, nll_loss=1.885, ppl=3.69, wps=549026, ups=1.11, wpb=494752, bsz=16780.2, num_updates=52400, lr=0.000276289, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=48362 epoch 032: 120 / 1689 loss=3.447, nll_loss=1.885, ppl=3.69, wps=549026, ups=1.11, wpb=494752, bsz=16780.2, num_updates=52400, lr=0.000276289, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=48362 epoch 032: 120 / 1689 loss=3.447, nll_loss=1.885, ppl=3.69, wps=549026, ups=1.11, wpb=494752, bsz=16780.2, num_updates=52400, lr=0.000276289, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=48362 epoch 032: 120 / 1689 loss=3.447, nll_loss=1.885, ppl=3.69, wps=549026, ups=1.11, wpb=494752, bsz=16780.2, num_updates=52400, lr=0.000276289, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=48362 epoch 032: 120 / 1689 loss=3.447, nll_loss=1.885, ppl=3.69, wps=549026, ups=1.11, wpb=494752, bsz=16780.2, num_updates=52400, lr=0.000276289, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=48362 epoch 032: 120 / 1689 loss=3.447, nll_loss=1.885, ppl=3.69, wps=549026, ups=1.11, wpb=494752, bsz=16780.2, num_updates=52400, lr=0.000276289, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=48362 epoch 032: 120 / 1689 loss=3.447, nll_loss=1.885, ppl=3.69, wps=549026, ups=1.11, wpb=494752, bsz=16780.2, num_updates=52400, lr=0.000276289, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=48362 epoch 032: 120 / 1689 loss=3.447, nll_loss=1.885, ppl=3.69, wps=549026, ups=1.11, wpb=494752, bsz=16780.2, num_updates=52400, lr=0.000276289, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=48362 epoch 032: 120 / 1689 loss=3.447, nll_loss=1.885, ppl=3.69, wps=549026, ups=1.11, wpb=494752, bsz=16780.2, num_updates=52400, lr=0.000276289, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=48362 epoch 032: 120 / 1689 loss=3.447, nll_loss=1.885, ppl=3.69, wps=549026, ups=1.11, wpb=494752, bsz=16780.2, num_updates=52400, lr=0.000276289, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=48362 epoch 032: 120 / 1689 loss=3.447, nll_loss=1.885, ppl=3.69, wps=549026, ups=1.11, wpb=494752, bsz=16780.2, num_updates=52400, lr=0.000276289, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=48362 epoch 032: 120 / 1689 loss=3.447, nll_loss=1.885, ppl=3.69, wps=549026, ups=1.11, wpb=494752, bsz=16780.2, num_updates=52400, lr=0.000276289, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=48362 epoch 032: 120 / 1689 loss=3.447, nll_loss=1.885, ppl=3.69, wps=549026, ups=1.11, wpb=494752, bsz=16780.2, num_updates=52400, lr=0.000276289, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=48362 epoch 032: 120 / 1689 loss=3.447, nll_loss=1.885, ppl=3.69, wps=549026, ups=1.11, wpb=494752, bsz=16780.2, num_updates=52400, lr=0.000276289, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=48362 epoch 032: 120 / 1689 loss=3.447, nll_loss=1.885, ppl=3.69, wps=549026, ups=1.11, wpb=494752, bsz=16780.2, num_updates=52400, lr=0.000276289, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=48362 epoch 032: 120 / 1689 loss=3.447, nll_loss=1.885, ppl=3.69, wps=549026, ups=1.11, wpb=494752, bsz=16780.2, num_updates=52400, lr=0.000276289, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=48362 epoch 032: 120 / 1689 loss=3.447, nll_loss=1.885, ppl=3.69, wps=549026, ups=1.11, wpb=494752, bsz=16780.2, num_updates=52400, lr=0.000276289, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=48362 epoch 032: 120 / 1689 loss=3.447, nll_loss=1.885, ppl=3.69, wps=549026, ups=1.11, wpb=494752, bsz=16780.2, num_updates=52400, lr=0.000276289, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=48362 epoch 032: 120 / 1689 loss=3.447, nll_loss=1.885, ppl=3.69, wps=549026, ups=1.11, wpb=494752, bsz=16780.2, num_updates=52400, lr=0.000276289, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=48362 epoch 032: 120 / 1689 loss=3.447, nll_loss=1.885, ppl=3.69, wps=549026, ups=1.11, wpb=494752, bsz=16780.2, num_updates=52400, lr=0.000276289, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=48362 epoch 032: 120 / 1689 loss=3.447, nll_loss=1.885, ppl=3.69, wps=549026, ups=1.11, wpb=494752, bsz=16780.2, num_updates=52400, lr=0.000276289, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=48362 epoch 032: 120 / 1689 loss=3.447, nll_loss=1.885, ppl=3.69, wps=549026, ups=1.11, wpb=494752, bsz=16780.2, num_updates=52400, lr=0.000276289, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=48362 epoch 032: 120 / 1689 loss=3.447, nll_loss=1.885, ppl=3.69, wps=549026, ups=1.11, wpb=494752, bsz=16780.2, num_updates=52400, lr=0.000276289, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=48362 epoch 032: 120 / 1689 loss=3.447, nll_loss=1.885, ppl=3.69, wps=549026, ups=1.11, wpb=494752, bsz=16780.2, num_updates=52400, lr=0.000276289, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=48362 epoch 032: 120 / 1689 loss=3.447, nll_loss=1.885, ppl=3.69, wps=549026, ups=1.11, wpb=494752, bsz=16780.2, num_updates=52400, lr=0.000276289, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=48362 epoch 032: 120 / 1689 loss=3.447, nll_loss=1.885, ppl=3.69, wps=549026, ups=1.11, wpb=494752, bsz=16780.2, num_updates=52400, lr=0.000276289, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=48362 epoch 032: 120 / 1689 loss=3.447, nll_loss=1.885, ppl=3.69, wps=549026, ups=1.11, wpb=494752, bsz=16780.2, num_updates=52400, lr=0.000276289, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=48362 epoch 032: 120 / 1689 loss=3.447, nll_loss=1.885, ppl=3.69, wps=549026, ups=1.11, wpb=494752, bsz=16780.2, num_updates=52400, lr=0.000276289, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=48362 epoch 032: 220 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=546898, ups=1.11, wpb=494174, bsz=16501.8, num_updates=52500, lr=0.000276026, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=48452 epoch 032: 220 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=546898, ups=1.11, wpb=494174, bsz=16501.8, num_updates=52500, lr=0.000276026, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=48452 epoch 032: 220 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=546898, ups=1.11, wpb=494174, bsz=16501.8, num_updates=52500, lr=0.000276026, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=48452 epoch 032: 220 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=546898, ups=1.11, wpb=494174, bsz=16501.8, num_updates=52500, lr=0.000276026, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=48452 epoch 032: 220 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=546898, ups=1.11, wpb=494174, bsz=16501.8, num_updates=52500, lr=0.000276026, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=48452 epoch 032: 220 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=546898, ups=1.11, wpb=494174, bsz=16501.8, num_updates=52500, lr=0.000276026, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=48452 epoch 032: 220 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=546898, ups=1.11, wpb=494174, bsz=16501.8, num_updates=52500, lr=0.000276026, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=48452 epoch 032: 220 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=546898, ups=1.11, wpb=494174, bsz=16501.8, num_updates=52500, lr=0.000276026, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=48452 epoch 032: 220 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=546898, ups=1.11, wpb=494174, bsz=16501.8, num_updates=52500, lr=0.000276026, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=48452 epoch 032: 220 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=546898, ups=1.11, wpb=494174, bsz=16501.8, num_updates=52500, lr=0.000276026, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=48452 epoch 032: 220 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=546898, ups=1.11, wpb=494174, bsz=16501.8, num_updates=52500, lr=0.000276026, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=48452 epoch 032: 220 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=546898, ups=1.11, wpb=494174, bsz=16501.8, num_updates=52500, lr=0.000276026, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=48452 epoch 032: 220 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=546898, ups=1.11, wpb=494174, bsz=16501.8, num_updates=52500, lr=0.000276026, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=48452 epoch 032: 220 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=546898, ups=1.11, wpb=494174, bsz=16501.8, num_updates=52500, lr=0.000276026, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=48452 epoch 032: 220 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=546898, ups=1.11, wpb=494174, bsz=16501.8, num_updates=52500, lr=0.000276026, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=48452 epoch 032: 220 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=546898, ups=1.11, wpb=494174, bsz=16501.8, num_updates=52500, lr=0.000276026, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=48452 epoch 032: 220 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=546898, ups=1.11, wpb=494174, bsz=16501.8, num_updates=52500, lr=0.000276026, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=48452 epoch 032: 220 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=546898, ups=1.11, wpb=494174, bsz=16501.8, num_updates=52500, lr=0.000276026, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=48452 epoch 032: 220 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=546898, ups=1.11, wpb=494174, bsz=16501.8, num_updates=52500, lr=0.000276026, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=48452 epoch 032: 220 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=546898, ups=1.11, wpb=494174, bsz=16501.8, num_updates=52500, lr=0.000276026, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=48452 epoch 032: 220 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=546898, ups=1.11, wpb=494174, bsz=16501.8, num_updates=52500, lr=0.000276026, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=48452 epoch 032: 220 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=546898, ups=1.11, wpb=494174, bsz=16501.8, num_updates=52500, lr=0.000276026, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=48452 epoch 032: 220 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=546898, ups=1.11, wpb=494174, bsz=16501.8, num_updates=52500, lr=0.000276026, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=48452 epoch 032: 220 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=546898, ups=1.11, wpb=494174, bsz=16501.8, num_updates=52500, lr=0.000276026, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=48452 epoch 032: 220 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=546898, ups=1.11, wpb=494174, bsz=16501.8, num_updates=52500, lr=0.000276026, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=48452 epoch 032: 220 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=546898, ups=1.11, wpb=494174, bsz=16501.8, num_updates=52500, lr=0.000276026, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=48452 epoch 032: 220 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=546898, ups=1.11, wpb=494174, bsz=16501.8, num_updates=52500, lr=0.000276026, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=48452 epoch 032: 220 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=546898, ups=1.11, wpb=494174, bsz=16501.8, num_updates=52500, lr=0.000276026, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=48452 epoch 032: 220 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=546898, ups=1.11, wpb=494174, bsz=16501.8, num_updates=52500, lr=0.000276026, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=48452 epoch 032: 220 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=546898, ups=1.11, wpb=494174, bsz=16501.8, num_updates=52500, lr=0.000276026, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=48452 epoch 032: 220 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=546898, ups=1.11, wpb=494174, bsz=16501.8, num_updates=52500, lr=0.000276026, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=48452 epoch 032: 220 / 1689 loss=3.448, nll_loss=1.886, ppl=3.7, wps=546898, ups=1.11, wpb=494174, bsz=16501.8, num_updates=52500, lr=0.000276026, gnorm=0.162, clip=0, loss_scale=2, train_wall=89, gb_free=22.7, wall=48452 epoch 032: 321 / 1689 loss=3.442, nll_loss=1.88, ppl=3.68, wps=544012, ups=1.1, wpb=496032, bsz=16598.1, num_updates=52600, lr=0.000275764, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=48543 epoch 032: 321 / 1689 loss=3.442, nll_loss=1.88, ppl=3.68, wps=544012, ups=1.1, wpb=496032, bsz=16598.1, num_updates=52600, lr=0.000275764, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=48543 epoch 032: 321 / 1689 loss=3.442, nll_loss=1.88, ppl=3.68, wps=544012, ups=1.1, wpb=496032, bsz=16598.1, num_updates=52600, lr=0.000275764, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=48543 epoch 032: 321 / 1689 loss=3.442, nll_loss=1.88, ppl=3.68, wps=544012, ups=1.1, wpb=496032, bsz=16598.1, num_updates=52600, lr=0.000275764, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=48543 epoch 032: 321 / 1689 loss=3.442, nll_loss=1.88, ppl=3.68, wps=544012, ups=1.1, wpb=496032, bsz=16598.1, num_updates=52600, lr=0.000275764, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=48543 epoch 032: 321 / 1689 loss=3.442, nll_loss=1.88, ppl=3.68, wps=544012, ups=1.1, wpb=496032, bsz=16598.1, num_updates=52600, lr=0.000275764, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=48543 epoch 032: 321 / 1689 loss=3.442, nll_loss=1.88, ppl=3.68, wps=544012, ups=1.1, wpb=496032, bsz=16598.1, num_updates=52600, lr=0.000275764, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=48543 epoch 032: 321 / 1689 loss=3.442, nll_loss=1.88, ppl=3.68, wps=544012, ups=1.1, wpb=496032, bsz=16598.1, num_updates=52600, lr=0.000275764, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=48543 epoch 032: 321 / 1689 loss=3.442, nll_loss=1.88, ppl=3.68, wps=544012, ups=1.1, wpb=496032, bsz=16598.1, num_updates=52600, lr=0.000275764, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=48543 epoch 032: 321 / 1689 loss=3.442, nll_loss=1.88, ppl=3.68, wps=544012, ups=1.1, wpb=496032, bsz=16598.1, num_updates=52600, lr=0.000275764, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=48543 epoch 032: 321 / 1689 loss=3.442, nll_loss=1.88, ppl=3.68, wps=544012, ups=1.1, wpb=496032, bsz=16598.1, num_updates=52600, lr=0.000275764, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=48543 epoch 032: 321 / 1689 loss=3.442, nll_loss=1.88, ppl=3.68, wps=544012, ups=1.1, wpb=496032, bsz=16598.1, num_updates=52600, lr=0.000275764, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=48543 epoch 032: 321 / 1689 loss=3.442, nll_loss=1.88, ppl=3.68, wps=544012, ups=1.1, wpb=496032, bsz=16598.1, num_updates=52600, lr=0.000275764, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=48543 epoch 032: 321 / 1689 loss=3.442, nll_loss=1.88, ppl=3.68, wps=544012, ups=1.1, wpb=496032, bsz=16598.1, num_updates=52600, lr=0.000275764, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=48543 epoch 032: 321 / 1689 loss=3.442, nll_loss=1.88, ppl=3.68, wps=544012, ups=1.1, wpb=496032, bsz=16598.1, num_updates=52600, lr=0.000275764, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=48543 epoch 032: 321 / 1689 loss=3.442, nll_loss=1.88, ppl=3.68, wps=544012, ups=1.1, wpb=496032, bsz=16598.1, num_updates=52600, lr=0.000275764, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=48543 epoch 032: 321 / 1689 loss=3.442, nll_loss=1.88, ppl=3.68, wps=544012, ups=1.1, wpb=496032, bsz=16598.1, num_updates=52600, lr=0.000275764, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=48543 epoch 032: 321 / 1689 loss=3.442, nll_loss=1.88, ppl=3.68, wps=544012, ups=1.1, wpb=496032, bsz=16598.1, num_updates=52600, lr=0.000275764, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=48543 epoch 032: 321 / 1689 loss=3.442, nll_loss=1.88, ppl=3.68, wps=544012, ups=1.1, wpb=496032, bsz=16598.1, num_updates=52600, lr=0.000275764, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=48543 epoch 032: 321 / 1689 loss=3.442, nll_loss=1.88, ppl=3.68, wps=544012, ups=1.1, wpb=496032, bsz=16598.1, num_updates=52600, lr=0.000275764, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=48543 epoch 032: 321 / 1689 loss=3.442, nll_loss=1.88, ppl=3.68, wps=544012, ups=1.1, wpb=496032, bsz=16598.1, num_updates=52600, lr=0.000275764, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=48543 epoch 032: 321 / 1689 loss=3.442, nll_loss=1.88, ppl=3.68, wps=544012, ups=1.1, wpb=496032, bsz=16598.1, num_updates=52600, lr=0.000275764, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=48543 epoch 032: 321 / 1689 loss=3.442, nll_loss=1.88, ppl=3.68, wps=544012, ups=1.1, wpb=496032, bsz=16598.1, num_updates=52600, lr=0.000275764, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=48543 epoch 032: 321 / 1689 loss=3.442, nll_loss=1.88, ppl=3.68, wps=544012, ups=1.1, wpb=496032, bsz=16598.1, num_updates=52600, lr=0.000275764, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=48543 epoch 032: 321 / 1689 loss=3.442, nll_loss=1.88, ppl=3.68, wps=544012, ups=1.1, wpb=496032, bsz=16598.1, num_updates=52600, lr=0.000275764, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=48543 epoch 032: 321 / 1689 loss=3.442, nll_loss=1.88, ppl=3.68, wps=544012, ups=1.1, wpb=496032, bsz=16598.1, num_updates=52600, lr=0.000275764, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=48543 epoch 032: 321 / 1689 loss=3.442, nll_loss=1.88, ppl=3.68, wps=544012, ups=1.1, wpb=496032, bsz=16598.1, num_updates=52600, lr=0.000275764, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=48543 epoch 032: 321 / 1689 loss=3.442, nll_loss=1.88, ppl=3.68, wps=544012, ups=1.1, wpb=496032, bsz=16598.1, num_updates=52600, lr=0.000275764, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=48543 epoch 032: 321 / 1689 loss=3.442, nll_loss=1.88, ppl=3.68, wps=544012, ups=1.1, wpb=496032, bsz=16598.1, num_updates=52600, lr=0.000275764, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=48543 epoch 032: 321 / 1689 loss=3.442, nll_loss=1.88, ppl=3.68, wps=544012, ups=1.1, wpb=496032, bsz=16598.1, num_updates=52600, lr=0.000275764, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=48543 epoch 032: 321 / 1689 loss=3.442, nll_loss=1.88, ppl=3.68, wps=544012, ups=1.1, wpb=496032, bsz=16598.1, num_updates=52600, lr=0.000275764, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=48543 epoch 032: 321 / 1689 loss=3.442, nll_loss=1.88, ppl=3.68, wps=544012, ups=1.1, wpb=496032, bsz=16598.1, num_updates=52600, lr=0.000275764, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.7, wall=48543 epoch 032: 421 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=553634, ups=1.11, wpb=497387, bsz=16576.9, num_updates=52700, lr=0.000275502, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=48633 epoch 032: 421 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=553634, ups=1.11, wpb=497387, bsz=16576.9, num_updates=52700, lr=0.000275502, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=48633 epoch 032: 421 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=553634, ups=1.11, wpb=497387, bsz=16576.9, num_updates=52700, lr=0.000275502, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=48633 epoch 032: 421 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=553634, ups=1.11, wpb=497387, bsz=16576.9, num_updates=52700, lr=0.000275502, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=48633 epoch 032: 421 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=553634, ups=1.11, wpb=497387, bsz=16576.9, num_updates=52700, lr=0.000275502, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=48633 epoch 032: 421 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=553634, ups=1.11, wpb=497387, bsz=16576.9, num_updates=52700, lr=0.000275502, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=48633 epoch 032: 421 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=553634, ups=1.11, wpb=497387, bsz=16576.9, num_updates=52700, lr=0.000275502, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=48633 epoch 032: 421 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=553634, ups=1.11, wpb=497387, bsz=16576.9, num_updates=52700, lr=0.000275502, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=48633 epoch 032: 421 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=553634, ups=1.11, wpb=497387, bsz=16576.9, num_updates=52700, lr=0.000275502, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=48633 epoch 032: 421 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=553634, ups=1.11, wpb=497387, bsz=16576.9, num_updates=52700, lr=0.000275502, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=48633 epoch 032: 421 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=553634, ups=1.11, wpb=497387, bsz=16576.9, num_updates=52700, lr=0.000275502, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=48633 epoch 032: 421 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=553634, ups=1.11, wpb=497387, bsz=16576.9, num_updates=52700, lr=0.000275502, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=48633 epoch 032: 421 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=553634, ups=1.11, wpb=497387, bsz=16576.9, num_updates=52700, lr=0.000275502, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=48633 epoch 032: 421 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=553634, ups=1.11, wpb=497387, bsz=16576.9, num_updates=52700, lr=0.000275502, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=48633 epoch 032: 421 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=553634, ups=1.11, wpb=497387, bsz=16576.9, num_updates=52700, lr=0.000275502, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=48633 epoch 032: 421 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=553634, ups=1.11, wpb=497387, bsz=16576.9, num_updates=52700, lr=0.000275502, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=48633 epoch 032: 421 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=553634, ups=1.11, wpb=497387, bsz=16576.9, num_updates=52700, lr=0.000275502, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=48633 epoch 032: 421 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=553634, ups=1.11, wpb=497387, bsz=16576.9, num_updates=52700, lr=0.000275502, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=48633 epoch 032: 421 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=553634, ups=1.11, wpb=497387, bsz=16576.9, num_updates=52700, lr=0.000275502, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=48633 epoch 032: 421 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=553634, ups=1.11, wpb=497387, bsz=16576.9, num_updates=52700, lr=0.000275502, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=48633 epoch 032: 421 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=553634, ups=1.11, wpb=497387, bsz=16576.9, num_updates=52700, lr=0.000275502, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=48633 epoch 032: 421 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=553634, ups=1.11, wpb=497387, bsz=16576.9, num_updates=52700, lr=0.000275502, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=48633 epoch 032: 421 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=553634, ups=1.11, wpb=497387, bsz=16576.9, num_updates=52700, lr=0.000275502, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=48633 epoch 032: 421 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=553634, ups=1.11, wpb=497387, bsz=16576.9, num_updates=52700, lr=0.000275502, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=48633 epoch 032: 421 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=553634, ups=1.11, wpb=497387, bsz=16576.9, num_updates=52700, lr=0.000275502, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=48633 epoch 032: 421 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=553634, ups=1.11, wpb=497387, bsz=16576.9, num_updates=52700, lr=0.000275502, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=48633 epoch 032: 421 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=553634, ups=1.11, wpb=497387, bsz=16576.9, num_updates=52700, lr=0.000275502, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=48633 epoch 032: 421 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=553634, ups=1.11, wpb=497387, bsz=16576.9, num_updates=52700, lr=0.000275502, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=48633 epoch 032: 421 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=553634, ups=1.11, wpb=497387, bsz=16576.9, num_updates=52700, lr=0.000275502, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=48633 epoch 032: 421 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=553634, ups=1.11, wpb=497387, bsz=16576.9, num_updates=52700, lr=0.000275502, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=48633 epoch 032: 421 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=553634, ups=1.11, wpb=497387, bsz=16576.9, num_updates=52700, lr=0.000275502, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=48633 epoch 032: 421 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=553634, ups=1.11, wpb=497387, bsz=16576.9, num_updates=52700, lr=0.000275502, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=48633 epoch 032: 521 / 1689 loss=3.457, nll_loss=1.897, ppl=3.72, wps=546260, ups=1.11, wpb=494238, bsz=16820.4, num_updates=52800, lr=0.000275241, gnorm=0.164, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=48723 epoch 032: 521 / 1689 loss=3.457, nll_loss=1.897, ppl=3.72, wps=546260, ups=1.11, wpb=494238, bsz=16820.4, num_updates=52800, lr=0.000275241, gnorm=0.164, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=48723 epoch 032: 521 / 1689 loss=3.457, nll_loss=1.897, ppl=3.72, wps=546260, ups=1.11, wpb=494238, bsz=16820.4, num_updates=52800, lr=0.000275241, gnorm=0.164, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=48723 epoch 032: 521 / 1689 loss=3.457, nll_loss=1.897, ppl=3.72, wps=546260, ups=1.11, wpb=494238, bsz=16820.4, num_updates=52800, lr=0.000275241, gnorm=0.164, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=48723 epoch 032: 521 / 1689 loss=3.457, nll_loss=1.897, ppl=3.72, wps=546260, ups=1.11, wpb=494238, bsz=16820.4, num_updates=52800, lr=0.000275241, gnorm=0.164, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=48723 epoch 032: 521 / 1689 loss=3.457, nll_loss=1.897, ppl=3.72, wps=546260, ups=1.11, wpb=494238, bsz=16820.4, num_updates=52800, lr=0.000275241, gnorm=0.164, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=48723 epoch 032: 521 / 1689 loss=3.457, nll_loss=1.897, ppl=3.72, wps=546260, ups=1.11, wpb=494238, bsz=16820.4, num_updates=52800, lr=0.000275241, gnorm=0.164, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=48723 epoch 032: 521 / 1689 loss=3.457, nll_loss=1.897, ppl=3.72, wps=546260, ups=1.11, wpb=494238, bsz=16820.4, num_updates=52800, lr=0.000275241, gnorm=0.164, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=48723 epoch 032: 521 / 1689 loss=3.457, nll_loss=1.897, ppl=3.72, wps=546260, ups=1.11, wpb=494238, bsz=16820.4, num_updates=52800, lr=0.000275241, gnorm=0.164, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=48723 epoch 032: 521 / 1689 loss=3.457, nll_loss=1.897, ppl=3.72, wps=546260, ups=1.11, wpb=494238, bsz=16820.4, num_updates=52800, lr=0.000275241, gnorm=0.164, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=48723 epoch 032: 521 / 1689 loss=3.457, nll_loss=1.897, ppl=3.72, wps=546260, ups=1.11, wpb=494238, bsz=16820.4, num_updates=52800, lr=0.000275241, gnorm=0.164, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=48723 epoch 032: 521 / 1689 loss=3.457, nll_loss=1.897, ppl=3.72, wps=546260, ups=1.11, wpb=494238, bsz=16820.4, num_updates=52800, lr=0.000275241, gnorm=0.164, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=48723 epoch 032: 521 / 1689 loss=3.457, nll_loss=1.897, ppl=3.72, wps=546260, ups=1.11, wpb=494238, bsz=16820.4, num_updates=52800, lr=0.000275241, gnorm=0.164, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=48723 epoch 032: 521 / 1689 loss=3.457, nll_loss=1.897, ppl=3.72, wps=546260, ups=1.11, wpb=494238, bsz=16820.4, num_updates=52800, lr=0.000275241, gnorm=0.164, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=48723 epoch 032: 521 / 1689 loss=3.457, nll_loss=1.897, ppl=3.72, wps=546260, ups=1.11, wpb=494238, bsz=16820.4, num_updates=52800, lr=0.000275241, gnorm=0.164, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=48723 epoch 032: 521 / 1689 loss=3.457, nll_loss=1.897, ppl=3.72, wps=546260, ups=1.11, wpb=494238, bsz=16820.4, num_updates=52800, lr=0.000275241, gnorm=0.164, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=48723 epoch 032: 521 / 1689 loss=3.457, nll_loss=1.897, ppl=3.72, wps=546260, ups=1.11, wpb=494238, bsz=16820.4, num_updates=52800, lr=0.000275241, gnorm=0.164, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=48723 epoch 032: 521 / 1689 loss=3.457, nll_loss=1.897, ppl=3.72, wps=546260, ups=1.11, wpb=494238, bsz=16820.4, num_updates=52800, lr=0.000275241, gnorm=0.164, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=48723 epoch 032: 521 / 1689 loss=3.457, nll_loss=1.897, ppl=3.72, wps=546260, ups=1.11, wpb=494238, bsz=16820.4, num_updates=52800, lr=0.000275241, gnorm=0.164, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=48723 epoch 032: 521 / 1689 loss=3.457, nll_loss=1.897, ppl=3.72, wps=546260, ups=1.11, wpb=494238, bsz=16820.4, num_updates=52800, lr=0.000275241, gnorm=0.164, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=48723 epoch 032: 521 / 1689 loss=3.457, nll_loss=1.897, ppl=3.72, wps=546260, ups=1.11, wpb=494238, bsz=16820.4, num_updates=52800, lr=0.000275241, gnorm=0.164, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=48723 epoch 032: 521 / 1689 loss=3.457, nll_loss=1.897, ppl=3.72, wps=546260, ups=1.11, wpb=494238, bsz=16820.4, num_updates=52800, lr=0.000275241, gnorm=0.164, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=48723 epoch 032: 521 / 1689 loss=3.457, nll_loss=1.897, ppl=3.72, wps=546260, ups=1.11, wpb=494238, bsz=16820.4, num_updates=52800, lr=0.000275241, gnorm=0.164, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=48723 epoch 032: 521 / 1689 loss=3.457, nll_loss=1.897, ppl=3.72, wps=546260, ups=1.11, wpb=494238, bsz=16820.4, num_updates=52800, lr=0.000275241, gnorm=0.164, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=48723 epoch 032: 521 / 1689 loss=3.457, nll_loss=1.897, ppl=3.72, wps=546260, ups=1.11, wpb=494238, bsz=16820.4, num_updates=52800, lr=0.000275241, gnorm=0.164, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=48723 epoch 032: 521 / 1689 loss=3.457, nll_loss=1.897, ppl=3.72, wps=546260, ups=1.11, wpb=494238, bsz=16820.4, num_updates=52800, lr=0.000275241, gnorm=0.164, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=48723 epoch 032: 521 / 1689 loss=3.457, nll_loss=1.897, ppl=3.72, wps=546260, ups=1.11, wpb=494238, bsz=16820.4, num_updates=52800, lr=0.000275241, gnorm=0.164, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=48723 epoch 032: 521 / 1689 loss=3.457, nll_loss=1.897, ppl=3.72, wps=546260, ups=1.11, wpb=494238, bsz=16820.4, num_updates=52800, lr=0.000275241, gnorm=0.164, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=48723 epoch 032: 521 / 1689 loss=3.457, nll_loss=1.897, ppl=3.72, wps=546260, ups=1.11, wpb=494238, bsz=16820.4, num_updates=52800, lr=0.000275241, gnorm=0.164, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=48723 epoch 032: 521 / 1689 loss=3.457, nll_loss=1.897, ppl=3.72, wps=546260, ups=1.11, wpb=494238, bsz=16820.4, num_updates=52800, lr=0.000275241, gnorm=0.164, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=48723 epoch 032: 521 / 1689 loss=3.457, nll_loss=1.897, ppl=3.72, wps=546260, ups=1.11, wpb=494238, bsz=16820.4, num_updates=52800, lr=0.000275241, gnorm=0.164, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=48723 epoch 032: 521 / 1689 loss=3.457, nll_loss=1.897, ppl=3.72, wps=546260, ups=1.11, wpb=494238, bsz=16820.4, num_updates=52800, lr=0.000275241, gnorm=0.164, clip=0, loss_scale=1, train_wall=89, gb_free=21.9, wall=48723 epoch 032: 621 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=551598, ups=1.11, wpb=495871, bsz=16469.1, num_updates=52900, lr=0.000274981, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=48813 epoch 032: 621 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=551598, ups=1.11, wpb=495871, bsz=16469.1, num_updates=52900, lr=0.000274981, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=48813 epoch 032: 621 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=551598, ups=1.11, wpb=495871, bsz=16469.1, num_updates=52900, lr=0.000274981, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=48813 epoch 032: 621 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=551598, ups=1.11, wpb=495871, bsz=16469.1, num_updates=52900, lr=0.000274981, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=48813 epoch 032: 621 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=551598, ups=1.11, wpb=495871, bsz=16469.1, num_updates=52900, lr=0.000274981, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=48813 epoch 032: 621 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=551598, ups=1.11, wpb=495871, bsz=16469.1, num_updates=52900, lr=0.000274981, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=48813 epoch 032: 621 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=551598, ups=1.11, wpb=495871, bsz=16469.1, num_updates=52900, lr=0.000274981, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=48813 epoch 032: 621 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=551598, ups=1.11, wpb=495871, bsz=16469.1, num_updates=52900, lr=0.000274981, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=48813 epoch 032: 621 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=551598, ups=1.11, wpb=495871, bsz=16469.1, num_updates=52900, lr=0.000274981, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=48813 epoch 032: 621 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=551598, ups=1.11, wpb=495871, bsz=16469.1, num_updates=52900, lr=0.000274981, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=48813 epoch 032: 621 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=551598, ups=1.11, wpb=495871, bsz=16469.1, num_updates=52900, lr=0.000274981, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=48813 epoch 032: 621 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=551598, ups=1.11, wpb=495871, bsz=16469.1, num_updates=52900, lr=0.000274981, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=48813 epoch 032: 621 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=551598, ups=1.11, wpb=495871, bsz=16469.1, num_updates=52900, lr=0.000274981, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=48813 epoch 032: 621 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=551598, ups=1.11, wpb=495871, bsz=16469.1, num_updates=52900, lr=0.000274981, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=48813 epoch 032: 621 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=551598, ups=1.11, wpb=495871, bsz=16469.1, num_updates=52900, lr=0.000274981, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=48813 epoch 032: 621 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=551598, ups=1.11, wpb=495871, bsz=16469.1, num_updates=52900, lr=0.000274981, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=48813 epoch 032: 621 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=551598, ups=1.11, wpb=495871, bsz=16469.1, num_updates=52900, lr=0.000274981, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=48813 epoch 032: 621 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=551598, ups=1.11, wpb=495871, bsz=16469.1, num_updates=52900, lr=0.000274981, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=48813 epoch 032: 621 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=551598, ups=1.11, wpb=495871, bsz=16469.1, num_updates=52900, lr=0.000274981, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=48813 epoch 032: 621 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=551598, ups=1.11, wpb=495871, bsz=16469.1, num_updates=52900, lr=0.000274981, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=48813 epoch 032: 621 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=551598, ups=1.11, wpb=495871, bsz=16469.1, num_updates=52900, lr=0.000274981, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=48813 epoch 032: 621 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=551598, ups=1.11, wpb=495871, bsz=16469.1, num_updates=52900, lr=0.000274981, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=48813 epoch 032: 621 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=551598, ups=1.11, wpb=495871, bsz=16469.1, num_updates=52900, lr=0.000274981, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=48813 epoch 032: 621 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=551598, ups=1.11, wpb=495871, bsz=16469.1, num_updates=52900, lr=0.000274981, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=48813 epoch 032: 621 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=551598, ups=1.11, wpb=495871, bsz=16469.1, num_updates=52900, lr=0.000274981, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=48813 epoch 032: 621 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=551598, ups=1.11, wpb=495871, bsz=16469.1, num_updates=52900, lr=0.000274981, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=48813 epoch 032: 621 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=551598, ups=1.11, wpb=495871, bsz=16469.1, num_updates=52900, lr=0.000274981, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=48813 epoch 032: 621 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=551598, ups=1.11, wpb=495871, bsz=16469.1, num_updates=52900, lr=0.000274981, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=48813 epoch 032: 621 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=551598, ups=1.11, wpb=495871, bsz=16469.1, num_updates=52900, lr=0.000274981, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=48813 epoch 032: 621 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=551598, ups=1.11, wpb=495871, bsz=16469.1, num_updates=52900, lr=0.000274981, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=48813 epoch 032: 621 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=551598, ups=1.11, wpb=495871, bsz=16469.1, num_updates=52900, lr=0.000274981, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=48813 epoch 032: 621 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=551598, ups=1.11, wpb=495871, bsz=16469.1, num_updates=52900, lr=0.000274981, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=48813 epoch 032: 721 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=549917, ups=1.11, wpb=494146, bsz=16560.2, num_updates=53000, lr=0.000274721, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=48903 epoch 032: 721 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=549917, ups=1.11, wpb=494146, bsz=16560.2, num_updates=53000, lr=0.000274721, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=48903 epoch 032: 721 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=549917, ups=1.11, wpb=494146, bsz=16560.2, num_updates=53000, lr=0.000274721, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=48903 epoch 032: 721 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=549917, ups=1.11, wpb=494146, bsz=16560.2, num_updates=53000, lr=0.000274721, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=48903 epoch 032: 721 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=549917, ups=1.11, wpb=494146, bsz=16560.2, num_updates=53000, lr=0.000274721, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=48903 epoch 032: 721 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=549917, ups=1.11, wpb=494146, bsz=16560.2, num_updates=53000, lr=0.000274721, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=48903 epoch 032: 721 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=549917, ups=1.11, wpb=494146, bsz=16560.2, num_updates=53000, lr=0.000274721, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=48903 epoch 032: 721 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=549917, ups=1.11, wpb=494146, bsz=16560.2, num_updates=53000, lr=0.000274721, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=48903 epoch 032: 721 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=549917, ups=1.11, wpb=494146, bsz=16560.2, num_updates=53000, lr=0.000274721, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=48903 epoch 032: 721 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=549917, ups=1.11, wpb=494146, bsz=16560.2, num_updates=53000, lr=0.000274721, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=48903 epoch 032: 721 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=549917, ups=1.11, wpb=494146, bsz=16560.2, num_updates=53000, lr=0.000274721, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=48903 epoch 032: 721 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=549917, ups=1.11, wpb=494146, bsz=16560.2, num_updates=53000, lr=0.000274721, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=48903 epoch 032: 721 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=549917, ups=1.11, wpb=494146, bsz=16560.2, num_updates=53000, lr=0.000274721, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=48903 epoch 032: 721 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=549917, ups=1.11, wpb=494146, bsz=16560.2, num_updates=53000, lr=0.000274721, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=48903 epoch 032: 721 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=549917, ups=1.11, wpb=494146, bsz=16560.2, num_updates=53000, lr=0.000274721, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=48903 epoch 032: 721 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=549917, ups=1.11, wpb=494146, bsz=16560.2, num_updates=53000, lr=0.000274721, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=48903 epoch 032: 721 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=549917, ups=1.11, wpb=494146, bsz=16560.2, num_updates=53000, lr=0.000274721, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=48903 epoch 032: 721 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=549917, ups=1.11, wpb=494146, bsz=16560.2, num_updates=53000, lr=0.000274721, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=48903 epoch 032: 721 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=549917, ups=1.11, wpb=494146, bsz=16560.2, num_updates=53000, lr=0.000274721, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=48903 epoch 032: 721 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=549917, ups=1.11, wpb=494146, bsz=16560.2, num_updates=53000, lr=0.000274721, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=48903 epoch 032: 721 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=549917, ups=1.11, wpb=494146, bsz=16560.2, num_updates=53000, lr=0.000274721, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=48903 epoch 032: 721 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=549917, ups=1.11, wpb=494146, bsz=16560.2, num_updates=53000, lr=0.000274721, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=48903 epoch 032: 721 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=549917, ups=1.11, wpb=494146, bsz=16560.2, num_updates=53000, lr=0.000274721, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=48903 epoch 032: 721 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=549917, ups=1.11, wpb=494146, bsz=16560.2, num_updates=53000, lr=0.000274721, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=48903 epoch 032: 721 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=549917, ups=1.11, wpb=494146, bsz=16560.2, num_updates=53000, lr=0.000274721, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=48903 epoch 032: 721 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=549917, ups=1.11, wpb=494146, bsz=16560.2, num_updates=53000, lr=0.000274721, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=48903 epoch 032: 721 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=549917, ups=1.11, wpb=494146, bsz=16560.2, num_updates=53000, lr=0.000274721, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=48903 epoch 032: 721 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=549917, ups=1.11, wpb=494146, bsz=16560.2, num_updates=53000, lr=0.000274721, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=48903 epoch 032: 721 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=549917, ups=1.11, wpb=494146, bsz=16560.2, num_updates=53000, lr=0.000274721, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=48903 epoch 032: 721 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=549917, ups=1.11, wpb=494146, bsz=16560.2, num_updates=53000, lr=0.000274721, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=48903 epoch 032: 721 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=549917, ups=1.11, wpb=494146, bsz=16560.2, num_updates=53000, lr=0.000274721, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=48903 epoch 032: 721 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=549917, ups=1.11, wpb=494146, bsz=16560.2, num_updates=53000, lr=0.000274721, gnorm=0.166, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=48903 begin validation on "valid" subset epoch 032 | valid on 'valid' subset | loss 3.715 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 53000 | best_loss 3.702 epoch 032 | valid on 'valid' subset | loss 3.715 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 53000 | best_loss 3.702 epoch 032 | valid on 'valid' subset | loss 3.715 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 53000 | best_loss 3.702 epoch 032 | valid on 'valid' subset | loss 3.715 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 53000 | best_loss 3.702 epoch 032 | valid on 'valid' subset | loss 3.715 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 53000 | best_loss 3.702 epoch 032 | valid on 'valid' subset | loss 3.715 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 53000 | best_loss 3.702 epoch 032 | valid on 'valid' subset | loss 3.715 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 53000 | best_loss 3.702 epoch 032 | valid on 'valid' subset | loss 3.715 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 53000 | best_loss 3.702 epoch 032 | valid on 'valid' subset | loss 3.715 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 53000 | best_loss 3.702 epoch 032 | valid on 'valid' subset | loss 3.715 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 53000 | best_loss 3.702 epoch 032 | valid on 'valid' subset | loss 3.715 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 53000 | best_loss 3.702 epoch 032 | valid on 'valid' subset | loss 3.715 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 53000 | best_loss 3.702 epoch 032 | valid on 'valid' subset | loss 3.715 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 53000 | best_loss 3.702 epoch 032 | valid on 'valid' subset | loss 3.715 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 53000 | best_loss 3.702 epoch 032 | valid on 'valid' subset | loss 3.715 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 53000 | best_loss 3.702 epoch 032 | valid on 'valid' subset | loss 3.715 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 53000 | best_loss 3.702 epoch 032 | valid on 'valid' subset | loss 3.715 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 53000 | best_loss 3.702 epoch 032 | valid on 'valid' subset | loss 3.715 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 53000 | best_loss 3.702 epoch 032 | valid on 'valid' subset | loss 3.715 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 53000 | best_loss 3.702 epoch 032 | valid on 'valid' subset | loss 3.715 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 53000 | best_loss 3.702 epoch 032 | valid on 'valid' subset | loss 3.715 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 53000 | best_loss 3.702 epoch 032 | valid on 'valid' subset | loss 3.715 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 53000 | best_loss 3.702 epoch 032 | valid on 'valid' subset | loss 3.715 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 53000 | best_loss 3.702 epoch 032 | valid on 'valid' subset | loss 3.715 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 53000 | best_loss 3.702 epoch 032 | valid on 'valid' subset | loss 3.715 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 53000 | best_loss 3.702 epoch 032 | valid on 'valid' subset | loss 3.715 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 53000 | best_loss 3.702 epoch 032 | valid on 'valid' subset | loss 3.715 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 53000 | best_loss 3.702 epoch 032 | valid on 'valid' subset | loss 3.715 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 53000 | best_loss 3.702 epoch 032 | valid on 'valid' subset | loss 3.715 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 53000 | best_loss 3.702 epoch 032 | valid on 'valid' subset | loss 3.715 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 53000 | best_loss 3.702 epoch 032 | valid on 'valid' subset | loss 3.715 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 53000 | best_loss 3.702 epoch 032 | valid on 'valid' subset | loss 3.715 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 53000 | best_loss 3.702 epoch 032: 822 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=485627, ups=0.98, wpb=494783, bsz=16069.4, num_updates=53100, lr=0.000274462, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=49005 epoch 032: 822 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=485627, ups=0.98, wpb=494783, bsz=16069.4, num_updates=53100, lr=0.000274462, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=49005 epoch 032: 822 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=485627, ups=0.98, wpb=494783, bsz=16069.4, num_updates=53100, lr=0.000274462, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=49005 epoch 032: 822 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=485627, ups=0.98, wpb=494783, bsz=16069.4, num_updates=53100, lr=0.000274462, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=49005 epoch 032: 822 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=485627, ups=0.98, wpb=494783, bsz=16069.4, num_updates=53100, lr=0.000274462, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=49005 epoch 032: 822 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=485627, ups=0.98, wpb=494783, bsz=16069.4, num_updates=53100, lr=0.000274462, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=49005 epoch 032: 822 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=485627, ups=0.98, wpb=494783, bsz=16069.4, num_updates=53100, lr=0.000274462, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=49005 epoch 032: 822 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=485627, ups=0.98, wpb=494783, bsz=16069.4, num_updates=53100, lr=0.000274462, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=49005 epoch 032: 822 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=485627, ups=0.98, wpb=494783, bsz=16069.4, num_updates=53100, lr=0.000274462, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=49005 epoch 032: 822 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=485627, ups=0.98, wpb=494783, bsz=16069.4, num_updates=53100, lr=0.000274462, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=49005 epoch 032: 822 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=485627, ups=0.98, wpb=494783, bsz=16069.4, num_updates=53100, lr=0.000274462, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=49005 epoch 032: 822 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=485627, ups=0.98, wpb=494783, bsz=16069.4, num_updates=53100, lr=0.000274462, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=49005 epoch 032: 822 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=485627, ups=0.98, wpb=494783, bsz=16069.4, num_updates=53100, lr=0.000274462, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=49005 epoch 032: 822 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=485627, ups=0.98, wpb=494783, bsz=16069.4, num_updates=53100, lr=0.000274462, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=49005 epoch 032: 822 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=485627, ups=0.98, wpb=494783, bsz=16069.4, num_updates=53100, lr=0.000274462, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=49005 epoch 032: 822 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=485627, ups=0.98, wpb=494783, bsz=16069.4, num_updates=53100, lr=0.000274462, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=49005 epoch 032: 822 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=485627, ups=0.98, wpb=494783, bsz=16069.4, num_updates=53100, lr=0.000274462, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=49005 epoch 032: 822 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=485627, ups=0.98, wpb=494783, bsz=16069.4, num_updates=53100, lr=0.000274462, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=49005 epoch 032: 822 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=485627, ups=0.98, wpb=494783, bsz=16069.4, num_updates=53100, lr=0.000274462, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=49005 epoch 032: 822 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=485627, ups=0.98, wpb=494783, bsz=16069.4, num_updates=53100, lr=0.000274462, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=49005 epoch 032: 822 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=485627, ups=0.98, wpb=494783, bsz=16069.4, num_updates=53100, lr=0.000274462, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=49005 epoch 032: 822 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=485627, ups=0.98, wpb=494783, bsz=16069.4, num_updates=53100, lr=0.000274462, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=49005 epoch 032: 822 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=485627, ups=0.98, wpb=494783, bsz=16069.4, num_updates=53100, lr=0.000274462, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=49005 epoch 032: 822 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=485627, ups=0.98, wpb=494783, bsz=16069.4, num_updates=53100, lr=0.000274462, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=49005 epoch 032: 822 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=485627, ups=0.98, wpb=494783, bsz=16069.4, num_updates=53100, lr=0.000274462, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=49005 epoch 032: 822 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=485627, ups=0.98, wpb=494783, bsz=16069.4, num_updates=53100, lr=0.000274462, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=49005 epoch 032: 822 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=485627, ups=0.98, wpb=494783, bsz=16069.4, num_updates=53100, lr=0.000274462, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=49005 epoch 032: 822 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=485627, ups=0.98, wpb=494783, bsz=16069.4, num_updates=53100, lr=0.000274462, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=49005 epoch 032: 822 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=485627, ups=0.98, wpb=494783, bsz=16069.4, num_updates=53100, lr=0.000274462, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=49005 epoch 032: 822 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=485627, ups=0.98, wpb=494783, bsz=16069.4, num_updates=53100, lr=0.000274462, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=49005 epoch 032: 822 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=485627, ups=0.98, wpb=494783, bsz=16069.4, num_updates=53100, lr=0.000274462, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=49005 epoch 032: 822 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=485627, ups=0.98, wpb=494783, bsz=16069.4, num_updates=53100, lr=0.000274462, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=49005 epoch 032: 922 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=553699, ups=1.12, wpb=495925, bsz=16524.8, num_updates=53200, lr=0.000274204, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=49095 epoch 032: 922 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=553699, ups=1.12, wpb=495925, bsz=16524.8, num_updates=53200, lr=0.000274204, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=49095 epoch 032: 922 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=553699, ups=1.12, wpb=495925, bsz=16524.8, num_updates=53200, lr=0.000274204, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=49095 epoch 032: 922 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=553699, ups=1.12, wpb=495925, bsz=16524.8, num_updates=53200, lr=0.000274204, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=49095 epoch 032: 922 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=553699, ups=1.12, wpb=495925, bsz=16524.8, num_updates=53200, lr=0.000274204, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=49095 epoch 032: 922 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=553699, ups=1.12, wpb=495925, bsz=16524.8, num_updates=53200, lr=0.000274204, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=49095 epoch 032: 922 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=553699, ups=1.12, wpb=495925, bsz=16524.8, num_updates=53200, lr=0.000274204, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=49095 epoch 032: 922 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=553699, ups=1.12, wpb=495925, bsz=16524.8, num_updates=53200, lr=0.000274204, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=49095 epoch 032: 922 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=553699, ups=1.12, wpb=495925, bsz=16524.8, num_updates=53200, lr=0.000274204, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=49095 epoch 032: 922 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=553699, ups=1.12, wpb=495925, bsz=16524.8, num_updates=53200, lr=0.000274204, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=49095 epoch 032: 922 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=553699, ups=1.12, wpb=495925, bsz=16524.8, num_updates=53200, lr=0.000274204, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=49095 epoch 032: 922 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=553699, ups=1.12, wpb=495925, bsz=16524.8, num_updates=53200, lr=0.000274204, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=49095 epoch 032: 922 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=553699, ups=1.12, wpb=495925, bsz=16524.8, num_updates=53200, lr=0.000274204, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=49095 epoch 032: 922 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=553699, ups=1.12, wpb=495925, bsz=16524.8, num_updates=53200, lr=0.000274204, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=49095 epoch 032: 922 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=553699, ups=1.12, wpb=495925, bsz=16524.8, num_updates=53200, lr=0.000274204, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=49095 epoch 032: 922 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=553699, ups=1.12, wpb=495925, bsz=16524.8, num_updates=53200, lr=0.000274204, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=49095 epoch 032: 922 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=553699, ups=1.12, wpb=495925, bsz=16524.8, num_updates=53200, lr=0.000274204, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=49095 epoch 032: 922 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=553699, ups=1.12, wpb=495925, bsz=16524.8, num_updates=53200, lr=0.000274204, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=49095 epoch 032: 922 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=553699, ups=1.12, wpb=495925, bsz=16524.8, num_updates=53200, lr=0.000274204, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=49095 epoch 032: 922 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=553699, ups=1.12, wpb=495925, bsz=16524.8, num_updates=53200, lr=0.000274204, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=49095 epoch 032: 922 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=553699, ups=1.12, wpb=495925, bsz=16524.8, num_updates=53200, lr=0.000274204, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=49095 epoch 032: 922 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=553699, ups=1.12, wpb=495925, bsz=16524.8, num_updates=53200, lr=0.000274204, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=49095 epoch 032: 922 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=553699, ups=1.12, wpb=495925, bsz=16524.8, num_updates=53200, lr=0.000274204, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=49095 epoch 032: 922 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=553699, ups=1.12, wpb=495925, bsz=16524.8, num_updates=53200, lr=0.000274204, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=49095 epoch 032: 922 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=553699, ups=1.12, wpb=495925, bsz=16524.8, num_updates=53200, lr=0.000274204, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=49095 epoch 032: 922 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=553699, ups=1.12, wpb=495925, bsz=16524.8, num_updates=53200, lr=0.000274204, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=49095 epoch 032: 922 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=553699, ups=1.12, wpb=495925, bsz=16524.8, num_updates=53200, lr=0.000274204, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=49095 epoch 032: 922 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=553699, ups=1.12, wpb=495925, bsz=16524.8, num_updates=53200, lr=0.000274204, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=49095 epoch 032: 922 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=553699, ups=1.12, wpb=495925, bsz=16524.8, num_updates=53200, lr=0.000274204, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=49095 epoch 032: 922 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=553699, ups=1.12, wpb=495925, bsz=16524.8, num_updates=53200, lr=0.000274204, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=49095 epoch 032: 922 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=553699, ups=1.12, wpb=495925, bsz=16524.8, num_updates=53200, lr=0.000274204, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=49095 epoch 032: 922 / 1689 loss=3.466, nll_loss=1.907, ppl=3.75, wps=553699, ups=1.12, wpb=495925, bsz=16524.8, num_updates=53200, lr=0.000274204, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=49095 epoch 032: 1022 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=557258, ups=1.12, wpb=496291, bsz=16064.6, num_updates=53300, lr=0.000273947, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=49184 epoch 032: 1022 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=557258, ups=1.12, wpb=496291, bsz=16064.6, num_updates=53300, lr=0.000273947, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=49184 epoch 032: 1022 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=557258, ups=1.12, wpb=496291, bsz=16064.6, num_updates=53300, lr=0.000273947, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=49184 epoch 032: 1022 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=557258, ups=1.12, wpb=496291, bsz=16064.6, num_updates=53300, lr=0.000273947, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=49184 epoch 032: 1022 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=557258, ups=1.12, wpb=496291, bsz=16064.6, num_updates=53300, lr=0.000273947, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=49184 epoch 032: 1022 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=557258, ups=1.12, wpb=496291, bsz=16064.6, num_updates=53300, lr=0.000273947, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=49184 epoch 032: 1022 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=557258, ups=1.12, wpb=496291, bsz=16064.6, num_updates=53300, lr=0.000273947, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=49184 epoch 032: 1022 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=557258, ups=1.12, wpb=496291, bsz=16064.6, num_updates=53300, lr=0.000273947, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=49184 epoch 032: 1022 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=557258, ups=1.12, wpb=496291, bsz=16064.6, num_updates=53300, lr=0.000273947, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=49184 epoch 032: 1022 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=557258, ups=1.12, wpb=496291, bsz=16064.6, num_updates=53300, lr=0.000273947, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=49184 epoch 032: 1022 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=557258, ups=1.12, wpb=496291, bsz=16064.6, num_updates=53300, lr=0.000273947, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=49184 epoch 032: 1022 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=557258, ups=1.12, wpb=496291, bsz=16064.6, num_updates=53300, lr=0.000273947, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=49184 epoch 032: 1022 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=557258, ups=1.12, wpb=496291, bsz=16064.6, num_updates=53300, lr=0.000273947, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=49184 epoch 032: 1022 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=557258, ups=1.12, wpb=496291, bsz=16064.6, num_updates=53300, lr=0.000273947, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=49184 epoch 032: 1022 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=557258, ups=1.12, wpb=496291, bsz=16064.6, num_updates=53300, lr=0.000273947, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=49184 epoch 032: 1022 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=557258, ups=1.12, wpb=496291, bsz=16064.6, num_updates=53300, lr=0.000273947, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=49184 epoch 032: 1022 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=557258, ups=1.12, wpb=496291, bsz=16064.6, num_updates=53300, lr=0.000273947, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=49184 epoch 032: 1022 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=557258, ups=1.12, wpb=496291, bsz=16064.6, num_updates=53300, lr=0.000273947, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=49184 epoch 032: 1022 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=557258, ups=1.12, wpb=496291, bsz=16064.6, num_updates=53300, lr=0.000273947, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=49184 epoch 032: 1022 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=557258, ups=1.12, wpb=496291, bsz=16064.6, num_updates=53300, lr=0.000273947, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=49184 epoch 032: 1022 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=557258, ups=1.12, wpb=496291, bsz=16064.6, num_updates=53300, lr=0.000273947, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=49184 epoch 032: 1022 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=557258, ups=1.12, wpb=496291, bsz=16064.6, num_updates=53300, lr=0.000273947, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=49184 epoch 032: 1022 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=557258, ups=1.12, wpb=496291, bsz=16064.6, num_updates=53300, lr=0.000273947, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=49184 epoch 032: 1022 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=557258, ups=1.12, wpb=496291, bsz=16064.6, num_updates=53300, lr=0.000273947, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=49184 epoch 032: 1022 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=557258, ups=1.12, wpb=496291, bsz=16064.6, num_updates=53300, lr=0.000273947, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=49184 epoch 032: 1022 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=557258, ups=1.12, wpb=496291, bsz=16064.6, num_updates=53300, lr=0.000273947, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=49184 epoch 032: 1022 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=557258, ups=1.12, wpb=496291, bsz=16064.6, num_updates=53300, lr=0.000273947, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=49184 epoch 032: 1022 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=557258, ups=1.12, wpb=496291, bsz=16064.6, num_updates=53300, lr=0.000273947, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=49184 epoch 032: 1022 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=557258, ups=1.12, wpb=496291, bsz=16064.6, num_updates=53300, lr=0.000273947, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=49184 epoch 032: 1022 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=557258, ups=1.12, wpb=496291, bsz=16064.6, num_updates=53300, lr=0.000273947, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=49184 epoch 032: 1022 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=557258, ups=1.12, wpb=496291, bsz=16064.6, num_updates=53300, lr=0.000273947, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=49184 epoch 032: 1022 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=557258, ups=1.12, wpb=496291, bsz=16064.6, num_updates=53300, lr=0.000273947, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=21.9, wall=49184 epoch 032: 1122 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=552178, ups=1.12, wpb=494796, bsz=16484.8, num_updates=53400, lr=0.00027369, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=49273 epoch 032: 1122 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=552178, ups=1.12, wpb=494796, bsz=16484.8, num_updates=53400, lr=0.00027369, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=49273 epoch 032: 1122 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=552178, ups=1.12, wpb=494796, bsz=16484.8, num_updates=53400, lr=0.00027369, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=49273 epoch 032: 1122 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=552178, ups=1.12, wpb=494796, bsz=16484.8, num_updates=53400, lr=0.00027369, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=49273 epoch 032: 1122 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=552178, ups=1.12, wpb=494796, bsz=16484.8, num_updates=53400, lr=0.00027369, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=49273 epoch 032: 1122 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=552178, ups=1.12, wpb=494796, bsz=16484.8, num_updates=53400, lr=0.00027369, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=49273 epoch 032: 1122 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=552178, ups=1.12, wpb=494796, bsz=16484.8, num_updates=53400, lr=0.00027369, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=49273 epoch 032: 1122 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=552178, ups=1.12, wpb=494796, bsz=16484.8, num_updates=53400, lr=0.00027369, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=49273 epoch 032: 1122 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=552178, ups=1.12, wpb=494796, bsz=16484.8, num_updates=53400, lr=0.00027369, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=49273 epoch 032: 1122 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=552178, ups=1.12, wpb=494796, bsz=16484.8, num_updates=53400, lr=0.00027369, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=49273 epoch 032: 1122 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=552178, ups=1.12, wpb=494796, bsz=16484.8, num_updates=53400, lr=0.00027369, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=49273 epoch 032: 1122 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=552178, ups=1.12, wpb=494796, bsz=16484.8, num_updates=53400, lr=0.00027369, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=49273 epoch 032: 1122 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=552178, ups=1.12, wpb=494796, bsz=16484.8, num_updates=53400, lr=0.00027369, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=49273 epoch 032: 1122 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=552178, ups=1.12, wpb=494796, bsz=16484.8, num_updates=53400, lr=0.00027369, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=49273 epoch 032: 1122 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=552178, ups=1.12, wpb=494796, bsz=16484.8, num_updates=53400, lr=0.00027369, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=49273 epoch 032: 1122 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=552178, ups=1.12, wpb=494796, bsz=16484.8, num_updates=53400, lr=0.00027369, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=49273 epoch 032: 1122 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=552178, ups=1.12, wpb=494796, bsz=16484.8, num_updates=53400, lr=0.00027369, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=49273 epoch 032: 1122 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=552178, ups=1.12, wpb=494796, bsz=16484.8, num_updates=53400, lr=0.00027369, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=49273 epoch 032: 1122 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=552178, ups=1.12, wpb=494796, bsz=16484.8, num_updates=53400, lr=0.00027369, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=49273 epoch 032: 1122 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=552178, ups=1.12, wpb=494796, bsz=16484.8, num_updates=53400, lr=0.00027369, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=49273 epoch 032: 1122 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=552178, ups=1.12, wpb=494796, bsz=16484.8, num_updates=53400, lr=0.00027369, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=49273 epoch 032: 1122 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=552178, ups=1.12, wpb=494796, bsz=16484.8, num_updates=53400, lr=0.00027369, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=49273 epoch 032: 1122 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=552178, ups=1.12, wpb=494796, bsz=16484.8, num_updates=53400, lr=0.00027369, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=49273 epoch 032: 1122 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=552178, ups=1.12, wpb=494796, bsz=16484.8, num_updates=53400, lr=0.00027369, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=49273 epoch 032: 1122 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=552178, ups=1.12, wpb=494796, bsz=16484.8, num_updates=53400, lr=0.00027369, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=49273 epoch 032: 1122 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=552178, ups=1.12, wpb=494796, bsz=16484.8, num_updates=53400, lr=0.00027369, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=49273 epoch 032: 1122 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=552178, ups=1.12, wpb=494796, bsz=16484.8, num_updates=53400, lr=0.00027369, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=49273 epoch 032: 1122 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=552178, ups=1.12, wpb=494796, bsz=16484.8, num_updates=53400, lr=0.00027369, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=49273 epoch 032: 1122 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=552178, ups=1.12, wpb=494796, bsz=16484.8, num_updates=53400, lr=0.00027369, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=49273 epoch 032: 1122 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=552178, ups=1.12, wpb=494796, bsz=16484.8, num_updates=53400, lr=0.00027369, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=49273 epoch 032: 1122 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=552178, ups=1.12, wpb=494796, bsz=16484.8, num_updates=53400, lr=0.00027369, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=49273 epoch 032: 1122 / 1689 loss=3.47, nll_loss=1.912, ppl=3.76, wps=552178, ups=1.12, wpb=494796, bsz=16484.8, num_updates=53400, lr=0.00027369, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.5, wall=49273 epoch 032: 1222 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=554648, ups=1.12, wpb=494784, bsz=16258.3, num_updates=53500, lr=0.000273434, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.2, wall=49362 epoch 032: 1222 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=554648, ups=1.12, wpb=494784, bsz=16258.3, num_updates=53500, lr=0.000273434, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.2, wall=49362 epoch 032: 1222 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=554648, ups=1.12, wpb=494784, bsz=16258.3, num_updates=53500, lr=0.000273434, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.2, wall=49362 epoch 032: 1222 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=554648, ups=1.12, wpb=494784, bsz=16258.3, num_updates=53500, lr=0.000273434, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.2, wall=49362 epoch 032: 1222 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=554648, ups=1.12, wpb=494784, bsz=16258.3, num_updates=53500, lr=0.000273434, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.2, wall=49362 epoch 032: 1222 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=554648, ups=1.12, wpb=494784, bsz=16258.3, num_updates=53500, lr=0.000273434, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.2, wall=49362 epoch 032: 1222 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=554648, ups=1.12, wpb=494784, bsz=16258.3, num_updates=53500, lr=0.000273434, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.2, wall=49362 epoch 032: 1222 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=554648, ups=1.12, wpb=494784, bsz=16258.3, num_updates=53500, lr=0.000273434, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.2, wall=49362 epoch 032: 1222 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=554648, ups=1.12, wpb=494784, bsz=16258.3, num_updates=53500, lr=0.000273434, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.2, wall=49362 epoch 032: 1222 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=554648, ups=1.12, wpb=494784, bsz=16258.3, num_updates=53500, lr=0.000273434, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.2, wall=49362 epoch 032: 1222 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=554648, ups=1.12, wpb=494784, bsz=16258.3, num_updates=53500, lr=0.000273434, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.2, wall=49362 epoch 032: 1222 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=554648, ups=1.12, wpb=494784, bsz=16258.3, num_updates=53500, lr=0.000273434, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.2, wall=49362 epoch 032: 1222 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=554648, ups=1.12, wpb=494784, bsz=16258.3, num_updates=53500, lr=0.000273434, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.2, wall=49362 epoch 032: 1222 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=554648, ups=1.12, wpb=494784, bsz=16258.3, num_updates=53500, lr=0.000273434, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.2, wall=49362 epoch 032: 1222 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=554648, ups=1.12, wpb=494784, bsz=16258.3, num_updates=53500, lr=0.000273434, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.2, wall=49362 epoch 032: 1222 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=554648, ups=1.12, wpb=494784, bsz=16258.3, num_updates=53500, lr=0.000273434, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.2, wall=49362 epoch 032: 1222 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=554648, ups=1.12, wpb=494784, bsz=16258.3, num_updates=53500, lr=0.000273434, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.2, wall=49362 epoch 032: 1222 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=554648, ups=1.12, wpb=494784, bsz=16258.3, num_updates=53500, lr=0.000273434, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.2, wall=49362 epoch 032: 1222 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=554648, ups=1.12, wpb=494784, bsz=16258.3, num_updates=53500, lr=0.000273434, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.2, wall=49362 epoch 032: 1222 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=554648, ups=1.12, wpb=494784, bsz=16258.3, num_updates=53500, lr=0.000273434, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.2, wall=49362 epoch 032: 1222 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=554648, ups=1.12, wpb=494784, bsz=16258.3, num_updates=53500, lr=0.000273434, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.2, wall=49362 epoch 032: 1222 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=554648, ups=1.12, wpb=494784, bsz=16258.3, num_updates=53500, lr=0.000273434, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.2, wall=49362 epoch 032: 1222 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=554648, ups=1.12, wpb=494784, bsz=16258.3, num_updates=53500, lr=0.000273434, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.2, wall=49362 epoch 032: 1222 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=554648, ups=1.12, wpb=494784, bsz=16258.3, num_updates=53500, lr=0.000273434, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.2, wall=49362 epoch 032: 1222 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=554648, ups=1.12, wpb=494784, bsz=16258.3, num_updates=53500, lr=0.000273434, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.2, wall=49362 epoch 032: 1222 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=554648, ups=1.12, wpb=494784, bsz=16258.3, num_updates=53500, lr=0.000273434, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.2, wall=49362 epoch 032: 1222 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=554648, ups=1.12, wpb=494784, bsz=16258.3, num_updates=53500, lr=0.000273434, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.2, wall=49362 epoch 032: 1222 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=554648, ups=1.12, wpb=494784, bsz=16258.3, num_updates=53500, lr=0.000273434, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.2, wall=49362 epoch 032: 1222 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=554648, ups=1.12, wpb=494784, bsz=16258.3, num_updates=53500, lr=0.000273434, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.2, wall=49362 epoch 032: 1222 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=554648, ups=1.12, wpb=494784, bsz=16258.3, num_updates=53500, lr=0.000273434, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.2, wall=49362 epoch 032: 1222 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=554648, ups=1.12, wpb=494784, bsz=16258.3, num_updates=53500, lr=0.000273434, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.2, wall=49362 epoch 032: 1222 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=554648, ups=1.12, wpb=494784, bsz=16258.3, num_updates=53500, lr=0.000273434, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=21.2, wall=49362 epoch 032: 1322 / 1689 loss=3.463, nll_loss=1.904, ppl=3.74, wps=554567, ups=1.12, wpb=495932, bsz=16110.8, num_updates=53600, lr=0.000273179, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=49452 epoch 032: 1322 / 1689 loss=3.463, nll_loss=1.904, ppl=3.74, wps=554567, ups=1.12, wpb=495932, bsz=16110.8, num_updates=53600, lr=0.000273179, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=49452 epoch 032: 1322 / 1689 loss=3.463, nll_loss=1.904, ppl=3.74, wps=554567, ups=1.12, wpb=495932, bsz=16110.8, num_updates=53600, lr=0.000273179, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=49452 epoch 032: 1322 / 1689 loss=3.463, nll_loss=1.904, ppl=3.74, wps=554567, ups=1.12, wpb=495932, bsz=16110.8, num_updates=53600, lr=0.000273179, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=49452 epoch 032: 1322 / 1689 loss=3.463, nll_loss=1.904, ppl=3.74, wps=554567, ups=1.12, wpb=495932, bsz=16110.8, num_updates=53600, lr=0.000273179, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=49452 epoch 032: 1322 / 1689 loss=3.463, nll_loss=1.904, ppl=3.74, wps=554567, ups=1.12, wpb=495932, bsz=16110.8, num_updates=53600, lr=0.000273179, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=49452 epoch 032: 1322 / 1689 loss=3.463, nll_loss=1.904, ppl=3.74, wps=554567, ups=1.12, wpb=495932, bsz=16110.8, num_updates=53600, lr=0.000273179, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=49452 epoch 032: 1322 / 1689 loss=3.463, nll_loss=1.904, ppl=3.74, wps=554567, ups=1.12, wpb=495932, bsz=16110.8, num_updates=53600, lr=0.000273179, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=49452 epoch 032: 1322 / 1689 loss=3.463, nll_loss=1.904, ppl=3.74, wps=554567, ups=1.12, wpb=495932, bsz=16110.8, num_updates=53600, lr=0.000273179, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=49452 epoch 032: 1322 / 1689 loss=3.463, nll_loss=1.904, ppl=3.74, wps=554567, ups=1.12, wpb=495932, bsz=16110.8, num_updates=53600, lr=0.000273179, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=49452 epoch 032: 1322 / 1689 loss=3.463, nll_loss=1.904, ppl=3.74, wps=554567, ups=1.12, wpb=495932, bsz=16110.8, num_updates=53600, lr=0.000273179, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=49452 epoch 032: 1322 / 1689 loss=3.463, nll_loss=1.904, ppl=3.74, wps=554567, ups=1.12, wpb=495932, bsz=16110.8, num_updates=53600, lr=0.000273179, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=49452 epoch 032: 1322 / 1689 loss=3.463, nll_loss=1.904, ppl=3.74, wps=554567, ups=1.12, wpb=495932, bsz=16110.8, num_updates=53600, lr=0.000273179, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=49452 epoch 032: 1322 / 1689 loss=3.463, nll_loss=1.904, ppl=3.74, wps=554567, ups=1.12, wpb=495932, bsz=16110.8, num_updates=53600, lr=0.000273179, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=49452 epoch 032: 1322 / 1689 loss=3.463, nll_loss=1.904, ppl=3.74, wps=554567, ups=1.12, wpb=495932, bsz=16110.8, num_updates=53600, lr=0.000273179, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=49452 epoch 032: 1322 / 1689 loss=3.463, nll_loss=1.904, ppl=3.74, wps=554567, ups=1.12, wpb=495932, bsz=16110.8, num_updates=53600, lr=0.000273179, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=49452 epoch 032: 1322 / 1689 loss=3.463, nll_loss=1.904, ppl=3.74, wps=554567, ups=1.12, wpb=495932, bsz=16110.8, num_updates=53600, lr=0.000273179, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=49452 epoch 032: 1322 / 1689 loss=3.463, nll_loss=1.904, ppl=3.74, wps=554567, ups=1.12, wpb=495932, bsz=16110.8, num_updates=53600, lr=0.000273179, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=49452 epoch 032: 1322 / 1689 loss=3.463, nll_loss=1.904, ppl=3.74, wps=554567, ups=1.12, wpb=495932, bsz=16110.8, num_updates=53600, lr=0.000273179, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=49452 epoch 032: 1322 / 1689 loss=3.463, nll_loss=1.904, ppl=3.74, wps=554567, ups=1.12, wpb=495932, bsz=16110.8, num_updates=53600, lr=0.000273179, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=49452 epoch 032: 1322 / 1689 loss=3.463, nll_loss=1.904, ppl=3.74, wps=554567, ups=1.12, wpb=495932, bsz=16110.8, num_updates=53600, lr=0.000273179, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=49452 epoch 032: 1322 / 1689 loss=3.463, nll_loss=1.904, ppl=3.74, wps=554567, ups=1.12, wpb=495932, bsz=16110.8, num_updates=53600, lr=0.000273179, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=49452 epoch 032: 1322 / 1689 loss=3.463, nll_loss=1.904, ppl=3.74, wps=554567, ups=1.12, wpb=495932, bsz=16110.8, num_updates=53600, lr=0.000273179, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=49452 epoch 032: 1322 / 1689 loss=3.463, nll_loss=1.904, ppl=3.74, wps=554567, ups=1.12, wpb=495932, bsz=16110.8, num_updates=53600, lr=0.000273179, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=49452 epoch 032: 1322 / 1689 loss=3.463, nll_loss=1.904, ppl=3.74, wps=554567, ups=1.12, wpb=495932, bsz=16110.8, num_updates=53600, lr=0.000273179, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=49452 epoch 032: 1322 / 1689 loss=3.463, nll_loss=1.904, ppl=3.74, wps=554567, ups=1.12, wpb=495932, bsz=16110.8, num_updates=53600, lr=0.000273179, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=49452 epoch 032: 1322 / 1689 loss=3.463, nll_loss=1.904, ppl=3.74, wps=554567, ups=1.12, wpb=495932, bsz=16110.8, num_updates=53600, lr=0.000273179, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=49452 epoch 032: 1322 / 1689 loss=3.463, nll_loss=1.904, ppl=3.74, wps=554567, ups=1.12, wpb=495932, bsz=16110.8, num_updates=53600, lr=0.000273179, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=49452 epoch 032: 1322 / 1689 loss=3.463, nll_loss=1.904, ppl=3.74, wps=554567, ups=1.12, wpb=495932, bsz=16110.8, num_updates=53600, lr=0.000273179, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=49452 epoch 032: 1322 / 1689 loss=3.463, nll_loss=1.904, ppl=3.74, wps=554567, ups=1.12, wpb=495932, bsz=16110.8, num_updates=53600, lr=0.000273179, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=49452 epoch 032: 1322 / 1689 loss=3.463, nll_loss=1.904, ppl=3.74, wps=554567, ups=1.12, wpb=495932, bsz=16110.8, num_updates=53600, lr=0.000273179, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=49452 epoch 032: 1322 / 1689 loss=3.463, nll_loss=1.904, ppl=3.74, wps=554567, ups=1.12, wpb=495932, bsz=16110.8, num_updates=53600, lr=0.000273179, gnorm=0.164, clip=0, loss_scale=2, train_wall=88, gb_free=22.1, wall=49452 epoch 032: 1423 / 1689 loss=3.476, nll_loss=1.919, ppl=3.78, wps=540715, ups=1.09, wpb=494572, bsz=16812.3, num_updates=53700, lr=0.000272925, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21.1, wall=49543 epoch 032: 1423 / 1689 loss=3.476, nll_loss=1.919, ppl=3.78, wps=540715, ups=1.09, wpb=494572, bsz=16812.3, num_updates=53700, lr=0.000272925, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21.1, wall=49543 epoch 032: 1423 / 1689 loss=3.476, nll_loss=1.919, ppl=3.78, wps=540715, ups=1.09, wpb=494572, bsz=16812.3, num_updates=53700, lr=0.000272925, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21.1, wall=49543 epoch 032: 1423 / 1689 loss=3.476, nll_loss=1.919, ppl=3.78, wps=540715, ups=1.09, wpb=494572, bsz=16812.3, num_updates=53700, lr=0.000272925, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21.1, wall=49543 epoch 032: 1423 / 1689 loss=3.476, nll_loss=1.919, ppl=3.78, wps=540715, ups=1.09, wpb=494572, bsz=16812.3, num_updates=53700, lr=0.000272925, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21.1, wall=49543 epoch 032: 1423 / 1689 loss=3.476, nll_loss=1.919, ppl=3.78, wps=540715, ups=1.09, wpb=494572, bsz=16812.3, num_updates=53700, lr=0.000272925, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21.1, wall=49543 epoch 032: 1423 / 1689 loss=3.476, nll_loss=1.919, ppl=3.78, wps=540715, ups=1.09, wpb=494572, bsz=16812.3, num_updates=53700, lr=0.000272925, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21.1, wall=49543 epoch 032: 1423 / 1689 loss=3.476, nll_loss=1.919, ppl=3.78, wps=540715, ups=1.09, wpb=494572, bsz=16812.3, num_updates=53700, lr=0.000272925, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21.1, wall=49543 epoch 032: 1423 / 1689 loss=3.476, nll_loss=1.919, ppl=3.78, wps=540715, ups=1.09, wpb=494572, bsz=16812.3, num_updates=53700, lr=0.000272925, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21.1, wall=49543 epoch 032: 1423 / 1689 loss=3.476, nll_loss=1.919, ppl=3.78, wps=540715, ups=1.09, wpb=494572, bsz=16812.3, num_updates=53700, lr=0.000272925, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21.1, wall=49543 epoch 032: 1423 / 1689 loss=3.476, nll_loss=1.919, ppl=3.78, wps=540715, ups=1.09, wpb=494572, bsz=16812.3, num_updates=53700, lr=0.000272925, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21.1, wall=49543 epoch 032: 1423 / 1689 loss=3.476, nll_loss=1.919, ppl=3.78, wps=540715, ups=1.09, wpb=494572, bsz=16812.3, num_updates=53700, lr=0.000272925, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21.1, wall=49543 epoch 032: 1423 / 1689 loss=3.476, nll_loss=1.919, ppl=3.78, wps=540715, ups=1.09, wpb=494572, bsz=16812.3, num_updates=53700, lr=0.000272925, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21.1, wall=49543 epoch 032: 1423 / 1689 loss=3.476, nll_loss=1.919, ppl=3.78, wps=540715, ups=1.09, wpb=494572, bsz=16812.3, num_updates=53700, lr=0.000272925, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21.1, wall=49543 epoch 032: 1423 / 1689 loss=3.476, nll_loss=1.919, ppl=3.78, wps=540715, ups=1.09, wpb=494572, bsz=16812.3, num_updates=53700, lr=0.000272925, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21.1, wall=49543 epoch 032: 1423 / 1689 loss=3.476, nll_loss=1.919, ppl=3.78, wps=540715, ups=1.09, wpb=494572, bsz=16812.3, num_updates=53700, lr=0.000272925, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21.1, wall=49543 epoch 032: 1423 / 1689 loss=3.476, nll_loss=1.919, ppl=3.78, wps=540715, ups=1.09, wpb=494572, bsz=16812.3, num_updates=53700, lr=0.000272925, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21.1, wall=49543 epoch 032: 1423 / 1689 loss=3.476, nll_loss=1.919, ppl=3.78, wps=540715, ups=1.09, wpb=494572, bsz=16812.3, num_updates=53700, lr=0.000272925, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21.1, wall=49543 epoch 032: 1423 / 1689 loss=3.476, nll_loss=1.919, ppl=3.78, wps=540715, ups=1.09, wpb=494572, bsz=16812.3, num_updates=53700, lr=0.000272925, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21.1, wall=49543 epoch 032: 1423 / 1689 loss=3.476, nll_loss=1.919, ppl=3.78, wps=540715, ups=1.09, wpb=494572, bsz=16812.3, num_updates=53700, lr=0.000272925, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21.1, wall=49543 epoch 032: 1423 / 1689 loss=3.476, nll_loss=1.919, ppl=3.78, wps=540715, ups=1.09, wpb=494572, bsz=16812.3, num_updates=53700, lr=0.000272925, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21.1, wall=49543 epoch 032: 1423 / 1689 loss=3.476, nll_loss=1.919, ppl=3.78, wps=540715, ups=1.09, wpb=494572, bsz=16812.3, num_updates=53700, lr=0.000272925, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21.1, wall=49543 epoch 032: 1423 / 1689 loss=3.476, nll_loss=1.919, ppl=3.78, wps=540715, ups=1.09, wpb=494572, bsz=16812.3, num_updates=53700, lr=0.000272925, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21.1, wall=49543 epoch 032: 1423 / 1689 loss=3.476, nll_loss=1.919, ppl=3.78, wps=540715, ups=1.09, wpb=494572, bsz=16812.3, num_updates=53700, lr=0.000272925, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21.1, wall=49543 epoch 032: 1423 / 1689 loss=3.476, nll_loss=1.919, ppl=3.78, wps=540715, ups=1.09, wpb=494572, bsz=16812.3, num_updates=53700, lr=0.000272925, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21.1, wall=49543 epoch 032: 1423 / 1689 loss=3.476, nll_loss=1.919, ppl=3.78, wps=540715, ups=1.09, wpb=494572, bsz=16812.3, num_updates=53700, lr=0.000272925, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21.1, wall=49543 epoch 032: 1423 / 1689 loss=3.476, nll_loss=1.919, ppl=3.78, wps=540715, ups=1.09, wpb=494572, bsz=16812.3, num_updates=53700, lr=0.000272925, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21.1, wall=49543 epoch 032: 1423 / 1689 loss=3.476, nll_loss=1.919, ppl=3.78, wps=540715, ups=1.09, wpb=494572, bsz=16812.3, num_updates=53700, lr=0.000272925, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21.1, wall=49543 epoch 032: 1423 / 1689 loss=3.476, nll_loss=1.919, ppl=3.78, wps=540715, ups=1.09, wpb=494572, bsz=16812.3, num_updates=53700, lr=0.000272925, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21.1, wall=49543 epoch 032: 1423 / 1689 loss=3.476, nll_loss=1.919, ppl=3.78, wps=540715, ups=1.09, wpb=494572, bsz=16812.3, num_updates=53700, lr=0.000272925, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21.1, wall=49543 epoch 032: 1423 / 1689 loss=3.476, nll_loss=1.919, ppl=3.78, wps=540715, ups=1.09, wpb=494572, bsz=16812.3, num_updates=53700, lr=0.000272925, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21.1, wall=49543 epoch 032: 1423 / 1689 loss=3.476, nll_loss=1.919, ppl=3.78, wps=540715, ups=1.09, wpb=494572, bsz=16812.3, num_updates=53700, lr=0.000272925, gnorm=0.174, clip=0, loss_scale=1, train_wall=90, gb_free=21.1, wall=49543 epoch 032: 1523 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=552126, ups=1.11, wpb=496851, bsz=16677.4, num_updates=53800, lr=0.000272671, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=23.1, wall=49633 epoch 032: 1523 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=552126, ups=1.11, wpb=496851, bsz=16677.4, num_updates=53800, lr=0.000272671, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=23.1, wall=49633 epoch 032: 1523 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=552126, ups=1.11, wpb=496851, bsz=16677.4, num_updates=53800, lr=0.000272671, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=23.1, wall=49633 epoch 032: 1523 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=552126, ups=1.11, wpb=496851, bsz=16677.4, num_updates=53800, lr=0.000272671, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=23.1, wall=49633 epoch 032: 1523 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=552126, ups=1.11, wpb=496851, bsz=16677.4, num_updates=53800, lr=0.000272671, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=23.1, wall=49633 epoch 032: 1523 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=552126, ups=1.11, wpb=496851, bsz=16677.4, num_updates=53800, lr=0.000272671, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=23.1, wall=49633 epoch 032: 1523 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=552126, ups=1.11, wpb=496851, bsz=16677.4, num_updates=53800, lr=0.000272671, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=23.1, wall=49633 epoch 032: 1523 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=552126, ups=1.11, wpb=496851, bsz=16677.4, num_updates=53800, lr=0.000272671, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=23.1, wall=49633 epoch 032: 1523 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=552126, ups=1.11, wpb=496851, bsz=16677.4, num_updates=53800, lr=0.000272671, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=23.1, wall=49633 epoch 032: 1523 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=552126, ups=1.11, wpb=496851, bsz=16677.4, num_updates=53800, lr=0.000272671, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=23.1, wall=49633 epoch 032: 1523 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=552126, ups=1.11, wpb=496851, bsz=16677.4, num_updates=53800, lr=0.000272671, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=23.1, wall=49633 epoch 032: 1523 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=552126, ups=1.11, wpb=496851, bsz=16677.4, num_updates=53800, lr=0.000272671, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=23.1, wall=49633 epoch 032: 1523 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=552126, ups=1.11, wpb=496851, bsz=16677.4, num_updates=53800, lr=0.000272671, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=23.1, wall=49633 epoch 032: 1523 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=552126, ups=1.11, wpb=496851, bsz=16677.4, num_updates=53800, lr=0.000272671, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=23.1, wall=49633 epoch 032: 1523 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=552126, ups=1.11, wpb=496851, bsz=16677.4, num_updates=53800, lr=0.000272671, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=23.1, wall=49633 epoch 032: 1523 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=552126, ups=1.11, wpb=496851, bsz=16677.4, num_updates=53800, lr=0.000272671, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=23.1, wall=49633 epoch 032: 1523 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=552126, ups=1.11, wpb=496851, bsz=16677.4, num_updates=53800, lr=0.000272671, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=23.1, wall=49633 epoch 032: 1523 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=552126, ups=1.11, wpb=496851, bsz=16677.4, num_updates=53800, lr=0.000272671, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=23.1, wall=49633 epoch 032: 1523 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=552126, ups=1.11, wpb=496851, bsz=16677.4, num_updates=53800, lr=0.000272671, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=23.1, wall=49633 epoch 032: 1523 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=552126, ups=1.11, wpb=496851, bsz=16677.4, num_updates=53800, lr=0.000272671, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=23.1, wall=49633 epoch 032: 1523 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=552126, ups=1.11, wpb=496851, bsz=16677.4, num_updates=53800, lr=0.000272671, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=23.1, wall=49633 epoch 032: 1523 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=552126, ups=1.11, wpb=496851, bsz=16677.4, num_updates=53800, lr=0.000272671, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=23.1, wall=49633 epoch 032: 1523 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=552126, ups=1.11, wpb=496851, bsz=16677.4, num_updates=53800, lr=0.000272671, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=23.1, wall=49633 epoch 032: 1523 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=552126, ups=1.11, wpb=496851, bsz=16677.4, num_updates=53800, lr=0.000272671, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=23.1, wall=49633 epoch 032: 1523 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=552126, ups=1.11, wpb=496851, bsz=16677.4, num_updates=53800, lr=0.000272671, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=23.1, wall=49633 epoch 032: 1523 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=552126, ups=1.11, wpb=496851, bsz=16677.4, num_updates=53800, lr=0.000272671, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=23.1, wall=49633 epoch 032: 1523 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=552126, ups=1.11, wpb=496851, bsz=16677.4, num_updates=53800, lr=0.000272671, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=23.1, wall=49633 epoch 032: 1523 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=552126, ups=1.11, wpb=496851, bsz=16677.4, num_updates=53800, lr=0.000272671, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=23.1, wall=49633 epoch 032: 1523 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=552126, ups=1.11, wpb=496851, bsz=16677.4, num_updates=53800, lr=0.000272671, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=23.1, wall=49633 epoch 032: 1523 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=552126, ups=1.11, wpb=496851, bsz=16677.4, num_updates=53800, lr=0.000272671, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=23.1, wall=49633 epoch 032: 1523 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=552126, ups=1.11, wpb=496851, bsz=16677.4, num_updates=53800, lr=0.000272671, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=23.1, wall=49633 epoch 032: 1523 / 1689 loss=3.471, nll_loss=1.913, ppl=3.77, wps=552126, ups=1.11, wpb=496851, bsz=16677.4, num_updates=53800, lr=0.000272671, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=23.1, wall=49633 epoch 032: 1623 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=548953, ups=1.11, wpb=494392, bsz=16443, num_updates=53900, lr=0.000272418, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=49723 epoch 032: 1623 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=548953, ups=1.11, wpb=494392, bsz=16443, num_updates=53900, lr=0.000272418, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=49723 epoch 032: 1623 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=548953, ups=1.11, wpb=494392, bsz=16443, num_updates=53900, lr=0.000272418, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=49723 epoch 032: 1623 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=548953, ups=1.11, wpb=494392, bsz=16443, num_updates=53900, lr=0.000272418, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=49723 epoch 032: 1623 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=548953, ups=1.11, wpb=494392, bsz=16443, num_updates=53900, lr=0.000272418, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=49723 epoch 032: 1623 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=548953, ups=1.11, wpb=494392, bsz=16443, num_updates=53900, lr=0.000272418, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=49723 epoch 032: 1623 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=548953, ups=1.11, wpb=494392, bsz=16443, num_updates=53900, lr=0.000272418, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=49723 epoch 032: 1623 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=548953, ups=1.11, wpb=494392, bsz=16443, num_updates=53900, lr=0.000272418, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=49723 epoch 032: 1623 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=548953, ups=1.11, wpb=494392, bsz=16443, num_updates=53900, lr=0.000272418, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=49723 epoch 032: 1623 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=548953, ups=1.11, wpb=494392, bsz=16443, num_updates=53900, lr=0.000272418, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=49723 epoch 032: 1623 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=548953, ups=1.11, wpb=494392, bsz=16443, num_updates=53900, lr=0.000272418, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=49723 epoch 032: 1623 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=548953, ups=1.11, wpb=494392, bsz=16443, num_updates=53900, lr=0.000272418, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=49723 epoch 032: 1623 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=548953, ups=1.11, wpb=494392, bsz=16443, num_updates=53900, lr=0.000272418, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=49723 epoch 032: 1623 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=548953, ups=1.11, wpb=494392, bsz=16443, num_updates=53900, lr=0.000272418, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=49723 epoch 032: 1623 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=548953, ups=1.11, wpb=494392, bsz=16443, num_updates=53900, lr=0.000272418, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=49723 epoch 032: 1623 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=548953, ups=1.11, wpb=494392, bsz=16443, num_updates=53900, lr=0.000272418, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=49723 epoch 032: 1623 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=548953, ups=1.11, wpb=494392, bsz=16443, num_updates=53900, lr=0.000272418, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=49723 epoch 032: 1623 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=548953, ups=1.11, wpb=494392, bsz=16443, num_updates=53900, lr=0.000272418, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=49723 epoch 032: 1623 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=548953, ups=1.11, wpb=494392, bsz=16443, num_updates=53900, lr=0.000272418, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=49723 epoch 032: 1623 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=548953, ups=1.11, wpb=494392, bsz=16443, num_updates=53900, lr=0.000272418, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=49723 epoch 032: 1623 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=548953, ups=1.11, wpb=494392, bsz=16443, num_updates=53900, lr=0.000272418, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=49723 epoch 032: 1623 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=548953, ups=1.11, wpb=494392, bsz=16443, num_updates=53900, lr=0.000272418, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=49723 epoch 032: 1623 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=548953, ups=1.11, wpb=494392, bsz=16443, num_updates=53900, lr=0.000272418, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=49723 epoch 032: 1623 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=548953, ups=1.11, wpb=494392, bsz=16443, num_updates=53900, lr=0.000272418, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=49723 epoch 032: 1623 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=548953, ups=1.11, wpb=494392, bsz=16443, num_updates=53900, lr=0.000272418, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=49723 epoch 032: 1623 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=548953, ups=1.11, wpb=494392, bsz=16443, num_updates=53900, lr=0.000272418, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=49723 epoch 032: 1623 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=548953, ups=1.11, wpb=494392, bsz=16443, num_updates=53900, lr=0.000272418, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=49723 epoch 032: 1623 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=548953, ups=1.11, wpb=494392, bsz=16443, num_updates=53900, lr=0.000272418, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=49723 epoch 032: 1623 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=548953, ups=1.11, wpb=494392, bsz=16443, num_updates=53900, lr=0.000272418, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=49723 epoch 032: 1623 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=548953, ups=1.11, wpb=494392, bsz=16443, num_updates=53900, lr=0.000272418, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=49723 epoch 032: 1623 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=548953, ups=1.11, wpb=494392, bsz=16443, num_updates=53900, lr=0.000272418, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=49723 epoch 032: 1623 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=548953, ups=1.11, wpb=494392, bsz=16443, num_updates=53900, lr=0.000272418, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.5, wall=49723 end of epoch 32 (average epoch stats below) epoch 032 | loss 3.462 | nll_loss 1.902 | ppl 3.74 | wps 545446 | ups 1.1 | wpb 495120 | bsz 16507.4 | num_updates 53966 | lr 0.000272251 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1489 | gb_free 22.8 | wall 49782 epoch 032 | loss 3.462 | nll_loss 1.902 | ppl 3.74 | wps 545446 | ups 1.1 | wpb 495120 | bsz 16507.4 | num_updates 53966 | lr 0.000272251 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1489 | gb_free 22.8 | wall 49782 epoch 032 | loss 3.462 | nll_loss 1.902 | ppl 3.74 | wps 545446 | ups 1.1 | wpb 495120 | bsz 16507.4 | num_updates 53966 | lr 0.000272251 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1489 | gb_free 22.8 | wall 49782 epoch 032 | loss 3.462 | nll_loss 1.902 | ppl 3.74 | wps 545446 | ups 1.1 | wpb 495120 | bsz 16507.4 | num_updates 53966 | lr 0.000272251 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1489 | gb_free 22.8 | wall 49782 epoch 032 | loss 3.462 | nll_loss 1.902 | ppl 3.74 | wps 545446 | ups 1.1 | wpb 495120 | bsz 16507.4 | num_updates 53966 | lr 0.000272251 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1489 | gb_free 22.8 | wall 49782 epoch 032 | loss 3.462 | nll_loss 1.902 | ppl 3.74 | wps 545446 | ups 1.1 | wpb 495120 | bsz 16507.4 | num_updates 53966 | lr 0.000272251 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1489 | gb_free 22.8 | wall 49782 epoch 032 | loss 3.462 | nll_loss 1.902 | ppl 3.74 | wps 545446 | ups 1.1 | wpb 495120 | bsz 16507.4 | num_updates 53966 | lr 0.000272251 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1489 | gb_free 22.8 | wall 49782 epoch 032 | loss 3.462 | nll_loss 1.902 | ppl 3.74 | wps 545446 | ups 1.1 | wpb 495120 | bsz 16507.4 | num_updates 53966 | lr 0.000272251 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1489 | gb_free 22.8 | wall 49782 epoch 032 | loss 3.462 | nll_loss 1.902 | ppl 3.74 | wps 545446 | ups 1.1 | wpb 495120 | bsz 16507.4 | num_updates 53966 | lr 0.000272251 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1489 | gb_free 22.8 | wall 49782 epoch 032 | loss 3.462 | nll_loss 1.902 | ppl 3.74 | wps 545446 | ups 1.1 | wpb 495120 | bsz 16507.4 | num_updates 53966 | lr 0.000272251 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1489 | gb_free 22.8 | wall 49782 epoch 032 | loss 3.462 | nll_loss 1.902 | ppl 3.74 | wps 545446 | ups 1.1 | wpb 495120 | bsz 16507.4 | num_updates 53966 | lr 0.000272251 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1489 | gb_free 22.8 | wall 49782 epoch 032 | loss 3.462 | nll_loss 1.902 | ppl 3.74 | wps 545446 | ups 1.1 | wpb 495120 | bsz 16507.4 | num_updates 53966 | lr 0.000272251 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1489 | gb_free 22.8 | wall 49782 epoch 032 | loss 3.462 | nll_loss 1.902 | ppl 3.74 | wps 545446 | ups 1.1 | wpb 495120 | bsz 16507.4 | num_updates 53966 | lr 0.000272251 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1489 | gb_free 22.8 | wall 49782 epoch 032 | loss 3.462 | nll_loss 1.902 | ppl 3.74 | wps 545446 | ups 1.1 | wpb 495120 | bsz 16507.4 | num_updates 53966 | lr 0.000272251 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1489 | gb_free 22.8 | wall 49782 epoch 032 | loss 3.462 | nll_loss 1.902 | ppl 3.74 | wps 545446 | ups 1.1 | wpb 495120 | bsz 16507.4 | num_updates 53966 | lr 0.000272251 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1489 | gb_free 22.8 | wall 49782 epoch 032 | loss 3.462 | nll_loss 1.902 | ppl 3.74 | wps 545446 | ups 1.1 | wpb 495120 | bsz 16507.4 | num_updates 53966 | lr 0.000272251 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1489 | gb_free 22.8 | wall 49782 epoch 032 | loss 3.462 | nll_loss 1.902 | ppl 3.74 | wps 545446 | ups 1.1 | wpb 495120 | bsz 16507.4 | num_updates 53966 | lr 0.000272251 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1489 | gb_free 22.8 | wall 49782 epoch 032 | loss 3.462 | nll_loss 1.902 | ppl 3.74 | wps 545446 | ups 1.1 | wpb 495120 | bsz 16507.4 | num_updates 53966 | lr 0.000272251 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1489 | gb_free 22.8 | wall 49782 epoch 032 | loss 3.462 | nll_loss 1.902 | ppl 3.74 | wps 545446 | ups 1.1 | wpb 495120 | bsz 16507.4 | num_updates 53966 | lr 0.000272251 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1489 | gb_free 22.8 | wall 49782 epoch 032 | loss 3.462 | nll_loss 1.902 | ppl 3.74 | wps 545446 | ups 1.1 | wpb 495120 | bsz 16507.4 | num_updates 53966 | lr 0.000272251 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1489 | gb_free 22.8 | wall 49782 epoch 032 | loss 3.462 | nll_loss 1.902 | ppl 3.74 | wps 545446 | ups 1.1 | wpb 495120 | bsz 16507.4 | num_updates 53966 | lr 0.000272251 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1489 | gb_free 22.8 | wall 49782 epoch 032 | loss 3.462 | nll_loss 1.902 | ppl 3.74 | wps 545446 | ups 1.1 | wpb 495120 | bsz 16507.4 | num_updates 53966 | lr 0.000272251 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1489 | gb_free 22.8 | wall 49782 epoch 032 | loss 3.462 | nll_loss 1.902 | ppl 3.74 | wps 545446 | ups 1.1 | wpb 495120 | bsz 16507.4 | num_updates 53966 | lr 0.000272251 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1489 | gb_free 22.8 | wall 49782 epoch 032 | loss 3.462 | nll_loss 1.902 | ppl 3.74 | wps 545446 | ups 1.1 | wpb 495120 | bsz 16507.4 | num_updates 53966 | lr 0.000272251 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1489 | gb_free 22.8 | wall 49782 epoch 032 | loss 3.462 | nll_loss 1.902 | ppl 3.74 | wps 545446 | ups 1.1 | wpb 495120 | bsz 16507.4 | num_updates 53966 | lr 0.000272251 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1489 | gb_free 22.8 | wall 49782 epoch 032 | loss 3.462 | nll_loss 1.902 | ppl 3.74 | wps 545446 | ups 1.1 | wpb 495120 | bsz 16507.4 | num_updates 53966 | lr 0.000272251 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1489 | gb_free 22.8 | wall 49782 epoch 032 | loss 3.462 | nll_loss 1.902 | ppl 3.74 | wps 545446 | ups 1.1 | wpb 495120 | bsz 16507.4 | num_updates 53966 | lr 0.000272251 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1489 | gb_free 22.8 | wall 49782 epoch 032 | loss 3.462 | nll_loss 1.902 | ppl 3.74 | wps 545446 | ups 1.1 | wpb 495120 | bsz 16507.4 | num_updates 53966 | lr 0.000272251 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1489 | gb_free 22.8 | wall 49782 epoch 032 | loss 3.462 | nll_loss 1.902 | ppl 3.74 | wps 545446 | ups 1.1 | wpb 495120 | bsz 16507.4 | num_updates 53966 | lr 0.000272251 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1489 | gb_free 22.8 | wall 49782 epoch 032 | loss 3.462 | nll_loss 1.902 | ppl 3.74 | wps 545446 | ups 1.1 | wpb 495120 | bsz 16507.4 | num_updates 53966 | lr 0.000272251 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1489 | gb_free 22.8 | wall 49782 epoch 032 | loss 3.462 | nll_loss 1.902 | ppl 3.74 | wps 545446 | ups 1.1 | wpb 495120 | bsz 16507.4 | num_updates 53966 | lr 0.000272251 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1489 | gb_free 22.8 | wall 49782 epoch 032 | loss 3.462 | nll_loss 1.902 | ppl 3.74 | wps 545446 | ups 1.1 | wpb 495120 | bsz 16507.4 | num_updates 53966 | lr 0.000272251 | gnorm 0.166 | clip 0 | loss_scale 1 | train_wall 1489 | gb_free 22.8 | wall 49782 Start iterating over samples epoch 033: 34 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=540856, ups=1.1, wpb=491936, bsz=16573, num_updates=54000, lr=0.000272166, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=49814 epoch 033: 34 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=540856, ups=1.1, wpb=491936, bsz=16573, num_updates=54000, lr=0.000272166, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=49814 epoch 033: 34 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=540856, ups=1.1, wpb=491936, bsz=16573, num_updates=54000, lr=0.000272166, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=49814 epoch 033: 34 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=540856, ups=1.1, wpb=491936, bsz=16573, num_updates=54000, lr=0.000272166, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=49814 epoch 033: 34 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=540856, ups=1.1, wpb=491936, bsz=16573, num_updates=54000, lr=0.000272166, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=49814 epoch 033: 34 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=540856, ups=1.1, wpb=491936, bsz=16573, num_updates=54000, lr=0.000272166, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=49814 epoch 033: 34 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=540856, ups=1.1, wpb=491936, bsz=16573, num_updates=54000, lr=0.000272166, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=49814 epoch 033: 34 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=540856, ups=1.1, wpb=491936, bsz=16573, num_updates=54000, lr=0.000272166, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=49814 epoch 033: 34 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=540856, ups=1.1, wpb=491936, bsz=16573, num_updates=54000, lr=0.000272166, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=49814 epoch 033: 34 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=540856, ups=1.1, wpb=491936, bsz=16573, num_updates=54000, lr=0.000272166, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=49814 epoch 033: 34 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=540856, ups=1.1, wpb=491936, bsz=16573, num_updates=54000, lr=0.000272166, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=49814 epoch 033: 34 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=540856, ups=1.1, wpb=491936, bsz=16573, num_updates=54000, lr=0.000272166, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=49814 epoch 033: 34 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=540856, ups=1.1, wpb=491936, bsz=16573, num_updates=54000, lr=0.000272166, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=49814 epoch 033: 34 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=540856, ups=1.1, wpb=491936, bsz=16573, num_updates=54000, lr=0.000272166, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=49814 epoch 033: 34 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=540856, ups=1.1, wpb=491936, bsz=16573, num_updates=54000, lr=0.000272166, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=49814 epoch 033: 34 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=540856, ups=1.1, wpb=491936, bsz=16573, num_updates=54000, lr=0.000272166, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=49814 epoch 033: 34 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=540856, ups=1.1, wpb=491936, bsz=16573, num_updates=54000, lr=0.000272166, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=49814 epoch 033: 34 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=540856, ups=1.1, wpb=491936, bsz=16573, num_updates=54000, lr=0.000272166, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=49814 epoch 033: 34 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=540856, ups=1.1, wpb=491936, bsz=16573, num_updates=54000, lr=0.000272166, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=49814 epoch 033: 34 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=540856, ups=1.1, wpb=491936, bsz=16573, num_updates=54000, lr=0.000272166, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=49814 epoch 033: 34 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=540856, ups=1.1, wpb=491936, bsz=16573, num_updates=54000, lr=0.000272166, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=49814 epoch 033: 34 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=540856, ups=1.1, wpb=491936, bsz=16573, num_updates=54000, lr=0.000272166, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=49814 epoch 033: 34 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=540856, ups=1.1, wpb=491936, bsz=16573, num_updates=54000, lr=0.000272166, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=49814 epoch 033: 34 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=540856, ups=1.1, wpb=491936, bsz=16573, num_updates=54000, lr=0.000272166, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=49814 epoch 033: 34 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=540856, ups=1.1, wpb=491936, bsz=16573, num_updates=54000, lr=0.000272166, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=49814 epoch 033: 34 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=540856, ups=1.1, wpb=491936, bsz=16573, num_updates=54000, lr=0.000272166, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=49814 epoch 033: 34 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=540856, ups=1.1, wpb=491936, bsz=16573, num_updates=54000, lr=0.000272166, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=49814 epoch 033: 34 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=540856, ups=1.1, wpb=491936, bsz=16573, num_updates=54000, lr=0.000272166, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=49814 epoch 033: 34 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=540856, ups=1.1, wpb=491936, bsz=16573, num_updates=54000, lr=0.000272166, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=49814 epoch 033: 34 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=540856, ups=1.1, wpb=491936, bsz=16573, num_updates=54000, lr=0.000272166, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=49814 epoch 033: 34 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=540856, ups=1.1, wpb=491936, bsz=16573, num_updates=54000, lr=0.000272166, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=49814 epoch 033: 34 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=540856, ups=1.1, wpb=491936, bsz=16573, num_updates=54000, lr=0.000272166, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=49814 epoch 033: 34 / 1689 loss=3.467, nll_loss=1.908, ppl=3.75, wps=540856, ups=1.1, wpb=491936, bsz=16573, num_updates=54000, lr=0.000272166, gnorm=0.162, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=49814 begin validation on "valid" subset epoch 033 | valid on 'valid' subset | loss 3.717 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 54000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.717 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 54000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.717 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 54000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.717 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 54000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.717 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 54000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.717 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 54000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.717 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 54000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.717 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 54000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.717 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 54000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.717 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 54000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.717 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 54000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.717 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 54000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.717 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 54000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.717 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 54000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.717 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 54000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.717 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 54000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.717 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 54000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.717 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 54000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.717 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 54000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.717 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 54000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.717 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 54000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.717 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 54000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.717 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 54000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.717 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 54000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.717 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 54000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.717 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 54000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.717 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 54000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.717 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 54000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.717 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 54000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.717 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 54000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.717 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 54000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.717 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 54000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.717 | nll_loss 2.168 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 54000 | best_loss 3.702 epoch 033: 134 / 1689 loss=3.446, nll_loss=1.884, ppl=3.69, wps=413772, ups=0.84, wpb=495051, bsz=16581.6, num_updates=54100, lr=0.000271914, gnorm=0.171, clip=0, loss_scale=1, train_wall=87, gb_free=21.4, wall=49934 epoch 033: 134 / 1689 loss=3.446, nll_loss=1.884, ppl=3.69, wps=413772, ups=0.84, wpb=495051, bsz=16581.6, num_updates=54100, lr=0.000271914, gnorm=0.171, clip=0, loss_scale=1, train_wall=87, gb_free=21.4, wall=49934 epoch 033: 134 / 1689 loss=3.446, nll_loss=1.884, ppl=3.69, wps=413772, ups=0.84, wpb=495051, bsz=16581.6, num_updates=54100, lr=0.000271914, gnorm=0.171, clip=0, loss_scale=1, train_wall=87, gb_free=21.4, wall=49934 epoch 033: 134 / 1689 loss=3.446, nll_loss=1.884, ppl=3.69, wps=413772, ups=0.84, wpb=495051, bsz=16581.6, num_updates=54100, lr=0.000271914, gnorm=0.171, clip=0, loss_scale=1, train_wall=87, gb_free=21.4, wall=49934 epoch 033: 134 / 1689 loss=3.446, nll_loss=1.884, ppl=3.69, wps=413772, ups=0.84, wpb=495051, bsz=16581.6, num_updates=54100, lr=0.000271914, gnorm=0.171, clip=0, loss_scale=1, train_wall=87, gb_free=21.4, wall=49934 epoch 033: 134 / 1689 loss=3.446, nll_loss=1.884, ppl=3.69, wps=413772, ups=0.84, wpb=495051, bsz=16581.6, num_updates=54100, lr=0.000271914, gnorm=0.171, clip=0, loss_scale=1, train_wall=87, gb_free=21.4, wall=49934 epoch 033: 134 / 1689 loss=3.446, nll_loss=1.884, ppl=3.69, wps=413772, ups=0.84, wpb=495051, bsz=16581.6, num_updates=54100, lr=0.000271914, gnorm=0.171, clip=0, loss_scale=1, train_wall=87, gb_free=21.4, wall=49934 epoch 033: 134 / 1689 loss=3.446, nll_loss=1.884, ppl=3.69, wps=413772, ups=0.84, wpb=495051, bsz=16581.6, num_updates=54100, lr=0.000271914, gnorm=0.171, clip=0, loss_scale=1, train_wall=87, gb_free=21.4, wall=49934 epoch 033: 134 / 1689 loss=3.446, nll_loss=1.884, ppl=3.69, wps=413772, ups=0.84, wpb=495051, bsz=16581.6, num_updates=54100, lr=0.000271914, gnorm=0.171, clip=0, loss_scale=1, train_wall=87, gb_free=21.4, wall=49934 epoch 033: 134 / 1689 loss=3.446, nll_loss=1.884, ppl=3.69, wps=413772, ups=0.84, wpb=495051, bsz=16581.6, num_updates=54100, lr=0.000271914, gnorm=0.171, clip=0, loss_scale=1, train_wall=87, gb_free=21.4, wall=49934 epoch 033: 134 / 1689 loss=3.446, nll_loss=1.884, ppl=3.69, wps=413772, ups=0.84, wpb=495051, bsz=16581.6, num_updates=54100, lr=0.000271914, gnorm=0.171, clip=0, loss_scale=1, train_wall=87, gb_free=21.4, wall=49934 epoch 033: 134 / 1689 loss=3.446, nll_loss=1.884, ppl=3.69, wps=413772, ups=0.84, wpb=495051, bsz=16581.6, num_updates=54100, lr=0.000271914, gnorm=0.171, clip=0, loss_scale=1, train_wall=87, gb_free=21.4, wall=49934 epoch 033: 134 / 1689 loss=3.446, nll_loss=1.884, ppl=3.69, wps=413772, ups=0.84, wpb=495051, bsz=16581.6, num_updates=54100, lr=0.000271914, gnorm=0.171, clip=0, loss_scale=1, train_wall=87, gb_free=21.4, wall=49934 epoch 033: 134 / 1689 loss=3.446, nll_loss=1.884, ppl=3.69, wps=413772, ups=0.84, wpb=495051, bsz=16581.6, num_updates=54100, lr=0.000271914, gnorm=0.171, clip=0, loss_scale=1, train_wall=87, gb_free=21.4, wall=49934 epoch 033: 134 / 1689 loss=3.446, nll_loss=1.884, ppl=3.69, wps=413772, ups=0.84, wpb=495051, bsz=16581.6, num_updates=54100, lr=0.000271914, gnorm=0.171, clip=0, loss_scale=1, train_wall=87, gb_free=21.4, wall=49934 epoch 033: 134 / 1689 loss=3.446, nll_loss=1.884, ppl=3.69, wps=413772, ups=0.84, wpb=495051, bsz=16581.6, num_updates=54100, lr=0.000271914, gnorm=0.171, clip=0, loss_scale=1, train_wall=87, gb_free=21.4, wall=49934 epoch 033: 134 / 1689 loss=3.446, nll_loss=1.884, ppl=3.69, wps=413772, ups=0.84, wpb=495051, bsz=16581.6, num_updates=54100, lr=0.000271914, gnorm=0.171, clip=0, loss_scale=1, train_wall=87, gb_free=21.4, wall=49934 epoch 033: 134 / 1689 loss=3.446, nll_loss=1.884, ppl=3.69, wps=413772, ups=0.84, wpb=495051, bsz=16581.6, num_updates=54100, lr=0.000271914, gnorm=0.171, clip=0, loss_scale=1, train_wall=87, gb_free=21.4, wall=49934 epoch 033: 134 / 1689 loss=3.446, nll_loss=1.884, ppl=3.69, wps=413772, ups=0.84, wpb=495051, bsz=16581.6, num_updates=54100, lr=0.000271914, gnorm=0.171, clip=0, loss_scale=1, train_wall=87, gb_free=21.4, wall=49934 epoch 033: 134 / 1689 loss=3.446, nll_loss=1.884, ppl=3.69, wps=413772, ups=0.84, wpb=495051, bsz=16581.6, num_updates=54100, lr=0.000271914, gnorm=0.171, clip=0, loss_scale=1, train_wall=87, gb_free=21.4, wall=49934 epoch 033: 134 / 1689 loss=3.446, nll_loss=1.884, ppl=3.69, wps=413772, ups=0.84, wpb=495051, bsz=16581.6, num_updates=54100, lr=0.000271914, gnorm=0.171, clip=0, loss_scale=1, train_wall=87, gb_free=21.4, wall=49934 epoch 033: 134 / 1689 loss=3.446, nll_loss=1.884, ppl=3.69, wps=413772, ups=0.84, wpb=495051, bsz=16581.6, num_updates=54100, lr=0.000271914, gnorm=0.171, clip=0, loss_scale=1, train_wall=87, gb_free=21.4, wall=49934 epoch 033: 134 / 1689 loss=3.446, nll_loss=1.884, ppl=3.69, wps=413772, ups=0.84, wpb=495051, bsz=16581.6, num_updates=54100, lr=0.000271914, gnorm=0.171, clip=0, loss_scale=1, train_wall=87, gb_free=21.4, wall=49934 epoch 033: 134 / 1689 loss=3.446, nll_loss=1.884, ppl=3.69, wps=413772, ups=0.84, wpb=495051, bsz=16581.6, num_updates=54100, lr=0.000271914, gnorm=0.171, clip=0, loss_scale=1, train_wall=87, gb_free=21.4, wall=49934 epoch 033: 134 / 1689 loss=3.446, nll_loss=1.884, ppl=3.69, wps=413772, ups=0.84, wpb=495051, bsz=16581.6, num_updates=54100, lr=0.000271914, gnorm=0.171, clip=0, loss_scale=1, train_wall=87, gb_free=21.4, wall=49934 epoch 033: 134 / 1689 loss=3.446, nll_loss=1.884, ppl=3.69, wps=413772, ups=0.84, wpb=495051, bsz=16581.6, num_updates=54100, lr=0.000271914, gnorm=0.171, clip=0, loss_scale=1, train_wall=87, gb_free=21.4, wall=49934 epoch 033: 134 / 1689 loss=3.446, nll_loss=1.884, ppl=3.69, wps=413772, ups=0.84, wpb=495051, bsz=16581.6, num_updates=54100, lr=0.000271914, gnorm=0.171, clip=0, loss_scale=1, train_wall=87, gb_free=21.4, wall=49934 epoch 033: 134 / 1689 loss=3.446, nll_loss=1.884, ppl=3.69, wps=413772, ups=0.84, wpb=495051, bsz=16581.6, num_updates=54100, lr=0.000271914, gnorm=0.171, clip=0, loss_scale=1, train_wall=87, gb_free=21.4, wall=49934 epoch 033: 134 / 1689 loss=3.446, nll_loss=1.884, ppl=3.69, wps=413772, ups=0.84, wpb=495051, bsz=16581.6, num_updates=54100, lr=0.000271914, gnorm=0.171, clip=0, loss_scale=1, train_wall=87, gb_free=21.4, wall=49934 epoch 033: 134 / 1689 loss=3.446, nll_loss=1.884, ppl=3.69, wps=413772, ups=0.84, wpb=495051, bsz=16581.6, num_updates=54100, lr=0.000271914, gnorm=0.171, clip=0, loss_scale=1, train_wall=87, gb_free=21.4, wall=49934 epoch 033: 134 / 1689 loss=3.446, nll_loss=1.884, ppl=3.69, wps=413772, ups=0.84, wpb=495051, bsz=16581.6, num_updates=54100, lr=0.000271914, gnorm=0.171, clip=0, loss_scale=1, train_wall=87, gb_free=21.4, wall=49934 epoch 033: 134 / 1689 loss=3.446, nll_loss=1.884, ppl=3.69, wps=413772, ups=0.84, wpb=495051, bsz=16581.6, num_updates=54100, lr=0.000271914, gnorm=0.171, clip=0, loss_scale=1, train_wall=87, gb_free=21.4, wall=49934 epoch 033: 134 / 1689 loss=3.446, nll_loss=1.884, ppl=3.69, wps=413772, ups=0.84, wpb=495051, bsz=16581.6, num_updates=54100, lr=0.000271914, gnorm=0.171, clip=0, loss_scale=1, train_wall=87, gb_free=21.4, wall=49934 epoch 033: 235 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=553993, ups=1.11, wpb=497011, bsz=16933.4, num_updates=54200, lr=0.000271663, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=21.3, wall=50024 epoch 033: 235 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=553993, ups=1.11, wpb=497011, bsz=16933.4, num_updates=54200, lr=0.000271663, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=21.3, wall=50024 epoch 033: 235 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=553993, ups=1.11, wpb=497011, bsz=16933.4, num_updates=54200, lr=0.000271663, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=21.3, wall=50024 epoch 033: 235 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=553993, ups=1.11, wpb=497011, bsz=16933.4, num_updates=54200, lr=0.000271663, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=21.3, wall=50024 epoch 033: 235 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=553993, ups=1.11, wpb=497011, bsz=16933.4, num_updates=54200, lr=0.000271663, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=21.3, wall=50024 epoch 033: 235 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=553993, ups=1.11, wpb=497011, bsz=16933.4, num_updates=54200, lr=0.000271663, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=21.3, wall=50024 epoch 033: 235 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=553993, ups=1.11, wpb=497011, bsz=16933.4, num_updates=54200, lr=0.000271663, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=21.3, wall=50024 epoch 033: 235 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=553993, ups=1.11, wpb=497011, bsz=16933.4, num_updates=54200, lr=0.000271663, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=21.3, wall=50024 epoch 033: 235 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=553993, ups=1.11, wpb=497011, bsz=16933.4, num_updates=54200, lr=0.000271663, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=21.3, wall=50024 epoch 033: 235 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=553993, ups=1.11, wpb=497011, bsz=16933.4, num_updates=54200, lr=0.000271663, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=21.3, wall=50024 epoch 033: 235 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=553993, ups=1.11, wpb=497011, bsz=16933.4, num_updates=54200, lr=0.000271663, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=21.3, wall=50024 epoch 033: 235 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=553993, ups=1.11, wpb=497011, bsz=16933.4, num_updates=54200, lr=0.000271663, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=21.3, wall=50024 epoch 033: 235 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=553993, ups=1.11, wpb=497011, bsz=16933.4, num_updates=54200, lr=0.000271663, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=21.3, wall=50024 epoch 033: 235 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=553993, ups=1.11, wpb=497011, bsz=16933.4, num_updates=54200, lr=0.000271663, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=21.3, wall=50024 epoch 033: 235 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=553993, ups=1.11, wpb=497011, bsz=16933.4, num_updates=54200, lr=0.000271663, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=21.3, wall=50024 epoch 033: 235 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=553993, ups=1.11, wpb=497011, bsz=16933.4, num_updates=54200, lr=0.000271663, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=21.3, wall=50024 epoch 033: 235 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=553993, ups=1.11, wpb=497011, bsz=16933.4, num_updates=54200, lr=0.000271663, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=21.3, wall=50024 epoch 033: 235 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=553993, ups=1.11, wpb=497011, bsz=16933.4, num_updates=54200, lr=0.000271663, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=21.3, wall=50024 epoch 033: 235 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=553993, ups=1.11, wpb=497011, bsz=16933.4, num_updates=54200, lr=0.000271663, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=21.3, wall=50024 epoch 033: 235 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=553993, ups=1.11, wpb=497011, bsz=16933.4, num_updates=54200, lr=0.000271663, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=21.3, wall=50024 epoch 033: 235 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=553993, ups=1.11, wpb=497011, bsz=16933.4, num_updates=54200, lr=0.000271663, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=21.3, wall=50024 epoch 033: 235 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=553993, ups=1.11, wpb=497011, bsz=16933.4, num_updates=54200, lr=0.000271663, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=21.3, wall=50024 epoch 033: 235 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=553993, ups=1.11, wpb=497011, bsz=16933.4, num_updates=54200, lr=0.000271663, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=21.3, wall=50024 epoch 033: 235 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=553993, ups=1.11, wpb=497011, bsz=16933.4, num_updates=54200, lr=0.000271663, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=21.3, wall=50024 epoch 033: 235 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=553993, ups=1.11, wpb=497011, bsz=16933.4, num_updates=54200, lr=0.000271663, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=21.3, wall=50024 epoch 033: 235 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=553993, ups=1.11, wpb=497011, bsz=16933.4, num_updates=54200, lr=0.000271663, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=21.3, wall=50024 epoch 033: 235 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=553993, ups=1.11, wpb=497011, bsz=16933.4, num_updates=54200, lr=0.000271663, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=21.3, wall=50024 epoch 033: 235 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=553993, ups=1.11, wpb=497011, bsz=16933.4, num_updates=54200, lr=0.000271663, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=21.3, wall=50024 epoch 033: 235 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=553993, ups=1.11, wpb=497011, bsz=16933.4, num_updates=54200, lr=0.000271663, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=21.3, wall=50024 epoch 033: 235 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=553993, ups=1.11, wpb=497011, bsz=16933.4, num_updates=54200, lr=0.000271663, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=21.3, wall=50024 epoch 033: 235 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=553993, ups=1.11, wpb=497011, bsz=16933.4, num_updates=54200, lr=0.000271663, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=21.3, wall=50024 epoch 033: 235 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=553993, ups=1.11, wpb=497011, bsz=16933.4, num_updates=54200, lr=0.000271663, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=21.3, wall=50024 epoch 033: 235 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=553993, ups=1.11, wpb=497011, bsz=16933.4, num_updates=54200, lr=0.000271663, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=21.3, wall=50024 epoch 033: 335 / 1689 loss=3.443, nll_loss=1.881, ppl=3.68, wps=552466, ups=1.12, wpb=494001, bsz=16163.9, num_updates=54300, lr=0.000271413, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=50113 epoch 033: 335 / 1689 loss=3.443, nll_loss=1.881, ppl=3.68, wps=552466, ups=1.12, wpb=494001, bsz=16163.9, num_updates=54300, lr=0.000271413, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=50113 epoch 033: 335 / 1689 loss=3.443, nll_loss=1.881, ppl=3.68, wps=552466, ups=1.12, wpb=494001, bsz=16163.9, num_updates=54300, lr=0.000271413, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=50113 epoch 033: 335 / 1689 loss=3.443, nll_loss=1.881, ppl=3.68, wps=552466, ups=1.12, wpb=494001, bsz=16163.9, num_updates=54300, lr=0.000271413, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=50113 epoch 033: 335 / 1689 loss=3.443, nll_loss=1.881, ppl=3.68, wps=552466, ups=1.12, wpb=494001, bsz=16163.9, num_updates=54300, lr=0.000271413, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=50113 epoch 033: 335 / 1689 loss=3.443, nll_loss=1.881, ppl=3.68, wps=552466, ups=1.12, wpb=494001, bsz=16163.9, num_updates=54300, lr=0.000271413, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=50113 epoch 033: 335 / 1689 loss=3.443, nll_loss=1.881, ppl=3.68, wps=552466, ups=1.12, wpb=494001, bsz=16163.9, num_updates=54300, lr=0.000271413, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=50113 epoch 033: 335 / 1689 loss=3.443, nll_loss=1.881, ppl=3.68, wps=552466, ups=1.12, wpb=494001, bsz=16163.9, num_updates=54300, lr=0.000271413, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=50113 epoch 033: 335 / 1689 loss=3.443, nll_loss=1.881, ppl=3.68, wps=552466, ups=1.12, wpb=494001, bsz=16163.9, num_updates=54300, lr=0.000271413, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=50113 epoch 033: 335 / 1689 loss=3.443, nll_loss=1.881, ppl=3.68, wps=552466, ups=1.12, wpb=494001, bsz=16163.9, num_updates=54300, lr=0.000271413, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=50113 epoch 033: 335 / 1689 loss=3.443, nll_loss=1.881, ppl=3.68, wps=552466, ups=1.12, wpb=494001, bsz=16163.9, num_updates=54300, lr=0.000271413, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=50113 epoch 033: 335 / 1689 loss=3.443, nll_loss=1.881, ppl=3.68, wps=552466, ups=1.12, wpb=494001, bsz=16163.9, num_updates=54300, lr=0.000271413, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=50113 epoch 033: 335 / 1689 loss=3.443, nll_loss=1.881, ppl=3.68, wps=552466, ups=1.12, wpb=494001, bsz=16163.9, num_updates=54300, lr=0.000271413, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=50113 epoch 033: 335 / 1689 loss=3.443, nll_loss=1.881, ppl=3.68, wps=552466, ups=1.12, wpb=494001, bsz=16163.9, num_updates=54300, lr=0.000271413, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=50113 epoch 033: 335 / 1689 loss=3.443, nll_loss=1.881, ppl=3.68, wps=552466, ups=1.12, wpb=494001, bsz=16163.9, num_updates=54300, lr=0.000271413, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=50113 epoch 033: 335 / 1689 loss=3.443, nll_loss=1.881, ppl=3.68, wps=552466, ups=1.12, wpb=494001, bsz=16163.9, num_updates=54300, lr=0.000271413, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=50113 epoch 033: 335 / 1689 loss=3.443, nll_loss=1.881, ppl=3.68, wps=552466, ups=1.12, wpb=494001, bsz=16163.9, num_updates=54300, lr=0.000271413, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=50113 epoch 033: 335 / 1689 loss=3.443, nll_loss=1.881, ppl=3.68, wps=552466, ups=1.12, wpb=494001, bsz=16163.9, num_updates=54300, lr=0.000271413, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=50113 epoch 033: 335 / 1689 loss=3.443, nll_loss=1.881, ppl=3.68, wps=552466, ups=1.12, wpb=494001, bsz=16163.9, num_updates=54300, lr=0.000271413, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=50113 epoch 033: 335 / 1689 loss=3.443, nll_loss=1.881, ppl=3.68, wps=552466, ups=1.12, wpb=494001, bsz=16163.9, num_updates=54300, lr=0.000271413, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=50113 epoch 033: 335 / 1689 loss=3.443, nll_loss=1.881, ppl=3.68, wps=552466, ups=1.12, wpb=494001, bsz=16163.9, num_updates=54300, lr=0.000271413, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=50113 epoch 033: 335 / 1689 loss=3.443, nll_loss=1.881, ppl=3.68, wps=552466, ups=1.12, wpb=494001, bsz=16163.9, num_updates=54300, lr=0.000271413, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=50113 epoch 033: 335 / 1689 loss=3.443, nll_loss=1.881, ppl=3.68, wps=552466, ups=1.12, wpb=494001, bsz=16163.9, num_updates=54300, lr=0.000271413, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=50113 epoch 033: 335 / 1689 loss=3.443, nll_loss=1.881, ppl=3.68, wps=552466, ups=1.12, wpb=494001, bsz=16163.9, num_updates=54300, lr=0.000271413, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=50113 epoch 033: 335 / 1689 loss=3.443, nll_loss=1.881, ppl=3.68, wps=552466, ups=1.12, wpb=494001, bsz=16163.9, num_updates=54300, lr=0.000271413, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=50113 epoch 033: 335 / 1689 loss=3.443, nll_loss=1.881, ppl=3.68, wps=552466, ups=1.12, wpb=494001, bsz=16163.9, num_updates=54300, lr=0.000271413, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=50113 epoch 033: 335 / 1689 loss=3.443, nll_loss=1.881, ppl=3.68, wps=552466, ups=1.12, wpb=494001, bsz=16163.9, num_updates=54300, lr=0.000271413, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=50113 epoch 033: 335 / 1689 loss=3.443, nll_loss=1.881, ppl=3.68, wps=552466, ups=1.12, wpb=494001, bsz=16163.9, num_updates=54300, lr=0.000271413, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=50113 epoch 033: 335 / 1689 loss=3.443, nll_loss=1.881, ppl=3.68, wps=552466, ups=1.12, wpb=494001, bsz=16163.9, num_updates=54300, lr=0.000271413, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=50113 epoch 033: 335 / 1689 loss=3.443, nll_loss=1.881, ppl=3.68, wps=552466, ups=1.12, wpb=494001, bsz=16163.9, num_updates=54300, lr=0.000271413, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=50113 epoch 033: 335 / 1689 loss=3.443, nll_loss=1.881, ppl=3.68, wps=552466, ups=1.12, wpb=494001, bsz=16163.9, num_updates=54300, lr=0.000271413, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=50113 epoch 033: 335 / 1689 loss=3.443, nll_loss=1.881, ppl=3.68, wps=552466, ups=1.12, wpb=494001, bsz=16163.9, num_updates=54300, lr=0.000271413, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=50113 epoch 033: 335 / 1689 loss=3.443, nll_loss=1.881, ppl=3.68, wps=552466, ups=1.12, wpb=494001, bsz=16163.9, num_updates=54300, lr=0.000271413, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=21.8, wall=50113 epoch 033: 435 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=558394, ups=1.13, wpb=494697, bsz=16487.4, num_updates=54400, lr=0.000271163, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=50202 epoch 033: 435 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=558394, ups=1.13, wpb=494697, bsz=16487.4, num_updates=54400, lr=0.000271163, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=50202 epoch 033: 435 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=558394, ups=1.13, wpb=494697, bsz=16487.4, num_updates=54400, lr=0.000271163, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=50202 epoch 033: 435 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=558394, ups=1.13, wpb=494697, bsz=16487.4, num_updates=54400, lr=0.000271163, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=50202 epoch 033: 435 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=558394, ups=1.13, wpb=494697, bsz=16487.4, num_updates=54400, lr=0.000271163, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=50202 epoch 033: 435 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=558394, ups=1.13, wpb=494697, bsz=16487.4, num_updates=54400, lr=0.000271163, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=50202 epoch 033: 435 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=558394, ups=1.13, wpb=494697, bsz=16487.4, num_updates=54400, lr=0.000271163, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=50202 epoch 033: 435 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=558394, ups=1.13, wpb=494697, bsz=16487.4, num_updates=54400, lr=0.000271163, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=50202 epoch 033: 435 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=558394, ups=1.13, wpb=494697, bsz=16487.4, num_updates=54400, lr=0.000271163, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=50202 epoch 033: 435 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=558394, ups=1.13, wpb=494697, bsz=16487.4, num_updates=54400, lr=0.000271163, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=50202 epoch 033: 435 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=558394, ups=1.13, wpb=494697, bsz=16487.4, num_updates=54400, lr=0.000271163, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=50202 epoch 033: 435 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=558394, ups=1.13, wpb=494697, bsz=16487.4, num_updates=54400, lr=0.000271163, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=50202 epoch 033: 435 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=558394, ups=1.13, wpb=494697, bsz=16487.4, num_updates=54400, lr=0.000271163, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=50202 epoch 033: 435 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=558394, ups=1.13, wpb=494697, bsz=16487.4, num_updates=54400, lr=0.000271163, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=50202 epoch 033: 435 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=558394, ups=1.13, wpb=494697, bsz=16487.4, num_updates=54400, lr=0.000271163, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=50202 epoch 033: 435 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=558394, ups=1.13, wpb=494697, bsz=16487.4, num_updates=54400, lr=0.000271163, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=50202 epoch 033: 435 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=558394, ups=1.13, wpb=494697, bsz=16487.4, num_updates=54400, lr=0.000271163, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=50202 epoch 033: 435 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=558394, ups=1.13, wpb=494697, bsz=16487.4, num_updates=54400, lr=0.000271163, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=50202 epoch 033: 435 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=558394, ups=1.13, wpb=494697, bsz=16487.4, num_updates=54400, lr=0.000271163, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=50202 epoch 033: 435 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=558394, ups=1.13, wpb=494697, bsz=16487.4, num_updates=54400, lr=0.000271163, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=50202 epoch 033: 435 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=558394, ups=1.13, wpb=494697, bsz=16487.4, num_updates=54400, lr=0.000271163, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=50202 epoch 033: 435 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=558394, ups=1.13, wpb=494697, bsz=16487.4, num_updates=54400, lr=0.000271163, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=50202 epoch 033: 435 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=558394, ups=1.13, wpb=494697, bsz=16487.4, num_updates=54400, lr=0.000271163, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=50202 epoch 033: 435 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=558394, ups=1.13, wpb=494697, bsz=16487.4, num_updates=54400, lr=0.000271163, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=50202 epoch 033: 435 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=558394, ups=1.13, wpb=494697, bsz=16487.4, num_updates=54400, lr=0.000271163, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=50202 epoch 033: 435 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=558394, ups=1.13, wpb=494697, bsz=16487.4, num_updates=54400, lr=0.000271163, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=50202 epoch 033: 435 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=558394, ups=1.13, wpb=494697, bsz=16487.4, num_updates=54400, lr=0.000271163, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=50202 epoch 033: 435 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=558394, ups=1.13, wpb=494697, bsz=16487.4, num_updates=54400, lr=0.000271163, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=50202 epoch 033: 435 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=558394, ups=1.13, wpb=494697, bsz=16487.4, num_updates=54400, lr=0.000271163, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=50202 epoch 033: 435 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=558394, ups=1.13, wpb=494697, bsz=16487.4, num_updates=54400, lr=0.000271163, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=50202 epoch 033: 435 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=558394, ups=1.13, wpb=494697, bsz=16487.4, num_updates=54400, lr=0.000271163, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=50202 epoch 033: 435 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=558394, ups=1.13, wpb=494697, bsz=16487.4, num_updates=54400, lr=0.000271163, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=50202 epoch 033: 435 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=558394, ups=1.13, wpb=494697, bsz=16487.4, num_updates=54400, lr=0.000271163, gnorm=0.158, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=50202 epoch 033: 535 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=556548, ups=1.12, wpb=496269, bsz=16698.1, num_updates=54500, lr=0.000270914, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=50291 epoch 033: 535 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=556548, ups=1.12, wpb=496269, bsz=16698.1, num_updates=54500, lr=0.000270914, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=50291 epoch 033: 535 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=556548, ups=1.12, wpb=496269, bsz=16698.1, num_updates=54500, lr=0.000270914, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=50291 epoch 033: 535 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=556548, ups=1.12, wpb=496269, bsz=16698.1, num_updates=54500, lr=0.000270914, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=50291 epoch 033: 535 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=556548, ups=1.12, wpb=496269, bsz=16698.1, num_updates=54500, lr=0.000270914, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=50291 epoch 033: 535 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=556548, ups=1.12, wpb=496269, bsz=16698.1, num_updates=54500, lr=0.000270914, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=50291 epoch 033: 535 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=556548, ups=1.12, wpb=496269, bsz=16698.1, num_updates=54500, lr=0.000270914, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=50291 epoch 033: 535 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=556548, ups=1.12, wpb=496269, bsz=16698.1, num_updates=54500, lr=0.000270914, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=50291 epoch 033: 535 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=556548, ups=1.12, wpb=496269, bsz=16698.1, num_updates=54500, lr=0.000270914, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=50291 epoch 033: 535 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=556548, ups=1.12, wpb=496269, bsz=16698.1, num_updates=54500, lr=0.000270914, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=50291 epoch 033: 535 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=556548, ups=1.12, wpb=496269, bsz=16698.1, num_updates=54500, lr=0.000270914, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=50291 epoch 033: 535 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=556548, ups=1.12, wpb=496269, bsz=16698.1, num_updates=54500, lr=0.000270914, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=50291 epoch 033: 535 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=556548, ups=1.12, wpb=496269, bsz=16698.1, num_updates=54500, lr=0.000270914, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=50291 epoch 033: 535 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=556548, ups=1.12, wpb=496269, bsz=16698.1, num_updates=54500, lr=0.000270914, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=50291 epoch 033: 535 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=556548, ups=1.12, wpb=496269, bsz=16698.1, num_updates=54500, lr=0.000270914, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=50291 epoch 033: 535 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=556548, ups=1.12, wpb=496269, bsz=16698.1, num_updates=54500, lr=0.000270914, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=50291 epoch 033: 535 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=556548, ups=1.12, wpb=496269, bsz=16698.1, num_updates=54500, lr=0.000270914, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=50291 epoch 033: 535 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=556548, ups=1.12, wpb=496269, bsz=16698.1, num_updates=54500, lr=0.000270914, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=50291 epoch 033: 535 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=556548, ups=1.12, wpb=496269, bsz=16698.1, num_updates=54500, lr=0.000270914, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=50291 epoch 033: 535 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=556548, ups=1.12, wpb=496269, bsz=16698.1, num_updates=54500, lr=0.000270914, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=50291 epoch 033: 535 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=556548, ups=1.12, wpb=496269, bsz=16698.1, num_updates=54500, lr=0.000270914, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=50291 epoch 033: 535 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=556548, ups=1.12, wpb=496269, bsz=16698.1, num_updates=54500, lr=0.000270914, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=50291 epoch 033: 535 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=556548, ups=1.12, wpb=496269, bsz=16698.1, num_updates=54500, lr=0.000270914, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=50291 epoch 033: 535 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=556548, ups=1.12, wpb=496269, bsz=16698.1, num_updates=54500, lr=0.000270914, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=50291 epoch 033: 535 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=556548, ups=1.12, wpb=496269, bsz=16698.1, num_updates=54500, lr=0.000270914, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=50291 epoch 033: 535 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=556548, ups=1.12, wpb=496269, bsz=16698.1, num_updates=54500, lr=0.000270914, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=50291 epoch 033: 535 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=556548, ups=1.12, wpb=496269, bsz=16698.1, num_updates=54500, lr=0.000270914, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=50291 epoch 033: 535 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=556548, ups=1.12, wpb=496269, bsz=16698.1, num_updates=54500, lr=0.000270914, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=50291 epoch 033: 535 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=556548, ups=1.12, wpb=496269, bsz=16698.1, num_updates=54500, lr=0.000270914, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=50291 epoch 033: 535 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=556548, ups=1.12, wpb=496269, bsz=16698.1, num_updates=54500, lr=0.000270914, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=50291 epoch 033: 535 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=556548, ups=1.12, wpb=496269, bsz=16698.1, num_updates=54500, lr=0.000270914, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=50291 epoch 033: 535 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=556548, ups=1.12, wpb=496269, bsz=16698.1, num_updates=54500, lr=0.000270914, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=50291 epoch 033: 535 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=556548, ups=1.12, wpb=496269, bsz=16698.1, num_updates=54500, lr=0.000270914, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.2, wall=50291 epoch 033: 635 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=556859, ups=1.12, wpb=495416, bsz=16713, num_updates=54600, lr=0.000270666, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=50380 epoch 033: 635 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=556859, ups=1.12, wpb=495416, bsz=16713, num_updates=54600, lr=0.000270666, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=50380 epoch 033: 635 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=556859, ups=1.12, wpb=495416, bsz=16713, num_updates=54600, lr=0.000270666, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=50380 epoch 033: 635 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=556859, ups=1.12, wpb=495416, bsz=16713, num_updates=54600, lr=0.000270666, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=50380 epoch 033: 635 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=556859, ups=1.12, wpb=495416, bsz=16713, num_updates=54600, lr=0.000270666, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=50380 epoch 033: 635 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=556859, ups=1.12, wpb=495416, bsz=16713, num_updates=54600, lr=0.000270666, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=50380 epoch 033: 635 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=556859, ups=1.12, wpb=495416, bsz=16713, num_updates=54600, lr=0.000270666, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=50380 epoch 033: 635 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=556859, ups=1.12, wpb=495416, bsz=16713, num_updates=54600, lr=0.000270666, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=50380 epoch 033: 635 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=556859, ups=1.12, wpb=495416, bsz=16713, num_updates=54600, lr=0.000270666, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=50380 epoch 033: 635 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=556859, ups=1.12, wpb=495416, bsz=16713, num_updates=54600, lr=0.000270666, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=50380 epoch 033: 635 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=556859, ups=1.12, wpb=495416, bsz=16713, num_updates=54600, lr=0.000270666, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=50380 epoch 033: 635 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=556859, ups=1.12, wpb=495416, bsz=16713, num_updates=54600, lr=0.000270666, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=50380 epoch 033: 635 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=556859, ups=1.12, wpb=495416, bsz=16713, num_updates=54600, lr=0.000270666, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=50380 epoch 033: 635 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=556859, ups=1.12, wpb=495416, bsz=16713, num_updates=54600, lr=0.000270666, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=50380 epoch 033: 635 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=556859, ups=1.12, wpb=495416, bsz=16713, num_updates=54600, lr=0.000270666, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=50380 epoch 033: 635 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=556859, ups=1.12, wpb=495416, bsz=16713, num_updates=54600, lr=0.000270666, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=50380 epoch 033: 635 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=556859, ups=1.12, wpb=495416, bsz=16713, num_updates=54600, lr=0.000270666, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=50380 epoch 033: 635 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=556859, ups=1.12, wpb=495416, bsz=16713, num_updates=54600, lr=0.000270666, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=50380 epoch 033: 635 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=556859, ups=1.12, wpb=495416, bsz=16713, num_updates=54600, lr=0.000270666, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=50380 epoch 033: 635 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=556859, ups=1.12, wpb=495416, bsz=16713, num_updates=54600, lr=0.000270666, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=50380 epoch 033: 635 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=556859, ups=1.12, wpb=495416, bsz=16713, num_updates=54600, lr=0.000270666, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=50380 epoch 033: 635 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=556859, ups=1.12, wpb=495416, bsz=16713, num_updates=54600, lr=0.000270666, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=50380 epoch 033: 635 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=556859, ups=1.12, wpb=495416, bsz=16713, num_updates=54600, lr=0.000270666, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=50380 epoch 033: 635 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=556859, ups=1.12, wpb=495416, bsz=16713, num_updates=54600, lr=0.000270666, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=50380 epoch 033: 635 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=556859, ups=1.12, wpb=495416, bsz=16713, num_updates=54600, lr=0.000270666, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=50380 epoch 033: 635 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=556859, ups=1.12, wpb=495416, bsz=16713, num_updates=54600, lr=0.000270666, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=50380 epoch 033: 635 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=556859, ups=1.12, wpb=495416, bsz=16713, num_updates=54600, lr=0.000270666, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=50380 epoch 033: 635 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=556859, ups=1.12, wpb=495416, bsz=16713, num_updates=54600, lr=0.000270666, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=50380 epoch 033: 635 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=556859, ups=1.12, wpb=495416, bsz=16713, num_updates=54600, lr=0.000270666, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=50380 epoch 033: 635 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=556859, ups=1.12, wpb=495416, bsz=16713, num_updates=54600, lr=0.000270666, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=50380 epoch 033: 635 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=556859, ups=1.12, wpb=495416, bsz=16713, num_updates=54600, lr=0.000270666, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=50380 epoch 033: 635 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=556859, ups=1.12, wpb=495416, bsz=16713, num_updates=54600, lr=0.000270666, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=50380 epoch 033: 635 / 1689 loss=3.451, nll_loss=1.89, ppl=3.71, wps=556859, ups=1.12, wpb=495416, bsz=16713, num_updates=54600, lr=0.000270666, gnorm=0.171, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=50380 epoch 033: 735 / 1689 loss=3.458, nll_loss=1.898, ppl=3.73, wps=560178, ups=1.13, wpb=494131, bsz=16274.1, num_updates=54700, lr=0.000270418, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=50468 epoch 033: 735 / 1689 loss=3.458, nll_loss=1.898, ppl=3.73, wps=560178, ups=1.13, wpb=494131, bsz=16274.1, num_updates=54700, lr=0.000270418, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=50468 epoch 033: 735 / 1689 loss=3.458, nll_loss=1.898, ppl=3.73, wps=560178, ups=1.13, wpb=494131, bsz=16274.1, num_updates=54700, lr=0.000270418, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=50468 epoch 033: 735 / 1689 loss=3.458, nll_loss=1.898, ppl=3.73, wps=560178, ups=1.13, wpb=494131, bsz=16274.1, num_updates=54700, lr=0.000270418, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=50468 epoch 033: 735 / 1689 loss=3.458, nll_loss=1.898, ppl=3.73, wps=560178, ups=1.13, wpb=494131, bsz=16274.1, num_updates=54700, lr=0.000270418, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=50468 epoch 033: 735 / 1689 loss=3.458, nll_loss=1.898, ppl=3.73, wps=560178, ups=1.13, wpb=494131, bsz=16274.1, num_updates=54700, lr=0.000270418, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=50468 epoch 033: 735 / 1689 loss=3.458, nll_loss=1.898, ppl=3.73, wps=560178, ups=1.13, wpb=494131, bsz=16274.1, num_updates=54700, lr=0.000270418, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=50468 epoch 033: 735 / 1689 loss=3.458, nll_loss=1.898, ppl=3.73, wps=560178, ups=1.13, wpb=494131, bsz=16274.1, num_updates=54700, lr=0.000270418, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=50468 epoch 033: 735 / 1689 loss=3.458, nll_loss=1.898, ppl=3.73, wps=560178, ups=1.13, wpb=494131, bsz=16274.1, num_updates=54700, lr=0.000270418, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=50468 epoch 033: 735 / 1689 loss=3.458, nll_loss=1.898, ppl=3.73, wps=560178, ups=1.13, wpb=494131, bsz=16274.1, num_updates=54700, lr=0.000270418, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=50468 epoch 033: 735 / 1689 loss=3.458, nll_loss=1.898, ppl=3.73, wps=560178, ups=1.13, wpb=494131, bsz=16274.1, num_updates=54700, lr=0.000270418, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=50468 epoch 033: 735 / 1689 loss=3.458, nll_loss=1.898, ppl=3.73, wps=560178, ups=1.13, wpb=494131, bsz=16274.1, num_updates=54700, lr=0.000270418, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=50468 epoch 033: 735 / 1689 loss=3.458, nll_loss=1.898, ppl=3.73, wps=560178, ups=1.13, wpb=494131, bsz=16274.1, num_updates=54700, lr=0.000270418, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=50468 epoch 033: 735 / 1689 loss=3.458, nll_loss=1.898, ppl=3.73, wps=560178, ups=1.13, wpb=494131, bsz=16274.1, num_updates=54700, lr=0.000270418, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=50468 epoch 033: 735 / 1689 loss=3.458, nll_loss=1.898, ppl=3.73, wps=560178, ups=1.13, wpb=494131, bsz=16274.1, num_updates=54700, lr=0.000270418, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=50468 epoch 033: 735 / 1689 loss=3.458, nll_loss=1.898, ppl=3.73, wps=560178, ups=1.13, wpb=494131, bsz=16274.1, num_updates=54700, lr=0.000270418, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=50468 epoch 033: 735 / 1689 loss=3.458, nll_loss=1.898, ppl=3.73, wps=560178, ups=1.13, wpb=494131, bsz=16274.1, num_updates=54700, lr=0.000270418, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=50468 epoch 033: 735 / 1689 loss=3.458, nll_loss=1.898, ppl=3.73, wps=560178, ups=1.13, wpb=494131, bsz=16274.1, num_updates=54700, lr=0.000270418, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=50468 epoch 033: 735 / 1689 loss=3.458, nll_loss=1.898, ppl=3.73, wps=560178, ups=1.13, wpb=494131, bsz=16274.1, num_updates=54700, lr=0.000270418, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=50468 epoch 033: 735 / 1689 loss=3.458, nll_loss=1.898, ppl=3.73, wps=560178, ups=1.13, wpb=494131, bsz=16274.1, num_updates=54700, lr=0.000270418, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=50468 epoch 033: 735 / 1689 loss=3.458, nll_loss=1.898, ppl=3.73, wps=560178, ups=1.13, wpb=494131, bsz=16274.1, num_updates=54700, lr=0.000270418, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=50468 epoch 033: 735 / 1689 loss=3.458, nll_loss=1.898, ppl=3.73, wps=560178, ups=1.13, wpb=494131, bsz=16274.1, num_updates=54700, lr=0.000270418, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=50468 epoch 033: 735 / 1689 loss=3.458, nll_loss=1.898, ppl=3.73, wps=560178, ups=1.13, wpb=494131, bsz=16274.1, num_updates=54700, lr=0.000270418, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=50468 epoch 033: 735 / 1689 loss=3.458, nll_loss=1.898, ppl=3.73, wps=560178, ups=1.13, wpb=494131, bsz=16274.1, num_updates=54700, lr=0.000270418, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=50468 epoch 033: 735 / 1689 loss=3.458, nll_loss=1.898, ppl=3.73, wps=560178, ups=1.13, wpb=494131, bsz=16274.1, num_updates=54700, lr=0.000270418, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=50468 epoch 033: 735 / 1689 loss=3.458, nll_loss=1.898, ppl=3.73, wps=560178, ups=1.13, wpb=494131, bsz=16274.1, num_updates=54700, lr=0.000270418, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=50468 epoch 033: 735 / 1689 loss=3.458, nll_loss=1.898, ppl=3.73, wps=560178, ups=1.13, wpb=494131, bsz=16274.1, num_updates=54700, lr=0.000270418, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=50468 epoch 033: 735 / 1689 loss=3.458, nll_loss=1.898, ppl=3.73, wps=560178, ups=1.13, wpb=494131, bsz=16274.1, num_updates=54700, lr=0.000270418, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=50468 epoch 033: 735 / 1689 loss=3.458, nll_loss=1.898, ppl=3.73, wps=560178, ups=1.13, wpb=494131, bsz=16274.1, num_updates=54700, lr=0.000270418, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=50468 epoch 033: 735 / 1689 loss=3.458, nll_loss=1.898, ppl=3.73, wps=560178, ups=1.13, wpb=494131, bsz=16274.1, num_updates=54700, lr=0.000270418, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=50468 epoch 033: 735 / 1689 loss=3.458, nll_loss=1.898, ppl=3.73, wps=560178, ups=1.13, wpb=494131, bsz=16274.1, num_updates=54700, lr=0.000270418, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=50468 epoch 033: 735 / 1689 loss=3.458, nll_loss=1.898, ppl=3.73, wps=560178, ups=1.13, wpb=494131, bsz=16274.1, num_updates=54700, lr=0.000270418, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=50468 epoch 033: 735 / 1689 loss=3.458, nll_loss=1.898, ppl=3.73, wps=560178, ups=1.13, wpb=494131, bsz=16274.1, num_updates=54700, lr=0.000270418, gnorm=0.167, clip=0, loss_scale=2, train_wall=87, gb_free=21.8, wall=50468 epoch 033: 835 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=555592, ups=1.12, wpb=494142, bsz=16823.5, num_updates=54800, lr=0.000270172, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=50557 epoch 033: 835 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=555592, ups=1.12, wpb=494142, bsz=16823.5, num_updates=54800, lr=0.000270172, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=50557 epoch 033: 835 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=555592, ups=1.12, wpb=494142, bsz=16823.5, num_updates=54800, lr=0.000270172, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=50557 epoch 033: 835 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=555592, ups=1.12, wpb=494142, bsz=16823.5, num_updates=54800, lr=0.000270172, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=50557 epoch 033: 835 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=555592, ups=1.12, wpb=494142, bsz=16823.5, num_updates=54800, lr=0.000270172, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=50557 epoch 033: 835 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=555592, ups=1.12, wpb=494142, bsz=16823.5, num_updates=54800, lr=0.000270172, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=50557 epoch 033: 835 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=555592, ups=1.12, wpb=494142, bsz=16823.5, num_updates=54800, lr=0.000270172, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=50557 epoch 033: 835 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=555592, ups=1.12, wpb=494142, bsz=16823.5, num_updates=54800, lr=0.000270172, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=50557 epoch 033: 835 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=555592, ups=1.12, wpb=494142, bsz=16823.5, num_updates=54800, lr=0.000270172, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=50557 epoch 033: 835 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=555592, ups=1.12, wpb=494142, bsz=16823.5, num_updates=54800, lr=0.000270172, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=50557 epoch 033: 835 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=555592, ups=1.12, wpb=494142, bsz=16823.5, num_updates=54800, lr=0.000270172, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=50557 epoch 033: 835 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=555592, ups=1.12, wpb=494142, bsz=16823.5, num_updates=54800, lr=0.000270172, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=50557 epoch 033: 835 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=555592, ups=1.12, wpb=494142, bsz=16823.5, num_updates=54800, lr=0.000270172, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=50557 epoch 033: 835 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=555592, ups=1.12, wpb=494142, bsz=16823.5, num_updates=54800, lr=0.000270172, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=50557 epoch 033: 835 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=555592, ups=1.12, wpb=494142, bsz=16823.5, num_updates=54800, lr=0.000270172, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=50557 epoch 033: 835 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=555592, ups=1.12, wpb=494142, bsz=16823.5, num_updates=54800, lr=0.000270172, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=50557 epoch 033: 835 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=555592, ups=1.12, wpb=494142, bsz=16823.5, num_updates=54800, lr=0.000270172, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=50557 epoch 033: 835 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=555592, ups=1.12, wpb=494142, bsz=16823.5, num_updates=54800, lr=0.000270172, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=50557 epoch 033: 835 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=555592, ups=1.12, wpb=494142, bsz=16823.5, num_updates=54800, lr=0.000270172, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=50557 epoch 033: 835 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=555592, ups=1.12, wpb=494142, bsz=16823.5, num_updates=54800, lr=0.000270172, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=50557 epoch 033: 835 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=555592, ups=1.12, wpb=494142, bsz=16823.5, num_updates=54800, lr=0.000270172, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=50557 epoch 033: 835 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=555592, ups=1.12, wpb=494142, bsz=16823.5, num_updates=54800, lr=0.000270172, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=50557 epoch 033: 835 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=555592, ups=1.12, wpb=494142, bsz=16823.5, num_updates=54800, lr=0.000270172, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=50557 epoch 033: 835 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=555592, ups=1.12, wpb=494142, bsz=16823.5, num_updates=54800, lr=0.000270172, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=50557 epoch 033: 835 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=555592, ups=1.12, wpb=494142, bsz=16823.5, num_updates=54800, lr=0.000270172, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=50557 epoch 033: 835 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=555592, ups=1.12, wpb=494142, bsz=16823.5, num_updates=54800, lr=0.000270172, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=50557 epoch 033: 835 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=555592, ups=1.12, wpb=494142, bsz=16823.5, num_updates=54800, lr=0.000270172, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=50557 epoch 033: 835 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=555592, ups=1.12, wpb=494142, bsz=16823.5, num_updates=54800, lr=0.000270172, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=50557 epoch 033: 835 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=555592, ups=1.12, wpb=494142, bsz=16823.5, num_updates=54800, lr=0.000270172, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=50557 epoch 033: 835 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=555592, ups=1.12, wpb=494142, bsz=16823.5, num_updates=54800, lr=0.000270172, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=50557 epoch 033: 835 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=555592, ups=1.12, wpb=494142, bsz=16823.5, num_updates=54800, lr=0.000270172, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=50557 epoch 033: 835 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=555592, ups=1.12, wpb=494142, bsz=16823.5, num_updates=54800, lr=0.000270172, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=50557 epoch 033: 835 / 1689 loss=3.464, nll_loss=1.905, ppl=3.74, wps=555592, ups=1.12, wpb=494142, bsz=16823.5, num_updates=54800, lr=0.000270172, gnorm=0.165, clip=0, loss_scale=2, train_wall=88, gb_free=22.5, wall=50557 epoch 033: 935 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=558494, ups=1.13, wpb=496394, bsz=15964.5, num_updates=54900, lr=0.000269925, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=50646 epoch 033: 935 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=558494, ups=1.13, wpb=496394, bsz=15964.5, num_updates=54900, lr=0.000269925, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=50646 epoch 033: 935 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=558494, ups=1.13, wpb=496394, bsz=15964.5, num_updates=54900, lr=0.000269925, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=50646 epoch 033: 935 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=558494, ups=1.13, wpb=496394, bsz=15964.5, num_updates=54900, lr=0.000269925, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=50646 epoch 033: 935 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=558494, ups=1.13, wpb=496394, bsz=15964.5, num_updates=54900, lr=0.000269925, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=50646 epoch 033: 935 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=558494, ups=1.13, wpb=496394, bsz=15964.5, num_updates=54900, lr=0.000269925, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=50646 epoch 033: 935 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=558494, ups=1.13, wpb=496394, bsz=15964.5, num_updates=54900, lr=0.000269925, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=50646 epoch 033: 935 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=558494, ups=1.13, wpb=496394, bsz=15964.5, num_updates=54900, lr=0.000269925, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=50646 epoch 033: 935 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=558494, ups=1.13, wpb=496394, bsz=15964.5, num_updates=54900, lr=0.000269925, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=50646 epoch 033: 935 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=558494, ups=1.13, wpb=496394, bsz=15964.5, num_updates=54900, lr=0.000269925, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=50646 epoch 033: 935 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=558494, ups=1.13, wpb=496394, bsz=15964.5, num_updates=54900, lr=0.000269925, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=50646 epoch 033: 935 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=558494, ups=1.13, wpb=496394, bsz=15964.5, num_updates=54900, lr=0.000269925, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=50646 epoch 033: 935 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=558494, ups=1.13, wpb=496394, bsz=15964.5, num_updates=54900, lr=0.000269925, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=50646 epoch 033: 935 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=558494, ups=1.13, wpb=496394, bsz=15964.5, num_updates=54900, lr=0.000269925, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=50646 epoch 033: 935 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=558494, ups=1.13, wpb=496394, bsz=15964.5, num_updates=54900, lr=0.000269925, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=50646 epoch 033: 935 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=558494, ups=1.13, wpb=496394, bsz=15964.5, num_updates=54900, lr=0.000269925, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=50646 epoch 033: 935 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=558494, ups=1.13, wpb=496394, bsz=15964.5, num_updates=54900, lr=0.000269925, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=50646 epoch 033: 935 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=558494, ups=1.13, wpb=496394, bsz=15964.5, num_updates=54900, lr=0.000269925, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=50646 epoch 033: 935 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=558494, ups=1.13, wpb=496394, bsz=15964.5, num_updates=54900, lr=0.000269925, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=50646 epoch 033: 935 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=558494, ups=1.13, wpb=496394, bsz=15964.5, num_updates=54900, lr=0.000269925, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=50646 epoch 033: 935 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=558494, ups=1.13, wpb=496394, bsz=15964.5, num_updates=54900, lr=0.000269925, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=50646 epoch 033: 935 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=558494, ups=1.13, wpb=496394, bsz=15964.5, num_updates=54900, lr=0.000269925, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=50646 epoch 033: 935 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=558494, ups=1.13, wpb=496394, bsz=15964.5, num_updates=54900, lr=0.000269925, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=50646 epoch 033: 935 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=558494, ups=1.13, wpb=496394, bsz=15964.5, num_updates=54900, lr=0.000269925, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=50646 epoch 033: 935 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=558494, ups=1.13, wpb=496394, bsz=15964.5, num_updates=54900, lr=0.000269925, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=50646 epoch 033: 935 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=558494, ups=1.13, wpb=496394, bsz=15964.5, num_updates=54900, lr=0.000269925, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=50646 epoch 033: 935 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=558494, ups=1.13, wpb=496394, bsz=15964.5, num_updates=54900, lr=0.000269925, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=50646 epoch 033: 935 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=558494, ups=1.13, wpb=496394, bsz=15964.5, num_updates=54900, lr=0.000269925, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=50646 epoch 033: 935 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=558494, ups=1.13, wpb=496394, bsz=15964.5, num_updates=54900, lr=0.000269925, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=50646 epoch 033: 935 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=558494, ups=1.13, wpb=496394, bsz=15964.5, num_updates=54900, lr=0.000269925, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=50646 epoch 033: 935 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=558494, ups=1.13, wpb=496394, bsz=15964.5, num_updates=54900, lr=0.000269925, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=50646 epoch 033: 935 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=558494, ups=1.13, wpb=496394, bsz=15964.5, num_updates=54900, lr=0.000269925, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=50646 epoch 033: 935 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=558494, ups=1.13, wpb=496394, bsz=15964.5, num_updates=54900, lr=0.000269925, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22.7, wall=50646 epoch 033: 1035 / 1689 loss=3.459, nll_loss=1.9, ppl=3.73, wps=555808, ups=1.12, wpb=495264, bsz=16465.2, num_updates=55000, lr=0.00026968, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=50735 epoch 033: 1035 / 1689 loss=3.459, nll_loss=1.9, ppl=3.73, wps=555808, ups=1.12, wpb=495264, bsz=16465.2, num_updates=55000, lr=0.00026968, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=50735 epoch 033: 1035 / 1689 loss=3.459, nll_loss=1.9, ppl=3.73, wps=555808, ups=1.12, wpb=495264, bsz=16465.2, num_updates=55000, lr=0.00026968, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=50735 epoch 033: 1035 / 1689 loss=3.459, nll_loss=1.9, ppl=3.73, wps=555808, ups=1.12, wpb=495264, bsz=16465.2, num_updates=55000, lr=0.00026968, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=50735 epoch 033: 1035 / 1689 loss=3.459, nll_loss=1.9, ppl=3.73, wps=555808, ups=1.12, wpb=495264, bsz=16465.2, num_updates=55000, lr=0.00026968, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=50735 epoch 033: 1035 / 1689 loss=3.459, nll_loss=1.9, ppl=3.73, wps=555808, ups=1.12, wpb=495264, bsz=16465.2, num_updates=55000, lr=0.00026968, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=50735 epoch 033: 1035 / 1689 loss=3.459, nll_loss=1.9, ppl=3.73, wps=555808, ups=1.12, wpb=495264, bsz=16465.2, num_updates=55000, lr=0.00026968, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=50735 epoch 033: 1035 / 1689 loss=3.459, nll_loss=1.9, ppl=3.73, wps=555808, ups=1.12, wpb=495264, bsz=16465.2, num_updates=55000, lr=0.00026968, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=50735 epoch 033: 1035 / 1689 loss=3.459, nll_loss=1.9, ppl=3.73, wps=555808, ups=1.12, wpb=495264, bsz=16465.2, num_updates=55000, lr=0.00026968, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=50735 epoch 033: 1035 / 1689 loss=3.459, nll_loss=1.9, ppl=3.73, wps=555808, ups=1.12, wpb=495264, bsz=16465.2, num_updates=55000, lr=0.00026968, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=50735 epoch 033: 1035 / 1689 loss=3.459, nll_loss=1.9, ppl=3.73, wps=555808, ups=1.12, wpb=495264, bsz=16465.2, num_updates=55000, lr=0.00026968, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=50735 epoch 033: 1035 / 1689 loss=3.459, nll_loss=1.9, ppl=3.73, wps=555808, ups=1.12, wpb=495264, bsz=16465.2, num_updates=55000, lr=0.00026968, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=50735 epoch 033: 1035 / 1689 loss=3.459, nll_loss=1.9, ppl=3.73, wps=555808, ups=1.12, wpb=495264, bsz=16465.2, num_updates=55000, lr=0.00026968, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=50735 epoch 033: 1035 / 1689 loss=3.459, nll_loss=1.9, ppl=3.73, wps=555808, ups=1.12, wpb=495264, bsz=16465.2, num_updates=55000, lr=0.00026968, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=50735 epoch 033: 1035 / 1689 loss=3.459, nll_loss=1.9, ppl=3.73, wps=555808, ups=1.12, wpb=495264, bsz=16465.2, num_updates=55000, lr=0.00026968, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=50735 epoch 033: 1035 / 1689 loss=3.459, nll_loss=1.9, ppl=3.73, wps=555808, ups=1.12, wpb=495264, bsz=16465.2, num_updates=55000, lr=0.00026968, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=50735 epoch 033: 1035 / 1689 loss=3.459, nll_loss=1.9, ppl=3.73, wps=555808, ups=1.12, wpb=495264, bsz=16465.2, num_updates=55000, lr=0.00026968, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=50735 epoch 033: 1035 / 1689 loss=3.459, nll_loss=1.9, ppl=3.73, wps=555808, ups=1.12, wpb=495264, bsz=16465.2, num_updates=55000, lr=0.00026968, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=50735 epoch 033: 1035 / 1689 loss=3.459, nll_loss=1.9, ppl=3.73, wps=555808, ups=1.12, wpb=495264, bsz=16465.2, num_updates=55000, lr=0.00026968, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=50735 epoch 033: 1035 / 1689 loss=3.459, nll_loss=1.9, ppl=3.73, wps=555808, ups=1.12, wpb=495264, bsz=16465.2, num_updates=55000, lr=0.00026968, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=50735 epoch 033: 1035 / 1689 loss=3.459, nll_loss=1.9, ppl=3.73, wps=555808, ups=1.12, wpb=495264, bsz=16465.2, num_updates=55000, lr=0.00026968, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=50735 epoch 033: 1035 / 1689 loss=3.459, nll_loss=1.9, ppl=3.73, wps=555808, ups=1.12, wpb=495264, bsz=16465.2, num_updates=55000, lr=0.00026968, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=50735 epoch 033: 1035 / 1689 loss=3.459, nll_loss=1.9, ppl=3.73, wps=555808, ups=1.12, wpb=495264, bsz=16465.2, num_updates=55000, lr=0.00026968, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=50735 epoch 033: 1035 / 1689 loss=3.459, nll_loss=1.9, ppl=3.73, wps=555808, ups=1.12, wpb=495264, bsz=16465.2, num_updates=55000, lr=0.00026968, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=50735 epoch 033: 1035 / 1689 loss=3.459, nll_loss=1.9, ppl=3.73, wps=555808, ups=1.12, wpb=495264, bsz=16465.2, num_updates=55000, lr=0.00026968, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=50735 epoch 033: 1035 / 1689 loss=3.459, nll_loss=1.9, ppl=3.73, wps=555808, ups=1.12, wpb=495264, bsz=16465.2, num_updates=55000, lr=0.00026968, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=50735 epoch 033: 1035 / 1689 loss=3.459, nll_loss=1.9, ppl=3.73, wps=555808, ups=1.12, wpb=495264, bsz=16465.2, num_updates=55000, lr=0.00026968, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=50735 epoch 033: 1035 / 1689 loss=3.459, nll_loss=1.9, ppl=3.73, wps=555808, ups=1.12, wpb=495264, bsz=16465.2, num_updates=55000, lr=0.00026968, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=50735 epoch 033: 1035 / 1689 loss=3.459, nll_loss=1.9, ppl=3.73, wps=555808, ups=1.12, wpb=495264, bsz=16465.2, num_updates=55000, lr=0.00026968, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=50735 epoch 033: 1035 / 1689 loss=3.459, nll_loss=1.9, ppl=3.73, wps=555808, ups=1.12, wpb=495264, bsz=16465.2, num_updates=55000, lr=0.00026968, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=50735 epoch 033: 1035 / 1689 loss=3.459, nll_loss=1.9, ppl=3.73, wps=555808, ups=1.12, wpb=495264, bsz=16465.2, num_updates=55000, lr=0.00026968, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=50735 epoch 033: 1035 / 1689 loss=3.459, nll_loss=1.9, ppl=3.73, wps=555808, ups=1.12, wpb=495264, bsz=16465.2, num_updates=55000, lr=0.00026968, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=50735 epoch 033: 1035 / 1689 loss=3.459, nll_loss=1.9, ppl=3.73, wps=555808, ups=1.12, wpb=495264, bsz=16465.2, num_updates=55000, lr=0.00026968, gnorm=0.161, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=50735 begin validation on "valid" subset epoch 033 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 55000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 55000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 55000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 55000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 55000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 55000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 55000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 55000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 55000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 55000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 55000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 55000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 55000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 55000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 55000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 55000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 55000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 55000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 55000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 55000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 55000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 55000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 55000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 55000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 55000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 55000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 55000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 55000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 55000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 55000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 55000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 55000 | best_loss 3.702 epoch 033 | valid on 'valid' subset | loss 3.716 | nll_loss 2.17 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 55000 | best_loss 3.702 epoch 033: 1136 / 1689 loss=3.469, nll_loss=1.91, ppl=3.76, wps=365881, ups=0.74, wpb=494328, bsz=16524.7, num_updates=55100, lr=0.000269435, gnorm=0.166, clip=0, loss_scale=1, train_wall=117, gb_free=22, wall=50870 epoch 033: 1136 / 1689 loss=3.469, nll_loss=1.91, ppl=3.76, wps=365881, ups=0.74, wpb=494328, bsz=16524.7, num_updates=55100, lr=0.000269435, gnorm=0.166, clip=0, loss_scale=1, train_wall=117, gb_free=22, wall=50870 epoch 033: 1136 / 1689 loss=3.469, nll_loss=1.91, ppl=3.76, wps=365881, ups=0.74, wpb=494328, bsz=16524.7, num_updates=55100, lr=0.000269435, gnorm=0.166, clip=0, loss_scale=1, train_wall=117, gb_free=22, wall=50870 epoch 033: 1136 / 1689 loss=3.469, nll_loss=1.91, ppl=3.76, wps=365881, ups=0.74, wpb=494328, bsz=16524.7, num_updates=55100, lr=0.000269435, gnorm=0.166, clip=0, loss_scale=1, train_wall=117, gb_free=22, wall=50870 epoch 033: 1136 / 1689 loss=3.469, nll_loss=1.91, ppl=3.76, wps=365881, ups=0.74, wpb=494328, bsz=16524.7, num_updates=55100, lr=0.000269435, gnorm=0.166, clip=0, loss_scale=1, train_wall=117, gb_free=22, wall=50870 epoch 033: 1136 / 1689 loss=3.469, nll_loss=1.91, ppl=3.76, wps=365881, ups=0.74, wpb=494328, bsz=16524.7, num_updates=55100, lr=0.000269435, gnorm=0.166, clip=0, loss_scale=1, train_wall=117, gb_free=22, wall=50870 epoch 033: 1136 / 1689 loss=3.469, nll_loss=1.91, ppl=3.76, wps=365881, ups=0.74, wpb=494328, bsz=16524.7, num_updates=55100, lr=0.000269435, gnorm=0.166, clip=0, loss_scale=1, train_wall=117, gb_free=22, wall=50870 epoch 033: 1136 / 1689 loss=3.469, nll_loss=1.91, ppl=3.76, wps=365881, ups=0.74, wpb=494328, bsz=16524.7, num_updates=55100, lr=0.000269435, gnorm=0.166, clip=0, loss_scale=1, train_wall=117, gb_free=22, wall=50870 epoch 033: 1136 / 1689 loss=3.469, nll_loss=1.91, ppl=3.76, wps=365881, ups=0.74, wpb=494328, bsz=16524.7, num_updates=55100, lr=0.000269435, gnorm=0.166, clip=0, loss_scale=1, train_wall=117, gb_free=22, wall=50870 epoch 033: 1136 / 1689 loss=3.469, nll_loss=1.91, ppl=3.76, wps=365881, ups=0.74, wpb=494328, bsz=16524.7, num_updates=55100, lr=0.000269435, gnorm=0.166, clip=0, loss_scale=1, train_wall=117, gb_free=22, wall=50870 epoch 033: 1136 / 1689 loss=3.469, nll_loss=1.91, ppl=3.76, wps=365881, ups=0.74, wpb=494328, bsz=16524.7, num_updates=55100, lr=0.000269435, gnorm=0.166, clip=0, loss_scale=1, train_wall=117, gb_free=22, wall=50870 epoch 033: 1136 / 1689 loss=3.469, nll_loss=1.91, ppl=3.76, wps=365881, ups=0.74, wpb=494328, bsz=16524.7, num_updates=55100, lr=0.000269435, gnorm=0.166, clip=0, loss_scale=1, train_wall=117, gb_free=22, wall=50870 epoch 033: 1136 / 1689 loss=3.469, nll_loss=1.91, ppl=3.76, wps=365881, ups=0.74, wpb=494328, bsz=16524.7, num_updates=55100, lr=0.000269435, gnorm=0.166, clip=0, loss_scale=1, train_wall=117, gb_free=22, wall=50870 epoch 033: 1136 / 1689 loss=3.469, nll_loss=1.91, ppl=3.76, wps=365881, ups=0.74, wpb=494328, bsz=16524.7, num_updates=55100, lr=0.000269435, gnorm=0.166, clip=0, loss_scale=1, train_wall=117, gb_free=22, wall=50870 epoch 033: 1136 / 1689 loss=3.469, nll_loss=1.91, ppl=3.76, wps=365881, ups=0.74, wpb=494328, bsz=16524.7, num_updates=55100, lr=0.000269435, gnorm=0.166, clip=0, loss_scale=1, train_wall=117, gb_free=22, wall=50870 epoch 033: 1136 / 1689 loss=3.469, nll_loss=1.91, ppl=3.76, wps=365881, ups=0.74, wpb=494328, bsz=16524.7, num_updates=55100, lr=0.000269435, gnorm=0.166, clip=0, loss_scale=1, train_wall=117, gb_free=22, wall=50870 epoch 033: 1136 / 1689 loss=3.469, nll_loss=1.91, ppl=3.76, wps=365881, ups=0.74, wpb=494328, bsz=16524.7, num_updates=55100, lr=0.000269435, gnorm=0.166, clip=0, loss_scale=1, train_wall=117, gb_free=22, wall=50870 epoch 033: 1136 / 1689 loss=3.469, nll_loss=1.91, ppl=3.76, wps=365881, ups=0.74, wpb=494328, bsz=16524.7, num_updates=55100, lr=0.000269435, gnorm=0.166, clip=0, loss_scale=1, train_wall=117, gb_free=22, wall=50870 epoch 033: 1136 / 1689 loss=3.469, nll_loss=1.91, ppl=3.76, wps=365881, ups=0.74, wpb=494328, bsz=16524.7, num_updates=55100, lr=0.000269435, gnorm=0.166, clip=0, loss_scale=1, train_wall=117, gb_free=22, wall=50870 epoch 033: 1136 / 1689 loss=3.469, nll_loss=1.91, ppl=3.76, wps=365881, ups=0.74, wpb=494328, bsz=16524.7, num_updates=55100, lr=0.000269435, gnorm=0.166, clip=0, loss_scale=1, train_wall=117, gb_free=22, wall=50870 epoch 033: 1136 / 1689 loss=3.469, nll_loss=1.91, ppl=3.76, wps=365881, ups=0.74, wpb=494328, bsz=16524.7, num_updates=55100, lr=0.000269435, gnorm=0.166, clip=0, loss_scale=1, train_wall=117, gb_free=22, wall=50870 epoch 033: 1136 / 1689 loss=3.469, nll_loss=1.91, ppl=3.76, wps=365881, ups=0.74, wpb=494328, bsz=16524.7, num_updates=55100, lr=0.000269435, gnorm=0.166, clip=0, loss_scale=1, train_wall=117, gb_free=22, wall=50870 epoch 033: 1136 / 1689 loss=3.469, nll_loss=1.91, ppl=3.76, wps=365881, ups=0.74, wpb=494328, bsz=16524.7, num_updates=55100, lr=0.000269435, gnorm=0.166, clip=0, loss_scale=1, train_wall=117, gb_free=22, wall=50870 epoch 033: 1136 / 1689 loss=3.469, nll_loss=1.91, ppl=3.76, wps=365881, ups=0.74, wpb=494328, bsz=16524.7, num_updates=55100, lr=0.000269435, gnorm=0.166, clip=0, loss_scale=1, train_wall=117, gb_free=22, wall=50870 epoch 033: 1136 / 1689 loss=3.469, nll_loss=1.91, ppl=3.76, wps=365881, ups=0.74, wpb=494328, bsz=16524.7, num_updates=55100, lr=0.000269435, gnorm=0.166, clip=0, loss_scale=1, train_wall=117, gb_free=22, wall=50870 epoch 033: 1136 / 1689 loss=3.469, nll_loss=1.91, ppl=3.76, wps=365881, ups=0.74, wpb=494328, bsz=16524.7, num_updates=55100, lr=0.000269435, gnorm=0.166, clip=0, loss_scale=1, train_wall=117, gb_free=22, wall=50870 epoch 033: 1136 / 1689 loss=3.469, nll_loss=1.91, ppl=3.76, wps=365881, ups=0.74, wpb=494328, bsz=16524.7, num_updates=55100, lr=0.000269435, gnorm=0.166, clip=0, loss_scale=1, train_wall=117, gb_free=22, wall=50870 epoch 033: 1136 / 1689 loss=3.469, nll_loss=1.91, ppl=3.76, wps=365881, ups=0.74, wpb=494328, bsz=16524.7, num_updates=55100, lr=0.000269435, gnorm=0.166, clip=0, loss_scale=1, train_wall=117, gb_free=22, wall=50870 epoch 033: 1136 / 1689 loss=3.469, nll_loss=1.91, ppl=3.76, wps=365881, ups=0.74, wpb=494328, bsz=16524.7, num_updates=55100, lr=0.000269435, gnorm=0.166, clip=0, loss_scale=1, train_wall=117, gb_free=22, wall=50870 epoch 033: 1136 / 1689 loss=3.469, nll_loss=1.91, ppl=3.76, wps=365881, ups=0.74, wpb=494328, bsz=16524.7, num_updates=55100, lr=0.000269435, gnorm=0.166, clip=0, loss_scale=1, train_wall=117, gb_free=22, wall=50870 epoch 033: 1136 / 1689 loss=3.469, nll_loss=1.91, ppl=3.76, wps=365881, ups=0.74, wpb=494328, bsz=16524.7, num_updates=55100, lr=0.000269435, gnorm=0.166, clip=0, loss_scale=1, train_wall=117, gb_free=22, wall=50870 epoch 033: 1136 / 1689 loss=3.469, nll_loss=1.91, ppl=3.76, wps=365881, ups=0.74, wpb=494328, bsz=16524.7, num_updates=55100, lr=0.000269435, gnorm=0.166, clip=0, loss_scale=1, train_wall=117, gb_free=22, wall=50870 epoch 033: 1136 / 1689 loss=3.469, nll_loss=1.91, ppl=3.76, wps=365881, ups=0.74, wpb=494328, bsz=16524.7, num_updates=55100, lr=0.000269435, gnorm=0.166, clip=0, loss_scale=1, train_wall=117, gb_free=22, wall=50870 epoch 033: 1236 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=551861, ups=1.11, wpb=495742, bsz=17065.8, num_updates=55200, lr=0.000269191, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=21, wall=50960 epoch 033: 1236 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=551861, ups=1.11, wpb=495742, bsz=17065.8, num_updates=55200, lr=0.000269191, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=21, wall=50960 epoch 033: 1236 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=551861, ups=1.11, wpb=495742, bsz=17065.8, num_updates=55200, lr=0.000269191, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=21, wall=50960 epoch 033: 1236 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=551861, ups=1.11, wpb=495742, bsz=17065.8, num_updates=55200, lr=0.000269191, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=21, wall=50960 epoch 033: 1236 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=551861, ups=1.11, wpb=495742, bsz=17065.8, num_updates=55200, lr=0.000269191, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=21, wall=50960 epoch 033: 1236 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=551861, ups=1.11, wpb=495742, bsz=17065.8, num_updates=55200, lr=0.000269191, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=21, wall=50960 epoch 033: 1236 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=551861, ups=1.11, wpb=495742, bsz=17065.8, num_updates=55200, lr=0.000269191, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=21, wall=50960 epoch 033: 1236 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=551861, ups=1.11, wpb=495742, bsz=17065.8, num_updates=55200, lr=0.000269191, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=21, wall=50960 epoch 033: 1236 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=551861, ups=1.11, wpb=495742, bsz=17065.8, num_updates=55200, lr=0.000269191, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=21, wall=50960 epoch 033: 1236 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=551861, ups=1.11, wpb=495742, bsz=17065.8, num_updates=55200, lr=0.000269191, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=21, wall=50960 epoch 033: 1236 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=551861, ups=1.11, wpb=495742, bsz=17065.8, num_updates=55200, lr=0.000269191, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=21, wall=50960 epoch 033: 1236 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=551861, ups=1.11, wpb=495742, bsz=17065.8, num_updates=55200, lr=0.000269191, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=21, wall=50960 epoch 033: 1236 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=551861, ups=1.11, wpb=495742, bsz=17065.8, num_updates=55200, lr=0.000269191, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=21, wall=50960 epoch 033: 1236 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=551861, ups=1.11, wpb=495742, bsz=17065.8, num_updates=55200, lr=0.000269191, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=21, wall=50960 epoch 033: 1236 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=551861, ups=1.11, wpb=495742, bsz=17065.8, num_updates=55200, lr=0.000269191, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=21, wall=50960 epoch 033: 1236 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=551861, ups=1.11, wpb=495742, bsz=17065.8, num_updates=55200, lr=0.000269191, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=21, wall=50960 epoch 033: 1236 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=551861, ups=1.11, wpb=495742, bsz=17065.8, num_updates=55200, lr=0.000269191, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=21, wall=50960 epoch 033: 1236 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=551861, ups=1.11, wpb=495742, bsz=17065.8, num_updates=55200, lr=0.000269191, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=21, wall=50960 epoch 033: 1236 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=551861, ups=1.11, wpb=495742, bsz=17065.8, num_updates=55200, lr=0.000269191, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=21, wall=50960 epoch 033: 1236 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=551861, ups=1.11, wpb=495742, bsz=17065.8, num_updates=55200, lr=0.000269191, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=21, wall=50960 epoch 033: 1236 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=551861, ups=1.11, wpb=495742, bsz=17065.8, num_updates=55200, lr=0.000269191, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=21, wall=50960 epoch 033: 1236 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=551861, ups=1.11, wpb=495742, bsz=17065.8, num_updates=55200, lr=0.000269191, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=21, wall=50960 epoch 033: 1236 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=551861, ups=1.11, wpb=495742, bsz=17065.8, num_updates=55200, lr=0.000269191, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=21, wall=50960 epoch 033: 1236 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=551861, ups=1.11, wpb=495742, bsz=17065.8, num_updates=55200, lr=0.000269191, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=21, wall=50960 epoch 033: 1236 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=551861, ups=1.11, wpb=495742, bsz=17065.8, num_updates=55200, lr=0.000269191, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=21, wall=50960 epoch 033: 1236 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=551861, ups=1.11, wpb=495742, bsz=17065.8, num_updates=55200, lr=0.000269191, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=21, wall=50960 epoch 033: 1236 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=551861, ups=1.11, wpb=495742, bsz=17065.8, num_updates=55200, lr=0.000269191, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=21, wall=50960 epoch 033: 1236 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=551861, ups=1.11, wpb=495742, bsz=17065.8, num_updates=55200, lr=0.000269191, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=21, wall=50960 epoch 033: 1236 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=551861, ups=1.11, wpb=495742, bsz=17065.8, num_updates=55200, lr=0.000269191, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=21, wall=50960 epoch 033: 1236 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=551861, ups=1.11, wpb=495742, bsz=17065.8, num_updates=55200, lr=0.000269191, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=21, wall=50960 epoch 033: 1236 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=551861, ups=1.11, wpb=495742, bsz=17065.8, num_updates=55200, lr=0.000269191, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=21, wall=50960 epoch 033: 1236 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=551861, ups=1.11, wpb=495742, bsz=17065.8, num_updates=55200, lr=0.000269191, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=21, wall=50960 epoch 033: 1236 / 1689 loss=3.474, nll_loss=1.917, ppl=3.78, wps=551861, ups=1.11, wpb=495742, bsz=17065.8, num_updates=55200, lr=0.000269191, gnorm=0.169, clip=0, loss_scale=1, train_wall=89, gb_free=21, wall=50960 epoch 033: 1336 / 1689 loss=3.468, nll_loss=1.91, ppl=3.76, wps=551646, ups=1.11, wpb=496130, bsz=16390, num_updates=55300, lr=0.000268947, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51050 epoch 033: 1336 / 1689 loss=3.468, nll_loss=1.91, ppl=3.76, wps=551646, ups=1.11, wpb=496130, bsz=16390, num_updates=55300, lr=0.000268947, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51050 epoch 033: 1336 / 1689 loss=3.468, nll_loss=1.91, ppl=3.76, wps=551646, ups=1.11, wpb=496130, bsz=16390, num_updates=55300, lr=0.000268947, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51050 epoch 033: 1336 / 1689 loss=3.468, nll_loss=1.91, ppl=3.76, wps=551646, ups=1.11, wpb=496130, bsz=16390, num_updates=55300, lr=0.000268947, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51050 epoch 033: 1336 / 1689 loss=3.468, nll_loss=1.91, ppl=3.76, wps=551646, ups=1.11, wpb=496130, bsz=16390, num_updates=55300, lr=0.000268947, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51050 epoch 033: 1336 / 1689 loss=3.468, nll_loss=1.91, ppl=3.76, wps=551646, ups=1.11, wpb=496130, bsz=16390, num_updates=55300, lr=0.000268947, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51050 epoch 033: 1336 / 1689 loss=3.468, nll_loss=1.91, ppl=3.76, wps=551646, ups=1.11, wpb=496130, bsz=16390, num_updates=55300, lr=0.000268947, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51050 epoch 033: 1336 / 1689 loss=3.468, nll_loss=1.91, ppl=3.76, wps=551646, ups=1.11, wpb=496130, bsz=16390, num_updates=55300, lr=0.000268947, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51050 epoch 033: 1336 / 1689 loss=3.468, nll_loss=1.91, ppl=3.76, wps=551646, ups=1.11, wpb=496130, bsz=16390, num_updates=55300, lr=0.000268947, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51050 epoch 033: 1336 / 1689 loss=3.468, nll_loss=1.91, ppl=3.76, wps=551646, ups=1.11, wpb=496130, bsz=16390, num_updates=55300, lr=0.000268947, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51050 epoch 033: 1336 / 1689 loss=3.468, nll_loss=1.91, ppl=3.76, wps=551646, ups=1.11, wpb=496130, bsz=16390, num_updates=55300, lr=0.000268947, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51050 epoch 033: 1336 / 1689 loss=3.468, nll_loss=1.91, ppl=3.76, wps=551646, ups=1.11, wpb=496130, bsz=16390, num_updates=55300, lr=0.000268947, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51050 epoch 033: 1336 / 1689 loss=3.468, nll_loss=1.91, ppl=3.76, wps=551646, ups=1.11, wpb=496130, bsz=16390, num_updates=55300, lr=0.000268947, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51050 epoch 033: 1336 / 1689 loss=3.468, nll_loss=1.91, ppl=3.76, wps=551646, ups=1.11, wpb=496130, bsz=16390, num_updates=55300, lr=0.000268947, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51050 epoch 033: 1336 / 1689 loss=3.468, nll_loss=1.91, ppl=3.76, wps=551646, ups=1.11, wpb=496130, bsz=16390, num_updates=55300, lr=0.000268947, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51050 epoch 033: 1336 / 1689 loss=3.468, nll_loss=1.91, ppl=3.76, wps=551646, ups=1.11, wpb=496130, bsz=16390, num_updates=55300, lr=0.000268947, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51050 epoch 033: 1336 / 1689 loss=3.468, nll_loss=1.91, ppl=3.76, wps=551646, ups=1.11, wpb=496130, bsz=16390, num_updates=55300, lr=0.000268947, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51050 epoch 033: 1336 / 1689 loss=3.468, nll_loss=1.91, ppl=3.76, wps=551646, ups=1.11, wpb=496130, bsz=16390, num_updates=55300, lr=0.000268947, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51050 epoch 033: 1336 / 1689 loss=3.468, nll_loss=1.91, ppl=3.76, wps=551646, ups=1.11, wpb=496130, bsz=16390, num_updates=55300, lr=0.000268947, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51050 epoch 033: 1336 / 1689 loss=3.468, nll_loss=1.91, ppl=3.76, wps=551646, ups=1.11, wpb=496130, bsz=16390, num_updates=55300, lr=0.000268947, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51050 epoch 033: 1336 / 1689 loss=3.468, nll_loss=1.91, ppl=3.76, wps=551646, ups=1.11, wpb=496130, bsz=16390, num_updates=55300, lr=0.000268947, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51050 epoch 033: 1336 / 1689 loss=3.468, nll_loss=1.91, ppl=3.76, wps=551646, ups=1.11, wpb=496130, bsz=16390, num_updates=55300, lr=0.000268947, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51050 epoch 033: 1336 / 1689 loss=3.468, nll_loss=1.91, ppl=3.76, wps=551646, ups=1.11, wpb=496130, bsz=16390, num_updates=55300, lr=0.000268947, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51050 epoch 033: 1336 / 1689 loss=3.468, nll_loss=1.91, ppl=3.76, wps=551646, ups=1.11, wpb=496130, bsz=16390, num_updates=55300, lr=0.000268947, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51050 epoch 033: 1336 / 1689 loss=3.468, nll_loss=1.91, ppl=3.76, wps=551646, ups=1.11, wpb=496130, bsz=16390, num_updates=55300, lr=0.000268947, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51050 epoch 033: 1336 / 1689 loss=3.468, nll_loss=1.91, ppl=3.76, wps=551646, ups=1.11, wpb=496130, bsz=16390, num_updates=55300, lr=0.000268947, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51050 epoch 033: 1336 / 1689 loss=3.468, nll_loss=1.91, ppl=3.76, wps=551646, ups=1.11, wpb=496130, bsz=16390, num_updates=55300, lr=0.000268947, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51050 epoch 033: 1336 / 1689 loss=3.468, nll_loss=1.91, ppl=3.76, wps=551646, ups=1.11, wpb=496130, bsz=16390, num_updates=55300, lr=0.000268947, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51050 epoch 033: 1336 / 1689 loss=3.468, nll_loss=1.91, ppl=3.76, wps=551646, ups=1.11, wpb=496130, bsz=16390, num_updates=55300, lr=0.000268947, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51050 epoch 033: 1336 / 1689 loss=3.468, nll_loss=1.91, ppl=3.76, wps=551646, ups=1.11, wpb=496130, bsz=16390, num_updates=55300, lr=0.000268947, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51050 epoch 033: 1336 / 1689 loss=3.468, nll_loss=1.91, ppl=3.76, wps=551646, ups=1.11, wpb=496130, bsz=16390, num_updates=55300, lr=0.000268947, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51050 epoch 033: 1336 / 1689 loss=3.468, nll_loss=1.91, ppl=3.76, wps=551646, ups=1.11, wpb=496130, bsz=16390, num_updates=55300, lr=0.000268947, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51050 epoch 033: 1336 / 1689 loss=3.468, nll_loss=1.91, ppl=3.76, wps=551646, ups=1.11, wpb=496130, bsz=16390, num_updates=55300, lr=0.000268947, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51050 epoch 033: 1436 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=549702, ups=1.11, wpb=496398, bsz=16788.8, num_updates=55400, lr=0.000268705, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=51140 epoch 033: 1436 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=549702, ups=1.11, wpb=496398, bsz=16788.8, num_updates=55400, lr=0.000268705, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=51140 epoch 033: 1436 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=549702, ups=1.11, wpb=496398, bsz=16788.8, num_updates=55400, lr=0.000268705, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=51140 epoch 033: 1436 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=549702, ups=1.11, wpb=496398, bsz=16788.8, num_updates=55400, lr=0.000268705, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=51140 epoch 033: 1436 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=549702, ups=1.11, wpb=496398, bsz=16788.8, num_updates=55400, lr=0.000268705, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=51140 epoch 033: 1436 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=549702, ups=1.11, wpb=496398, bsz=16788.8, num_updates=55400, lr=0.000268705, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=51140 epoch 033: 1436 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=549702, ups=1.11, wpb=496398, bsz=16788.8, num_updates=55400, lr=0.000268705, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=51140 epoch 033: 1436 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=549702, ups=1.11, wpb=496398, bsz=16788.8, num_updates=55400, lr=0.000268705, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=51140 epoch 033: 1436 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=549702, ups=1.11, wpb=496398, bsz=16788.8, num_updates=55400, lr=0.000268705, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=51140 epoch 033: 1436 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=549702, ups=1.11, wpb=496398, bsz=16788.8, num_updates=55400, lr=0.000268705, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=51140 epoch 033: 1436 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=549702, ups=1.11, wpb=496398, bsz=16788.8, num_updates=55400, lr=0.000268705, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=51140 epoch 033: 1436 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=549702, ups=1.11, wpb=496398, bsz=16788.8, num_updates=55400, lr=0.000268705, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=51140 epoch 033: 1436 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=549702, ups=1.11, wpb=496398, bsz=16788.8, num_updates=55400, lr=0.000268705, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=51140 epoch 033: 1436 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=549702, ups=1.11, wpb=496398, bsz=16788.8, num_updates=55400, lr=0.000268705, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=51140 epoch 033: 1436 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=549702, ups=1.11, wpb=496398, bsz=16788.8, num_updates=55400, lr=0.000268705, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=51140 epoch 033: 1436 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=549702, ups=1.11, wpb=496398, bsz=16788.8, num_updates=55400, lr=0.000268705, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=51140 epoch 033: 1436 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=549702, ups=1.11, wpb=496398, bsz=16788.8, num_updates=55400, lr=0.000268705, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=51140 epoch 033: 1436 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=549702, ups=1.11, wpb=496398, bsz=16788.8, num_updates=55400, lr=0.000268705, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=51140 epoch 033: 1436 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=549702, ups=1.11, wpb=496398, bsz=16788.8, num_updates=55400, lr=0.000268705, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=51140 epoch 033: 1436 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=549702, ups=1.11, wpb=496398, bsz=16788.8, num_updates=55400, lr=0.000268705, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=51140 epoch 033: 1436 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=549702, ups=1.11, wpb=496398, bsz=16788.8, num_updates=55400, lr=0.000268705, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=51140 epoch 033: 1436 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=549702, ups=1.11, wpb=496398, bsz=16788.8, num_updates=55400, lr=0.000268705, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=51140 epoch 033: 1436 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=549702, ups=1.11, wpb=496398, bsz=16788.8, num_updates=55400, lr=0.000268705, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=51140 epoch 033: 1436 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=549702, ups=1.11, wpb=496398, bsz=16788.8, num_updates=55400, lr=0.000268705, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=51140 epoch 033: 1436 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=549702, ups=1.11, wpb=496398, bsz=16788.8, num_updates=55400, lr=0.000268705, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=51140 epoch 033: 1436 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=549702, ups=1.11, wpb=496398, bsz=16788.8, num_updates=55400, lr=0.000268705, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=51140 epoch 033: 1436 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=549702, ups=1.11, wpb=496398, bsz=16788.8, num_updates=55400, lr=0.000268705, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=51140 epoch 033: 1436 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=549702, ups=1.11, wpb=496398, bsz=16788.8, num_updates=55400, lr=0.000268705, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=51140 epoch 033: 1436 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=549702, ups=1.11, wpb=496398, bsz=16788.8, num_updates=55400, lr=0.000268705, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=51140 epoch 033: 1436 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=549702, ups=1.11, wpb=496398, bsz=16788.8, num_updates=55400, lr=0.000268705, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=51140 epoch 033: 1436 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=549702, ups=1.11, wpb=496398, bsz=16788.8, num_updates=55400, lr=0.000268705, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=51140 epoch 033: 1436 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=549702, ups=1.11, wpb=496398, bsz=16788.8, num_updates=55400, lr=0.000268705, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=51140 epoch 033: 1436 / 1689 loss=3.473, nll_loss=1.916, ppl=3.77, wps=549702, ups=1.11, wpb=496398, bsz=16788.8, num_updates=55400, lr=0.000268705, gnorm=0.162, clip=0, loss_scale=1, train_wall=89, gb_free=22.4, wall=51140 epoch 033: 1536 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=552096, ups=1.11, wpb=495398, bsz=16522.5, num_updates=55500, lr=0.000268462, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=51230 epoch 033: 1536 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=552096, ups=1.11, wpb=495398, bsz=16522.5, num_updates=55500, lr=0.000268462, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=51230 epoch 033: 1536 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=552096, ups=1.11, wpb=495398, bsz=16522.5, num_updates=55500, lr=0.000268462, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=51230 epoch 033: 1536 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=552096, ups=1.11, wpb=495398, bsz=16522.5, num_updates=55500, lr=0.000268462, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=51230 epoch 033: 1536 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=552096, ups=1.11, wpb=495398, bsz=16522.5, num_updates=55500, lr=0.000268462, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=51230 epoch 033: 1536 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=552096, ups=1.11, wpb=495398, bsz=16522.5, num_updates=55500, lr=0.000268462, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=51230 epoch 033: 1536 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=552096, ups=1.11, wpb=495398, bsz=16522.5, num_updates=55500, lr=0.000268462, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=51230 epoch 033: 1536 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=552096, ups=1.11, wpb=495398, bsz=16522.5, num_updates=55500, lr=0.000268462, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=51230 epoch 033: 1536 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=552096, ups=1.11, wpb=495398, bsz=16522.5, num_updates=55500, lr=0.000268462, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=51230 epoch 033: 1536 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=552096, ups=1.11, wpb=495398, bsz=16522.5, num_updates=55500, lr=0.000268462, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=51230 epoch 033: 1536 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=552096, ups=1.11, wpb=495398, bsz=16522.5, num_updates=55500, lr=0.000268462, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=51230 epoch 033: 1536 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=552096, ups=1.11, wpb=495398, bsz=16522.5, num_updates=55500, lr=0.000268462, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=51230 epoch 033: 1536 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=552096, ups=1.11, wpb=495398, bsz=16522.5, num_updates=55500, lr=0.000268462, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=51230 epoch 033: 1536 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=552096, ups=1.11, wpb=495398, bsz=16522.5, num_updates=55500, lr=0.000268462, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=51230 epoch 033: 1536 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=552096, ups=1.11, wpb=495398, bsz=16522.5, num_updates=55500, lr=0.000268462, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=51230 epoch 033: 1536 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=552096, ups=1.11, wpb=495398, bsz=16522.5, num_updates=55500, lr=0.000268462, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=51230 epoch 033: 1536 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=552096, ups=1.11, wpb=495398, bsz=16522.5, num_updates=55500, lr=0.000268462, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=51230 epoch 033: 1536 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=552096, ups=1.11, wpb=495398, bsz=16522.5, num_updates=55500, lr=0.000268462, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=51230 epoch 033: 1536 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=552096, ups=1.11, wpb=495398, bsz=16522.5, num_updates=55500, lr=0.000268462, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=51230 epoch 033: 1536 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=552096, ups=1.11, wpb=495398, bsz=16522.5, num_updates=55500, lr=0.000268462, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=51230 epoch 033: 1536 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=552096, ups=1.11, wpb=495398, bsz=16522.5, num_updates=55500, lr=0.000268462, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=51230 epoch 033: 1536 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=552096, ups=1.11, wpb=495398, bsz=16522.5, num_updates=55500, lr=0.000268462, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=51230 epoch 033: 1536 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=552096, ups=1.11, wpb=495398, bsz=16522.5, num_updates=55500, lr=0.000268462, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=51230 epoch 033: 1536 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=552096, ups=1.11, wpb=495398, bsz=16522.5, num_updates=55500, lr=0.000268462, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=51230 epoch 033: 1536 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=552096, ups=1.11, wpb=495398, bsz=16522.5, num_updates=55500, lr=0.000268462, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=51230 epoch 033: 1536 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=552096, ups=1.11, wpb=495398, bsz=16522.5, num_updates=55500, lr=0.000268462, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=51230 epoch 033: 1536 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=552096, ups=1.11, wpb=495398, bsz=16522.5, num_updates=55500, lr=0.000268462, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=51230 epoch 033: 1536 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=552096, ups=1.11, wpb=495398, bsz=16522.5, num_updates=55500, lr=0.000268462, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=51230 epoch 033: 1536 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=552096, ups=1.11, wpb=495398, bsz=16522.5, num_updates=55500, lr=0.000268462, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=51230 epoch 033: 1536 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=552096, ups=1.11, wpb=495398, bsz=16522.5, num_updates=55500, lr=0.000268462, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=51230 epoch 033: 1536 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=552096, ups=1.11, wpb=495398, bsz=16522.5, num_updates=55500, lr=0.000268462, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=51230 epoch 033: 1536 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=552096, ups=1.11, wpb=495398, bsz=16522.5, num_updates=55500, lr=0.000268462, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=51230 epoch 033: 1536 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=552096, ups=1.11, wpb=495398, bsz=16522.5, num_updates=55500, lr=0.000268462, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.5, wall=51230 epoch 033: 1636 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=555062, ups=1.12, wpb=494940, bsz=16151.6, num_updates=55600, lr=0.000268221, gnorm=0.159, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=51319 epoch 033: 1636 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=555062, ups=1.12, wpb=494940, bsz=16151.6, num_updates=55600, lr=0.000268221, gnorm=0.159, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=51319 epoch 033: 1636 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=555062, ups=1.12, wpb=494940, bsz=16151.6, num_updates=55600, lr=0.000268221, gnorm=0.159, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=51319 epoch 033: 1636 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=555062, ups=1.12, wpb=494940, bsz=16151.6, num_updates=55600, lr=0.000268221, gnorm=0.159, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=51319 epoch 033: 1636 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=555062, ups=1.12, wpb=494940, bsz=16151.6, num_updates=55600, lr=0.000268221, gnorm=0.159, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=51319 epoch 033: 1636 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=555062, ups=1.12, wpb=494940, bsz=16151.6, num_updates=55600, lr=0.000268221, gnorm=0.159, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=51319 epoch 033: 1636 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=555062, ups=1.12, wpb=494940, bsz=16151.6, num_updates=55600, lr=0.000268221, gnorm=0.159, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=51319 epoch 033: 1636 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=555062, ups=1.12, wpb=494940, bsz=16151.6, num_updates=55600, lr=0.000268221, gnorm=0.159, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=51319 epoch 033: 1636 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=555062, ups=1.12, wpb=494940, bsz=16151.6, num_updates=55600, lr=0.000268221, gnorm=0.159, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=51319 epoch 033: 1636 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=555062, ups=1.12, wpb=494940, bsz=16151.6, num_updates=55600, lr=0.000268221, gnorm=0.159, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=51319 epoch 033: 1636 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=555062, ups=1.12, wpb=494940, bsz=16151.6, num_updates=55600, lr=0.000268221, gnorm=0.159, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=51319 epoch 033: 1636 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=555062, ups=1.12, wpb=494940, bsz=16151.6, num_updates=55600, lr=0.000268221, gnorm=0.159, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=51319 epoch 033: 1636 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=555062, ups=1.12, wpb=494940, bsz=16151.6, num_updates=55600, lr=0.000268221, gnorm=0.159, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=51319 epoch 033: 1636 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=555062, ups=1.12, wpb=494940, bsz=16151.6, num_updates=55600, lr=0.000268221, gnorm=0.159, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=51319 epoch 033: 1636 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=555062, ups=1.12, wpb=494940, bsz=16151.6, num_updates=55600, lr=0.000268221, gnorm=0.159, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=51319 epoch 033: 1636 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=555062, ups=1.12, wpb=494940, bsz=16151.6, num_updates=55600, lr=0.000268221, gnorm=0.159, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=51319 epoch 033: 1636 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=555062, ups=1.12, wpb=494940, bsz=16151.6, num_updates=55600, lr=0.000268221, gnorm=0.159, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=51319 epoch 033: 1636 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=555062, ups=1.12, wpb=494940, bsz=16151.6, num_updates=55600, lr=0.000268221, gnorm=0.159, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=51319 epoch 033: 1636 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=555062, ups=1.12, wpb=494940, bsz=16151.6, num_updates=55600, lr=0.000268221, gnorm=0.159, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=51319 epoch 033: 1636 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=555062, ups=1.12, wpb=494940, bsz=16151.6, num_updates=55600, lr=0.000268221, gnorm=0.159, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=51319 epoch 033: 1636 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=555062, ups=1.12, wpb=494940, bsz=16151.6, num_updates=55600, lr=0.000268221, gnorm=0.159, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=51319 epoch 033: 1636 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=555062, ups=1.12, wpb=494940, bsz=16151.6, num_updates=55600, lr=0.000268221, gnorm=0.159, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=51319 epoch 033: 1636 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=555062, ups=1.12, wpb=494940, bsz=16151.6, num_updates=55600, lr=0.000268221, gnorm=0.159, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=51319 epoch 033: 1636 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=555062, ups=1.12, wpb=494940, bsz=16151.6, num_updates=55600, lr=0.000268221, gnorm=0.159, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=51319 epoch 033: 1636 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=555062, ups=1.12, wpb=494940, bsz=16151.6, num_updates=55600, lr=0.000268221, gnorm=0.159, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=51319 epoch 033: 1636 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=555062, ups=1.12, wpb=494940, bsz=16151.6, num_updates=55600, lr=0.000268221, gnorm=0.159, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=51319 epoch 033: 1636 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=555062, ups=1.12, wpb=494940, bsz=16151.6, num_updates=55600, lr=0.000268221, gnorm=0.159, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=51319 epoch 033: 1636 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=555062, ups=1.12, wpb=494940, bsz=16151.6, num_updates=55600, lr=0.000268221, gnorm=0.159, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=51319 epoch 033: 1636 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=555062, ups=1.12, wpb=494940, bsz=16151.6, num_updates=55600, lr=0.000268221, gnorm=0.159, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=51319 epoch 033: 1636 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=555062, ups=1.12, wpb=494940, bsz=16151.6, num_updates=55600, lr=0.000268221, gnorm=0.159, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=51319 epoch 033: 1636 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=555062, ups=1.12, wpb=494940, bsz=16151.6, num_updates=55600, lr=0.000268221, gnorm=0.159, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=51319 epoch 033: 1636 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=555062, ups=1.12, wpb=494940, bsz=16151.6, num_updates=55600, lr=0.000268221, gnorm=0.159, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=51319 epoch 033: 1636 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=555062, ups=1.12, wpb=494940, bsz=16151.6, num_updates=55600, lr=0.000268221, gnorm=0.159, clip=0, loss_scale=2, train_wall=88, gb_free=22, wall=51319 end of epoch 33 (average epoch stats below) epoch 033 | loss 3.458 | nll_loss 1.899 | ppl 3.73 | wps 527567 | ups 1.07 | wpb 495135 | bsz 16503.7 | num_updates 55653 | lr 0.000268093 | gnorm 0.165 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 24.1 | wall 51366 epoch 033 | loss 3.458 | nll_loss 1.899 | ppl 3.73 | wps 527567 | ups 1.07 | wpb 495135 | bsz 16503.7 | num_updates 55653 | lr 0.000268093 | gnorm 0.165 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 24.1 | wall 51366 epoch 033 | loss 3.458 | nll_loss 1.899 | ppl 3.73 | wps 527567 | ups 1.07 | wpb 495135 | bsz 16503.7 | num_updates 55653 | lr 0.000268093 | gnorm 0.165 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 24.1 | wall 51366 epoch 033 | loss 3.458 | nll_loss 1.899 | ppl 3.73 | wps 527567 | ups 1.07 | wpb 495135 | bsz 16503.7 | num_updates 55653 | lr 0.000268093 | gnorm 0.165 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 24.1 | wall 51366 epoch 033 | loss 3.458 | nll_loss 1.899 | ppl 3.73 | wps 527567 | ups 1.07 | wpb 495135 | bsz 16503.7 | num_updates 55653 | lr 0.000268093 | gnorm 0.165 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 24.1 | wall 51366 epoch 033 | loss 3.458 | nll_loss 1.899 | ppl 3.73 | wps 527567 | ups 1.07 | wpb 495135 | bsz 16503.7 | num_updates 55653 | lr 0.000268093 | gnorm 0.165 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 24.1 | wall 51366 epoch 033 | loss 3.458 | nll_loss 1.899 | ppl 3.73 | wps 527567 | ups 1.07 | wpb 495135 | bsz 16503.7 | num_updates 55653 | lr 0.000268093 | gnorm 0.165 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 24.1 | wall 51366 epoch 033 | loss 3.458 | nll_loss 1.899 | ppl 3.73 | wps 527567 | ups 1.07 | wpb 495135 | bsz 16503.7 | num_updates 55653 | lr 0.000268093 | gnorm 0.165 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 24.1 | wall 51366 epoch 033 | loss 3.458 | nll_loss 1.899 | ppl 3.73 | wps 527567 | ups 1.07 | wpb 495135 | bsz 16503.7 | num_updates 55653 | lr 0.000268093 | gnorm 0.165 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 24.1 | wall 51366 epoch 033 | loss 3.458 | nll_loss 1.899 | ppl 3.73 | wps 527567 | ups 1.07 | wpb 495135 | bsz 16503.7 | num_updates 55653 | lr 0.000268093 | gnorm 0.165 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 24.1 | wall 51366 epoch 033 | loss 3.458 | nll_loss 1.899 | ppl 3.73 | wps 527567 | ups 1.07 | wpb 495135 | bsz 16503.7 | num_updates 55653 | lr 0.000268093 | gnorm 0.165 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 24.1 | wall 51366 epoch 033 | loss 3.458 | nll_loss 1.899 | ppl 3.73 | wps 527567 | ups 1.07 | wpb 495135 | bsz 16503.7 | num_updates 55653 | lr 0.000268093 | gnorm 0.165 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 24.1 | wall 51366 epoch 033 | loss 3.458 | nll_loss 1.899 | ppl 3.73 | wps 527567 | ups 1.07 | wpb 495135 | bsz 16503.7 | num_updates 55653 | lr 0.000268093 | gnorm 0.165 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 24.1 | wall 51366 epoch 033 | loss 3.458 | nll_loss 1.899 | ppl 3.73 | wps 527567 | ups 1.07 | wpb 495135 | bsz 16503.7 | num_updates 55653 | lr 0.000268093 | gnorm 0.165 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 24.1 | wall 51366 epoch 033 | loss 3.458 | nll_loss 1.899 | ppl 3.73 | wps 527567 | ups 1.07 | wpb 495135 | bsz 16503.7 | num_updates 55653 | lr 0.000268093 | gnorm 0.165 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 24.1 | wall 51366 epoch 033 | loss 3.458 | nll_loss 1.899 | ppl 3.73 | wps 527567 | ups 1.07 | wpb 495135 | bsz 16503.7 | num_updates 55653 | lr 0.000268093 | gnorm 0.165 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 24.1 | wall 51366 epoch 033 | loss 3.458 | nll_loss 1.899 | ppl 3.73 | wps 527567 | ups 1.07 | wpb 495135 | bsz 16503.7 | num_updates 55653 | lr 0.000268093 | gnorm 0.165 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 24.1 | wall 51366 epoch 033 | loss 3.458 | nll_loss 1.899 | ppl 3.73 | wps 527567 | ups 1.07 | wpb 495135 | bsz 16503.7 | num_updates 55653 | lr 0.000268093 | gnorm 0.165 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 24.1 | wall 51366 epoch 033 | loss 3.458 | nll_loss 1.899 | ppl 3.73 | wps 527567 | ups 1.07 | wpb 495135 | bsz 16503.7 | num_updates 55653 | lr 0.000268093 | gnorm 0.165 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 24.1 | wall 51366 epoch 033 | loss 3.458 | nll_loss 1.899 | ppl 3.73 | wps 527567 | ups 1.07 | wpb 495135 | bsz 16503.7 | num_updates 55653 | lr 0.000268093 | gnorm 0.165 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 24.1 | wall 51366 epoch 033 | loss 3.458 | nll_loss 1.899 | ppl 3.73 | wps 527567 | ups 1.07 | wpb 495135 | bsz 16503.7 | num_updates 55653 | lr 0.000268093 | gnorm 0.165 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 24.1 | wall 51366 epoch 033 | loss 3.458 | nll_loss 1.899 | ppl 3.73 | wps 527567 | ups 1.07 | wpb 495135 | bsz 16503.7 | num_updates 55653 | lr 0.000268093 | gnorm 0.165 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 24.1 | wall 51366 epoch 033 | loss 3.458 | nll_loss 1.899 | ppl 3.73 | wps 527567 | ups 1.07 | wpb 495135 | bsz 16503.7 | num_updates 55653 | lr 0.000268093 | gnorm 0.165 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 24.1 | wall 51366 epoch 033 | loss 3.458 | nll_loss 1.899 | ppl 3.73 | wps 527567 | ups 1.07 | wpb 495135 | bsz 16503.7 | num_updates 55653 | lr 0.000268093 | gnorm 0.165 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 24.1 | wall 51366 epoch 033 | loss 3.458 | nll_loss 1.899 | ppl 3.73 | wps 527567 | ups 1.07 | wpb 495135 | bsz 16503.7 | num_updates 55653 | lr 0.000268093 | gnorm 0.165 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 24.1 | wall 51366 epoch 033 | loss 3.458 | nll_loss 1.899 | ppl 3.73 | wps 527567 | ups 1.07 | wpb 495135 | bsz 16503.7 | num_updates 55653 | lr 0.000268093 | gnorm 0.165 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 24.1 | wall 51366 epoch 033 | loss 3.458 | nll_loss 1.899 | ppl 3.73 | wps 527567 | ups 1.07 | wpb 495135 | bsz 16503.7 | num_updates 55653 | lr 0.000268093 | gnorm 0.165 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 24.1 | wall 51366 epoch 033 | loss 3.458 | nll_loss 1.899 | ppl 3.73 | wps 527567 | ups 1.07 | wpb 495135 | bsz 16503.7 | num_updates 55653 | lr 0.000268093 | gnorm 0.165 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 24.1 | wall 51366 epoch 033 | loss 3.458 | nll_loss 1.899 | ppl 3.73 | wps 527567 | ups 1.07 | wpb 495135 | bsz 16503.7 | num_updates 55653 | lr 0.000268093 | gnorm 0.165 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 24.1 | wall 51366 epoch 033 | loss 3.458 | nll_loss 1.899 | ppl 3.73 | wps 527567 | ups 1.07 | wpb 495135 | bsz 16503.7 | num_updates 55653 | lr 0.000268093 | gnorm 0.165 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 24.1 | wall 51366 epoch 033 | loss 3.458 | nll_loss 1.899 | ppl 3.73 | wps 527567 | ups 1.07 | wpb 495135 | bsz 16503.7 | num_updates 55653 | lr 0.000268093 | gnorm 0.165 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 24.1 | wall 51366 epoch 033 | loss 3.458 | nll_loss 1.899 | ppl 3.73 | wps 527567 | ups 1.07 | wpb 495135 | bsz 16503.7 | num_updates 55653 | lr 0.000268093 | gnorm 0.165 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 24.1 | wall 51366 epoch 033 | loss 3.458 | nll_loss 1.899 | ppl 3.73 | wps 527567 | ups 1.07 | wpb 495135 | bsz 16503.7 | num_updates 55653 | lr 0.000268093 | gnorm 0.165 | clip 0 | loss_scale 2 | train_wall 1514 | gb_free 24.1 | wall 51366 Start iterating over samples epoch 034: 48 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=534559, ups=1.09, wpb=491635, bsz=16179, num_updates=55700, lr=0.00026798, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51411 epoch 034: 48 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=534559, ups=1.09, wpb=491635, bsz=16179, num_updates=55700, lr=0.00026798, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51411 epoch 034: 48 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=534559, ups=1.09, wpb=491635, bsz=16179, num_updates=55700, lr=0.00026798, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51411 epoch 034: 48 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=534559, ups=1.09, wpb=491635, bsz=16179, num_updates=55700, lr=0.00026798, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51411 epoch 034: 48 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=534559, ups=1.09, wpb=491635, bsz=16179, num_updates=55700, lr=0.00026798, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51411 epoch 034: 48 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=534559, ups=1.09, wpb=491635, bsz=16179, num_updates=55700, lr=0.00026798, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51411 epoch 034: 48 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=534559, ups=1.09, wpb=491635, bsz=16179, num_updates=55700, lr=0.00026798, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51411 epoch 034: 48 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=534559, ups=1.09, wpb=491635, bsz=16179, num_updates=55700, lr=0.00026798, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51411 epoch 034: 48 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=534559, ups=1.09, wpb=491635, bsz=16179, num_updates=55700, lr=0.00026798, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51411 epoch 034: 48 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=534559, ups=1.09, wpb=491635, bsz=16179, num_updates=55700, lr=0.00026798, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51411 epoch 034: 48 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=534559, ups=1.09, wpb=491635, bsz=16179, num_updates=55700, lr=0.00026798, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51411 epoch 034: 48 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=534559, ups=1.09, wpb=491635, bsz=16179, num_updates=55700, lr=0.00026798, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51411 epoch 034: 48 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=534559, ups=1.09, wpb=491635, bsz=16179, num_updates=55700, lr=0.00026798, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51411 epoch 034: 48 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=534559, ups=1.09, wpb=491635, bsz=16179, num_updates=55700, lr=0.00026798, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51411 epoch 034: 48 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=534559, ups=1.09, wpb=491635, bsz=16179, num_updates=55700, lr=0.00026798, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51411 epoch 034: 48 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=534559, ups=1.09, wpb=491635, bsz=16179, num_updates=55700, lr=0.00026798, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51411 epoch 034: 48 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=534559, ups=1.09, wpb=491635, bsz=16179, num_updates=55700, lr=0.00026798, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51411 epoch 034: 48 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=534559, ups=1.09, wpb=491635, bsz=16179, num_updates=55700, lr=0.00026798, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51411 epoch 034: 48 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=534559, ups=1.09, wpb=491635, bsz=16179, num_updates=55700, lr=0.00026798, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51411 epoch 034: 48 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=534559, ups=1.09, wpb=491635, bsz=16179, num_updates=55700, lr=0.00026798, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51411 epoch 034: 48 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=534559, ups=1.09, wpb=491635, bsz=16179, num_updates=55700, lr=0.00026798, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51411 epoch 034: 48 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=534559, ups=1.09, wpb=491635, bsz=16179, num_updates=55700, lr=0.00026798, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51411 epoch 034: 48 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=534559, ups=1.09, wpb=491635, bsz=16179, num_updates=55700, lr=0.00026798, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51411 epoch 034: 48 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=534559, ups=1.09, wpb=491635, bsz=16179, num_updates=55700, lr=0.00026798, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51411 epoch 034: 48 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=534559, ups=1.09, wpb=491635, bsz=16179, num_updates=55700, lr=0.00026798, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51411 epoch 034: 48 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=534559, ups=1.09, wpb=491635, bsz=16179, num_updates=55700, lr=0.00026798, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51411 epoch 034: 48 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=534559, ups=1.09, wpb=491635, bsz=16179, num_updates=55700, lr=0.00026798, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51411 epoch 034: 48 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=534559, ups=1.09, wpb=491635, bsz=16179, num_updates=55700, lr=0.00026798, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51411 epoch 034: 48 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=534559, ups=1.09, wpb=491635, bsz=16179, num_updates=55700, lr=0.00026798, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51411 epoch 034: 48 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=534559, ups=1.09, wpb=491635, bsz=16179, num_updates=55700, lr=0.00026798, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51411 epoch 034: 48 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=534559, ups=1.09, wpb=491635, bsz=16179, num_updates=55700, lr=0.00026798, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51411 epoch 034: 48 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=534559, ups=1.09, wpb=491635, bsz=16179, num_updates=55700, lr=0.00026798, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51411 epoch 034: 48 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=534559, ups=1.09, wpb=491635, bsz=16179, num_updates=55700, lr=0.00026798, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51411 epoch 034: 48 / 1689 loss=3.455, nll_loss=1.895, ppl=3.72, wps=534559, ups=1.09, wpb=491635, bsz=16179, num_updates=55700, lr=0.00026798, gnorm=0.168, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=51411 epoch 034: 148 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=554643, ups=1.12, wpb=496063, bsz=16964.2, num_updates=55800, lr=0.00026774, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=51501 epoch 034: 148 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=554643, ups=1.12, wpb=496063, bsz=16964.2, num_updates=55800, lr=0.00026774, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=51501 epoch 034: 148 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=554643, ups=1.12, wpb=496063, bsz=16964.2, num_updates=55800, lr=0.00026774, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=51501 epoch 034: 148 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=554643, ups=1.12, wpb=496063, bsz=16964.2, num_updates=55800, lr=0.00026774, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=51501 epoch 034: 148 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=554643, ups=1.12, wpb=496063, bsz=16964.2, num_updates=55800, lr=0.00026774, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=51501 epoch 034: 148 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=554643, ups=1.12, wpb=496063, bsz=16964.2, num_updates=55800, lr=0.00026774, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=51501 epoch 034: 148 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=554643, ups=1.12, wpb=496063, bsz=16964.2, num_updates=55800, lr=0.00026774, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=51501 epoch 034: 148 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=554643, ups=1.12, wpb=496063, bsz=16964.2, num_updates=55800, lr=0.00026774, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=51501 epoch 034: 148 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=554643, ups=1.12, wpb=496063, bsz=16964.2, num_updates=55800, lr=0.00026774, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=51501 epoch 034: 148 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=554643, ups=1.12, wpb=496063, bsz=16964.2, num_updates=55800, lr=0.00026774, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=51501 epoch 034: 148 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=554643, ups=1.12, wpb=496063, bsz=16964.2, num_updates=55800, lr=0.00026774, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=51501 epoch 034: 148 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=554643, ups=1.12, wpb=496063, bsz=16964.2, num_updates=55800, lr=0.00026774, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=51501 epoch 034: 148 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=554643, ups=1.12, wpb=496063, bsz=16964.2, num_updates=55800, lr=0.00026774, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=51501 epoch 034: 148 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=554643, ups=1.12, wpb=496063, bsz=16964.2, num_updates=55800, lr=0.00026774, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=51501 epoch 034: 148 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=554643, ups=1.12, wpb=496063, bsz=16964.2, num_updates=55800, lr=0.00026774, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=51501 epoch 034: 148 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=554643, ups=1.12, wpb=496063, bsz=16964.2, num_updates=55800, lr=0.00026774, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=51501 epoch 034: 148 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=554643, ups=1.12, wpb=496063, bsz=16964.2, num_updates=55800, lr=0.00026774, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=51501 epoch 034: 148 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=554643, ups=1.12, wpb=496063, bsz=16964.2, num_updates=55800, lr=0.00026774, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=51501 epoch 034: 148 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=554643, ups=1.12, wpb=496063, bsz=16964.2, num_updates=55800, lr=0.00026774, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=51501 epoch 034: 148 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=554643, ups=1.12, wpb=496063, bsz=16964.2, num_updates=55800, lr=0.00026774, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=51501 epoch 034: 148 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=554643, ups=1.12, wpb=496063, bsz=16964.2, num_updates=55800, lr=0.00026774, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=51501 epoch 034: 148 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=554643, ups=1.12, wpb=496063, bsz=16964.2, num_updates=55800, lr=0.00026774, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=51501 epoch 034: 148 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=554643, ups=1.12, wpb=496063, bsz=16964.2, num_updates=55800, lr=0.00026774, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=51501 epoch 034: 148 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=554643, ups=1.12, wpb=496063, bsz=16964.2, num_updates=55800, lr=0.00026774, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=51501 epoch 034: 148 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=554643, ups=1.12, wpb=496063, bsz=16964.2, num_updates=55800, lr=0.00026774, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=51501 epoch 034: 148 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=554643, ups=1.12, wpb=496063, bsz=16964.2, num_updates=55800, lr=0.00026774, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=51501 epoch 034: 148 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=554643, ups=1.12, wpb=496063, bsz=16964.2, num_updates=55800, lr=0.00026774, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=51501 epoch 034: 148 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=554643, ups=1.12, wpb=496063, bsz=16964.2, num_updates=55800, lr=0.00026774, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=51501 epoch 034: 148 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=554643, ups=1.12, wpb=496063, bsz=16964.2, num_updates=55800, lr=0.00026774, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=51501 epoch 034: 148 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=554643, ups=1.12, wpb=496063, bsz=16964.2, num_updates=55800, lr=0.00026774, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=51501 epoch 034: 148 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=554643, ups=1.12, wpb=496063, bsz=16964.2, num_updates=55800, lr=0.00026774, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=51501 epoch 034: 148 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=554643, ups=1.12, wpb=496063, bsz=16964.2, num_updates=55800, lr=0.00026774, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=51501 epoch 034: 148 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=554643, ups=1.12, wpb=496063, bsz=16964.2, num_updates=55800, lr=0.00026774, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=51501 epoch 034: 148 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=554643, ups=1.12, wpb=496063, bsz=16964.2, num_updates=55800, lr=0.00026774, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=51501 epoch 034: 248 / 1689 loss=3.44, nll_loss=1.877, ppl=3.67, wps=555143, ups=1.12, wpb=496274, bsz=16035.5, num_updates=55900, lr=0.0002675, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=51590 epoch 034: 248 / 1689 loss=3.44, nll_loss=1.877, ppl=3.67, wps=555143, ups=1.12, wpb=496274, bsz=16035.5, num_updates=55900, lr=0.0002675, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=51590 epoch 034: 248 / 1689 loss=3.44, nll_loss=1.877, ppl=3.67, wps=555143, ups=1.12, wpb=496274, bsz=16035.5, num_updates=55900, lr=0.0002675, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=51590 epoch 034: 248 / 1689 loss=3.44, nll_loss=1.877, ppl=3.67, wps=555143, ups=1.12, wpb=496274, bsz=16035.5, num_updates=55900, lr=0.0002675, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=51590 epoch 034: 248 / 1689 loss=3.44, nll_loss=1.877, ppl=3.67, wps=555143, ups=1.12, wpb=496274, bsz=16035.5, num_updates=55900, lr=0.0002675, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=51590 epoch 034: 248 / 1689 loss=3.44, nll_loss=1.877, ppl=3.67, wps=555143, ups=1.12, wpb=496274, bsz=16035.5, num_updates=55900, lr=0.0002675, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=51590 epoch 034: 248 / 1689 loss=3.44, nll_loss=1.877, ppl=3.67, wps=555143, ups=1.12, wpb=496274, bsz=16035.5, num_updates=55900, lr=0.0002675, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=51590 epoch 034: 248 / 1689 loss=3.44, nll_loss=1.877, ppl=3.67, wps=555143, ups=1.12, wpb=496274, bsz=16035.5, num_updates=55900, lr=0.0002675, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=51590 epoch 034: 248 / 1689 loss=3.44, nll_loss=1.877, ppl=3.67, wps=555143, ups=1.12, wpb=496274, bsz=16035.5, num_updates=55900, lr=0.0002675, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=51590 epoch 034: 248 / 1689 loss=3.44, nll_loss=1.877, ppl=3.67, wps=555143, ups=1.12, wpb=496274, bsz=16035.5, num_updates=55900, lr=0.0002675, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=51590 epoch 034: 248 / 1689 loss=3.44, nll_loss=1.877, ppl=3.67, wps=555143, ups=1.12, wpb=496274, bsz=16035.5, num_updates=55900, lr=0.0002675, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=51590 epoch 034: 248 / 1689 loss=3.44, nll_loss=1.877, ppl=3.67, wps=555143, ups=1.12, wpb=496274, bsz=16035.5, num_updates=55900, lr=0.0002675, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=51590 epoch 034: 248 / 1689 loss=3.44, nll_loss=1.877, ppl=3.67, wps=555143, ups=1.12, wpb=496274, bsz=16035.5, num_updates=55900, lr=0.0002675, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=51590 epoch 034: 248 / 1689 loss=3.44, nll_loss=1.877, ppl=3.67, wps=555143, ups=1.12, wpb=496274, bsz=16035.5, num_updates=55900, lr=0.0002675, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=51590 epoch 034: 248 / 1689 loss=3.44, nll_loss=1.877, ppl=3.67, wps=555143, ups=1.12, wpb=496274, bsz=16035.5, num_updates=55900, lr=0.0002675, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=51590 epoch 034: 248 / 1689 loss=3.44, nll_loss=1.877, ppl=3.67, wps=555143, ups=1.12, wpb=496274, bsz=16035.5, num_updates=55900, lr=0.0002675, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=51590 epoch 034: 248 / 1689 loss=3.44, nll_loss=1.877, ppl=3.67, wps=555143, ups=1.12, wpb=496274, bsz=16035.5, num_updates=55900, lr=0.0002675, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=51590 epoch 034: 248 / 1689 loss=3.44, nll_loss=1.877, ppl=3.67, wps=555143, ups=1.12, wpb=496274, bsz=16035.5, num_updates=55900, lr=0.0002675, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=51590 epoch 034: 248 / 1689 loss=3.44, nll_loss=1.877, ppl=3.67, wps=555143, ups=1.12, wpb=496274, bsz=16035.5, num_updates=55900, lr=0.0002675, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=51590 epoch 034: 248 / 1689 loss=3.44, nll_loss=1.877, ppl=3.67, wps=555143, ups=1.12, wpb=496274, bsz=16035.5, num_updates=55900, lr=0.0002675, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=51590 epoch 034: 248 / 1689 loss=3.44, nll_loss=1.877, ppl=3.67, wps=555143, ups=1.12, wpb=496274, bsz=16035.5, num_updates=55900, lr=0.0002675, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=51590 epoch 034: 248 / 1689 loss=3.44, nll_loss=1.877, ppl=3.67, wps=555143, ups=1.12, wpb=496274, bsz=16035.5, num_updates=55900, lr=0.0002675, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=51590 epoch 034: 248 / 1689 loss=3.44, nll_loss=1.877, ppl=3.67, wps=555143, ups=1.12, wpb=496274, bsz=16035.5, num_updates=55900, lr=0.0002675, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=51590 epoch 034: 248 / 1689 loss=3.44, nll_loss=1.877, ppl=3.67, wps=555143, ups=1.12, wpb=496274, bsz=16035.5, num_updates=55900, lr=0.0002675, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=51590 epoch 034: 248 / 1689 loss=3.44, nll_loss=1.877, ppl=3.67, wps=555143, ups=1.12, wpb=496274, bsz=16035.5, num_updates=55900, lr=0.0002675, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=51590 epoch 034: 248 / 1689 loss=3.44, nll_loss=1.877, ppl=3.67, wps=555143, ups=1.12, wpb=496274, bsz=16035.5, num_updates=55900, lr=0.0002675, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=51590 epoch 034: 248 / 1689 loss=3.44, nll_loss=1.877, ppl=3.67, wps=555143, ups=1.12, wpb=496274, bsz=16035.5, num_updates=55900, lr=0.0002675, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=51590 epoch 034: 248 / 1689 loss=3.44, nll_loss=1.877, ppl=3.67, wps=555143, ups=1.12, wpb=496274, bsz=16035.5, num_updates=55900, lr=0.0002675, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=51590 epoch 034: 248 / 1689 loss=3.44, nll_loss=1.877, ppl=3.67, wps=555143, ups=1.12, wpb=496274, bsz=16035.5, num_updates=55900, lr=0.0002675, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=51590 epoch 034: 248 / 1689 loss=3.44, nll_loss=1.877, ppl=3.67, wps=555143, ups=1.12, wpb=496274, bsz=16035.5, num_updates=55900, lr=0.0002675, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=51590 epoch 034: 248 / 1689 loss=3.44, nll_loss=1.877, ppl=3.67, wps=555143, ups=1.12, wpb=496274, bsz=16035.5, num_updates=55900, lr=0.0002675, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=51590 epoch 034: 248 / 1689 loss=3.44, nll_loss=1.877, ppl=3.67, wps=555143, ups=1.12, wpb=496274, bsz=16035.5, num_updates=55900, lr=0.0002675, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=51590 epoch 034: 248 / 1689 loss=3.44, nll_loss=1.877, ppl=3.67, wps=555143, ups=1.12, wpb=496274, bsz=16035.5, num_updates=55900, lr=0.0002675, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=51590 epoch 034: 248 / 1689 loss=3.44, nll_loss=1.877, ppl=3.67, wps=555143, ups=1.12, wpb=496274, bsz=16035.5, num_updates=55900, lr=0.0002675, gnorm=0.17, clip=0, loss_scale=1, train_wall=88, gb_free=22.2, wall=51590 epoch 034: 348 / 1689 loss=3.446, nll_loss=1.885, ppl=3.69, wps=552410, ups=1.12, wpb=495403, bsz=16299.9, num_updates=56000, lr=0.000267261, gnorm=0.155, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=51680 epoch 034: 348 / 1689 loss=3.446, nll_loss=1.885, ppl=3.69, wps=552410, ups=1.12, wpb=495403, bsz=16299.9, num_updates=56000, lr=0.000267261, gnorm=0.155, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=51680 epoch 034: 348 / 1689 loss=3.446, nll_loss=1.885, ppl=3.69, wps=552410, ups=1.12, wpb=495403, bsz=16299.9, num_updates=56000, lr=0.000267261, gnorm=0.155, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=51680 epoch 034: 348 / 1689 loss=3.446, nll_loss=1.885, ppl=3.69, wps=552410, ups=1.12, wpb=495403, bsz=16299.9, num_updates=56000, lr=0.000267261, gnorm=0.155, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=51680 epoch 034: 348 / 1689 loss=3.446, nll_loss=1.885, ppl=3.69, wps=552410, ups=1.12, wpb=495403, bsz=16299.9, num_updates=56000, lr=0.000267261, gnorm=0.155, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=51680 epoch 034: 348 / 1689 loss=3.446, nll_loss=1.885, ppl=3.69, wps=552410, ups=1.12, wpb=495403, bsz=16299.9, num_updates=56000, lr=0.000267261, gnorm=0.155, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=51680 epoch 034: 348 / 1689 loss=3.446, nll_loss=1.885, ppl=3.69, wps=552410, ups=1.12, wpb=495403, bsz=16299.9, num_updates=56000, lr=0.000267261, gnorm=0.155, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=51680 epoch 034: 348 / 1689 loss=3.446, nll_loss=1.885, ppl=3.69, wps=552410, ups=1.12, wpb=495403, bsz=16299.9, num_updates=56000, lr=0.000267261, gnorm=0.155, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=51680 epoch 034: 348 / 1689 loss=3.446, nll_loss=1.885, ppl=3.69, wps=552410, ups=1.12, wpb=495403, bsz=16299.9, num_updates=56000, lr=0.000267261, gnorm=0.155, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=51680 epoch 034: 348 / 1689 loss=3.446, nll_loss=1.885, ppl=3.69, wps=552410, ups=1.12, wpb=495403, bsz=16299.9, num_updates=56000, lr=0.000267261, gnorm=0.155, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=51680 epoch 034: 348 / 1689 loss=3.446, nll_loss=1.885, ppl=3.69, wps=552410, ups=1.12, wpb=495403, bsz=16299.9, num_updates=56000, lr=0.000267261, gnorm=0.155, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=51680 epoch 034: 348 / 1689 loss=3.446, nll_loss=1.885, ppl=3.69, wps=552410, ups=1.12, wpb=495403, bsz=16299.9, num_updates=56000, lr=0.000267261, gnorm=0.155, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=51680 epoch 034: 348 / 1689 loss=3.446, nll_loss=1.885, ppl=3.69, wps=552410, ups=1.12, wpb=495403, bsz=16299.9, num_updates=56000, lr=0.000267261, gnorm=0.155, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=51680 epoch 034: 348 / 1689 loss=3.446, nll_loss=1.885, ppl=3.69, wps=552410, ups=1.12, wpb=495403, bsz=16299.9, num_updates=56000, lr=0.000267261, gnorm=0.155, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=51680 epoch 034: 348 / 1689 loss=3.446, nll_loss=1.885, ppl=3.69, wps=552410, ups=1.12, wpb=495403, bsz=16299.9, num_updates=56000, lr=0.000267261, gnorm=0.155, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=51680 epoch 034: 348 / 1689 loss=3.446, nll_loss=1.885, ppl=3.69, wps=552410, ups=1.12, wpb=495403, bsz=16299.9, num_updates=56000, lr=0.000267261, gnorm=0.155, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=51680 epoch 034: 348 / 1689 loss=3.446, nll_loss=1.885, ppl=3.69, wps=552410, ups=1.12, wpb=495403, bsz=16299.9, num_updates=56000, lr=0.000267261, gnorm=0.155, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=51680 epoch 034: 348 / 1689 loss=3.446, nll_loss=1.885, ppl=3.69, wps=552410, ups=1.12, wpb=495403, bsz=16299.9, num_updates=56000, lr=0.000267261, gnorm=0.155, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=51680 epoch 034: 348 / 1689 loss=3.446, nll_loss=1.885, ppl=3.69, wps=552410, ups=1.12, wpb=495403, bsz=16299.9, num_updates=56000, lr=0.000267261, gnorm=0.155, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=51680 epoch 034: 348 / 1689 loss=3.446, nll_loss=1.885, ppl=3.69, wps=552410, ups=1.12, wpb=495403, bsz=16299.9, num_updates=56000, lr=0.000267261, gnorm=0.155, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=51680 epoch 034: 348 / 1689 loss=3.446, nll_loss=1.885, ppl=3.69, wps=552410, ups=1.12, wpb=495403, bsz=16299.9, num_updates=56000, lr=0.000267261, gnorm=0.155, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=51680 epoch 034: 348 / 1689 loss=3.446, nll_loss=1.885, ppl=3.69, wps=552410, ups=1.12, wpb=495403, bsz=16299.9, num_updates=56000, lr=0.000267261, gnorm=0.155, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=51680 epoch 034: 348 / 1689 loss=3.446, nll_loss=1.885, ppl=3.69, wps=552410, ups=1.12, wpb=495403, bsz=16299.9, num_updates=56000, lr=0.000267261, gnorm=0.155, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=51680 epoch 034: 348 / 1689 loss=3.446, nll_loss=1.885, ppl=3.69, wps=552410, ups=1.12, wpb=495403, bsz=16299.9, num_updates=56000, lr=0.000267261, gnorm=0.155, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=51680 epoch 034: 348 / 1689 loss=3.446, nll_loss=1.885, ppl=3.69, wps=552410, ups=1.12, wpb=495403, bsz=16299.9, num_updates=56000, lr=0.000267261, gnorm=0.155, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=51680 epoch 034: 348 / 1689 loss=3.446, nll_loss=1.885, ppl=3.69, wps=552410, ups=1.12, wpb=495403, bsz=16299.9, num_updates=56000, lr=0.000267261, gnorm=0.155, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=51680 epoch 034: 348 / 1689 loss=3.446, nll_loss=1.885, ppl=3.69, wps=552410, ups=1.12, wpb=495403, bsz=16299.9, num_updates=56000, lr=0.000267261, gnorm=0.155, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=51680 epoch 034: 348 / 1689 loss=3.446, nll_loss=1.885, ppl=3.69, wps=552410, ups=1.12, wpb=495403, bsz=16299.9, num_updates=56000, lr=0.000267261, gnorm=0.155, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=51680 epoch 034: 348 / 1689 loss=3.446, nll_loss=1.885, ppl=3.69, wps=552410, ups=1.12, wpb=495403, bsz=16299.9, num_updates=56000, lr=0.000267261, gnorm=0.155, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=51680 epoch 034: 348 / 1689 loss=3.446, nll_loss=1.885, ppl=3.69, wps=552410, ups=1.12, wpb=495403, bsz=16299.9, num_updates=56000, lr=0.000267261, gnorm=0.155, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=51680 epoch 034: 348 / 1689 loss=3.446, nll_loss=1.885, ppl=3.69, wps=552410, ups=1.12, wpb=495403, bsz=16299.9, num_updates=56000, lr=0.000267261, gnorm=0.155, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=51680 epoch 034: 348 / 1689 loss=3.446, nll_loss=1.885, ppl=3.69, wps=552410, ups=1.12, wpb=495403, bsz=16299.9, num_updates=56000, lr=0.000267261, gnorm=0.155, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=51680 epoch 034: 348 / 1689 loss=3.446, nll_loss=1.885, ppl=3.69, wps=552410, ups=1.12, wpb=495403, bsz=16299.9, num_updates=56000, lr=0.000267261, gnorm=0.155, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=51680 epoch 034: 348 / 1689 loss=3.446, nll_loss=1.885, ppl=3.69, wps=552410, ups=1.12, wpb=495403, bsz=16299.9, num_updates=56000, lr=0.000267261, gnorm=0.155, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=51680 begin validation on "valid" subset epoch 034 | valid on 'valid' subset | loss 3.713 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 56000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.713 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 56000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.713 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 56000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.713 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 56000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.713 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 56000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.713 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 56000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.713 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 56000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.713 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 56000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.713 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 56000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.713 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 56000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.713 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 56000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.713 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 56000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.713 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 56000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.713 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 56000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.713 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 56000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.713 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 56000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.713 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 56000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.713 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 56000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.713 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 56000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.713 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 56000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.713 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 56000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.713 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 56000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.713 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 56000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.713 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 56000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.713 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 56000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.713 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 56000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.713 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 56000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.713 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 56000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.713 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 56000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.713 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 56000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.713 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 56000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.713 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 56000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.713 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 56000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.713 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 56000 | best_loss 3.702 epoch 034: 448 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=486002, ups=0.98, wpb=494527, bsz=16596.2, num_updates=56100, lr=0.000267023, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=51781 epoch 034: 448 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=486002, ups=0.98, wpb=494527, bsz=16596.2, num_updates=56100, lr=0.000267023, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=51781 epoch 034: 448 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=486002, ups=0.98, wpb=494527, bsz=16596.2, num_updates=56100, lr=0.000267023, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=51781 epoch 034: 448 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=486002, ups=0.98, wpb=494527, bsz=16596.2, num_updates=56100, lr=0.000267023, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=51781 epoch 034: 448 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=486002, ups=0.98, wpb=494527, bsz=16596.2, num_updates=56100, lr=0.000267023, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=51781 epoch 034: 448 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=486002, ups=0.98, wpb=494527, bsz=16596.2, num_updates=56100, lr=0.000267023, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=51781 epoch 034: 448 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=486002, ups=0.98, wpb=494527, bsz=16596.2, num_updates=56100, lr=0.000267023, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=51781 epoch 034: 448 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=486002, ups=0.98, wpb=494527, bsz=16596.2, num_updates=56100, lr=0.000267023, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=51781 epoch 034: 448 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=486002, ups=0.98, wpb=494527, bsz=16596.2, num_updates=56100, lr=0.000267023, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=51781 epoch 034: 448 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=486002, ups=0.98, wpb=494527, bsz=16596.2, num_updates=56100, lr=0.000267023, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=51781 epoch 034: 448 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=486002, ups=0.98, wpb=494527, bsz=16596.2, num_updates=56100, lr=0.000267023, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=51781 epoch 034: 448 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=486002, ups=0.98, wpb=494527, bsz=16596.2, num_updates=56100, lr=0.000267023, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=51781 epoch 034: 448 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=486002, ups=0.98, wpb=494527, bsz=16596.2, num_updates=56100, lr=0.000267023, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=51781 epoch 034: 448 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=486002, ups=0.98, wpb=494527, bsz=16596.2, num_updates=56100, lr=0.000267023, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=51781 epoch 034: 448 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=486002, ups=0.98, wpb=494527, bsz=16596.2, num_updates=56100, lr=0.000267023, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=51781 epoch 034: 448 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=486002, ups=0.98, wpb=494527, bsz=16596.2, num_updates=56100, lr=0.000267023, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=51781 epoch 034: 448 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=486002, ups=0.98, wpb=494527, bsz=16596.2, num_updates=56100, lr=0.000267023, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=51781 epoch 034: 448 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=486002, ups=0.98, wpb=494527, bsz=16596.2, num_updates=56100, lr=0.000267023, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=51781 epoch 034: 448 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=486002, ups=0.98, wpb=494527, bsz=16596.2, num_updates=56100, lr=0.000267023, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=51781 epoch 034: 448 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=486002, ups=0.98, wpb=494527, bsz=16596.2, num_updates=56100, lr=0.000267023, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=51781 epoch 034: 448 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=486002, ups=0.98, wpb=494527, bsz=16596.2, num_updates=56100, lr=0.000267023, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=51781 epoch 034: 448 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=486002, ups=0.98, wpb=494527, bsz=16596.2, num_updates=56100, lr=0.000267023, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=51781 epoch 034: 448 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=486002, ups=0.98, wpb=494527, bsz=16596.2, num_updates=56100, lr=0.000267023, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=51781 epoch 034: 448 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=486002, ups=0.98, wpb=494527, bsz=16596.2, num_updates=56100, lr=0.000267023, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=51781 epoch 034: 448 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=486002, ups=0.98, wpb=494527, bsz=16596.2, num_updates=56100, lr=0.000267023, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=51781 epoch 034: 448 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=486002, ups=0.98, wpb=494527, bsz=16596.2, num_updates=56100, lr=0.000267023, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=51781 epoch 034: 448 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=486002, ups=0.98, wpb=494527, bsz=16596.2, num_updates=56100, lr=0.000267023, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=51781 epoch 034: 448 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=486002, ups=0.98, wpb=494527, bsz=16596.2, num_updates=56100, lr=0.000267023, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=51781 epoch 034: 448 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=486002, ups=0.98, wpb=494527, bsz=16596.2, num_updates=56100, lr=0.000267023, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=51781 epoch 034: 448 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=486002, ups=0.98, wpb=494527, bsz=16596.2, num_updates=56100, lr=0.000267023, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=51781 epoch 034: 448 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=486002, ups=0.98, wpb=494527, bsz=16596.2, num_updates=56100, lr=0.000267023, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=51781 epoch 034: 448 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=486002, ups=0.98, wpb=494527, bsz=16596.2, num_updates=56100, lr=0.000267023, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=51781 epoch 034: 448 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=486002, ups=0.98, wpb=494527, bsz=16596.2, num_updates=56100, lr=0.000267023, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=51781 epoch 034: 448 / 1689 loss=3.45, nll_loss=1.889, ppl=3.7, wps=486002, ups=0.98, wpb=494527, bsz=16596.2, num_updates=56100, lr=0.000267023, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.6, wall=51781 epoch 034: 548 / 1689 loss=3.448, nll_loss=1.887, ppl=3.7, wps=551440, ups=1.11, wpb=495985, bsz=16480.4, num_updates=56200, lr=0.000266785, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=51871 epoch 034: 548 / 1689 loss=3.448, nll_loss=1.887, ppl=3.7, wps=551440, ups=1.11, wpb=495985, bsz=16480.4, num_updates=56200, lr=0.000266785, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=51871 epoch 034: 548 / 1689 loss=3.448, nll_loss=1.887, ppl=3.7, wps=551440, ups=1.11, wpb=495985, bsz=16480.4, num_updates=56200, lr=0.000266785, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=51871 epoch 034: 548 / 1689 loss=3.448, nll_loss=1.887, ppl=3.7, wps=551440, ups=1.11, wpb=495985, bsz=16480.4, num_updates=56200, lr=0.000266785, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=51871 epoch 034: 548 / 1689 loss=3.448, nll_loss=1.887, ppl=3.7, wps=551440, ups=1.11, wpb=495985, bsz=16480.4, num_updates=56200, lr=0.000266785, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=51871 epoch 034: 548 / 1689 loss=3.448, nll_loss=1.887, ppl=3.7, wps=551440, ups=1.11, wpb=495985, bsz=16480.4, num_updates=56200, lr=0.000266785, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=51871 epoch 034: 548 / 1689 loss=3.448, nll_loss=1.887, ppl=3.7, wps=551440, ups=1.11, wpb=495985, bsz=16480.4, num_updates=56200, lr=0.000266785, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=51871 epoch 034: 548 / 1689 loss=3.448, nll_loss=1.887, ppl=3.7, wps=551440, ups=1.11, wpb=495985, bsz=16480.4, num_updates=56200, lr=0.000266785, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=51871 epoch 034: 548 / 1689 loss=3.448, nll_loss=1.887, ppl=3.7, wps=551440, ups=1.11, wpb=495985, bsz=16480.4, num_updates=56200, lr=0.000266785, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=51871 epoch 034: 548 / 1689 loss=3.448, nll_loss=1.887, ppl=3.7, wps=551440, ups=1.11, wpb=495985, bsz=16480.4, num_updates=56200, lr=0.000266785, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=51871 epoch 034: 548 / 1689 loss=3.448, nll_loss=1.887, ppl=3.7, wps=551440, ups=1.11, wpb=495985, bsz=16480.4, num_updates=56200, lr=0.000266785, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=51871 epoch 034: 548 / 1689 loss=3.448, nll_loss=1.887, ppl=3.7, wps=551440, ups=1.11, wpb=495985, bsz=16480.4, num_updates=56200, lr=0.000266785, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=51871 epoch 034: 548 / 1689 loss=3.448, nll_loss=1.887, ppl=3.7, wps=551440, ups=1.11, wpb=495985, bsz=16480.4, num_updates=56200, lr=0.000266785, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=51871 epoch 034: 548 / 1689 loss=3.448, nll_loss=1.887, ppl=3.7, wps=551440, ups=1.11, wpb=495985, bsz=16480.4, num_updates=56200, lr=0.000266785, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=51871 epoch 034: 548 / 1689 loss=3.448, nll_loss=1.887, ppl=3.7, wps=551440, ups=1.11, wpb=495985, bsz=16480.4, num_updates=56200, lr=0.000266785, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=51871 epoch 034: 548 / 1689 loss=3.448, nll_loss=1.887, ppl=3.7, wps=551440, ups=1.11, wpb=495985, bsz=16480.4, num_updates=56200, lr=0.000266785, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=51871 epoch 034: 548 / 1689 loss=3.448, nll_loss=1.887, ppl=3.7, wps=551440, ups=1.11, wpb=495985, bsz=16480.4, num_updates=56200, lr=0.000266785, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=51871 epoch 034: 548 / 1689 loss=3.448, nll_loss=1.887, ppl=3.7, wps=551440, ups=1.11, wpb=495985, bsz=16480.4, num_updates=56200, lr=0.000266785, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=51871 epoch 034: 548 / 1689 loss=3.448, nll_loss=1.887, ppl=3.7, wps=551440, ups=1.11, wpb=495985, bsz=16480.4, num_updates=56200, lr=0.000266785, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=51871 epoch 034: 548 / 1689 loss=3.448, nll_loss=1.887, ppl=3.7, wps=551440, ups=1.11, wpb=495985, bsz=16480.4, num_updates=56200, lr=0.000266785, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=51871 epoch 034: 548 / 1689 loss=3.448, nll_loss=1.887, ppl=3.7, wps=551440, ups=1.11, wpb=495985, bsz=16480.4, num_updates=56200, lr=0.000266785, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=51871 epoch 034: 548 / 1689 loss=3.448, nll_loss=1.887, ppl=3.7, wps=551440, ups=1.11, wpb=495985, bsz=16480.4, num_updates=56200, lr=0.000266785, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=51871 epoch 034: 548 / 1689 loss=3.448, nll_loss=1.887, ppl=3.7, wps=551440, ups=1.11, wpb=495985, bsz=16480.4, num_updates=56200, lr=0.000266785, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=51871 epoch 034: 548 / 1689 loss=3.448, nll_loss=1.887, ppl=3.7, wps=551440, ups=1.11, wpb=495985, bsz=16480.4, num_updates=56200, lr=0.000266785, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=51871 epoch 034: 548 / 1689 loss=3.448, nll_loss=1.887, ppl=3.7, wps=551440, ups=1.11, wpb=495985, bsz=16480.4, num_updates=56200, lr=0.000266785, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=51871 epoch 034: 548 / 1689 loss=3.448, nll_loss=1.887, ppl=3.7, wps=551440, ups=1.11, wpb=495985, bsz=16480.4, num_updates=56200, lr=0.000266785, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=51871 epoch 034: 548 / 1689 loss=3.448, nll_loss=1.887, ppl=3.7, wps=551440, ups=1.11, wpb=495985, bsz=16480.4, num_updates=56200, lr=0.000266785, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=51871 epoch 034: 548 / 1689 loss=3.448, nll_loss=1.887, ppl=3.7, wps=551440, ups=1.11, wpb=495985, bsz=16480.4, num_updates=56200, lr=0.000266785, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=51871 epoch 034: 548 / 1689 loss=3.448, nll_loss=1.887, ppl=3.7, wps=551440, ups=1.11, wpb=495985, bsz=16480.4, num_updates=56200, lr=0.000266785, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=51871 epoch 034: 548 / 1689 loss=3.448, nll_loss=1.887, ppl=3.7, wps=551440, ups=1.11, wpb=495985, bsz=16480.4, num_updates=56200, lr=0.000266785, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=51871 epoch 034: 548 / 1689 loss=3.448, nll_loss=1.887, ppl=3.7, wps=551440, ups=1.11, wpb=495985, bsz=16480.4, num_updates=56200, lr=0.000266785, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=51871 epoch 034: 548 / 1689 loss=3.448, nll_loss=1.887, ppl=3.7, wps=551440, ups=1.11, wpb=495985, bsz=16480.4, num_updates=56200, lr=0.000266785, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=51871 epoch 034: 548 / 1689 loss=3.448, nll_loss=1.887, ppl=3.7, wps=551440, ups=1.11, wpb=495985, bsz=16480.4, num_updates=56200, lr=0.000266785, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=51871 epoch 034: 548 / 1689 loss=3.448, nll_loss=1.887, ppl=3.7, wps=551440, ups=1.11, wpb=495985, bsz=16480.4, num_updates=56200, lr=0.000266785, gnorm=0.168, clip=0, loss_scale=2, train_wall=88, gb_free=21.3, wall=51871 epoch 034: 648 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=554820, ups=1.12, wpb=494841, bsz=16652.2, num_updates=56300, lr=0.000266548, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=51961 epoch 034: 648 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=554820, ups=1.12, wpb=494841, bsz=16652.2, num_updates=56300, lr=0.000266548, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=51961 epoch 034: 648 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=554820, ups=1.12, wpb=494841, bsz=16652.2, num_updates=56300, lr=0.000266548, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=51961 epoch 034: 648 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=554820, ups=1.12, wpb=494841, bsz=16652.2, num_updates=56300, lr=0.000266548, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=51961 epoch 034: 648 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=554820, ups=1.12, wpb=494841, bsz=16652.2, num_updates=56300, lr=0.000266548, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=51961 epoch 034: 648 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=554820, ups=1.12, wpb=494841, bsz=16652.2, num_updates=56300, lr=0.000266548, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=51961 epoch 034: 648 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=554820, ups=1.12, wpb=494841, bsz=16652.2, num_updates=56300, lr=0.000266548, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=51961 epoch 034: 648 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=554820, ups=1.12, wpb=494841, bsz=16652.2, num_updates=56300, lr=0.000266548, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=51961 epoch 034: 648 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=554820, ups=1.12, wpb=494841, bsz=16652.2, num_updates=56300, lr=0.000266548, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=51961 epoch 034: 648 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=554820, ups=1.12, wpb=494841, bsz=16652.2, num_updates=56300, lr=0.000266548, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=51961 epoch 034: 648 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=554820, ups=1.12, wpb=494841, bsz=16652.2, num_updates=56300, lr=0.000266548, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=51961 epoch 034: 648 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=554820, ups=1.12, wpb=494841, bsz=16652.2, num_updates=56300, lr=0.000266548, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=51961 epoch 034: 648 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=554820, ups=1.12, wpb=494841, bsz=16652.2, num_updates=56300, lr=0.000266548, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=51961 epoch 034: 648 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=554820, ups=1.12, wpb=494841, bsz=16652.2, num_updates=56300, lr=0.000266548, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=51961 epoch 034: 648 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=554820, ups=1.12, wpb=494841, bsz=16652.2, num_updates=56300, lr=0.000266548, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=51961 epoch 034: 648 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=554820, ups=1.12, wpb=494841, bsz=16652.2, num_updates=56300, lr=0.000266548, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=51961 epoch 034: 648 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=554820, ups=1.12, wpb=494841, bsz=16652.2, num_updates=56300, lr=0.000266548, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=51961 epoch 034: 648 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=554820, ups=1.12, wpb=494841, bsz=16652.2, num_updates=56300, lr=0.000266548, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=51961 epoch 034: 648 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=554820, ups=1.12, wpb=494841, bsz=16652.2, num_updates=56300, lr=0.000266548, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=51961 epoch 034: 648 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=554820, ups=1.12, wpb=494841, bsz=16652.2, num_updates=56300, lr=0.000266548, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=51961 epoch 034: 648 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=554820, ups=1.12, wpb=494841, bsz=16652.2, num_updates=56300, lr=0.000266548, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=51961 epoch 034: 648 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=554820, ups=1.12, wpb=494841, bsz=16652.2, num_updates=56300, lr=0.000266548, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=51961 epoch 034: 648 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=554820, ups=1.12, wpb=494841, bsz=16652.2, num_updates=56300, lr=0.000266548, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=51961 epoch 034: 648 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=554820, ups=1.12, wpb=494841, bsz=16652.2, num_updates=56300, lr=0.000266548, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=51961 epoch 034: 648 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=554820, ups=1.12, wpb=494841, bsz=16652.2, num_updates=56300, lr=0.000266548, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=51961 epoch 034: 648 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=554820, ups=1.12, wpb=494841, bsz=16652.2, num_updates=56300, lr=0.000266548, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=51961 epoch 034: 648 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=554820, ups=1.12, wpb=494841, bsz=16652.2, num_updates=56300, lr=0.000266548, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=51961 epoch 034: 648 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=554820, ups=1.12, wpb=494841, bsz=16652.2, num_updates=56300, lr=0.000266548, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=51961 epoch 034: 648 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=554820, ups=1.12, wpb=494841, bsz=16652.2, num_updates=56300, lr=0.000266548, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=51961 epoch 034: 648 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=554820, ups=1.12, wpb=494841, bsz=16652.2, num_updates=56300, lr=0.000266548, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=51961 epoch 034: 648 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=554820, ups=1.12, wpb=494841, bsz=16652.2, num_updates=56300, lr=0.000266548, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=51961 epoch 034: 648 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=554820, ups=1.12, wpb=494841, bsz=16652.2, num_updates=56300, lr=0.000266548, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=51961 epoch 034: 648 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=554820, ups=1.12, wpb=494841, bsz=16652.2, num_updates=56300, lr=0.000266548, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=51961 epoch 034: 648 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=554820, ups=1.12, wpb=494841, bsz=16652.2, num_updates=56300, lr=0.000266548, gnorm=0.176, clip=0, loss_scale=2, train_wall=88, gb_free=21.9, wall=51961 epoch 034: 749 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=547155, ups=1.1, wpb=496248, bsz=16639.4, num_updates=56400, lr=0.000266312, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=52051 epoch 034: 749 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=547155, ups=1.1, wpb=496248, bsz=16639.4, num_updates=56400, lr=0.000266312, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=52051 epoch 034: 749 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=547155, ups=1.1, wpb=496248, bsz=16639.4, num_updates=56400, lr=0.000266312, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=52051 epoch 034: 749 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=547155, ups=1.1, wpb=496248, bsz=16639.4, num_updates=56400, lr=0.000266312, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=52051 epoch 034: 749 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=547155, ups=1.1, wpb=496248, bsz=16639.4, num_updates=56400, lr=0.000266312, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=52051 epoch 034: 749 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=547155, ups=1.1, wpb=496248, bsz=16639.4, num_updates=56400, lr=0.000266312, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=52051 epoch 034: 749 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=547155, ups=1.1, wpb=496248, bsz=16639.4, num_updates=56400, lr=0.000266312, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=52051 epoch 034: 749 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=547155, ups=1.1, wpb=496248, bsz=16639.4, num_updates=56400, lr=0.000266312, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=52051 epoch 034: 749 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=547155, ups=1.1, wpb=496248, bsz=16639.4, num_updates=56400, lr=0.000266312, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=52051 epoch 034: 749 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=547155, ups=1.1, wpb=496248, bsz=16639.4, num_updates=56400, lr=0.000266312, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=52051 epoch 034: 749 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=547155, ups=1.1, wpb=496248, bsz=16639.4, num_updates=56400, lr=0.000266312, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=52051 epoch 034: 749 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=547155, ups=1.1, wpb=496248, bsz=16639.4, num_updates=56400, lr=0.000266312, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=52051 epoch 034: 749 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=547155, ups=1.1, wpb=496248, bsz=16639.4, num_updates=56400, lr=0.000266312, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=52051 epoch 034: 749 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=547155, ups=1.1, wpb=496248, bsz=16639.4, num_updates=56400, lr=0.000266312, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=52051 epoch 034: 749 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=547155, ups=1.1, wpb=496248, bsz=16639.4, num_updates=56400, lr=0.000266312, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=52051 epoch 034: 749 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=547155, ups=1.1, wpb=496248, bsz=16639.4, num_updates=56400, lr=0.000266312, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=52051 epoch 034: 749 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=547155, ups=1.1, wpb=496248, bsz=16639.4, num_updates=56400, lr=0.000266312, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=52051 epoch 034: 749 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=547155, ups=1.1, wpb=496248, bsz=16639.4, num_updates=56400, lr=0.000266312, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=52051 epoch 034: 749 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=547155, ups=1.1, wpb=496248, bsz=16639.4, num_updates=56400, lr=0.000266312, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=52051 epoch 034: 749 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=547155, ups=1.1, wpb=496248, bsz=16639.4, num_updates=56400, lr=0.000266312, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=52051 epoch 034: 749 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=547155, ups=1.1, wpb=496248, bsz=16639.4, num_updates=56400, lr=0.000266312, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=52051 epoch 034: 749 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=547155, ups=1.1, wpb=496248, bsz=16639.4, num_updates=56400, lr=0.000266312, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=52051 epoch 034: 749 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=547155, ups=1.1, wpb=496248, bsz=16639.4, num_updates=56400, lr=0.000266312, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=52051 epoch 034: 749 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=547155, ups=1.1, wpb=496248, bsz=16639.4, num_updates=56400, lr=0.000266312, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=52051 epoch 034: 749 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=547155, ups=1.1, wpb=496248, bsz=16639.4, num_updates=56400, lr=0.000266312, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=52051 epoch 034: 749 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=547155, ups=1.1, wpb=496248, bsz=16639.4, num_updates=56400, lr=0.000266312, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=52051 epoch 034: 749 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=547155, ups=1.1, wpb=496248, bsz=16639.4, num_updates=56400, lr=0.000266312, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=52051 epoch 034: 749 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=547155, ups=1.1, wpb=496248, bsz=16639.4, num_updates=56400, lr=0.000266312, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=52051 epoch 034: 749 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=547155, ups=1.1, wpb=496248, bsz=16639.4, num_updates=56400, lr=0.000266312, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=52051 epoch 034: 749 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=547155, ups=1.1, wpb=496248, bsz=16639.4, num_updates=56400, lr=0.000266312, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=52051 epoch 034: 749 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=547155, ups=1.1, wpb=496248, bsz=16639.4, num_updates=56400, lr=0.000266312, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=52051 epoch 034: 749 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=547155, ups=1.1, wpb=496248, bsz=16639.4, num_updates=56400, lr=0.000266312, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=52051 epoch 034: 749 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=547155, ups=1.1, wpb=496248, bsz=16639.4, num_updates=56400, lr=0.000266312, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=52051 epoch 034: 749 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=547155, ups=1.1, wpb=496248, bsz=16639.4, num_updates=56400, lr=0.000266312, gnorm=0.165, clip=0, loss_scale=1, train_wall=89, gb_free=22.3, wall=52051 epoch 034: 849 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=549467, ups=1.11, wpb=494612, bsz=16414.8, num_updates=56500, lr=0.000266076, gnorm=0.157, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=52141 epoch 034: 849 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=549467, ups=1.11, wpb=494612, bsz=16414.8, num_updates=56500, lr=0.000266076, gnorm=0.157, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=52141 epoch 034: 849 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=549467, ups=1.11, wpb=494612, bsz=16414.8, num_updates=56500, lr=0.000266076, gnorm=0.157, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=52141 epoch 034: 849 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=549467, ups=1.11, wpb=494612, bsz=16414.8, num_updates=56500, lr=0.000266076, gnorm=0.157, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=52141 epoch 034: 849 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=549467, ups=1.11, wpb=494612, bsz=16414.8, num_updates=56500, lr=0.000266076, gnorm=0.157, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=52141 epoch 034: 849 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=549467, ups=1.11, wpb=494612, bsz=16414.8, num_updates=56500, lr=0.000266076, gnorm=0.157, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=52141 epoch 034: 849 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=549467, ups=1.11, wpb=494612, bsz=16414.8, num_updates=56500, lr=0.000266076, gnorm=0.157, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=52141 epoch 034: 849 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=549467, ups=1.11, wpb=494612, bsz=16414.8, num_updates=56500, lr=0.000266076, gnorm=0.157, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=52141 epoch 034: 849 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=549467, ups=1.11, wpb=494612, bsz=16414.8, num_updates=56500, lr=0.000266076, gnorm=0.157, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=52141 epoch 034: 849 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=549467, ups=1.11, wpb=494612, bsz=16414.8, num_updates=56500, lr=0.000266076, gnorm=0.157, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=52141 epoch 034: 849 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=549467, ups=1.11, wpb=494612, bsz=16414.8, num_updates=56500, lr=0.000266076, gnorm=0.157, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=52141 epoch 034: 849 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=549467, ups=1.11, wpb=494612, bsz=16414.8, num_updates=56500, lr=0.000266076, gnorm=0.157, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=52141 epoch 034: 849 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=549467, ups=1.11, wpb=494612, bsz=16414.8, num_updates=56500, lr=0.000266076, gnorm=0.157, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=52141 epoch 034: 849 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=549467, ups=1.11, wpb=494612, bsz=16414.8, num_updates=56500, lr=0.000266076, gnorm=0.157, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=52141 epoch 034: 849 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=549467, ups=1.11, wpb=494612, bsz=16414.8, num_updates=56500, lr=0.000266076, gnorm=0.157, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=52141 epoch 034: 849 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=549467, ups=1.11, wpb=494612, bsz=16414.8, num_updates=56500, lr=0.000266076, gnorm=0.157, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=52141 epoch 034: 849 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=549467, ups=1.11, wpb=494612, bsz=16414.8, num_updates=56500, lr=0.000266076, gnorm=0.157, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=52141 epoch 034: 849 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=549467, ups=1.11, wpb=494612, bsz=16414.8, num_updates=56500, lr=0.000266076, gnorm=0.157, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=52141 epoch 034: 849 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=549467, ups=1.11, wpb=494612, bsz=16414.8, num_updates=56500, lr=0.000266076, gnorm=0.157, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=52141 epoch 034: 849 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=549467, ups=1.11, wpb=494612, bsz=16414.8, num_updates=56500, lr=0.000266076, gnorm=0.157, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=52141 epoch 034: 849 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=549467, ups=1.11, wpb=494612, bsz=16414.8, num_updates=56500, lr=0.000266076, gnorm=0.157, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=52141 epoch 034: 849 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=549467, ups=1.11, wpb=494612, bsz=16414.8, num_updates=56500, lr=0.000266076, gnorm=0.157, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=52141 epoch 034: 849 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=549467, ups=1.11, wpb=494612, bsz=16414.8, num_updates=56500, lr=0.000266076, gnorm=0.157, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=52141 epoch 034: 849 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=549467, ups=1.11, wpb=494612, bsz=16414.8, num_updates=56500, lr=0.000266076, gnorm=0.157, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=52141 epoch 034: 849 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=549467, ups=1.11, wpb=494612, bsz=16414.8, num_updates=56500, lr=0.000266076, gnorm=0.157, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=52141 epoch 034: 849 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=549467, ups=1.11, wpb=494612, bsz=16414.8, num_updates=56500, lr=0.000266076, gnorm=0.157, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=52141 epoch 034: 849 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=549467, ups=1.11, wpb=494612, bsz=16414.8, num_updates=56500, lr=0.000266076, gnorm=0.157, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=52141 epoch 034: 849 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=549467, ups=1.11, wpb=494612, bsz=16414.8, num_updates=56500, lr=0.000266076, gnorm=0.157, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=52141 epoch 034: 849 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=549467, ups=1.11, wpb=494612, bsz=16414.8, num_updates=56500, lr=0.000266076, gnorm=0.157, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=52141 epoch 034: 849 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=549467, ups=1.11, wpb=494612, bsz=16414.8, num_updates=56500, lr=0.000266076, gnorm=0.157, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=52141 epoch 034: 849 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=549467, ups=1.11, wpb=494612, bsz=16414.8, num_updates=56500, lr=0.000266076, gnorm=0.157, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=52141 epoch 034: 849 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=549467, ups=1.11, wpb=494612, bsz=16414.8, num_updates=56500, lr=0.000266076, gnorm=0.157, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=52141 epoch 034: 849 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=549467, ups=1.11, wpb=494612, bsz=16414.8, num_updates=56500, lr=0.000266076, gnorm=0.157, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=52141 epoch 034: 849 / 1689 loss=3.453, nll_loss=1.893, ppl=3.71, wps=549467, ups=1.11, wpb=494612, bsz=16414.8, num_updates=56500, lr=0.000266076, gnorm=0.157, clip=0, loss_scale=1, train_wall=88, gb_free=21.3, wall=52141 epoch 034: 949 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=546355, ups=1.11, wpb=493917, bsz=16682.5, num_updates=56600, lr=0.000265841, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.4, wall=52232 epoch 034: 949 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=546355, ups=1.11, wpb=493917, bsz=16682.5, num_updates=56600, lr=0.000265841, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.4, wall=52232 epoch 034: 949 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=546355, ups=1.11, wpb=493917, bsz=16682.5, num_updates=56600, lr=0.000265841, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.4, wall=52232 epoch 034: 949 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=546355, ups=1.11, wpb=493917, bsz=16682.5, num_updates=56600, lr=0.000265841, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.4, wall=52232 epoch 034: 949 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=546355, ups=1.11, wpb=493917, bsz=16682.5, num_updates=56600, lr=0.000265841, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.4, wall=52232 epoch 034: 949 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=546355, ups=1.11, wpb=493917, bsz=16682.5, num_updates=56600, lr=0.000265841, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.4, wall=52232 epoch 034: 949 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=546355, ups=1.11, wpb=493917, bsz=16682.5, num_updates=56600, lr=0.000265841, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.4, wall=52232 epoch 034: 949 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=546355, ups=1.11, wpb=493917, bsz=16682.5, num_updates=56600, lr=0.000265841, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.4, wall=52232 epoch 034: 949 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=546355, ups=1.11, wpb=493917, bsz=16682.5, num_updates=56600, lr=0.000265841, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.4, wall=52232 epoch 034: 949 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=546355, ups=1.11, wpb=493917, bsz=16682.5, num_updates=56600, lr=0.000265841, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.4, wall=52232 epoch 034: 949 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=546355, ups=1.11, wpb=493917, bsz=16682.5, num_updates=56600, lr=0.000265841, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.4, wall=52232 epoch 034: 949 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=546355, ups=1.11, wpb=493917, bsz=16682.5, num_updates=56600, lr=0.000265841, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.4, wall=52232 epoch 034: 949 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=546355, ups=1.11, wpb=493917, bsz=16682.5, num_updates=56600, lr=0.000265841, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.4, wall=52232 epoch 034: 949 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=546355, ups=1.11, wpb=493917, bsz=16682.5, num_updates=56600, lr=0.000265841, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.4, wall=52232 epoch 034: 949 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=546355, ups=1.11, wpb=493917, bsz=16682.5, num_updates=56600, lr=0.000265841, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.4, wall=52232 epoch 034: 949 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=546355, ups=1.11, wpb=493917, bsz=16682.5, num_updates=56600, lr=0.000265841, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.4, wall=52232 epoch 034: 949 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=546355, ups=1.11, wpb=493917, bsz=16682.5, num_updates=56600, lr=0.000265841, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.4, wall=52232 epoch 034: 949 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=546355, ups=1.11, wpb=493917, bsz=16682.5, num_updates=56600, lr=0.000265841, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.4, wall=52232 epoch 034: 949 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=546355, ups=1.11, wpb=493917, bsz=16682.5, num_updates=56600, lr=0.000265841, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.4, wall=52232 epoch 034: 949 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=546355, ups=1.11, wpb=493917, bsz=16682.5, num_updates=56600, lr=0.000265841, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.4, wall=52232 epoch 034: 949 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=546355, ups=1.11, wpb=493917, bsz=16682.5, num_updates=56600, lr=0.000265841, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.4, wall=52232 epoch 034: 949 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=546355, ups=1.11, wpb=493917, bsz=16682.5, num_updates=56600, lr=0.000265841, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.4, wall=52232 epoch 034: 949 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=546355, ups=1.11, wpb=493917, bsz=16682.5, num_updates=56600, lr=0.000265841, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.4, wall=52232 epoch 034: 949 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=546355, ups=1.11, wpb=493917, bsz=16682.5, num_updates=56600, lr=0.000265841, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.4, wall=52232 epoch 034: 949 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=546355, ups=1.11, wpb=493917, bsz=16682.5, num_updates=56600, lr=0.000265841, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.4, wall=52232 epoch 034: 949 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=546355, ups=1.11, wpb=493917, bsz=16682.5, num_updates=56600, lr=0.000265841, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.4, wall=52232 epoch 034: 949 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=546355, ups=1.11, wpb=493917, bsz=16682.5, num_updates=56600, lr=0.000265841, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.4, wall=52232 epoch 034: 949 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=546355, ups=1.11, wpb=493917, bsz=16682.5, num_updates=56600, lr=0.000265841, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.4, wall=52232 epoch 034: 949 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=546355, ups=1.11, wpb=493917, bsz=16682.5, num_updates=56600, lr=0.000265841, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.4, wall=52232 epoch 034: 949 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=546355, ups=1.11, wpb=493917, bsz=16682.5, num_updates=56600, lr=0.000265841, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.4, wall=52232 epoch 034: 949 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=546355, ups=1.11, wpb=493917, bsz=16682.5, num_updates=56600, lr=0.000265841, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.4, wall=52232 epoch 034: 949 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=546355, ups=1.11, wpb=493917, bsz=16682.5, num_updates=56600, lr=0.000265841, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.4, wall=52232 epoch 034: 949 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=546355, ups=1.11, wpb=493917, bsz=16682.5, num_updates=56600, lr=0.000265841, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.4, wall=52232 epoch 034: 949 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=546355, ups=1.11, wpb=493917, bsz=16682.5, num_updates=56600, lr=0.000265841, gnorm=0.166, clip=0, loss_scale=1, train_wall=89, gb_free=21.4, wall=52232 epoch 034: 1049 / 1689 loss=3.464, nll_loss=1.905, ppl=3.75, wps=557339, ups=1.13, wpb=495066, bsz=16115.5, num_updates=56700, lr=0.000265606, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.5, wall=52321 epoch 034: 1049 / 1689 loss=3.464, nll_loss=1.905, ppl=3.75, wps=557339, ups=1.13, wpb=495066, bsz=16115.5, num_updates=56700, lr=0.000265606, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.5, wall=52321 epoch 034: 1049 / 1689 loss=3.464, nll_loss=1.905, ppl=3.75, wps=557339, ups=1.13, wpb=495066, bsz=16115.5, num_updates=56700, lr=0.000265606, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.5, wall=52321 epoch 034: 1049 / 1689 loss=3.464, nll_loss=1.905, ppl=3.75, wps=557339, ups=1.13, wpb=495066, bsz=16115.5, num_updates=56700, lr=0.000265606, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.5, wall=52321 epoch 034: 1049 / 1689 loss=3.464, nll_loss=1.905, ppl=3.75, wps=557339, ups=1.13, wpb=495066, bsz=16115.5, num_updates=56700, lr=0.000265606, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.5, wall=52321 epoch 034: 1049 / 1689 loss=3.464, nll_loss=1.905, ppl=3.75, wps=557339, ups=1.13, wpb=495066, bsz=16115.5, num_updates=56700, lr=0.000265606, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.5, wall=52321 epoch 034: 1049 / 1689 loss=3.464, nll_loss=1.905, ppl=3.75, wps=557339, ups=1.13, wpb=495066, bsz=16115.5, num_updates=56700, lr=0.000265606, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.5, wall=52321 epoch 034: 1049 / 1689 loss=3.464, nll_loss=1.905, ppl=3.75, wps=557339, ups=1.13, wpb=495066, bsz=16115.5, num_updates=56700, lr=0.000265606, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.5, wall=52321 epoch 034: 1049 / 1689 loss=3.464, nll_loss=1.905, ppl=3.75, wps=557339, ups=1.13, wpb=495066, bsz=16115.5, num_updates=56700, lr=0.000265606, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.5, wall=52321 epoch 034: 1049 / 1689 loss=3.464, nll_loss=1.905, ppl=3.75, wps=557339, ups=1.13, wpb=495066, bsz=16115.5, num_updates=56700, lr=0.000265606, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.5, wall=52321 epoch 034: 1049 / 1689 loss=3.464, nll_loss=1.905, ppl=3.75, wps=557339, ups=1.13, wpb=495066, bsz=16115.5, num_updates=56700, lr=0.000265606, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.5, wall=52321 epoch 034: 1049 / 1689 loss=3.464, nll_loss=1.905, ppl=3.75, wps=557339, ups=1.13, wpb=495066, bsz=16115.5, num_updates=56700, lr=0.000265606, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.5, wall=52321 epoch 034: 1049 / 1689 loss=3.464, nll_loss=1.905, ppl=3.75, wps=557339, ups=1.13, wpb=495066, bsz=16115.5, num_updates=56700, lr=0.000265606, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.5, wall=52321 epoch 034: 1049 / 1689 loss=3.464, nll_loss=1.905, ppl=3.75, wps=557339, ups=1.13, wpb=495066, bsz=16115.5, num_updates=56700, lr=0.000265606, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.5, wall=52321 epoch 034: 1049 / 1689 loss=3.464, nll_loss=1.905, ppl=3.75, wps=557339, ups=1.13, wpb=495066, bsz=16115.5, num_updates=56700, lr=0.000265606, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.5, wall=52321 epoch 034: 1049 / 1689 loss=3.464, nll_loss=1.905, ppl=3.75, wps=557339, ups=1.13, wpb=495066, bsz=16115.5, num_updates=56700, lr=0.000265606, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.5, wall=52321 epoch 034: 1049 / 1689 loss=3.464, nll_loss=1.905, ppl=3.75, wps=557339, ups=1.13, wpb=495066, bsz=16115.5, num_updates=56700, lr=0.000265606, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.5, wall=52321 epoch 034: 1049 / 1689 loss=3.464, nll_loss=1.905, ppl=3.75, wps=557339, ups=1.13, wpb=495066, bsz=16115.5, num_updates=56700, lr=0.000265606, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.5, wall=52321 epoch 034: 1049 / 1689 loss=3.464, nll_loss=1.905, ppl=3.75, wps=557339, ups=1.13, wpb=495066, bsz=16115.5, num_updates=56700, lr=0.000265606, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.5, wall=52321 epoch 034: 1049 / 1689 loss=3.464, nll_loss=1.905, ppl=3.75, wps=557339, ups=1.13, wpb=495066, bsz=16115.5, num_updates=56700, lr=0.000265606, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.5, wall=52321 epoch 034: 1049 / 1689 loss=3.464, nll_loss=1.905, ppl=3.75, wps=557339, ups=1.13, wpb=495066, bsz=16115.5, num_updates=56700, lr=0.000265606, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.5, wall=52321 epoch 034: 1049 / 1689 loss=3.464, nll_loss=1.905, ppl=3.75, wps=557339, ups=1.13, wpb=495066, bsz=16115.5, num_updates=56700, lr=0.000265606, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.5, wall=52321 epoch 034: 1049 / 1689 loss=3.464, nll_loss=1.905, ppl=3.75, wps=557339, ups=1.13, wpb=495066, bsz=16115.5, num_updates=56700, lr=0.000265606, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.5, wall=52321 epoch 034: 1049 / 1689 loss=3.464, nll_loss=1.905, ppl=3.75, wps=557339, ups=1.13, wpb=495066, bsz=16115.5, num_updates=56700, lr=0.000265606, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.5, wall=52321 epoch 034: 1049 / 1689 loss=3.464, nll_loss=1.905, ppl=3.75, wps=557339, ups=1.13, wpb=495066, bsz=16115.5, num_updates=56700, lr=0.000265606, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.5, wall=52321 epoch 034: 1049 / 1689 loss=3.464, nll_loss=1.905, ppl=3.75, wps=557339, ups=1.13, wpb=495066, bsz=16115.5, num_updates=56700, lr=0.000265606, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.5, wall=52321 epoch 034: 1049 / 1689 loss=3.464, nll_loss=1.905, ppl=3.75, wps=557339, ups=1.13, wpb=495066, bsz=16115.5, num_updates=56700, lr=0.000265606, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.5, wall=52321 epoch 034: 1049 / 1689 loss=3.464, nll_loss=1.905, ppl=3.75, wps=557339, ups=1.13, wpb=495066, bsz=16115.5, num_updates=56700, lr=0.000265606, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.5, wall=52321 epoch 034: 1049 / 1689 loss=3.464, nll_loss=1.905, ppl=3.75, wps=557339, ups=1.13, wpb=495066, bsz=16115.5, num_updates=56700, lr=0.000265606, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.5, wall=52321 epoch 034: 1049 / 1689 loss=3.464, nll_loss=1.905, ppl=3.75, wps=557339, ups=1.13, wpb=495066, bsz=16115.5, num_updates=56700, lr=0.000265606, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.5, wall=52321 epoch 034: 1049 / 1689 loss=3.464, nll_loss=1.905, ppl=3.75, wps=557339, ups=1.13, wpb=495066, bsz=16115.5, num_updates=56700, lr=0.000265606, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.5, wall=52321 epoch 034: 1049 / 1689 loss=3.464, nll_loss=1.905, ppl=3.75, wps=557339, ups=1.13, wpb=495066, bsz=16115.5, num_updates=56700, lr=0.000265606, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.5, wall=52321 epoch 034: 1049 / 1689 loss=3.464, nll_loss=1.905, ppl=3.75, wps=557339, ups=1.13, wpb=495066, bsz=16115.5, num_updates=56700, lr=0.000265606, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.5, wall=52321 epoch 034: 1049 / 1689 loss=3.464, nll_loss=1.905, ppl=3.75, wps=557339, ups=1.13, wpb=495066, bsz=16115.5, num_updates=56700, lr=0.000265606, gnorm=0.168, clip=0, loss_scale=1, train_wall=87, gb_free=22.5, wall=52321 epoch 034: 1149 / 1689 loss=3.453, nll_loss=1.893, ppl=3.72, wps=553445, ups=1.12, wpb=495329, bsz=16469.5, num_updates=56800, lr=0.000265372, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52410 epoch 034: 1149 / 1689 loss=3.453, nll_loss=1.893, ppl=3.72, wps=553445, ups=1.12, wpb=495329, bsz=16469.5, num_updates=56800, lr=0.000265372, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52410 epoch 034: 1149 / 1689 loss=3.453, nll_loss=1.893, ppl=3.72, wps=553445, ups=1.12, wpb=495329, bsz=16469.5, num_updates=56800, lr=0.000265372, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52410 epoch 034: 1149 / 1689 loss=3.453, nll_loss=1.893, ppl=3.72, wps=553445, ups=1.12, wpb=495329, bsz=16469.5, num_updates=56800, lr=0.000265372, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52410 epoch 034: 1149 / 1689 loss=3.453, nll_loss=1.893, ppl=3.72, wps=553445, ups=1.12, wpb=495329, bsz=16469.5, num_updates=56800, lr=0.000265372, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52410 epoch 034: 1149 / 1689 loss=3.453, nll_loss=1.893, ppl=3.72, wps=553445, ups=1.12, wpb=495329, bsz=16469.5, num_updates=56800, lr=0.000265372, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52410 epoch 034: 1149 / 1689 loss=3.453, nll_loss=1.893, ppl=3.72, wps=553445, ups=1.12, wpb=495329, bsz=16469.5, num_updates=56800, lr=0.000265372, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52410 epoch 034: 1149 / 1689 loss=3.453, nll_loss=1.893, ppl=3.72, wps=553445, ups=1.12, wpb=495329, bsz=16469.5, num_updates=56800, lr=0.000265372, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52410 epoch 034: 1149 / 1689 loss=3.453, nll_loss=1.893, ppl=3.72, wps=553445, ups=1.12, wpb=495329, bsz=16469.5, num_updates=56800, lr=0.000265372, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52410 epoch 034: 1149 / 1689 loss=3.453, nll_loss=1.893, ppl=3.72, wps=553445, ups=1.12, wpb=495329, bsz=16469.5, num_updates=56800, lr=0.000265372, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52410 epoch 034: 1149 / 1689 loss=3.453, nll_loss=1.893, ppl=3.72, wps=553445, ups=1.12, wpb=495329, bsz=16469.5, num_updates=56800, lr=0.000265372, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52410 epoch 034: 1149 / 1689 loss=3.453, nll_loss=1.893, ppl=3.72, wps=553445, ups=1.12, wpb=495329, bsz=16469.5, num_updates=56800, lr=0.000265372, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52410 epoch 034: 1149 / 1689 loss=3.453, nll_loss=1.893, ppl=3.72, wps=553445, ups=1.12, wpb=495329, bsz=16469.5, num_updates=56800, lr=0.000265372, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52410 epoch 034: 1149 / 1689 loss=3.453, nll_loss=1.893, ppl=3.72, wps=553445, ups=1.12, wpb=495329, bsz=16469.5, num_updates=56800, lr=0.000265372, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52410 epoch 034: 1149 / 1689 loss=3.453, nll_loss=1.893, ppl=3.72, wps=553445, ups=1.12, wpb=495329, bsz=16469.5, num_updates=56800, lr=0.000265372, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52410 epoch 034: 1149 / 1689 loss=3.453, nll_loss=1.893, ppl=3.72, wps=553445, ups=1.12, wpb=495329, bsz=16469.5, num_updates=56800, lr=0.000265372, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52410 epoch 034: 1149 / 1689 loss=3.453, nll_loss=1.893, ppl=3.72, wps=553445, ups=1.12, wpb=495329, bsz=16469.5, num_updates=56800, lr=0.000265372, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52410 epoch 034: 1149 / 1689 loss=3.453, nll_loss=1.893, ppl=3.72, wps=553445, ups=1.12, wpb=495329, bsz=16469.5, num_updates=56800, lr=0.000265372, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52410 epoch 034: 1149 / 1689 loss=3.453, nll_loss=1.893, ppl=3.72, wps=553445, ups=1.12, wpb=495329, bsz=16469.5, num_updates=56800, lr=0.000265372, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52410 epoch 034: 1149 / 1689 loss=3.453, nll_loss=1.893, ppl=3.72, wps=553445, ups=1.12, wpb=495329, bsz=16469.5, num_updates=56800, lr=0.000265372, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52410 epoch 034: 1149 / 1689 loss=3.453, nll_loss=1.893, ppl=3.72, wps=553445, ups=1.12, wpb=495329, bsz=16469.5, num_updates=56800, lr=0.000265372, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52410 epoch 034: 1149 / 1689 loss=3.453, nll_loss=1.893, ppl=3.72, wps=553445, ups=1.12, wpb=495329, bsz=16469.5, num_updates=56800, lr=0.000265372, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52410 epoch 034: 1149 / 1689 loss=3.453, nll_loss=1.893, ppl=3.72, wps=553445, ups=1.12, wpb=495329, bsz=16469.5, num_updates=56800, lr=0.000265372, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52410 epoch 034: 1149 / 1689 loss=3.453, nll_loss=1.893, ppl=3.72, wps=553445, ups=1.12, wpb=495329, bsz=16469.5, num_updates=56800, lr=0.000265372, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52410 epoch 034: 1149 / 1689 loss=3.453, nll_loss=1.893, ppl=3.72, wps=553445, ups=1.12, wpb=495329, bsz=16469.5, num_updates=56800, lr=0.000265372, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52410 epoch 034: 1149 / 1689 loss=3.453, nll_loss=1.893, ppl=3.72, wps=553445, ups=1.12, wpb=495329, bsz=16469.5, num_updates=56800, lr=0.000265372, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52410 epoch 034: 1149 / 1689 loss=3.453, nll_loss=1.893, ppl=3.72, wps=553445, ups=1.12, wpb=495329, bsz=16469.5, num_updates=56800, lr=0.000265372, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52410 epoch 034: 1149 / 1689 loss=3.453, nll_loss=1.893, ppl=3.72, wps=553445, ups=1.12, wpb=495329, bsz=16469.5, num_updates=56800, lr=0.000265372, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52410 epoch 034: 1149 / 1689 loss=3.453, nll_loss=1.893, ppl=3.72, wps=553445, ups=1.12, wpb=495329, bsz=16469.5, num_updates=56800, lr=0.000265372, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52410 epoch 034: 1149 / 1689 loss=3.453, nll_loss=1.893, ppl=3.72, wps=553445, ups=1.12, wpb=495329, bsz=16469.5, num_updates=56800, lr=0.000265372, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52410 epoch 034: 1149 / 1689 loss=3.453, nll_loss=1.893, ppl=3.72, wps=553445, ups=1.12, wpb=495329, bsz=16469.5, num_updates=56800, lr=0.000265372, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52410 epoch 034: 1149 / 1689 loss=3.453, nll_loss=1.893, ppl=3.72, wps=553445, ups=1.12, wpb=495329, bsz=16469.5, num_updates=56800, lr=0.000265372, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52410 epoch 034: 1149 / 1689 loss=3.453, nll_loss=1.893, ppl=3.72, wps=553445, ups=1.12, wpb=495329, bsz=16469.5, num_updates=56800, lr=0.000265372, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52410 epoch 034: 1149 / 1689 loss=3.453, nll_loss=1.893, ppl=3.72, wps=553445, ups=1.12, wpb=495329, bsz=16469.5, num_updates=56800, lr=0.000265372, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52410 epoch 034: 1250 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=546868, ups=1.1, wpb=495139, bsz=16543, num_updates=56900, lr=0.000265139, gnorm=0.16, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=52501 epoch 034: 1250 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=546868, ups=1.1, wpb=495139, bsz=16543, num_updates=56900, lr=0.000265139, gnorm=0.16, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=52501 epoch 034: 1250 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=546868, ups=1.1, wpb=495139, bsz=16543, num_updates=56900, lr=0.000265139, gnorm=0.16, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=52501 epoch 034: 1250 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=546868, ups=1.1, wpb=495139, bsz=16543, num_updates=56900, lr=0.000265139, gnorm=0.16, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=52501 epoch 034: 1250 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=546868, ups=1.1, wpb=495139, bsz=16543, num_updates=56900, lr=0.000265139, gnorm=0.16, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=52501 epoch 034: 1250 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=546868, ups=1.1, wpb=495139, bsz=16543, num_updates=56900, lr=0.000265139, gnorm=0.16, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=52501 epoch 034: 1250 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=546868, ups=1.1, wpb=495139, bsz=16543, num_updates=56900, lr=0.000265139, gnorm=0.16, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=52501 epoch 034: 1250 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=546868, ups=1.1, wpb=495139, bsz=16543, num_updates=56900, lr=0.000265139, gnorm=0.16, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=52501 epoch 034: 1250 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=546868, ups=1.1, wpb=495139, bsz=16543, num_updates=56900, lr=0.000265139, gnorm=0.16, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=52501 epoch 034: 1250 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=546868, ups=1.1, wpb=495139, bsz=16543, num_updates=56900, lr=0.000265139, gnorm=0.16, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=52501 epoch 034: 1250 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=546868, ups=1.1, wpb=495139, bsz=16543, num_updates=56900, lr=0.000265139, gnorm=0.16, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=52501 epoch 034: 1250 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=546868, ups=1.1, wpb=495139, bsz=16543, num_updates=56900, lr=0.000265139, gnorm=0.16, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=52501 epoch 034: 1250 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=546868, ups=1.1, wpb=495139, bsz=16543, num_updates=56900, lr=0.000265139, gnorm=0.16, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=52501 epoch 034: 1250 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=546868, ups=1.1, wpb=495139, bsz=16543, num_updates=56900, lr=0.000265139, gnorm=0.16, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=52501 epoch 034: 1250 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=546868, ups=1.1, wpb=495139, bsz=16543, num_updates=56900, lr=0.000265139, gnorm=0.16, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=52501 epoch 034: 1250 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=546868, ups=1.1, wpb=495139, bsz=16543, num_updates=56900, lr=0.000265139, gnorm=0.16, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=52501 epoch 034: 1250 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=546868, ups=1.1, wpb=495139, bsz=16543, num_updates=56900, lr=0.000265139, gnorm=0.16, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=52501 epoch 034: 1250 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=546868, ups=1.1, wpb=495139, bsz=16543, num_updates=56900, lr=0.000265139, gnorm=0.16, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=52501 epoch 034: 1250 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=546868, ups=1.1, wpb=495139, bsz=16543, num_updates=56900, lr=0.000265139, gnorm=0.16, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=52501 epoch 034: 1250 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=546868, ups=1.1, wpb=495139, bsz=16543, num_updates=56900, lr=0.000265139, gnorm=0.16, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=52501 epoch 034: 1250 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=546868, ups=1.1, wpb=495139, bsz=16543, num_updates=56900, lr=0.000265139, gnorm=0.16, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=52501 epoch 034: 1250 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=546868, ups=1.1, wpb=495139, bsz=16543, num_updates=56900, lr=0.000265139, gnorm=0.16, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=52501 epoch 034: 1250 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=546868, ups=1.1, wpb=495139, bsz=16543, num_updates=56900, lr=0.000265139, gnorm=0.16, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=52501 epoch 034: 1250 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=546868, ups=1.1, wpb=495139, bsz=16543, num_updates=56900, lr=0.000265139, gnorm=0.16, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=52501 epoch 034: 1250 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=546868, ups=1.1, wpb=495139, bsz=16543, num_updates=56900, lr=0.000265139, gnorm=0.16, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=52501 epoch 034: 1250 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=546868, ups=1.1, wpb=495139, bsz=16543, num_updates=56900, lr=0.000265139, gnorm=0.16, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=52501 epoch 034: 1250 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=546868, ups=1.1, wpb=495139, bsz=16543, num_updates=56900, lr=0.000265139, gnorm=0.16, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=52501 epoch 034: 1250 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=546868, ups=1.1, wpb=495139, bsz=16543, num_updates=56900, lr=0.000265139, gnorm=0.16, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=52501 epoch 034: 1250 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=546868, ups=1.1, wpb=495139, bsz=16543, num_updates=56900, lr=0.000265139, gnorm=0.16, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=52501 epoch 034: 1250 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=546868, ups=1.1, wpb=495139, bsz=16543, num_updates=56900, lr=0.000265139, gnorm=0.16, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=52501 epoch 034: 1250 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=546868, ups=1.1, wpb=495139, bsz=16543, num_updates=56900, lr=0.000265139, gnorm=0.16, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=52501 epoch 034: 1250 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=546868, ups=1.1, wpb=495139, bsz=16543, num_updates=56900, lr=0.000265139, gnorm=0.16, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=52501 epoch 034: 1250 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=546868, ups=1.1, wpb=495139, bsz=16543, num_updates=56900, lr=0.000265139, gnorm=0.16, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=52501 epoch 034: 1250 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=546868, ups=1.1, wpb=495139, bsz=16543, num_updates=56900, lr=0.000265139, gnorm=0.16, clip=0, loss_scale=1, train_wall=89, gb_free=22, wall=52501 epoch 034: 1350 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=551682, ups=1.11, wpb=496626, bsz=16768.6, num_updates=57000, lr=0.000264906, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52591 epoch 034: 1350 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=551682, ups=1.11, wpb=496626, bsz=16768.6, num_updates=57000, lr=0.000264906, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52591 epoch 034: 1350 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=551682, ups=1.11, wpb=496626, bsz=16768.6, num_updates=57000, lr=0.000264906, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52591 epoch 034: 1350 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=551682, ups=1.11, wpb=496626, bsz=16768.6, num_updates=57000, lr=0.000264906, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52591 epoch 034: 1350 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=551682, ups=1.11, wpb=496626, bsz=16768.6, num_updates=57000, lr=0.000264906, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52591 epoch 034: 1350 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=551682, ups=1.11, wpb=496626, bsz=16768.6, num_updates=57000, lr=0.000264906, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52591 epoch 034: 1350 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=551682, ups=1.11, wpb=496626, bsz=16768.6, num_updates=57000, lr=0.000264906, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52591 epoch 034: 1350 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=551682, ups=1.11, wpb=496626, bsz=16768.6, num_updates=57000, lr=0.000264906, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52591 epoch 034: 1350 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=551682, ups=1.11, wpb=496626, bsz=16768.6, num_updates=57000, lr=0.000264906, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52591 epoch 034: 1350 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=551682, ups=1.11, wpb=496626, bsz=16768.6, num_updates=57000, lr=0.000264906, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52591 epoch 034: 1350 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=551682, ups=1.11, wpb=496626, bsz=16768.6, num_updates=57000, lr=0.000264906, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52591 epoch 034: 1350 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=551682, ups=1.11, wpb=496626, bsz=16768.6, num_updates=57000, lr=0.000264906, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52591 epoch 034: 1350 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=551682, ups=1.11, wpb=496626, bsz=16768.6, num_updates=57000, lr=0.000264906, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52591 epoch 034: 1350 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=551682, ups=1.11, wpb=496626, bsz=16768.6, num_updates=57000, lr=0.000264906, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52591 epoch 034: 1350 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=551682, ups=1.11, wpb=496626, bsz=16768.6, num_updates=57000, lr=0.000264906, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52591 epoch 034: 1350 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=551682, ups=1.11, wpb=496626, bsz=16768.6, num_updates=57000, lr=0.000264906, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52591 epoch 034: 1350 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=551682, ups=1.11, wpb=496626, bsz=16768.6, num_updates=57000, lr=0.000264906, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52591 epoch 034: 1350 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=551682, ups=1.11, wpb=496626, bsz=16768.6, num_updates=57000, lr=0.000264906, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52591 epoch 034: 1350 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=551682, ups=1.11, wpb=496626, bsz=16768.6, num_updates=57000, lr=0.000264906, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52591 epoch 034: 1350 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=551682, ups=1.11, wpb=496626, bsz=16768.6, num_updates=57000, lr=0.000264906, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52591 epoch 034: 1350 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=551682, ups=1.11, wpb=496626, bsz=16768.6, num_updates=57000, lr=0.000264906, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52591 epoch 034: 1350 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=551682, ups=1.11, wpb=496626, bsz=16768.6, num_updates=57000, lr=0.000264906, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52591 epoch 034: 1350 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=551682, ups=1.11, wpb=496626, bsz=16768.6, num_updates=57000, lr=0.000264906, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52591 epoch 034: 1350 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=551682, ups=1.11, wpb=496626, bsz=16768.6, num_updates=57000, lr=0.000264906, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52591 epoch 034: 1350 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=551682, ups=1.11, wpb=496626, bsz=16768.6, num_updates=57000, lr=0.000264906, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52591 epoch 034: 1350 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=551682, ups=1.11, wpb=496626, bsz=16768.6, num_updates=57000, lr=0.000264906, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52591 epoch 034: 1350 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=551682, ups=1.11, wpb=496626, bsz=16768.6, num_updates=57000, lr=0.000264906, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52591 epoch 034: 1350 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=551682, ups=1.11, wpb=496626, bsz=16768.6, num_updates=57000, lr=0.000264906, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52591 epoch 034: 1350 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=551682, ups=1.11, wpb=496626, bsz=16768.6, num_updates=57000, lr=0.000264906, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52591 epoch 034: 1350 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=551682, ups=1.11, wpb=496626, bsz=16768.6, num_updates=57000, lr=0.000264906, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52591 epoch 034: 1350 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=551682, ups=1.11, wpb=496626, bsz=16768.6, num_updates=57000, lr=0.000264906, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52591 epoch 034: 1350 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=551682, ups=1.11, wpb=496626, bsz=16768.6, num_updates=57000, lr=0.000264906, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52591 epoch 034: 1350 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=551682, ups=1.11, wpb=496626, bsz=16768.6, num_updates=57000, lr=0.000264906, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52591 epoch 034: 1350 / 1689 loss=3.461, nll_loss=1.902, ppl=3.74, wps=551682, ups=1.11, wpb=496626, bsz=16768.6, num_updates=57000, lr=0.000264906, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=52591 begin validation on "valid" subset epoch 034 | valid on 'valid' subset | loss 3.708 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 57000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.708 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 57000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.708 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 57000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.708 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 57000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.708 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 57000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.708 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 57000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.708 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 57000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.708 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 57000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.708 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 57000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.708 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 57000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.708 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 57000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.708 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 57000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.708 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 57000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.708 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 57000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.708 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 57000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.708 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 57000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.708 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 57000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.708 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 57000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.708 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 57000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.708 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 57000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.708 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 57000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.708 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 57000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.708 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 57000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.708 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 57000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.708 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 57000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.708 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 57000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.708 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 57000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.708 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 57000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.708 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 57000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.708 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 57000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.708 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 57000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.708 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 57000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.708 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 57000 | best_loss 3.702 epoch 034 | valid on 'valid' subset | loss 3.708 | nll_loss 2.167 | ppl 4.49 | wps 0 | wpb 44526 | bsz 2008 | num_updates 57000 | best_loss 3.702 epoch 034: 1450 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=336373, ups=0.68, wpb=495824, bsz=16431.8, num_updates=57100, lr=0.000264674, gnorm=0.166, clip=0, loss_scale=1, train_wall=86, gb_free=22.3, wall=52738 epoch 034: 1450 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=336373, ups=0.68, wpb=495824, bsz=16431.8, num_updates=57100, lr=0.000264674, gnorm=0.166, clip=0, loss_scale=1, train_wall=86, gb_free=22.3, wall=52738 epoch 034: 1450 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=336373, ups=0.68, wpb=495824, bsz=16431.8, num_updates=57100, lr=0.000264674, gnorm=0.166, clip=0, loss_scale=1, train_wall=86, gb_free=22.3, wall=52738 epoch 034: 1450 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=336373, ups=0.68, wpb=495824, bsz=16431.8, num_updates=57100, lr=0.000264674, gnorm=0.166, clip=0, loss_scale=1, train_wall=86, gb_free=22.3, wall=52738 epoch 034: 1450 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=336373, ups=0.68, wpb=495824, bsz=16431.8, num_updates=57100, lr=0.000264674, gnorm=0.166, clip=0, loss_scale=1, train_wall=86, gb_free=22.3, wall=52738 epoch 034: 1450 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=336373, ups=0.68, wpb=495824, bsz=16431.8, num_updates=57100, lr=0.000264674, gnorm=0.166, clip=0, loss_scale=1, train_wall=86, gb_free=22.3, wall=52738 epoch 034: 1450 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=336373, ups=0.68, wpb=495824, bsz=16431.8, num_updates=57100, lr=0.000264674, gnorm=0.166, clip=0, loss_scale=1, train_wall=86, gb_free=22.3, wall=52738 epoch 034: 1450 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=336373, ups=0.68, wpb=495824, bsz=16431.8, num_updates=57100, lr=0.000264674, gnorm=0.166, clip=0, loss_scale=1, train_wall=86, gb_free=22.3, wall=52738 epoch 034: 1450 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=336373, ups=0.68, wpb=495824, bsz=16431.8, num_updates=57100, lr=0.000264674, gnorm=0.166, clip=0, loss_scale=1, train_wall=86, gb_free=22.3, wall=52738 epoch 034: 1450 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=336373, ups=0.68, wpb=495824, bsz=16431.8, num_updates=57100, lr=0.000264674, gnorm=0.166, clip=0, loss_scale=1, train_wall=86, gb_free=22.3, wall=52738 epoch 034: 1450 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=336373, ups=0.68, wpb=495824, bsz=16431.8, num_updates=57100, lr=0.000264674, gnorm=0.166, clip=0, loss_scale=1, train_wall=86, gb_free=22.3, wall=52738 epoch 034: 1450 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=336373, ups=0.68, wpb=495824, bsz=16431.8, num_updates=57100, lr=0.000264674, gnorm=0.166, clip=0, loss_scale=1, train_wall=86, gb_free=22.3, wall=52738 epoch 034: 1450 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=336373, ups=0.68, wpb=495824, bsz=16431.8, num_updates=57100, lr=0.000264674, gnorm=0.166, clip=0, loss_scale=1, train_wall=86, gb_free=22.3, wall=52738 epoch 034: 1450 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=336373, ups=0.68, wpb=495824, bsz=16431.8, num_updates=57100, lr=0.000264674, gnorm=0.166, clip=0, loss_scale=1, train_wall=86, gb_free=22.3, wall=52738 epoch 034: 1450 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=336373, ups=0.68, wpb=495824, bsz=16431.8, num_updates=57100, lr=0.000264674, gnorm=0.166, clip=0, loss_scale=1, train_wall=86, gb_free=22.3, wall=52738 epoch 034: 1450 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=336373, ups=0.68, wpb=495824, bsz=16431.8, num_updates=57100, lr=0.000264674, gnorm=0.166, clip=0, loss_scale=1, train_wall=86, gb_free=22.3, wall=52738 epoch 034: 1450 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=336373, ups=0.68, wpb=495824, bsz=16431.8, num_updates=57100, lr=0.000264674, gnorm=0.166, clip=0, loss_scale=1, train_wall=86, gb_free=22.3, wall=52738 epoch 034: 1450 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=336373, ups=0.68, wpb=495824, bsz=16431.8, num_updates=57100, lr=0.000264674, gnorm=0.166, clip=0, loss_scale=1, train_wall=86, gb_free=22.3, wall=52738 epoch 034: 1450 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=336373, ups=0.68, wpb=495824, bsz=16431.8, num_updates=57100, lr=0.000264674, gnorm=0.166, clip=0, loss_scale=1, train_wall=86, gb_free=22.3, wall=52738 epoch 034: 1450 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=336373, ups=0.68, wpb=495824, bsz=16431.8, num_updates=57100, lr=0.000264674, gnorm=0.166, clip=0, loss_scale=1, train_wall=86, gb_free=22.3, wall=52738 epoch 034: 1450 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=336373, ups=0.68, wpb=495824, bsz=16431.8, num_updates=57100, lr=0.000264674, gnorm=0.166, clip=0, loss_scale=1, train_wall=86, gb_free=22.3, wall=52738 epoch 034: 1450 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=336373, ups=0.68, wpb=495824, bsz=16431.8, num_updates=57100, lr=0.000264674, gnorm=0.166, clip=0, loss_scale=1, train_wall=86, gb_free=22.3, wall=52738 epoch 034: 1450 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=336373, ups=0.68, wpb=495824, bsz=16431.8, num_updates=57100, lr=0.000264674, gnorm=0.166, clip=0, loss_scale=1, train_wall=86, gb_free=22.3, wall=52738 epoch 034: 1450 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=336373, ups=0.68, wpb=495824, bsz=16431.8, num_updates=57100, lr=0.000264674, gnorm=0.166, clip=0, loss_scale=1, train_wall=86, gb_free=22.3, wall=52738 epoch 034: 1450 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=336373, ups=0.68, wpb=495824, bsz=16431.8, num_updates=57100, lr=0.000264674, gnorm=0.166, clip=0, loss_scale=1, train_wall=86, gb_free=22.3, wall=52738 epoch 034: 1450 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=336373, ups=0.68, wpb=495824, bsz=16431.8, num_updates=57100, lr=0.000264674, gnorm=0.166, clip=0, loss_scale=1, train_wall=86, gb_free=22.3, wall=52738 epoch 034: 1450 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=336373, ups=0.68, wpb=495824, bsz=16431.8, num_updates=57100, lr=0.000264674, gnorm=0.166, clip=0, loss_scale=1, train_wall=86, gb_free=22.3, wall=52738 epoch 034: 1450 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=336373, ups=0.68, wpb=495824, bsz=16431.8, num_updates=57100, lr=0.000264674, gnorm=0.166, clip=0, loss_scale=1, train_wall=86, gb_free=22.3, wall=52738 epoch 034: 1450 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=336373, ups=0.68, wpb=495824, bsz=16431.8, num_updates=57100, lr=0.000264674, gnorm=0.166, clip=0, loss_scale=1, train_wall=86, gb_free=22.3, wall=52738 epoch 034: 1450 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=336373, ups=0.68, wpb=495824, bsz=16431.8, num_updates=57100, lr=0.000264674, gnorm=0.166, clip=0, loss_scale=1, train_wall=86, gb_free=22.3, wall=52738 epoch 034: 1450 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=336373, ups=0.68, wpb=495824, bsz=16431.8, num_updates=57100, lr=0.000264674, gnorm=0.166, clip=0, loss_scale=1, train_wall=86, gb_free=22.3, wall=52738 epoch 034: 1450 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=336373, ups=0.68, wpb=495824, bsz=16431.8, num_updates=57100, lr=0.000264674, gnorm=0.166, clip=0, loss_scale=1, train_wall=86, gb_free=22.3, wall=52738 epoch 034: 1450 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=336373, ups=0.68, wpb=495824, bsz=16431.8, num_updates=57100, lr=0.000264674, gnorm=0.166, clip=0, loss_scale=1, train_wall=86, gb_free=22.3, wall=52738 epoch 034: 1450 / 1689 loss=3.469, nll_loss=1.911, ppl=3.76, wps=336373, ups=0.68, wpb=495824, bsz=16431.8, num_updates=57100, lr=0.000264674, gnorm=0.166, clip=0, loss_scale=1, train_wall=86, gb_free=22.3, wall=52738 epoch 034: 1550 / 1689 loss=3.463, nll_loss=1.905, ppl=3.74, wps=558320, ups=1.13, wpb=493771, bsz=16525.3, num_updates=57200, lr=0.000264443, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=52826 epoch 034: 1550 / 1689 loss=3.463, nll_loss=1.905, ppl=3.74, wps=558320, ups=1.13, wpb=493771, bsz=16525.3, num_updates=57200, lr=0.000264443, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=52826 epoch 034: 1550 / 1689 loss=3.463, nll_loss=1.905, ppl=3.74, wps=558320, ups=1.13, wpb=493771, bsz=16525.3, num_updates=57200, lr=0.000264443, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=52826 epoch 034: 1550 / 1689 loss=3.463, nll_loss=1.905, ppl=3.74, wps=558320, ups=1.13, wpb=493771, bsz=16525.3, num_updates=57200, lr=0.000264443, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=52826 epoch 034: 1550 / 1689 loss=3.463, nll_loss=1.905, ppl=3.74, wps=558320, ups=1.13, wpb=493771, bsz=16525.3, num_updates=57200, lr=0.000264443, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=52826 epoch 034: 1550 / 1689 loss=3.463, nll_loss=1.905, ppl=3.74, wps=558320, ups=1.13, wpb=493771, bsz=16525.3, num_updates=57200, lr=0.000264443, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=52826 epoch 034: 1550 / 1689 loss=3.463, nll_loss=1.905, ppl=3.74, wps=558320, ups=1.13, wpb=493771, bsz=16525.3, num_updates=57200, lr=0.000264443, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=52826 epoch 034: 1550 / 1689 loss=3.463, nll_loss=1.905, ppl=3.74, wps=558320, ups=1.13, wpb=493771, bsz=16525.3, num_updates=57200, lr=0.000264443, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=52826 epoch 034: 1550 / 1689 loss=3.463, nll_loss=1.905, ppl=3.74, wps=558320, ups=1.13, wpb=493771, bsz=16525.3, num_updates=57200, lr=0.000264443, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=52826 epoch 034: 1550 / 1689 loss=3.463, nll_loss=1.905, ppl=3.74, wps=558320, ups=1.13, wpb=493771, bsz=16525.3, num_updates=57200, lr=0.000264443, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=52826 epoch 034: 1550 / 1689 loss=3.463, nll_loss=1.905, ppl=3.74, wps=558320, ups=1.13, wpb=493771, bsz=16525.3, num_updates=57200, lr=0.000264443, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=52826 epoch 034: 1550 / 1689 loss=3.463, nll_loss=1.905, ppl=3.74, wps=558320, ups=1.13, wpb=493771, bsz=16525.3, num_updates=57200, lr=0.000264443, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=52826 epoch 034: 1550 / 1689 loss=3.463, nll_loss=1.905, ppl=3.74, wps=558320, ups=1.13, wpb=493771, bsz=16525.3, num_updates=57200, lr=0.000264443, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=52826 epoch 034: 1550 / 1689 loss=3.463, nll_loss=1.905, ppl=3.74, wps=558320, ups=1.13, wpb=493771, bsz=16525.3, num_updates=57200, lr=0.000264443, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=52826 epoch 034: 1550 / 1689 loss=3.463, nll_loss=1.905, ppl=3.74, wps=558320, ups=1.13, wpb=493771, bsz=16525.3, num_updates=57200, lr=0.000264443, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=52826 epoch 034: 1550 / 1689 loss=3.463, nll_loss=1.905, ppl=3.74, wps=558320, ups=1.13, wpb=493771, bsz=16525.3, num_updates=57200, lr=0.000264443, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=52826 epoch 034: 1550 / 1689 loss=3.463, nll_loss=1.905, ppl=3.74, wps=558320, ups=1.13, wpb=493771, bsz=16525.3, num_updates=57200, lr=0.000264443, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=52826 epoch 034: 1550 / 1689 loss=3.463, nll_loss=1.905, ppl=3.74, wps=558320, ups=1.13, wpb=493771, bsz=16525.3, num_updates=57200, lr=0.000264443, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=52826 epoch 034: 1550 / 1689 loss=3.463, nll_loss=1.905, ppl=3.74, wps=558320, ups=1.13, wpb=493771, bsz=16525.3, num_updates=57200, lr=0.000264443, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=52826 epoch 034: 1550 / 1689 loss=3.463, nll_loss=1.905, ppl=3.74, wps=558320, ups=1.13, wpb=493771, bsz=16525.3, num_updates=57200, lr=0.000264443, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=52826 epoch 034: 1550 / 1689 loss=3.463, nll_loss=1.905, ppl=3.74, wps=558320, ups=1.13, wpb=493771, bsz=16525.3, num_updates=57200, lr=0.000264443, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=52826 epoch 034: 1550 / 1689 loss=3.463, nll_loss=1.905, ppl=3.74, wps=558320, ups=1.13, wpb=493771, bsz=16525.3, num_updates=57200, lr=0.000264443, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=52826 epoch 034: 1550 / 1689 loss=3.463, nll_loss=1.905, ppl=3.74, wps=558320, ups=1.13, wpb=493771, bsz=16525.3, num_updates=57200, lr=0.000264443, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=52826 epoch 034: 1550 / 1689 loss=3.463, nll_loss=1.905, ppl=3.74, wps=558320, ups=1.13, wpb=493771, bsz=16525.3, num_updates=57200, lr=0.000264443, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=52826 epoch 034: 1550 / 1689 loss=3.463, nll_loss=1.905, ppl=3.74, wps=558320, ups=1.13, wpb=493771, bsz=16525.3, num_updates=57200, lr=0.000264443, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=52826 epoch 034: 1550 / 1689 loss=3.463, nll_loss=1.905, ppl=3.74, wps=558320, ups=1.13, wpb=493771, bsz=16525.3, num_updates=57200, lr=0.000264443, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=52826 epoch 034: 1550 / 1689 loss=3.463, nll_loss=1.905, ppl=3.74, wps=558320, ups=1.13, wpb=493771, bsz=16525.3, num_updates=57200, lr=0.000264443, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=52826 epoch 034: 1550 / 1689 loss=3.463, nll_loss=1.905, ppl=3.74, wps=558320, ups=1.13, wpb=493771, bsz=16525.3, num_updates=57200, lr=0.000264443, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=52826 epoch 034: 1550 / 1689 loss=3.463, nll_loss=1.905, ppl=3.74, wps=558320, ups=1.13, wpb=493771, bsz=16525.3, num_updates=57200, lr=0.000264443, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=52826 epoch 034: 1550 / 1689 loss=3.463, nll_loss=1.905, ppl=3.74, wps=558320, ups=1.13, wpb=493771, bsz=16525.3, num_updates=57200, lr=0.000264443, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=52826 epoch 034: 1550 / 1689 loss=3.463, nll_loss=1.905, ppl=3.74, wps=558320, ups=1.13, wpb=493771, bsz=16525.3, num_updates=57200, lr=0.000264443, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=52826 epoch 034: 1550 / 1689 loss=3.463, nll_loss=1.905, ppl=3.74, wps=558320, ups=1.13, wpb=493771, bsz=16525.3, num_updates=57200, lr=0.000264443, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=52826 epoch 034: 1550 / 1689 loss=3.463, nll_loss=1.905, ppl=3.74, wps=558320, ups=1.13, wpb=493771, bsz=16525.3, num_updates=57200, lr=0.000264443, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=52826 epoch 034: 1550 / 1689 loss=3.463, nll_loss=1.905, ppl=3.74, wps=558320, ups=1.13, wpb=493771, bsz=16525.3, num_updates=57200, lr=0.000264443, gnorm=0.162, clip=0, loss_scale=1, train_wall=88, gb_free=21.8, wall=52826 epoch 034: 1650 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=560767, ups=1.13, wpb=495833, bsz=16381.2, num_updates=57300, lr=0.000264212, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=52915 epoch 034: 1650 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=560767, ups=1.13, wpb=495833, bsz=16381.2, num_updates=57300, lr=0.000264212, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=52915 epoch 034: 1650 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=560767, ups=1.13, wpb=495833, bsz=16381.2, num_updates=57300, lr=0.000264212, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=52915 epoch 034: 1650 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=560767, ups=1.13, wpb=495833, bsz=16381.2, num_updates=57300, lr=0.000264212, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=52915 epoch 034: 1650 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=560767, ups=1.13, wpb=495833, bsz=16381.2, num_updates=57300, lr=0.000264212, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=52915 epoch 034: 1650 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=560767, ups=1.13, wpb=495833, bsz=16381.2, num_updates=57300, lr=0.000264212, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=52915 epoch 034: 1650 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=560767, ups=1.13, wpb=495833, bsz=16381.2, num_updates=57300, lr=0.000264212, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=52915 epoch 034: 1650 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=560767, ups=1.13, wpb=495833, bsz=16381.2, num_updates=57300, lr=0.000264212, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=52915 epoch 034: 1650 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=560767, ups=1.13, wpb=495833, bsz=16381.2, num_updates=57300, lr=0.000264212, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=52915 epoch 034: 1650 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=560767, ups=1.13, wpb=495833, bsz=16381.2, num_updates=57300, lr=0.000264212, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=52915 epoch 034: 1650 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=560767, ups=1.13, wpb=495833, bsz=16381.2, num_updates=57300, lr=0.000264212, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=52915 epoch 034: 1650 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=560767, ups=1.13, wpb=495833, bsz=16381.2, num_updates=57300, lr=0.000264212, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=52915 epoch 034: 1650 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=560767, ups=1.13, wpb=495833, bsz=16381.2, num_updates=57300, lr=0.000264212, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=52915 epoch 034: 1650 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=560767, ups=1.13, wpb=495833, bsz=16381.2, num_updates=57300, lr=0.000264212, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=52915 epoch 034: 1650 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=560767, ups=1.13, wpb=495833, bsz=16381.2, num_updates=57300, lr=0.000264212, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=52915 epoch 034: 1650 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=560767, ups=1.13, wpb=495833, bsz=16381.2, num_updates=57300, lr=0.000264212, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=52915 epoch 034: 1650 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=560767, ups=1.13, wpb=495833, bsz=16381.2, num_updates=57300, lr=0.000264212, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=52915 epoch 034: 1650 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=560767, ups=1.13, wpb=495833, bsz=16381.2, num_updates=57300, lr=0.000264212, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=52915 epoch 034: 1650 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=560767, ups=1.13, wpb=495833, bsz=16381.2, num_updates=57300, lr=0.000264212, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=52915 epoch 034: 1650 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=560767, ups=1.13, wpb=495833, bsz=16381.2, num_updates=57300, lr=0.000264212, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=52915 epoch 034: 1650 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=560767, ups=1.13, wpb=495833, bsz=16381.2, num_updates=57300, lr=0.000264212, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=52915 epoch 034: 1650 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=560767, ups=1.13, wpb=495833, bsz=16381.2, num_updates=57300, lr=0.000264212, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=52915 epoch 034: 1650 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=560767, ups=1.13, wpb=495833, bsz=16381.2, num_updates=57300, lr=0.000264212, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=52915 epoch 034: 1650 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=560767, ups=1.13, wpb=495833, bsz=16381.2, num_updates=57300, lr=0.000264212, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=52915 epoch 034: 1650 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=560767, ups=1.13, wpb=495833, bsz=16381.2, num_updates=57300, lr=0.000264212, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=52915 epoch 034: 1650 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=560767, ups=1.13, wpb=495833, bsz=16381.2, num_updates=57300, lr=0.000264212, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=52915 epoch 034: 1650 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=560767, ups=1.13, wpb=495833, bsz=16381.2, num_updates=57300, lr=0.000264212, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=52915 epoch 034: 1650 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=560767, ups=1.13, wpb=495833, bsz=16381.2, num_updates=57300, lr=0.000264212, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=52915 epoch 034: 1650 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=560767, ups=1.13, wpb=495833, bsz=16381.2, num_updates=57300, lr=0.000264212, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=52915 epoch 034: 1650 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=560767, ups=1.13, wpb=495833, bsz=16381.2, num_updates=57300, lr=0.000264212, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=52915 epoch 034: 1650 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=560767, ups=1.13, wpb=495833, bsz=16381.2, num_updates=57300, lr=0.000264212, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=52915 epoch 034: 1650 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=560767, ups=1.13, wpb=495833, bsz=16381.2, num_updates=57300, lr=0.000264212, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=52915 epoch 034: 1650 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=560767, ups=1.13, wpb=495833, bsz=16381.2, num_updates=57300, lr=0.000264212, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=52915 epoch 034: 1650 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=560767, ups=1.13, wpb=495833, bsz=16381.2, num_updates=57300, lr=0.000264212, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=52915 end of epoch 34 (average epoch stats below) epoch 034 | loss 3.455 | nll_loss 1.895 | ppl 3.72 | wps 527221 | ups 1.06 | wpb 495115 | bsz 16498.7 | num_updates 57339 | lr 0.000264122 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 1484 | gb_free 22.4 | wall 52949 epoch 034 | loss 3.455 | nll_loss 1.895 | ppl 3.72 | wps 527221 | ups 1.06 | wpb 495115 | bsz 16498.7 | num_updates 57339 | lr 0.000264122 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 1484 | gb_free 22.4 | wall 52949 epoch 034 | loss 3.455 | nll_loss 1.895 | ppl 3.72 | wps 527221 | ups 1.06 | wpb 495115 | bsz 16498.7 | num_updates 57339 | lr 0.000264122 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 1484 | gb_free 22.4 | wall 52949 epoch 034 | loss 3.455 | nll_loss 1.895 | ppl 3.72 | wps 527221 | ups 1.06 | wpb 495115 | bsz 16498.7 | num_updates 57339 | lr 0.000264122 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 1484 | gb_free 22.4 | wall 52949 epoch 034 | loss 3.455 | nll_loss 1.895 | ppl 3.72 | wps 527221 | ups 1.06 | wpb 495115 | bsz 16498.7 | num_updates 57339 | lr 0.000264122 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 1484 | gb_free 22.4 | wall 52949 epoch 034 | loss 3.455 | nll_loss 1.895 | ppl 3.72 | wps 527221 | ups 1.06 | wpb 495115 | bsz 16498.7 | num_updates 57339 | lr 0.000264122 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 1484 | gb_free 22.4 | wall 52949 epoch 034 | loss 3.455 | nll_loss 1.895 | ppl 3.72 | wps 527221 | ups 1.06 | wpb 495115 | bsz 16498.7 | num_updates 57339 | lr 0.000264122 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 1484 | gb_free 22.4 | wall 52949 epoch 034 | loss 3.455 | nll_loss 1.895 | ppl 3.72 | wps 527221 | ups 1.06 | wpb 495115 | bsz 16498.7 | num_updates 57339 | lr 0.000264122 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 1484 | gb_free 22.4 | wall 52949 epoch 034 | loss 3.455 | nll_loss 1.895 | ppl 3.72 | wps 527221 | ups 1.06 | wpb 495115 | bsz 16498.7 | num_updates 57339 | lr 0.000264122 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 1484 | gb_free 22.4 | wall 52949 epoch 034 | loss 3.455 | nll_loss 1.895 | ppl 3.72 | wps 527221 | ups 1.06 | wpb 495115 | bsz 16498.7 | num_updates 57339 | lr 0.000264122 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 1484 | gb_free 22.4 | wall 52949 epoch 034 | loss 3.455 | nll_loss 1.895 | ppl 3.72 | wps 527221 | ups 1.06 | wpb 495115 | bsz 16498.7 | num_updates 57339 | lr 0.000264122 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 1484 | gb_free 22.4 | wall 52949 epoch 034 | loss 3.455 | nll_loss 1.895 | ppl 3.72 | wps 527221 | ups 1.06 | wpb 495115 | bsz 16498.7 | num_updates 57339 | lr 0.000264122 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 1484 | gb_free 22.4 | wall 52949 epoch 034 | loss 3.455 | nll_loss 1.895 | ppl 3.72 | wps 527221 | ups 1.06 | wpb 495115 | bsz 16498.7 | num_updates 57339 | lr 0.000264122 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 1484 | gb_free 22.4 | wall 52949 epoch 034 | loss 3.455 | nll_loss 1.895 | ppl 3.72 | wps 527221 | ups 1.06 | wpb 495115 | bsz 16498.7 | num_updates 57339 | lr 0.000264122 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 1484 | gb_free 22.4 | wall 52949 epoch 034 | loss 3.455 | nll_loss 1.895 | ppl 3.72 | wps 527221 | ups 1.06 | wpb 495115 | bsz 16498.7 | num_updates 57339 | lr 0.000264122 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 1484 | gb_free 22.4 | wall 52949 epoch 034 | loss 3.455 | nll_loss 1.895 | ppl 3.72 | wps 527221 | ups 1.06 | wpb 495115 | bsz 16498.7 | num_updates 57339 | lr 0.000264122 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 1484 | gb_free 22.4 | wall 52949 epoch 034 | loss 3.455 | nll_loss 1.895 | ppl 3.72 | wps 527221 | ups 1.06 | wpb 495115 | bsz 16498.7 | num_updates 57339 | lr 0.000264122 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 1484 | gb_free 22.4 | wall 52949 epoch 034 | loss 3.455 | nll_loss 1.895 | ppl 3.72 | wps 527221 | ups 1.06 | wpb 495115 | bsz 16498.7 | num_updates 57339 | lr 0.000264122 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 1484 | gb_free 22.4 | wall 52949 epoch 034 | loss 3.455 | nll_loss 1.895 | ppl 3.72 | wps 527221 | ups 1.06 | wpb 495115 | bsz 16498.7 | num_updates 57339 | lr 0.000264122 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 1484 | gb_free 22.4 | wall 52949 epoch 034 | loss 3.455 | nll_loss 1.895 | ppl 3.72 | wps 527221 | ups 1.06 | wpb 495115 | bsz 16498.7 | num_updates 57339 | lr 0.000264122 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 1484 | gb_free 22.4 | wall 52949 epoch 034 | loss 3.455 | nll_loss 1.895 | ppl 3.72 | wps 527221 | ups 1.06 | wpb 495115 | bsz 16498.7 | num_updates 57339 | lr 0.000264122 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 1484 | gb_free 22.4 | wall 52949 epoch 034 | loss 3.455 | nll_loss 1.895 | ppl 3.72 | wps 527221 | ups 1.06 | wpb 495115 | bsz 16498.7 | num_updates 57339 | lr 0.000264122 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 1484 | gb_free 22.4 | wall 52949 epoch 034 | loss 3.455 | nll_loss 1.895 | ppl 3.72 | wps 527221 | ups 1.06 | wpb 495115 | bsz 16498.7 | num_updates 57339 | lr 0.000264122 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 1484 | gb_free 22.4 | wall 52949 epoch 034 | loss 3.455 | nll_loss 1.895 | ppl 3.72 | wps 527221 | ups 1.06 | wpb 495115 | bsz 16498.7 | num_updates 57339 | lr 0.000264122 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 1484 | gb_free 22.4 | wall 52949 epoch 034 | loss 3.455 | nll_loss 1.895 | ppl 3.72 | wps 527221 | ups 1.06 | wpb 495115 | bsz 16498.7 | num_updates 57339 | lr 0.000264122 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 1484 | gb_free 22.4 | wall 52949 epoch 034 | loss 3.455 | nll_loss 1.895 | ppl 3.72 | wps 527221 | ups 1.06 | wpb 495115 | bsz 16498.7 | num_updates 57339 | lr 0.000264122 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 1484 | gb_free 22.4 | wall 52949 epoch 034 | loss 3.455 | nll_loss 1.895 | ppl 3.72 | wps 527221 | ups 1.06 | wpb 495115 | bsz 16498.7 | num_updates 57339 | lr 0.000264122 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 1484 | gb_free 22.4 | wall 52949 epoch 034 | loss 3.455 | nll_loss 1.895 | ppl 3.72 | wps 527221 | ups 1.06 | wpb 495115 | bsz 16498.7 | num_updates 57339 | lr 0.000264122 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 1484 | gb_free 22.4 | wall 52949 epoch 034 | loss 3.455 | nll_loss 1.895 | ppl 3.72 | wps 527221 | ups 1.06 | wpb 495115 | bsz 16498.7 | num_updates 57339 | lr 0.000264122 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 1484 | gb_free 22.4 | wall 52949 epoch 034 | loss 3.455 | nll_loss 1.895 | ppl 3.72 | wps 527221 | ups 1.06 | wpb 495115 | bsz 16498.7 | num_updates 57339 | lr 0.000264122 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 1484 | gb_free 22.4 | wall 52949 epoch 034 | loss 3.455 | nll_loss 1.895 | ppl 3.72 | wps 527221 | ups 1.06 | wpb 495115 | bsz 16498.7 | num_updates 57339 | lr 0.000264122 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 1484 | gb_free 22.4 | wall 52949 epoch 034 | loss 3.455 | nll_loss 1.895 | ppl 3.72 | wps 527221 | ups 1.06 | wpb 495115 | bsz 16498.7 | num_updates 57339 | lr 0.000264122 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 1484 | gb_free 22.4 | wall 52949 epoch 034 | loss 3.455 | nll_loss 1.895 | ppl 3.72 | wps 527221 | ups 1.06 | wpb 495115 | bsz 16498.7 | num_updates 57339 | lr 0.000264122 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 1484 | gb_free 22.4 | wall 52949 epoch 034 | loss 3.455 | nll_loss 1.895 | ppl 3.72 | wps 527221 | ups 1.06 | wpb 495115 | bsz 16498.7 | num_updates 57339 | lr 0.000264122 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 1484 | gb_free 22.4 | wall 52949 Start iterating over samples epoch 035: 61 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=553520, ups=1.13, wpb=491928, bsz=16352.6, num_updates=57400, lr=0.000263982, gnorm=0.169, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=53004 epoch 035: 61 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=553520, ups=1.13, wpb=491928, bsz=16352.6, num_updates=57400, lr=0.000263982, gnorm=0.169, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=53004 epoch 035: 61 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=553520, ups=1.13, wpb=491928, bsz=16352.6, num_updates=57400, lr=0.000263982, gnorm=0.169, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=53004 epoch 035: 61 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=553520, ups=1.13, wpb=491928, bsz=16352.6, num_updates=57400, lr=0.000263982, gnorm=0.169, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=53004 epoch 035: 61 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=553520, ups=1.13, wpb=491928, bsz=16352.6, num_updates=57400, lr=0.000263982, gnorm=0.169, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=53004 epoch 035: 61 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=553520, ups=1.13, wpb=491928, bsz=16352.6, num_updates=57400, lr=0.000263982, gnorm=0.169, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=53004 epoch 035: 61 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=553520, ups=1.13, wpb=491928, bsz=16352.6, num_updates=57400, lr=0.000263982, gnorm=0.169, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=53004 epoch 035: 61 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=553520, ups=1.13, wpb=491928, bsz=16352.6, num_updates=57400, lr=0.000263982, gnorm=0.169, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=53004 epoch 035: 61 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=553520, ups=1.13, wpb=491928, bsz=16352.6, num_updates=57400, lr=0.000263982, gnorm=0.169, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=53004 epoch 035: 61 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=553520, ups=1.13, wpb=491928, bsz=16352.6, num_updates=57400, lr=0.000263982, gnorm=0.169, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=53004 epoch 035: 61 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=553520, ups=1.13, wpb=491928, bsz=16352.6, num_updates=57400, lr=0.000263982, gnorm=0.169, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=53004 epoch 035: 61 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=553520, ups=1.13, wpb=491928, bsz=16352.6, num_updates=57400, lr=0.000263982, gnorm=0.169, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=53004 epoch 035: 61 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=553520, ups=1.13, wpb=491928, bsz=16352.6, num_updates=57400, lr=0.000263982, gnorm=0.169, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=53004 epoch 035: 61 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=553520, ups=1.13, wpb=491928, bsz=16352.6, num_updates=57400, lr=0.000263982, gnorm=0.169, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=53004 epoch 035: 61 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=553520, ups=1.13, wpb=491928, bsz=16352.6, num_updates=57400, lr=0.000263982, gnorm=0.169, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=53004 epoch 035: 61 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=553520, ups=1.13, wpb=491928, bsz=16352.6, num_updates=57400, lr=0.000263982, gnorm=0.169, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=53004 epoch 035: 61 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=553520, ups=1.13, wpb=491928, bsz=16352.6, num_updates=57400, lr=0.000263982, gnorm=0.169, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=53004 epoch 035: 61 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=553520, ups=1.13, wpb=491928, bsz=16352.6, num_updates=57400, lr=0.000263982, gnorm=0.169, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=53004 epoch 035: 61 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=553520, ups=1.13, wpb=491928, bsz=16352.6, num_updates=57400, lr=0.000263982, gnorm=0.169, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=53004 epoch 035: 61 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=553520, ups=1.13, wpb=491928, bsz=16352.6, num_updates=57400, lr=0.000263982, gnorm=0.169, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=53004 epoch 035: 61 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=553520, ups=1.13, wpb=491928, bsz=16352.6, num_updates=57400, lr=0.000263982, gnorm=0.169, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=53004 epoch 035: 61 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=553520, ups=1.13, wpb=491928, bsz=16352.6, num_updates=57400, lr=0.000263982, gnorm=0.169, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=53004 epoch 035: 61 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=553520, ups=1.13, wpb=491928, bsz=16352.6, num_updates=57400, lr=0.000263982, gnorm=0.169, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=53004 epoch 035: 61 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=553520, ups=1.13, wpb=491928, bsz=16352.6, num_updates=57400, lr=0.000263982, gnorm=0.169, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=53004 epoch 035: 61 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=553520, ups=1.13, wpb=491928, bsz=16352.6, num_updates=57400, lr=0.000263982, gnorm=0.169, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=53004 epoch 035: 61 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=553520, ups=1.13, wpb=491928, bsz=16352.6, num_updates=57400, lr=0.000263982, gnorm=0.169, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=53004 epoch 035: 61 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=553520, ups=1.13, wpb=491928, bsz=16352.6, num_updates=57400, lr=0.000263982, gnorm=0.169, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=53004 epoch 035: 61 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=553520, ups=1.13, wpb=491928, bsz=16352.6, num_updates=57400, lr=0.000263982, gnorm=0.169, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=53004 epoch 035: 61 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=553520, ups=1.13, wpb=491928, bsz=16352.6, num_updates=57400, lr=0.000263982, gnorm=0.169, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=53004 epoch 035: 61 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=553520, ups=1.13, wpb=491928, bsz=16352.6, num_updates=57400, lr=0.000263982, gnorm=0.169, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=53004 epoch 035: 61 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=553520, ups=1.13, wpb=491928, bsz=16352.6, num_updates=57400, lr=0.000263982, gnorm=0.169, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=53004 epoch 035: 61 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=553520, ups=1.13, wpb=491928, bsz=16352.6, num_updates=57400, lr=0.000263982, gnorm=0.169, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=53004 epoch 035: 61 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=553520, ups=1.13, wpb=491928, bsz=16352.6, num_updates=57400, lr=0.000263982, gnorm=0.169, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=53004 epoch 035: 61 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=553520, ups=1.13, wpb=491928, bsz=16352.6, num_updates=57400, lr=0.000263982, gnorm=0.169, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=53004 epoch 035: 61 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=553520, ups=1.13, wpb=491928, bsz=16352.6, num_updates=57400, lr=0.000263982, gnorm=0.169, clip=0, loss_scale=1, train_wall=87, gb_free=21.8, wall=53004 epoch 035: 161 / 1689 loss=3.434, nll_loss=1.87, ppl=3.66, wps=555654, ups=1.13, wpb=493902, bsz=16331.8, num_updates=57500, lr=0.000263752, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=53093 epoch 035: 161 / 1689 loss=3.434, nll_loss=1.87, ppl=3.66, wps=555654, ups=1.13, wpb=493902, bsz=16331.8, num_updates=57500, lr=0.000263752, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=53093 epoch 035: 161 / 1689 loss=3.434, nll_loss=1.87, ppl=3.66, wps=555654, ups=1.13, wpb=493902, bsz=16331.8, num_updates=57500, lr=0.000263752, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=53093 epoch 035: 161 / 1689 loss=3.434, nll_loss=1.87, ppl=3.66, wps=555654, ups=1.13, wpb=493902, bsz=16331.8, num_updates=57500, lr=0.000263752, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=53093 epoch 035: 161 / 1689 loss=3.434, nll_loss=1.87, ppl=3.66, wps=555654, ups=1.13, wpb=493902, bsz=16331.8, num_updates=57500, lr=0.000263752, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=53093 epoch 035: 161 / 1689 loss=3.434, nll_loss=1.87, ppl=3.66, wps=555654, ups=1.13, wpb=493902, bsz=16331.8, num_updates=57500, lr=0.000263752, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=53093 epoch 035: 161 / 1689 loss=3.434, nll_loss=1.87, ppl=3.66, wps=555654, ups=1.13, wpb=493902, bsz=16331.8, num_updates=57500, lr=0.000263752, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=53093 epoch 035: 161 / 1689 loss=3.434, nll_loss=1.87, ppl=3.66, wps=555654, ups=1.13, wpb=493902, bsz=16331.8, num_updates=57500, lr=0.000263752, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=53093 epoch 035: 161 / 1689 loss=3.434, nll_loss=1.87, ppl=3.66, wps=555654, ups=1.13, wpb=493902, bsz=16331.8, num_updates=57500, lr=0.000263752, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=53093 epoch 035: 161 / 1689 loss=3.434, nll_loss=1.87, ppl=3.66, wps=555654, ups=1.13, wpb=493902, bsz=16331.8, num_updates=57500, lr=0.000263752, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=53093 epoch 035: 161 / 1689 loss=3.434, nll_loss=1.87, ppl=3.66, wps=555654, ups=1.13, wpb=493902, bsz=16331.8, num_updates=57500, lr=0.000263752, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=53093 epoch 035: 161 / 1689 loss=3.434, nll_loss=1.87, ppl=3.66, wps=555654, ups=1.13, wpb=493902, bsz=16331.8, num_updates=57500, lr=0.000263752, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=53093 epoch 035: 161 / 1689 loss=3.434, nll_loss=1.87, ppl=3.66, wps=555654, ups=1.13, wpb=493902, bsz=16331.8, num_updates=57500, lr=0.000263752, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=53093 epoch 035: 161 / 1689 loss=3.434, nll_loss=1.87, ppl=3.66, wps=555654, ups=1.13, wpb=493902, bsz=16331.8, num_updates=57500, lr=0.000263752, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=53093 epoch 035: 161 / 1689 loss=3.434, nll_loss=1.87, ppl=3.66, wps=555654, ups=1.13, wpb=493902, bsz=16331.8, num_updates=57500, lr=0.000263752, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=53093 epoch 035: 161 / 1689 loss=3.434, nll_loss=1.87, ppl=3.66, wps=555654, ups=1.13, wpb=493902, bsz=16331.8, num_updates=57500, lr=0.000263752, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=53093 epoch 035: 161 / 1689 loss=3.434, nll_loss=1.87, ppl=3.66, wps=555654, ups=1.13, wpb=493902, bsz=16331.8, num_updates=57500, lr=0.000263752, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=53093 epoch 035: 161 / 1689 loss=3.434, nll_loss=1.87, ppl=3.66, wps=555654, ups=1.13, wpb=493902, bsz=16331.8, num_updates=57500, lr=0.000263752, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=53093 epoch 035: 161 / 1689 loss=3.434, nll_loss=1.87, ppl=3.66, wps=555654, ups=1.13, wpb=493902, bsz=16331.8, num_updates=57500, lr=0.000263752, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=53093 epoch 035: 161 / 1689 loss=3.434, nll_loss=1.87, ppl=3.66, wps=555654, ups=1.13, wpb=493902, bsz=16331.8, num_updates=57500, lr=0.000263752, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=53093 epoch 035: 161 / 1689 loss=3.434, nll_loss=1.87, ppl=3.66, wps=555654, ups=1.13, wpb=493902, bsz=16331.8, num_updates=57500, lr=0.000263752, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=53093 epoch 035: 161 / 1689 loss=3.434, nll_loss=1.87, ppl=3.66, wps=555654, ups=1.13, wpb=493902, bsz=16331.8, num_updates=57500, lr=0.000263752, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=53093 epoch 035: 161 / 1689 loss=3.434, nll_loss=1.87, ppl=3.66, wps=555654, ups=1.13, wpb=493902, bsz=16331.8, num_updates=57500, lr=0.000263752, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=53093 epoch 035: 161 / 1689 loss=3.434, nll_loss=1.87, ppl=3.66, wps=555654, ups=1.13, wpb=493902, bsz=16331.8, num_updates=57500, lr=0.000263752, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=53093 epoch 035: 161 / 1689 loss=3.434, nll_loss=1.87, ppl=3.66, wps=555654, ups=1.13, wpb=493902, bsz=16331.8, num_updates=57500, lr=0.000263752, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=53093 epoch 035: 161 / 1689 loss=3.434, nll_loss=1.87, ppl=3.66, wps=555654, ups=1.13, wpb=493902, bsz=16331.8, num_updates=57500, lr=0.000263752, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=53093 epoch 035: 161 / 1689 loss=3.434, nll_loss=1.87, ppl=3.66, wps=555654, ups=1.13, wpb=493902, bsz=16331.8, num_updates=57500, lr=0.000263752, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=53093 epoch 035: 161 / 1689 loss=3.434, nll_loss=1.87, ppl=3.66, wps=555654, ups=1.13, wpb=493902, bsz=16331.8, num_updates=57500, lr=0.000263752, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=53093 epoch 035: 161 / 1689 loss=3.434, nll_loss=1.87, ppl=3.66, wps=555654, ups=1.13, wpb=493902, bsz=16331.8, num_updates=57500, lr=0.000263752, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=53093 epoch 035: 161 / 1689 loss=3.434, nll_loss=1.87, ppl=3.66, wps=555654, ups=1.13, wpb=493902, bsz=16331.8, num_updates=57500, lr=0.000263752, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=53093 epoch 035: 161 / 1689 loss=3.434, nll_loss=1.87, ppl=3.66, wps=555654, ups=1.13, wpb=493902, bsz=16331.8, num_updates=57500, lr=0.000263752, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=53093 epoch 035: 161 / 1689 loss=3.434, nll_loss=1.87, ppl=3.66, wps=555654, ups=1.13, wpb=493902, bsz=16331.8, num_updates=57500, lr=0.000263752, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=53093 epoch 035: 161 / 1689 loss=3.434, nll_loss=1.87, ppl=3.66, wps=555654, ups=1.13, wpb=493902, bsz=16331.8, num_updates=57500, lr=0.000263752, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=53093 epoch 035: 161 / 1689 loss=3.434, nll_loss=1.87, ppl=3.66, wps=555654, ups=1.13, wpb=493902, bsz=16331.8, num_updates=57500, lr=0.000263752, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=53093 epoch 035: 161 / 1689 loss=3.434, nll_loss=1.87, ppl=3.66, wps=555654, ups=1.13, wpb=493902, bsz=16331.8, num_updates=57500, lr=0.000263752, gnorm=0.16, clip=0, loss_scale=2, train_wall=88, gb_free=21.8, wall=53093 epoch 035: 262 / 1689 loss=3.441, nll_loss=1.879, ppl=3.68, wps=554001, ups=1.11, wpb=496871, bsz=16577, num_updates=57600, lr=0.000263523, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=53182 epoch 035: 262 / 1689 loss=3.441, nll_loss=1.879, ppl=3.68, wps=554001, ups=1.11, wpb=496871, bsz=16577, num_updates=57600, lr=0.000263523, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=53182 epoch 035: 262 / 1689 loss=3.441, nll_loss=1.879, ppl=3.68, wps=554001, ups=1.11, wpb=496871, bsz=16577, num_updates=57600, lr=0.000263523, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=53182 epoch 035: 262 / 1689 loss=3.441, nll_loss=1.879, ppl=3.68, wps=554001, ups=1.11, wpb=496871, bsz=16577, num_updates=57600, lr=0.000263523, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=53182 epoch 035: 262 / 1689 loss=3.441, nll_loss=1.879, ppl=3.68, wps=554001, ups=1.11, wpb=496871, bsz=16577, num_updates=57600, lr=0.000263523, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=53182 epoch 035: 262 / 1689 loss=3.441, nll_loss=1.879, ppl=3.68, wps=554001, ups=1.11, wpb=496871, bsz=16577, num_updates=57600, lr=0.000263523, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=53182 epoch 035: 262 / 1689 loss=3.441, nll_loss=1.879, ppl=3.68, wps=554001, ups=1.11, wpb=496871, bsz=16577, num_updates=57600, lr=0.000263523, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=53182 epoch 035: 262 / 1689 loss=3.441, nll_loss=1.879, ppl=3.68, wps=554001, ups=1.11, wpb=496871, bsz=16577, num_updates=57600, lr=0.000263523, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=53182 epoch 035: 262 / 1689 loss=3.441, nll_loss=1.879, ppl=3.68, wps=554001, ups=1.11, wpb=496871, bsz=16577, num_updates=57600, lr=0.000263523, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=53182 epoch 035: 262 / 1689 loss=3.441, nll_loss=1.879, ppl=3.68, wps=554001, ups=1.11, wpb=496871, bsz=16577, num_updates=57600, lr=0.000263523, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=53182 epoch 035: 262 / 1689 loss=3.441, nll_loss=1.879, ppl=3.68, wps=554001, ups=1.11, wpb=496871, bsz=16577, num_updates=57600, lr=0.000263523, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=53182 epoch 035: 262 / 1689 loss=3.441, nll_loss=1.879, ppl=3.68, wps=554001, ups=1.11, wpb=496871, bsz=16577, num_updates=57600, lr=0.000263523, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=53182 epoch 035: 262 / 1689 loss=3.441, nll_loss=1.879, ppl=3.68, wps=554001, ups=1.11, wpb=496871, bsz=16577, num_updates=57600, lr=0.000263523, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=53182 epoch 035: 262 / 1689 loss=3.441, nll_loss=1.879, ppl=3.68, wps=554001, ups=1.11, wpb=496871, bsz=16577, num_updates=57600, lr=0.000263523, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=53182 epoch 035: 262 / 1689 loss=3.441, nll_loss=1.879, ppl=3.68, wps=554001, ups=1.11, wpb=496871, bsz=16577, num_updates=57600, lr=0.000263523, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=53182 epoch 035: 262 / 1689 loss=3.441, nll_loss=1.879, ppl=3.68, wps=554001, ups=1.11, wpb=496871, bsz=16577, num_updates=57600, lr=0.000263523, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=53182 epoch 035: 262 / 1689 loss=3.441, nll_loss=1.879, ppl=3.68, wps=554001, ups=1.11, wpb=496871, bsz=16577, num_updates=57600, lr=0.000263523, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=53182 epoch 035: 262 / 1689 loss=3.441, nll_loss=1.879, ppl=3.68, wps=554001, ups=1.11, wpb=496871, bsz=16577, num_updates=57600, lr=0.000263523, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=53182 epoch 035: 262 / 1689 loss=3.441, nll_loss=1.879, ppl=3.68, wps=554001, ups=1.11, wpb=496871, bsz=16577, num_updates=57600, lr=0.000263523, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=53182 epoch 035: 262 / 1689 loss=3.441, nll_loss=1.879, ppl=3.68, wps=554001, ups=1.11, wpb=496871, bsz=16577, num_updates=57600, lr=0.000263523, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=53182 epoch 035: 262 / 1689 loss=3.441, nll_loss=1.879, ppl=3.68, wps=554001, ups=1.11, wpb=496871, bsz=16577, num_updates=57600, lr=0.000263523, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=53182 epoch 035: 262 / 1689 loss=3.441, nll_loss=1.879, ppl=3.68, wps=554001, ups=1.11, wpb=496871, bsz=16577, num_updates=57600, lr=0.000263523, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=53182 epoch 035: 262 / 1689 loss=3.441, nll_loss=1.879, ppl=3.68, wps=554001, ups=1.11, wpb=496871, bsz=16577, num_updates=57600, lr=0.000263523, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=53182 epoch 035: 262 / 1689 loss=3.441, nll_loss=1.879, ppl=3.68, wps=554001, ups=1.11, wpb=496871, bsz=16577, num_updates=57600, lr=0.000263523, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=53182 epoch 035: 262 / 1689 loss=3.441, nll_loss=1.879, ppl=3.68, wps=554001, ups=1.11, wpb=496871, bsz=16577, num_updates=57600, lr=0.000263523, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=53182 epoch 035: 262 / 1689 loss=3.441, nll_loss=1.879, ppl=3.68, wps=554001, ups=1.11, wpb=496871, bsz=16577, num_updates=57600, lr=0.000263523, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=53182 epoch 035: 262 / 1689 loss=3.441, nll_loss=1.879, ppl=3.68, wps=554001, ups=1.11, wpb=496871, bsz=16577, num_updates=57600, lr=0.000263523, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=53182 epoch 035: 262 / 1689 loss=3.441, nll_loss=1.879, ppl=3.68, wps=554001, ups=1.11, wpb=496871, bsz=16577, num_updates=57600, lr=0.000263523, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=53182 epoch 035: 262 / 1689 loss=3.441, nll_loss=1.879, ppl=3.68, wps=554001, ups=1.11, wpb=496871, bsz=16577, num_updates=57600, lr=0.000263523, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=53182 epoch 035: 262 / 1689 loss=3.441, nll_loss=1.879, ppl=3.68, wps=554001, ups=1.11, wpb=496871, bsz=16577, num_updates=57600, lr=0.000263523, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=53182 epoch 035: 262 / 1689 loss=3.441, nll_loss=1.879, ppl=3.68, wps=554001, ups=1.11, wpb=496871, bsz=16577, num_updates=57600, lr=0.000263523, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=53182 epoch 035: 262 / 1689 loss=3.441, nll_loss=1.879, ppl=3.68, wps=554001, ups=1.11, wpb=496871, bsz=16577, num_updates=57600, lr=0.000263523, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=53182 epoch 035: 262 / 1689 loss=3.441, nll_loss=1.879, ppl=3.68, wps=554001, ups=1.11, wpb=496871, bsz=16577, num_updates=57600, lr=0.000263523, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=53182 epoch 035: 262 / 1689 loss=3.441, nll_loss=1.879, ppl=3.68, wps=554001, ups=1.11, wpb=496871, bsz=16577, num_updates=57600, lr=0.000263523, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=53182 epoch 035: 262 / 1689 loss=3.441, nll_loss=1.879, ppl=3.68, wps=554001, ups=1.11, wpb=496871, bsz=16577, num_updates=57600, lr=0.000263523, gnorm=0.171, clip=0, loss_scale=1, train_wall=89, gb_free=21.6, wall=53182 epoch 035: 363 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=553975, ups=1.12, wpb=495853, bsz=16406.3, num_updates=57700, lr=0.000263295, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.4, wall=53272 epoch 035: 363 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=553975, ups=1.12, wpb=495853, bsz=16406.3, num_updates=57700, lr=0.000263295, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.4, wall=53272 epoch 035: 363 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=553975, ups=1.12, wpb=495853, bsz=16406.3, num_updates=57700, lr=0.000263295, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.4, wall=53272 epoch 035: 363 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=553975, ups=1.12, wpb=495853, bsz=16406.3, num_updates=57700, lr=0.000263295, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.4, wall=53272 epoch 035: 363 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=553975, ups=1.12, wpb=495853, bsz=16406.3, num_updates=57700, lr=0.000263295, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.4, wall=53272 epoch 035: 363 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=553975, ups=1.12, wpb=495853, bsz=16406.3, num_updates=57700, lr=0.000263295, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.4, wall=53272 epoch 035: 363 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=553975, ups=1.12, wpb=495853, bsz=16406.3, num_updates=57700, lr=0.000263295, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.4, wall=53272 epoch 035: 363 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=553975, ups=1.12, wpb=495853, bsz=16406.3, num_updates=57700, lr=0.000263295, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.4, wall=53272 epoch 035: 363 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=553975, ups=1.12, wpb=495853, bsz=16406.3, num_updates=57700, lr=0.000263295, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.4, wall=53272 epoch 035: 363 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=553975, ups=1.12, wpb=495853, bsz=16406.3, num_updates=57700, lr=0.000263295, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.4, wall=53272 epoch 035: 363 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=553975, ups=1.12, wpb=495853, bsz=16406.3, num_updates=57700, lr=0.000263295, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.4, wall=53272 epoch 035: 363 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=553975, ups=1.12, wpb=495853, bsz=16406.3, num_updates=57700, lr=0.000263295, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.4, wall=53272 epoch 035: 363 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=553975, ups=1.12, wpb=495853, bsz=16406.3, num_updates=57700, lr=0.000263295, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.4, wall=53272 epoch 035: 363 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=553975, ups=1.12, wpb=495853, bsz=16406.3, num_updates=57700, lr=0.000263295, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.4, wall=53272 epoch 035: 363 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=553975, ups=1.12, wpb=495853, bsz=16406.3, num_updates=57700, lr=0.000263295, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.4, wall=53272 epoch 035: 363 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=553975, ups=1.12, wpb=495853, bsz=16406.3, num_updates=57700, lr=0.000263295, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.4, wall=53272 epoch 035: 363 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=553975, ups=1.12, wpb=495853, bsz=16406.3, num_updates=57700, lr=0.000263295, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.4, wall=53272 epoch 035: 363 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=553975, ups=1.12, wpb=495853, bsz=16406.3, num_updates=57700, lr=0.000263295, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.4, wall=53272 epoch 035: 363 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=553975, ups=1.12, wpb=495853, bsz=16406.3, num_updates=57700, lr=0.000263295, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.4, wall=53272 epoch 035: 363 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=553975, ups=1.12, wpb=495853, bsz=16406.3, num_updates=57700, lr=0.000263295, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.4, wall=53272 epoch 035: 363 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=553975, ups=1.12, wpb=495853, bsz=16406.3, num_updates=57700, lr=0.000263295, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.4, wall=53272 epoch 035: 363 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=553975, ups=1.12, wpb=495853, bsz=16406.3, num_updates=57700, lr=0.000263295, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.4, wall=53272 epoch 035: 363 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=553975, ups=1.12, wpb=495853, bsz=16406.3, num_updates=57700, lr=0.000263295, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.4, wall=53272 epoch 035: 363 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=553975, ups=1.12, wpb=495853, bsz=16406.3, num_updates=57700, lr=0.000263295, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.4, wall=53272 epoch 035: 363 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=553975, ups=1.12, wpb=495853, bsz=16406.3, num_updates=57700, lr=0.000263295, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.4, wall=53272 epoch 035: 363 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=553975, ups=1.12, wpb=495853, bsz=16406.3, num_updates=57700, lr=0.000263295, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.4, wall=53272 epoch 035: 363 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=553975, ups=1.12, wpb=495853, bsz=16406.3, num_updates=57700, lr=0.000263295, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.4, wall=53272 epoch 035: 363 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=553975, ups=1.12, wpb=495853, bsz=16406.3, num_updates=57700, lr=0.000263295, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.4, wall=53272 epoch 035: 363 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=553975, ups=1.12, wpb=495853, bsz=16406.3, num_updates=57700, lr=0.000263295, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.4, wall=53272 epoch 035: 363 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=553975, ups=1.12, wpb=495853, bsz=16406.3, num_updates=57700, lr=0.000263295, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.4, wall=53272 epoch 035: 363 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=553975, ups=1.12, wpb=495853, bsz=16406.3, num_updates=57700, lr=0.000263295, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.4, wall=53272 epoch 035: 363 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=553975, ups=1.12, wpb=495853, bsz=16406.3, num_updates=57700, lr=0.000263295, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.4, wall=53272 epoch 035: 363 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=553975, ups=1.12, wpb=495853, bsz=16406.3, num_updates=57700, lr=0.000263295, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.4, wall=53272 epoch 035: 363 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=553975, ups=1.12, wpb=495853, bsz=16406.3, num_updates=57700, lr=0.000263295, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.4, wall=53272 epoch 035: 363 / 1689 loss=3.436, nll_loss=1.874, ppl=3.67, wps=553975, ups=1.12, wpb=495853, bsz=16406.3, num_updates=57700, lr=0.000263295, gnorm=0.161, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.4, wall=53272 epoch 035: 463 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=555610, ups=1.12, wpb=494284, bsz=16535.2, num_updates=57800, lr=0.000263067, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.6, wall=53361 epoch 035: 463 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=555610, ups=1.12, wpb=494284, bsz=16535.2, num_updates=57800, lr=0.000263067, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.6, wall=53361 epoch 035: 463 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=555610, ups=1.12, wpb=494284, bsz=16535.2, num_updates=57800, lr=0.000263067, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.6, wall=53361 epoch 035: 463 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=555610, ups=1.12, wpb=494284, bsz=16535.2, num_updates=57800, lr=0.000263067, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.6, wall=53361 epoch 035: 463 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=555610, ups=1.12, wpb=494284, bsz=16535.2, num_updates=57800, lr=0.000263067, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.6, wall=53361 epoch 035: 463 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=555610, ups=1.12, wpb=494284, bsz=16535.2, num_updates=57800, lr=0.000263067, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.6, wall=53361 epoch 035: 463 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=555610, ups=1.12, wpb=494284, bsz=16535.2, num_updates=57800, lr=0.000263067, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.6, wall=53361 epoch 035: 463 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=555610, ups=1.12, wpb=494284, bsz=16535.2, num_updates=57800, lr=0.000263067, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.6, wall=53361 epoch 035: 463 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=555610, ups=1.12, wpb=494284, bsz=16535.2, num_updates=57800, lr=0.000263067, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.6, wall=53361 epoch 035: 463 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=555610, ups=1.12, wpb=494284, bsz=16535.2, num_updates=57800, lr=0.000263067, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.6, wall=53361 epoch 035: 463 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=555610, ups=1.12, wpb=494284, bsz=16535.2, num_updates=57800, lr=0.000263067, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.6, wall=53361 epoch 035: 463 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=555610, ups=1.12, wpb=494284, bsz=16535.2, num_updates=57800, lr=0.000263067, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.6, wall=53361 epoch 035: 463 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=555610, ups=1.12, wpb=494284, bsz=16535.2, num_updates=57800, lr=0.000263067, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.6, wall=53361 epoch 035: 463 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=555610, ups=1.12, wpb=494284, bsz=16535.2, num_updates=57800, lr=0.000263067, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.6, wall=53361 epoch 035: 463 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=555610, ups=1.12, wpb=494284, bsz=16535.2, num_updates=57800, lr=0.000263067, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.6, wall=53361 epoch 035: 463 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=555610, ups=1.12, wpb=494284, bsz=16535.2, num_updates=57800, lr=0.000263067, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.6, wall=53361 epoch 035: 463 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=555610, ups=1.12, wpb=494284, bsz=16535.2, num_updates=57800, lr=0.000263067, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.6, wall=53361 epoch 035: 463 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=555610, ups=1.12, wpb=494284, bsz=16535.2, num_updates=57800, lr=0.000263067, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.6, wall=53361 epoch 035: 463 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=555610, ups=1.12, wpb=494284, bsz=16535.2, num_updates=57800, lr=0.000263067, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.6, wall=53361 epoch 035: 463 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=555610, ups=1.12, wpb=494284, bsz=16535.2, num_updates=57800, lr=0.000263067, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.6, wall=53361 epoch 035: 463 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=555610, ups=1.12, wpb=494284, bsz=16535.2, num_updates=57800, lr=0.000263067, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.6, wall=53361 epoch 035: 463 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=555610, ups=1.12, wpb=494284, bsz=16535.2, num_updates=57800, lr=0.000263067, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.6, wall=53361 epoch 035: 463 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=555610, ups=1.12, wpb=494284, bsz=16535.2, num_updates=57800, lr=0.000263067, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.6, wall=53361 epoch 035: 463 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=555610, ups=1.12, wpb=494284, bsz=16535.2, num_updates=57800, lr=0.000263067, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.6, wall=53361 epoch 035: 463 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=555610, ups=1.12, wpb=494284, bsz=16535.2, num_updates=57800, lr=0.000263067, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.6, wall=53361 epoch 035: 463 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=555610, ups=1.12, wpb=494284, bsz=16535.2, num_updates=57800, lr=0.000263067, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.6, wall=53361 epoch 035: 463 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=555610, ups=1.12, wpb=494284, bsz=16535.2, num_updates=57800, lr=0.000263067, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.6, wall=53361 epoch 035: 463 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=555610, ups=1.12, wpb=494284, bsz=16535.2, num_updates=57800, lr=0.000263067, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.6, wall=53361 epoch 035: 463 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=555610, ups=1.12, wpb=494284, bsz=16535.2, num_updates=57800, lr=0.000263067, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.6, wall=53361 epoch 035: 463 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=555610, ups=1.12, wpb=494284, bsz=16535.2, num_updates=57800, lr=0.000263067, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.6, wall=53361 epoch 035: 463 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=555610, ups=1.12, wpb=494284, bsz=16535.2, num_updates=57800, lr=0.000263067, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.6, wall=53361 epoch 035: 463 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=555610, ups=1.12, wpb=494284, bsz=16535.2, num_updates=57800, lr=0.000263067, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.6, wall=53361 epoch 035: 463 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=555610, ups=1.12, wpb=494284, bsz=16535.2, num_updates=57800, lr=0.000263067, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.6, wall=53361 epoch 035: 463 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=555610, ups=1.12, wpb=494284, bsz=16535.2, num_updates=57800, lr=0.000263067, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.6, wall=53361 epoch 035: 463 / 1689 loss=3.451, nll_loss=1.891, ppl=3.71, wps=555610, ups=1.12, wpb=494284, bsz=16535.2, num_updates=57800, lr=0.000263067, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.6, wall=53361 epoch 035: 563 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=557820, ups=1.13, wpb=495462, bsz=16232.1, num_updates=57900, lr=0.00026284, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.3, wall=53450 epoch 035: 563 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=557820, ups=1.13, wpb=495462, bsz=16232.1, num_updates=57900, lr=0.00026284, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.3, wall=53450 epoch 035: 563 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=557820, ups=1.13, wpb=495462, bsz=16232.1, num_updates=57900, lr=0.00026284, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.3, wall=53450 epoch 035: 563 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=557820, ups=1.13, wpb=495462, bsz=16232.1, num_updates=57900, lr=0.00026284, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.3, wall=53450 epoch 035: 563 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=557820, ups=1.13, wpb=495462, bsz=16232.1, num_updates=57900, lr=0.00026284, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.3, wall=53450 epoch 035: 563 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=557820, ups=1.13, wpb=495462, bsz=16232.1, num_updates=57900, lr=0.00026284, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.3, wall=53450 epoch 035: 563 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=557820, ups=1.13, wpb=495462, bsz=16232.1, num_updates=57900, lr=0.00026284, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.3, wall=53450 epoch 035: 563 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=557820, ups=1.13, wpb=495462, bsz=16232.1, num_updates=57900, lr=0.00026284, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.3, wall=53450 epoch 035: 563 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=557820, ups=1.13, wpb=495462, bsz=16232.1, num_updates=57900, lr=0.00026284, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.3, wall=53450 epoch 035: 563 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=557820, ups=1.13, wpb=495462, bsz=16232.1, num_updates=57900, lr=0.00026284, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.3, wall=53450 epoch 035: 563 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=557820, ups=1.13, wpb=495462, bsz=16232.1, num_updates=57900, lr=0.00026284, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.3, wall=53450 epoch 035: 563 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=557820, ups=1.13, wpb=495462, bsz=16232.1, num_updates=57900, lr=0.00026284, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.3, wall=53450 epoch 035: 563 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=557820, ups=1.13, wpb=495462, bsz=16232.1, num_updates=57900, lr=0.00026284, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.3, wall=53450 epoch 035: 563 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=557820, ups=1.13, wpb=495462, bsz=16232.1, num_updates=57900, lr=0.00026284, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.3, wall=53450 epoch 035: 563 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=557820, ups=1.13, wpb=495462, bsz=16232.1, num_updates=57900, lr=0.00026284, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.3, wall=53450 epoch 035: 563 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=557820, ups=1.13, wpb=495462, bsz=16232.1, num_updates=57900, lr=0.00026284, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.3, wall=53450 epoch 035: 563 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=557820, ups=1.13, wpb=495462, bsz=16232.1, num_updates=57900, lr=0.00026284, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.3, wall=53450 epoch 035: 563 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=557820, ups=1.13, wpb=495462, bsz=16232.1, num_updates=57900, lr=0.00026284, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.3, wall=53450 epoch 035: 563 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=557820, ups=1.13, wpb=495462, bsz=16232.1, num_updates=57900, lr=0.00026284, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.3, wall=53450 epoch 035: 563 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=557820, ups=1.13, wpb=495462, bsz=16232.1, num_updates=57900, lr=0.00026284, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.3, wall=53450 epoch 035: 563 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=557820, ups=1.13, wpb=495462, bsz=16232.1, num_updates=57900, lr=0.00026284, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.3, wall=53450 epoch 035: 563 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=557820, ups=1.13, wpb=495462, bsz=16232.1, num_updates=57900, lr=0.00026284, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.3, wall=53450 epoch 035: 563 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=557820, ups=1.13, wpb=495462, bsz=16232.1, num_updates=57900, lr=0.00026284, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.3, wall=53450 epoch 035: 563 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=557820, ups=1.13, wpb=495462, bsz=16232.1, num_updates=57900, lr=0.00026284, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.3, wall=53450 epoch 035: 563 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=557820, ups=1.13, wpb=495462, bsz=16232.1, num_updates=57900, lr=0.00026284, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.3, wall=53450 epoch 035: 563 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=557820, ups=1.13, wpb=495462, bsz=16232.1, num_updates=57900, lr=0.00026284, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.3, wall=53450 epoch 035: 563 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=557820, ups=1.13, wpb=495462, bsz=16232.1, num_updates=57900, lr=0.00026284, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.3, wall=53450 epoch 035: 563 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=557820, ups=1.13, wpb=495462, bsz=16232.1, num_updates=57900, lr=0.00026284, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.3, wall=53450 epoch 035: 563 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=557820, ups=1.13, wpb=495462, bsz=16232.1, num_updates=57900, lr=0.00026284, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.3, wall=53450 epoch 035: 563 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=557820, ups=1.13, wpb=495462, bsz=16232.1, num_updates=57900, lr=0.00026284, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.3, wall=53450 epoch 035: 563 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=557820, ups=1.13, wpb=495462, bsz=16232.1, num_updates=57900, lr=0.00026284, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.3, wall=53450 epoch 035: 563 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=557820, ups=1.13, wpb=495462, bsz=16232.1, num_updates=57900, lr=0.00026284, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.3, wall=53450 epoch 035: 563 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=557820, ups=1.13, wpb=495462, bsz=16232.1, num_updates=57900, lr=0.00026284, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.3, wall=53450 epoch 035: 563 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=557820, ups=1.13, wpb=495462, bsz=16232.1, num_updates=57900, lr=0.00026284, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.3, wall=53450 epoch 035: 563 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=557820, ups=1.13, wpb=495462, bsz=16232.1, num_updates=57900, lr=0.00026284, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.3, wall=53450 epoch 035: 663 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=555084, ups=1.12, wpb=494975, bsz=16341.4, num_updates=58000, lr=0.000262613, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=53539 epoch 035: 663 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=555084, ups=1.12, wpb=494975, bsz=16341.4, num_updates=58000, lr=0.000262613, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=53539 epoch 035: 663 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=555084, ups=1.12, wpb=494975, bsz=16341.4, num_updates=58000, lr=0.000262613, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=53539 epoch 035: 663 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=555084, ups=1.12, wpb=494975, bsz=16341.4, num_updates=58000, lr=0.000262613, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=53539 epoch 035: 663 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=555084, ups=1.12, wpb=494975, bsz=16341.4, num_updates=58000, lr=0.000262613, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=53539 epoch 035: 663 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=555084, ups=1.12, wpb=494975, bsz=16341.4, num_updates=58000, lr=0.000262613, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=53539 epoch 035: 663 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=555084, ups=1.12, wpb=494975, bsz=16341.4, num_updates=58000, lr=0.000262613, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=53539 epoch 035: 663 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=555084, ups=1.12, wpb=494975, bsz=16341.4, num_updates=58000, lr=0.000262613, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=53539 epoch 035: 663 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=555084, ups=1.12, wpb=494975, bsz=16341.4, num_updates=58000, lr=0.000262613, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=53539 epoch 035: 663 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=555084, ups=1.12, wpb=494975, bsz=16341.4, num_updates=58000, lr=0.000262613, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=53539 epoch 035: 663 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=555084, ups=1.12, wpb=494975, bsz=16341.4, num_updates=58000, lr=0.000262613, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=53539 epoch 035: 663 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=555084, ups=1.12, wpb=494975, bsz=16341.4, num_updates=58000, lr=0.000262613, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=53539 epoch 035: 663 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=555084, ups=1.12, wpb=494975, bsz=16341.4, num_updates=58000, lr=0.000262613, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=53539 epoch 035: 663 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=555084, ups=1.12, wpb=494975, bsz=16341.4, num_updates=58000, lr=0.000262613, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=53539 epoch 035: 663 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=555084, ups=1.12, wpb=494975, bsz=16341.4, num_updates=58000, lr=0.000262613, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=53539 epoch 035: 663 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=555084, ups=1.12, wpb=494975, bsz=16341.4, num_updates=58000, lr=0.000262613, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=53539 epoch 035: 663 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=555084, ups=1.12, wpb=494975, bsz=16341.4, num_updates=58000, lr=0.000262613, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=53539 epoch 035: 663 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=555084, ups=1.12, wpb=494975, bsz=16341.4, num_updates=58000, lr=0.000262613, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=53539 epoch 035: 663 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=555084, ups=1.12, wpb=494975, bsz=16341.4, num_updates=58000, lr=0.000262613, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=53539 epoch 035: 663 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=555084, ups=1.12, wpb=494975, bsz=16341.4, num_updates=58000, lr=0.000262613, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=53539 epoch 035: 663 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=555084, ups=1.12, wpb=494975, bsz=16341.4, num_updates=58000, lr=0.000262613, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=53539 epoch 035: 663 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=555084, ups=1.12, wpb=494975, bsz=16341.4, num_updates=58000, lr=0.000262613, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=53539 epoch 035: 663 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=555084, ups=1.12, wpb=494975, bsz=16341.4, num_updates=58000, lr=0.000262613, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=53539 epoch 035: 663 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=555084, ups=1.12, wpb=494975, bsz=16341.4, num_updates=58000, lr=0.000262613, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=53539 epoch 035: 663 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=555084, ups=1.12, wpb=494975, bsz=16341.4, num_updates=58000, lr=0.000262613, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=53539 epoch 035: 663 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=555084, ups=1.12, wpb=494975, bsz=16341.4, num_updates=58000, lr=0.000262613, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=53539 epoch 035: 663 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=555084, ups=1.12, wpb=494975, bsz=16341.4, num_updates=58000, lr=0.000262613, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=53539 epoch 035: 663 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=555084, ups=1.12, wpb=494975, bsz=16341.4, num_updates=58000, lr=0.000262613, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=53539 epoch 035: 663 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=555084, ups=1.12, wpb=494975, bsz=16341.4, num_updates=58000, lr=0.000262613, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=53539 epoch 035: 663 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=555084, ups=1.12, wpb=494975, bsz=16341.4, num_updates=58000, lr=0.000262613, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=53539 epoch 035: 663 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=555084, ups=1.12, wpb=494975, bsz=16341.4, num_updates=58000, lr=0.000262613, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=53539 epoch 035: 663 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=555084, ups=1.12, wpb=494975, bsz=16341.4, num_updates=58000, lr=0.000262613, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=53539 epoch 035: 663 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=555084, ups=1.12, wpb=494975, bsz=16341.4, num_updates=58000, lr=0.000262613, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=53539 epoch 035: 663 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=555084, ups=1.12, wpb=494975, bsz=16341.4, num_updates=58000, lr=0.000262613, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=53539 epoch 035: 663 / 1689 loss=3.456, nll_loss=1.896, ppl=3.72, wps=555084, ups=1.12, wpb=494975, bsz=16341.4, num_updates=58000, lr=0.000262613, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.9, wall=53539 begin validation on "valid" subset epoch 035 | valid on 'valid' subset | loss 3.715 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 58000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.715 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 58000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.715 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 58000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.715 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 58000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.715 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 58000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.715 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 58000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.715 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 58000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.715 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 58000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.715 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 58000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.715 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 58000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.715 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 58000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.715 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 58000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.715 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 58000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.715 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 58000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.715 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 58000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.715 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 58000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.715 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 58000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.715 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 58000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.715 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 58000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.715 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 58000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.715 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 58000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.715 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 58000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.715 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 58000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.715 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 58000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.715 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 58000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.715 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 58000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.715 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 58000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.715 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 58000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.715 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 58000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.715 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 58000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.715 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 58000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.715 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 58000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.715 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 58000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.715 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 58000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.715 | nll_loss 2.169 | ppl 4.5 | wps 0 | wpb 44526 | bsz 2008 | num_updates 58000 | best_loss 3.702 epoch 035: 763 / 1689 loss=3.447, nll_loss=1.887, ppl=3.7, wps=491279, ups=0.99, wpb=494917, bsz=16871.9, num_updates=58100, lr=0.000262387, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=53640 epoch 035: 763 / 1689 loss=3.447, nll_loss=1.887, ppl=3.7, wps=491279, ups=0.99, wpb=494917, bsz=16871.9, num_updates=58100, lr=0.000262387, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=53640 epoch 035: 763 / 1689 loss=3.447, nll_loss=1.887, ppl=3.7, wps=491279, ups=0.99, wpb=494917, bsz=16871.9, num_updates=58100, lr=0.000262387, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=53640 epoch 035: 763 / 1689 loss=3.447, nll_loss=1.887, ppl=3.7, wps=491279, ups=0.99, wpb=494917, bsz=16871.9, num_updates=58100, lr=0.000262387, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=53640 epoch 035: 763 / 1689 loss=3.447, nll_loss=1.887, ppl=3.7, wps=491279, ups=0.99, wpb=494917, bsz=16871.9, num_updates=58100, lr=0.000262387, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=53640 epoch 035: 763 / 1689 loss=3.447, nll_loss=1.887, ppl=3.7, wps=491279, ups=0.99, wpb=494917, bsz=16871.9, num_updates=58100, lr=0.000262387, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=53640 epoch 035: 763 / 1689 loss=3.447, nll_loss=1.887, ppl=3.7, wps=491279, ups=0.99, wpb=494917, bsz=16871.9, num_updates=58100, lr=0.000262387, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=53640 epoch 035: 763 / 1689 loss=3.447, nll_loss=1.887, ppl=3.7, wps=491279, ups=0.99, wpb=494917, bsz=16871.9, num_updates=58100, lr=0.000262387, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=53640 epoch 035: 763 / 1689 loss=3.447, nll_loss=1.887, ppl=3.7, wps=491279, ups=0.99, wpb=494917, bsz=16871.9, num_updates=58100, lr=0.000262387, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=53640 epoch 035: 763 / 1689 loss=3.447, nll_loss=1.887, ppl=3.7, wps=491279, ups=0.99, wpb=494917, bsz=16871.9, num_updates=58100, lr=0.000262387, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=53640 epoch 035: 763 / 1689 loss=3.447, nll_loss=1.887, ppl=3.7, wps=491279, ups=0.99, wpb=494917, bsz=16871.9, num_updates=58100, lr=0.000262387, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=53640 epoch 035: 763 / 1689 loss=3.447, nll_loss=1.887, ppl=3.7, wps=491279, ups=0.99, wpb=494917, bsz=16871.9, num_updates=58100, lr=0.000262387, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=53640 epoch 035: 763 / 1689 loss=3.447, nll_loss=1.887, ppl=3.7, wps=491279, ups=0.99, wpb=494917, bsz=16871.9, num_updates=58100, lr=0.000262387, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=53640 epoch 035: 763 / 1689 loss=3.447, nll_loss=1.887, ppl=3.7, wps=491279, ups=0.99, wpb=494917, bsz=16871.9, num_updates=58100, lr=0.000262387, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=53640 epoch 035: 763 / 1689 loss=3.447, nll_loss=1.887, ppl=3.7, wps=491279, ups=0.99, wpb=494917, bsz=16871.9, num_updates=58100, lr=0.000262387, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=53640 epoch 035: 763 / 1689 loss=3.447, nll_loss=1.887, ppl=3.7, wps=491279, ups=0.99, wpb=494917, bsz=16871.9, num_updates=58100, lr=0.000262387, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=53640 epoch 035: 763 / 1689 loss=3.447, nll_loss=1.887, ppl=3.7, wps=491279, ups=0.99, wpb=494917, bsz=16871.9, num_updates=58100, lr=0.000262387, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=53640 epoch 035: 763 / 1689 loss=3.447, nll_loss=1.887, ppl=3.7, wps=491279, ups=0.99, wpb=494917, bsz=16871.9, num_updates=58100, lr=0.000262387, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=53640 epoch 035: 763 / 1689 loss=3.447, nll_loss=1.887, ppl=3.7, wps=491279, ups=0.99, wpb=494917, bsz=16871.9, num_updates=58100, lr=0.000262387, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=53640 epoch 035: 763 / 1689 loss=3.447, nll_loss=1.887, ppl=3.7, wps=491279, ups=0.99, wpb=494917, bsz=16871.9, num_updates=58100, lr=0.000262387, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=53640 epoch 035: 763 / 1689 loss=3.447, nll_loss=1.887, ppl=3.7, wps=491279, ups=0.99, wpb=494917, bsz=16871.9, num_updates=58100, lr=0.000262387, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=53640 epoch 035: 763 / 1689 loss=3.447, nll_loss=1.887, ppl=3.7, wps=491279, ups=0.99, wpb=494917, bsz=16871.9, num_updates=58100, lr=0.000262387, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=53640 epoch 035: 763 / 1689 loss=3.447, nll_loss=1.887, ppl=3.7, wps=491279, ups=0.99, wpb=494917, bsz=16871.9, num_updates=58100, lr=0.000262387, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=53640 epoch 035: 763 / 1689 loss=3.447, nll_loss=1.887, ppl=3.7, wps=491279, ups=0.99, wpb=494917, bsz=16871.9, num_updates=58100, lr=0.000262387, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=53640 epoch 035: 763 / 1689 loss=3.447, nll_loss=1.887, ppl=3.7, wps=491279, ups=0.99, wpb=494917, bsz=16871.9, num_updates=58100, lr=0.000262387, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=53640 epoch 035: 763 / 1689 loss=3.447, nll_loss=1.887, ppl=3.7, wps=491279, ups=0.99, wpb=494917, bsz=16871.9, num_updates=58100, lr=0.000262387, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=53640 epoch 035: 763 / 1689 loss=3.447, nll_loss=1.887, ppl=3.7, wps=491279, ups=0.99, wpb=494917, bsz=16871.9, num_updates=58100, lr=0.000262387, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=53640 epoch 035: 763 / 1689 loss=3.447, nll_loss=1.887, ppl=3.7, wps=491279, ups=0.99, wpb=494917, bsz=16871.9, num_updates=58100, lr=0.000262387, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=53640 epoch 035: 763 / 1689 loss=3.447, nll_loss=1.887, ppl=3.7, wps=491279, ups=0.99, wpb=494917, bsz=16871.9, num_updates=58100, lr=0.000262387, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=53640 epoch 035: 763 / 1689 loss=3.447, nll_loss=1.887, ppl=3.7, wps=491279, ups=0.99, wpb=494917, bsz=16871.9, num_updates=58100, lr=0.000262387, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=53640 epoch 035: 763 / 1689 loss=3.447, nll_loss=1.887, ppl=3.7, wps=491279, ups=0.99, wpb=494917, bsz=16871.9, num_updates=58100, lr=0.000262387, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=53640 epoch 035: 763 / 1689 loss=3.447, nll_loss=1.887, ppl=3.7, wps=491279, ups=0.99, wpb=494917, bsz=16871.9, num_updates=58100, lr=0.000262387, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=53640 epoch 035: 763 / 1689 loss=3.447, nll_loss=1.887, ppl=3.7, wps=491279, ups=0.99, wpb=494917, bsz=16871.9, num_updates=58100, lr=0.000262387, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=53640 epoch 035: 763 / 1689 loss=3.447, nll_loss=1.887, ppl=3.7, wps=491279, ups=0.99, wpb=494917, bsz=16871.9, num_updates=58100, lr=0.000262387, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=53640 epoch 035: 763 / 1689 loss=3.447, nll_loss=1.887, ppl=3.7, wps=491279, ups=0.99, wpb=494917, bsz=16871.9, num_updates=58100, lr=0.000262387, gnorm=0.165, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=53640 epoch 035: 863 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=555697, ups=1.12, wpb=496147, bsz=16512.9, num_updates=58200, lr=0.000262161, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=53729 epoch 035: 863 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=555697, ups=1.12, wpb=496147, bsz=16512.9, num_updates=58200, lr=0.000262161, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=53729 epoch 035: 863 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=555697, ups=1.12, wpb=496147, bsz=16512.9, num_updates=58200, lr=0.000262161, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=53729 epoch 035: 863 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=555697, ups=1.12, wpb=496147, bsz=16512.9, num_updates=58200, lr=0.000262161, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=53729 epoch 035: 863 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=555697, ups=1.12, wpb=496147, bsz=16512.9, num_updates=58200, lr=0.000262161, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=53729 epoch 035: 863 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=555697, ups=1.12, wpb=496147, bsz=16512.9, num_updates=58200, lr=0.000262161, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=53729 epoch 035: 863 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=555697, ups=1.12, wpb=496147, bsz=16512.9, num_updates=58200, lr=0.000262161, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=53729 epoch 035: 863 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=555697, ups=1.12, wpb=496147, bsz=16512.9, num_updates=58200, lr=0.000262161, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=53729 epoch 035: 863 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=555697, ups=1.12, wpb=496147, bsz=16512.9, num_updates=58200, lr=0.000262161, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=53729 epoch 035: 863 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=555697, ups=1.12, wpb=496147, bsz=16512.9, num_updates=58200, lr=0.000262161, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=53729 epoch 035: 863 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=555697, ups=1.12, wpb=496147, bsz=16512.9, num_updates=58200, lr=0.000262161, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=53729 epoch 035: 863 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=555697, ups=1.12, wpb=496147, bsz=16512.9, num_updates=58200, lr=0.000262161, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=53729 epoch 035: 863 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=555697, ups=1.12, wpb=496147, bsz=16512.9, num_updates=58200, lr=0.000262161, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=53729 epoch 035: 863 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=555697, ups=1.12, wpb=496147, bsz=16512.9, num_updates=58200, lr=0.000262161, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=53729 epoch 035: 863 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=555697, ups=1.12, wpb=496147, bsz=16512.9, num_updates=58200, lr=0.000262161, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=53729 epoch 035: 863 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=555697, ups=1.12, wpb=496147, bsz=16512.9, num_updates=58200, lr=0.000262161, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=53729 epoch 035: 863 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=555697, ups=1.12, wpb=496147, bsz=16512.9, num_updates=58200, lr=0.000262161, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=53729 epoch 035: 863 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=555697, ups=1.12, wpb=496147, bsz=16512.9, num_updates=58200, lr=0.000262161, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=53729 epoch 035: 863 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=555697, ups=1.12, wpb=496147, bsz=16512.9, num_updates=58200, lr=0.000262161, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=53729 epoch 035: 863 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=555697, ups=1.12, wpb=496147, bsz=16512.9, num_updates=58200, lr=0.000262161, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=53729 epoch 035: 863 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=555697, ups=1.12, wpb=496147, bsz=16512.9, num_updates=58200, lr=0.000262161, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=53729 epoch 035: 863 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=555697, ups=1.12, wpb=496147, bsz=16512.9, num_updates=58200, lr=0.000262161, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=53729 epoch 035: 863 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=555697, ups=1.12, wpb=496147, bsz=16512.9, num_updates=58200, lr=0.000262161, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=53729 epoch 035: 863 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=555697, ups=1.12, wpb=496147, bsz=16512.9, num_updates=58200, lr=0.000262161, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=53729 epoch 035: 863 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=555697, ups=1.12, wpb=496147, bsz=16512.9, num_updates=58200, lr=0.000262161, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=53729 epoch 035: 863 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=555697, ups=1.12, wpb=496147, bsz=16512.9, num_updates=58200, lr=0.000262161, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=53729 epoch 035: 863 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=555697, ups=1.12, wpb=496147, bsz=16512.9, num_updates=58200, lr=0.000262161, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=53729 epoch 035: 863 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=555697, ups=1.12, wpb=496147, bsz=16512.9, num_updates=58200, lr=0.000262161, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=53729 epoch 035: 863 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=555697, ups=1.12, wpb=496147, bsz=16512.9, num_updates=58200, lr=0.000262161, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=53729 epoch 035: 863 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=555697, ups=1.12, wpb=496147, bsz=16512.9, num_updates=58200, lr=0.000262161, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=53729 epoch 035: 863 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=555697, ups=1.12, wpb=496147, bsz=16512.9, num_updates=58200, lr=0.000262161, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=53729 epoch 035: 863 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=555697, ups=1.12, wpb=496147, bsz=16512.9, num_updates=58200, lr=0.000262161, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=53729 epoch 035: 863 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=555697, ups=1.12, wpb=496147, bsz=16512.9, num_updates=58200, lr=0.000262161, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=53729 epoch 035: 863 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=555697, ups=1.12, wpb=496147, bsz=16512.9, num_updates=58200, lr=0.000262161, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=53729 epoch 035: 863 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=555697, ups=1.12, wpb=496147, bsz=16512.9, num_updates=58200, lr=0.000262161, gnorm=0.169, clip=0, loss_scale=1, train_wall=88, gb_free=22, wall=53729 epoch 035: 963 / 1689 loss=3.457, nll_loss=1.898, ppl=3.73, wps=552805, ups=1.12, wpb=495200, bsz=16657.9, num_updates=58300, lr=0.000261936, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.1, wall=53818 epoch 035: 963 / 1689 loss=3.457, nll_loss=1.898, ppl=3.73, wps=552805, ups=1.12, wpb=495200, bsz=16657.9, num_updates=58300, lr=0.000261936, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.1, wall=53818 epoch 035: 963 / 1689 loss=3.457, nll_loss=1.898, ppl=3.73, wps=552805, ups=1.12, wpb=495200, bsz=16657.9, num_updates=58300, lr=0.000261936, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.1, wall=53818 epoch 035: 963 / 1689 loss=3.457, nll_loss=1.898, ppl=3.73, wps=552805, ups=1.12, wpb=495200, bsz=16657.9, num_updates=58300, lr=0.000261936, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.1, wall=53818 epoch 035: 963 / 1689 loss=3.457, nll_loss=1.898, ppl=3.73, wps=552805, ups=1.12, wpb=495200, bsz=16657.9, num_updates=58300, lr=0.000261936, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.1, wall=53818 epoch 035: 963 / 1689 loss=3.457, nll_loss=1.898, ppl=3.73, wps=552805, ups=1.12, wpb=495200, bsz=16657.9, num_updates=58300, lr=0.000261936, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.1, wall=53818 epoch 035: 963 / 1689 loss=3.457, nll_loss=1.898, ppl=3.73, wps=552805, ups=1.12, wpb=495200, bsz=16657.9, num_updates=58300, lr=0.000261936, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.1, wall=53818 epoch 035: 963 / 1689 loss=3.457, nll_loss=1.898, ppl=3.73, wps=552805, ups=1.12, wpb=495200, bsz=16657.9, num_updates=58300, lr=0.000261936, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.1, wall=53818 epoch 035: 963 / 1689 loss=3.457, nll_loss=1.898, ppl=3.73, wps=552805, ups=1.12, wpb=495200, bsz=16657.9, num_updates=58300, lr=0.000261936, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.1, wall=53818 epoch 035: 963 / 1689 loss=3.457, nll_loss=1.898, ppl=3.73, wps=552805, ups=1.12, wpb=495200, bsz=16657.9, num_updates=58300, lr=0.000261936, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.1, wall=53818 epoch 035: 963 / 1689 loss=3.457, nll_loss=1.898, ppl=3.73, wps=552805, ups=1.12, wpb=495200, bsz=16657.9, num_updates=58300, lr=0.000261936, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.1, wall=53818 epoch 035: 963 / 1689 loss=3.457, nll_loss=1.898, ppl=3.73, wps=552805, ups=1.12, wpb=495200, bsz=16657.9, num_updates=58300, lr=0.000261936, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.1, wall=53818 epoch 035: 963 / 1689 loss=3.457, nll_loss=1.898, ppl=3.73, wps=552805, ups=1.12, wpb=495200, bsz=16657.9, num_updates=58300, lr=0.000261936, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.1, wall=53818 epoch 035: 963 / 1689 loss=3.457, nll_loss=1.898, ppl=3.73, wps=552805, ups=1.12, wpb=495200, bsz=16657.9, num_updates=58300, lr=0.000261936, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.1, wall=53818 epoch 035: 963 / 1689 loss=3.457, nll_loss=1.898, ppl=3.73, wps=552805, ups=1.12, wpb=495200, bsz=16657.9, num_updates=58300, lr=0.000261936, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.1, wall=53818 epoch 035: 963 / 1689 loss=3.457, nll_loss=1.898, ppl=3.73, wps=552805, ups=1.12, wpb=495200, bsz=16657.9, num_updates=58300, lr=0.000261936, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.1, wall=53818 epoch 035: 963 / 1689 loss=3.457, nll_loss=1.898, ppl=3.73, wps=552805, ups=1.12, wpb=495200, bsz=16657.9, num_updates=58300, lr=0.000261936, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.1, wall=53818 epoch 035: 963 / 1689 loss=3.457, nll_loss=1.898, ppl=3.73, wps=552805, ups=1.12, wpb=495200, bsz=16657.9, num_updates=58300, lr=0.000261936, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.1, wall=53818 epoch 035: 963 / 1689 loss=3.457, nll_loss=1.898, ppl=3.73, wps=552805, ups=1.12, wpb=495200, bsz=16657.9, num_updates=58300, lr=0.000261936, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.1, wall=53818 epoch 035: 963 / 1689 loss=3.457, nll_loss=1.898, ppl=3.73, wps=552805, ups=1.12, wpb=495200, bsz=16657.9, num_updates=58300, lr=0.000261936, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.1, wall=53818 epoch 035: 963 / 1689 loss=3.457, nll_loss=1.898, ppl=3.73, wps=552805, ups=1.12, wpb=495200, bsz=16657.9, num_updates=58300, lr=0.000261936, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.1, wall=53818 epoch 035: 963 / 1689 loss=3.457, nll_loss=1.898, ppl=3.73, wps=552805, ups=1.12, wpb=495200, bsz=16657.9, num_updates=58300, lr=0.000261936, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.1, wall=53818 epoch 035: 963 / 1689 loss=3.457, nll_loss=1.898, ppl=3.73, wps=552805, ups=1.12, wpb=495200, bsz=16657.9, num_updates=58300, lr=0.000261936, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.1, wall=53818 epoch 035: 963 / 1689 loss=3.457, nll_loss=1.898, ppl=3.73, wps=552805, ups=1.12, wpb=495200, bsz=16657.9, num_updates=58300, lr=0.000261936, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.1, wall=53818 epoch 035: 963 / 1689 loss=3.457, nll_loss=1.898, ppl=3.73, wps=552805, ups=1.12, wpb=495200, bsz=16657.9, num_updates=58300, lr=0.000261936, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.1, wall=53818 epoch 035: 963 / 1689 loss=3.457, nll_loss=1.898, ppl=3.73, wps=552805, ups=1.12, wpb=495200, bsz=16657.9, num_updates=58300, lr=0.000261936, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.1, wall=53818 epoch 035: 963 / 1689 loss=3.457, nll_loss=1.898, ppl=3.73, wps=552805, ups=1.12, wpb=495200, bsz=16657.9, num_updates=58300, lr=0.000261936, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.1, wall=53818 epoch 035: 963 / 1689 loss=3.457, nll_loss=1.898, ppl=3.73, wps=552805, ups=1.12, wpb=495200, bsz=16657.9, num_updates=58300, lr=0.000261936, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.1, wall=53818 epoch 035: 963 / 1689 loss=3.457, nll_loss=1.898, ppl=3.73, wps=552805, ups=1.12, wpb=495200, bsz=16657.9, num_updates=58300, lr=0.000261936, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.1, wall=53818 epoch 035: 963 / 1689 loss=3.457, nll_loss=1.898, ppl=3.73, wps=552805, ups=1.12, wpb=495200, bsz=16657.9, num_updates=58300, lr=0.000261936, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.1, wall=53818 epoch 035: 963 / 1689 loss=3.457, nll_loss=1.898, ppl=3.73, wps=552805, ups=1.12, wpb=495200, bsz=16657.9, num_updates=58300, lr=0.000261936, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.1, wall=53818 epoch 035: 963 / 1689 loss=3.457, nll_loss=1.898, ppl=3.73, wps=552805, ups=1.12, wpb=495200, bsz=16657.9, num_updates=58300, lr=0.000261936, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.1, wall=53818 epoch 035: 963 / 1689 loss=3.457, nll_loss=1.898, ppl=3.73, wps=552805, ups=1.12, wpb=495200, bsz=16657.9, num_updates=58300, lr=0.000261936, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.1, wall=53818 epoch 035: 963 / 1689 loss=3.457, nll_loss=1.898, ppl=3.73, wps=552805, ups=1.12, wpb=495200, bsz=16657.9, num_updates=58300, lr=0.000261936, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.1, wall=53818 epoch 035: 963 / 1689 loss=3.457, nll_loss=1.898, ppl=3.73, wps=552805, ups=1.12, wpb=495200, bsz=16657.9, num_updates=58300, lr=0.000261936, gnorm=0.17, clip=0, loss_scale=1, train_wall=89, gb_free=22.1, wall=53818 epoch 035: 1063 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=563094, ups=1.14, wpb=496050, bsz=16380.5, num_updates=58400, lr=0.000261712, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=53906 epoch 035: 1063 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=563094, ups=1.14, wpb=496050, bsz=16380.5, num_updates=58400, lr=0.000261712, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=53906 epoch 035: 1063 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=563094, ups=1.14, wpb=496050, bsz=16380.5, num_updates=58400, lr=0.000261712, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=53906 epoch 035: 1063 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=563094, ups=1.14, wpb=496050, bsz=16380.5, num_updates=58400, lr=0.000261712, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=53906 epoch 035: 1063 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=563094, ups=1.14, wpb=496050, bsz=16380.5, num_updates=58400, lr=0.000261712, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=53906 epoch 035: 1063 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=563094, ups=1.14, wpb=496050, bsz=16380.5, num_updates=58400, lr=0.000261712, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=53906 epoch 035: 1063 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=563094, ups=1.14, wpb=496050, bsz=16380.5, num_updates=58400, lr=0.000261712, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=53906 epoch 035: 1063 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=563094, ups=1.14, wpb=496050, bsz=16380.5, num_updates=58400, lr=0.000261712, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=53906 epoch 035: 1063 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=563094, ups=1.14, wpb=496050, bsz=16380.5, num_updates=58400, lr=0.000261712, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=53906 epoch 035: 1063 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=563094, ups=1.14, wpb=496050, bsz=16380.5, num_updates=58400, lr=0.000261712, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=53906 epoch 035: 1063 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=563094, ups=1.14, wpb=496050, bsz=16380.5, num_updates=58400, lr=0.000261712, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=53906 epoch 035: 1063 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=563094, ups=1.14, wpb=496050, bsz=16380.5, num_updates=58400, lr=0.000261712, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=53906 epoch 035: 1063 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=563094, ups=1.14, wpb=496050, bsz=16380.5, num_updates=58400, lr=0.000261712, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=53906 epoch 035: 1063 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=563094, ups=1.14, wpb=496050, bsz=16380.5, num_updates=58400, lr=0.000261712, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=53906 epoch 035: 1063 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=563094, ups=1.14, wpb=496050, bsz=16380.5, num_updates=58400, lr=0.000261712, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=53906 epoch 035: 1063 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=563094, ups=1.14, wpb=496050, bsz=16380.5, num_updates=58400, lr=0.000261712, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=53906 epoch 035: 1063 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=563094, ups=1.14, wpb=496050, bsz=16380.5, num_updates=58400, lr=0.000261712, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=53906 epoch 035: 1063 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=563094, ups=1.14, wpb=496050, bsz=16380.5, num_updates=58400, lr=0.000261712, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=53906 epoch 035: 1063 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=563094, ups=1.14, wpb=496050, bsz=16380.5, num_updates=58400, lr=0.000261712, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=53906 epoch 035: 1063 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=563094, ups=1.14, wpb=496050, bsz=16380.5, num_updates=58400, lr=0.000261712, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=53906 epoch 035: 1063 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=563094, ups=1.14, wpb=496050, bsz=16380.5, num_updates=58400, lr=0.000261712, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=53906 epoch 035: 1063 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=563094, ups=1.14, wpb=496050, bsz=16380.5, num_updates=58400, lr=0.000261712, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=53906 epoch 035: 1063 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=563094, ups=1.14, wpb=496050, bsz=16380.5, num_updates=58400, lr=0.000261712, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=53906 epoch 035: 1063 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=563094, ups=1.14, wpb=496050, bsz=16380.5, num_updates=58400, lr=0.000261712, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=53906 epoch 035: 1063 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=563094, ups=1.14, wpb=496050, bsz=16380.5, num_updates=58400, lr=0.000261712, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=53906 epoch 035: 1063 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=563094, ups=1.14, wpb=496050, bsz=16380.5, num_updates=58400, lr=0.000261712, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=53906 epoch 035: 1063 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=563094, ups=1.14, wpb=496050, bsz=16380.5, num_updates=58400, lr=0.000261712, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=53906 epoch 035: 1063 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=563094, ups=1.14, wpb=496050, bsz=16380.5, num_updates=58400, lr=0.000261712, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=53906 epoch 035: 1063 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=563094, ups=1.14, wpb=496050, bsz=16380.5, num_updates=58400, lr=0.000261712, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=53906 epoch 035: 1063 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=563094, ups=1.14, wpb=496050, bsz=16380.5, num_updates=58400, lr=0.000261712, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=53906 epoch 035: 1063 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=563094, ups=1.14, wpb=496050, bsz=16380.5, num_updates=58400, lr=0.000261712, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=53906 epoch 035: 1063 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=563094, ups=1.14, wpb=496050, bsz=16380.5, num_updates=58400, lr=0.000261712, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=53906 epoch 035: 1063 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=563094, ups=1.14, wpb=496050, bsz=16380.5, num_updates=58400, lr=0.000261712, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=53906 epoch 035: 1063 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=563094, ups=1.14, wpb=496050, bsz=16380.5, num_updates=58400, lr=0.000261712, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=53906 epoch 035: 1063 / 1689 loss=3.452, nll_loss=1.892, ppl=3.71, wps=563094, ups=1.14, wpb=496050, bsz=16380.5, num_updates=58400, lr=0.000261712, gnorm=0.164, clip=0, loss_scale=1, train_wall=88, gb_free=22.1, wall=53906 epoch 035: 1163 / 1689 loss=3.46, nll_loss=1.901, ppl=3.74, wps=558841, ups=1.13, wpb=496285, bsz=16386.2, num_updates=58500, lr=0.000261488, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.7, wall=53995 epoch 035: 1163 / 1689 loss=3.46, nll_loss=1.901, ppl=3.74, wps=558841, ups=1.13, wpb=496285, bsz=16386.2, num_updates=58500, lr=0.000261488, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.7, wall=53995 epoch 035: 1163 / 1689 loss=3.46, nll_loss=1.901, ppl=3.74, wps=558841, ups=1.13, wpb=496285, bsz=16386.2, num_updates=58500, lr=0.000261488, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.7, wall=53995 epoch 035: 1163 / 1689 loss=3.46, nll_loss=1.901, ppl=3.74, wps=558841, ups=1.13, wpb=496285, bsz=16386.2, num_updates=58500, lr=0.000261488, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.7, wall=53995 epoch 035: 1163 / 1689 loss=3.46, nll_loss=1.901, ppl=3.74, wps=558841, ups=1.13, wpb=496285, bsz=16386.2, num_updates=58500, lr=0.000261488, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.7, wall=53995 epoch 035: 1163 / 1689 loss=3.46, nll_loss=1.901, ppl=3.74, wps=558841, ups=1.13, wpb=496285, bsz=16386.2, num_updates=58500, lr=0.000261488, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.7, wall=53995 epoch 035: 1163 / 1689 loss=3.46, nll_loss=1.901, ppl=3.74, wps=558841, ups=1.13, wpb=496285, bsz=16386.2, num_updates=58500, lr=0.000261488, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.7, wall=53995 epoch 035: 1163 / 1689 loss=3.46, nll_loss=1.901, ppl=3.74, wps=558841, ups=1.13, wpb=496285, bsz=16386.2, num_updates=58500, lr=0.000261488, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.7, wall=53995 epoch 035: 1163 / 1689 loss=3.46, nll_loss=1.901, ppl=3.74, wps=558841, ups=1.13, wpb=496285, bsz=16386.2, num_updates=58500, lr=0.000261488, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.7, wall=53995 epoch 035: 1163 / 1689 loss=3.46, nll_loss=1.901, ppl=3.74, wps=558841, ups=1.13, wpb=496285, bsz=16386.2, num_updates=58500, lr=0.000261488, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.7, wall=53995 epoch 035: 1163 / 1689 loss=3.46, nll_loss=1.901, ppl=3.74, wps=558841, ups=1.13, wpb=496285, bsz=16386.2, num_updates=58500, lr=0.000261488, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.7, wall=53995 epoch 035: 1163 / 1689 loss=3.46, nll_loss=1.901, ppl=3.74, wps=558841, ups=1.13, wpb=496285, bsz=16386.2, num_updates=58500, lr=0.000261488, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.7, wall=53995 epoch 035: 1163 / 1689 loss=3.46, nll_loss=1.901, ppl=3.74, wps=558841, ups=1.13, wpb=496285, bsz=16386.2, num_updates=58500, lr=0.000261488, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.7, wall=53995 epoch 035: 1163 / 1689 loss=3.46, nll_loss=1.901, ppl=3.74, wps=558841, ups=1.13, wpb=496285, bsz=16386.2, num_updates=58500, lr=0.000261488, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.7, wall=53995 epoch 035: 1163 / 1689 loss=3.46, nll_loss=1.901, ppl=3.74, wps=558841, ups=1.13, wpb=496285, bsz=16386.2, num_updates=58500, lr=0.000261488, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.7, wall=53995 epoch 035: 1163 / 1689 loss=3.46, nll_loss=1.901, ppl=3.74, wps=558841, ups=1.13, wpb=496285, bsz=16386.2, num_updates=58500, lr=0.000261488, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.7, wall=53995 epoch 035: 1163 / 1689 loss=3.46, nll_loss=1.901, ppl=3.74, wps=558841, ups=1.13, wpb=496285, bsz=16386.2, num_updates=58500, lr=0.000261488, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.7, wall=53995 epoch 035: 1163 / 1689 loss=3.46, nll_loss=1.901, ppl=3.74, wps=558841, ups=1.13, wpb=496285, bsz=16386.2, num_updates=58500, lr=0.000261488, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.7, wall=53995 epoch 035: 1163 / 1689 loss=3.46, nll_loss=1.901, ppl=3.74, wps=558841, ups=1.13, wpb=496285, bsz=16386.2, num_updates=58500, lr=0.000261488, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.7, wall=53995 epoch 035: 1163 / 1689 loss=3.46, nll_loss=1.901, ppl=3.74, wps=558841, ups=1.13, wpb=496285, bsz=16386.2, num_updates=58500, lr=0.000261488, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.7, wall=53995 epoch 035: 1163 / 1689 loss=3.46, nll_loss=1.901, ppl=3.74, wps=558841, ups=1.13, wpb=496285, bsz=16386.2, num_updates=58500, lr=0.000261488, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.7, wall=53995 epoch 035: 1163 / 1689 loss=3.46, nll_loss=1.901, ppl=3.74, wps=558841, ups=1.13, wpb=496285, bsz=16386.2, num_updates=58500, lr=0.000261488, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.7, wall=53995 epoch 035: 1163 / 1689 loss=3.46, nll_loss=1.901, ppl=3.74, wps=558841, ups=1.13, wpb=496285, bsz=16386.2, num_updates=58500, lr=0.000261488, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.7, wall=53995 epoch 035: 1163 / 1689 loss=3.46, nll_loss=1.901, ppl=3.74, wps=558841, ups=1.13, wpb=496285, bsz=16386.2, num_updates=58500, lr=0.000261488, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.7, wall=53995 epoch 035: 1163 / 1689 loss=3.46, nll_loss=1.901, ppl=3.74, wps=558841, ups=1.13, wpb=496285, bsz=16386.2, num_updates=58500, lr=0.000261488, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.7, wall=53995 epoch 035: 1163 / 1689 loss=3.46, nll_loss=1.901, ppl=3.74, wps=558841, ups=1.13, wpb=496285, bsz=16386.2, num_updates=58500, lr=0.000261488, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.7, wall=53995 epoch 035: 1163 / 1689 loss=3.46, nll_loss=1.901, ppl=3.74, wps=558841, ups=1.13, wpb=496285, bsz=16386.2, num_updates=58500, lr=0.000261488, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.7, wall=53995 epoch 035: 1163 / 1689 loss=3.46, nll_loss=1.901, ppl=3.74, wps=558841, ups=1.13, wpb=496285, bsz=16386.2, num_updates=58500, lr=0.000261488, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.7, wall=53995 epoch 035: 1163 / 1689 loss=3.46, nll_loss=1.901, ppl=3.74, wps=558841, ups=1.13, wpb=496285, bsz=16386.2, num_updates=58500, lr=0.000261488, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.7, wall=53995 epoch 035: 1163 / 1689 loss=3.46, nll_loss=1.901, ppl=3.74, wps=558841, ups=1.13, wpb=496285, bsz=16386.2, num_updates=58500, lr=0.000261488, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.7, wall=53995 epoch 035: 1163 / 1689 loss=3.46, nll_loss=1.901, ppl=3.74, wps=558841, ups=1.13, wpb=496285, bsz=16386.2, num_updates=58500, lr=0.000261488, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.7, wall=53995 epoch 035: 1163 / 1689 loss=3.46, nll_loss=1.901, ppl=3.74, wps=558841, ups=1.13, wpb=496285, bsz=16386.2, num_updates=58500, lr=0.000261488, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.7, wall=53995 epoch 035: 1163 / 1689 loss=3.46, nll_loss=1.901, ppl=3.74, wps=558841, ups=1.13, wpb=496285, bsz=16386.2, num_updates=58500, lr=0.000261488, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.7, wall=53995 epoch 035: 1163 / 1689 loss=3.46, nll_loss=1.901, ppl=3.74, wps=558841, ups=1.13, wpb=496285, bsz=16386.2, num_updates=58500, lr=0.000261488, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.7, wall=53995 epoch 035: 1163 / 1689 loss=3.46, nll_loss=1.901, ppl=3.74, wps=558841, ups=1.13, wpb=496285, bsz=16386.2, num_updates=58500, lr=0.000261488, gnorm=0.167, clip=0, loss_scale=1, train_wall=88, gb_free=22.7, wall=53995 epoch 035: 1264 / 1689 loss=3.45, nll_loss=1.89, ppl=3.71, wps=543804, ups=1.1, wpb=494437, bsz=16297.3, num_updates=58600, lr=0.000261265, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=54086 epoch 035: 1264 / 1689 loss=3.45, nll_loss=1.89, ppl=3.71, wps=543804, ups=1.1, wpb=494437, bsz=16297.3, num_updates=58600, lr=0.000261265, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=54086 epoch 035: 1264 / 1689 loss=3.45, nll_loss=1.89, ppl=3.71, wps=543804, ups=1.1, wpb=494437, bsz=16297.3, num_updates=58600, lr=0.000261265, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=54086 epoch 035: 1264 / 1689 loss=3.45, nll_loss=1.89, ppl=3.71, wps=543804, ups=1.1, wpb=494437, bsz=16297.3, num_updates=58600, lr=0.000261265, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=54086 epoch 035: 1264 / 1689 loss=3.45, nll_loss=1.89, ppl=3.71, wps=543804, ups=1.1, wpb=494437, bsz=16297.3, num_updates=58600, lr=0.000261265, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=54086 epoch 035: 1264 / 1689 loss=3.45, nll_loss=1.89, ppl=3.71, wps=543804, ups=1.1, wpb=494437, bsz=16297.3, num_updates=58600, lr=0.000261265, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=54086 epoch 035: 1264 / 1689 loss=3.45, nll_loss=1.89, ppl=3.71, wps=543804, ups=1.1, wpb=494437, bsz=16297.3, num_updates=58600, lr=0.000261265, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=54086 epoch 035: 1264 / 1689 loss=3.45, nll_loss=1.89, ppl=3.71, wps=543804, ups=1.1, wpb=494437, bsz=16297.3, num_updates=58600, lr=0.000261265, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=54086 epoch 035: 1264 / 1689 loss=3.45, nll_loss=1.89, ppl=3.71, wps=543804, ups=1.1, wpb=494437, bsz=16297.3, num_updates=58600, lr=0.000261265, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=54086 epoch 035: 1264 / 1689 loss=3.45, nll_loss=1.89, ppl=3.71, wps=543804, ups=1.1, wpb=494437, bsz=16297.3, num_updates=58600, lr=0.000261265, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=54086 epoch 035: 1264 / 1689 loss=3.45, nll_loss=1.89, ppl=3.71, wps=543804, ups=1.1, wpb=494437, bsz=16297.3, num_updates=58600, lr=0.000261265, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=54086 epoch 035: 1264 / 1689 loss=3.45, nll_loss=1.89, ppl=3.71, wps=543804, ups=1.1, wpb=494437, bsz=16297.3, num_updates=58600, lr=0.000261265, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=54086 epoch 035: 1264 / 1689 loss=3.45, nll_loss=1.89, ppl=3.71, wps=543804, ups=1.1, wpb=494437, bsz=16297.3, num_updates=58600, lr=0.000261265, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=54086 epoch 035: 1264 / 1689 loss=3.45, nll_loss=1.89, ppl=3.71, wps=543804, ups=1.1, wpb=494437, bsz=16297.3, num_updates=58600, lr=0.000261265, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=54086 epoch 035: 1264 / 1689 loss=3.45, nll_loss=1.89, ppl=3.71, wps=543804, ups=1.1, wpb=494437, bsz=16297.3, num_updates=58600, lr=0.000261265, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=54086 epoch 035: 1264 / 1689 loss=3.45, nll_loss=1.89, ppl=3.71, wps=543804, ups=1.1, wpb=494437, bsz=16297.3, num_updates=58600, lr=0.000261265, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=54086 epoch 035: 1264 / 1689 loss=3.45, nll_loss=1.89, ppl=3.71, wps=543804, ups=1.1, wpb=494437, bsz=16297.3, num_updates=58600, lr=0.000261265, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=54086 epoch 035: 1264 / 1689 loss=3.45, nll_loss=1.89, ppl=3.71, wps=543804, ups=1.1, wpb=494437, bsz=16297.3, num_updates=58600, lr=0.000261265, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=54086 epoch 035: 1264 / 1689 loss=3.45, nll_loss=1.89, ppl=3.71, wps=543804, ups=1.1, wpb=494437, bsz=16297.3, num_updates=58600, lr=0.000261265, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=54086 epoch 035: 1264 / 1689 loss=3.45, nll_loss=1.89, ppl=3.71, wps=543804, ups=1.1, wpb=494437, bsz=16297.3, num_updates=58600, lr=0.000261265, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=54086 epoch 035: 1264 / 1689 loss=3.45, nll_loss=1.89, ppl=3.71, wps=543804, ups=1.1, wpb=494437, bsz=16297.3, num_updates=58600, lr=0.000261265, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=54086 epoch 035: 1264 / 1689 loss=3.45, nll_loss=1.89, ppl=3.71, wps=543804, ups=1.1, wpb=494437, bsz=16297.3, num_updates=58600, lr=0.000261265, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=54086 epoch 035: 1264 / 1689 loss=3.45, nll_loss=1.89, ppl=3.71, wps=543804, ups=1.1, wpb=494437, bsz=16297.3, num_updates=58600, lr=0.000261265, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=54086 epoch 035: 1264 / 1689 loss=3.45, nll_loss=1.89, ppl=3.71, wps=543804, ups=1.1, wpb=494437, bsz=16297.3, num_updates=58600, lr=0.000261265, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=54086 epoch 035: 1264 / 1689 loss=3.45, nll_loss=1.89, ppl=3.71, wps=543804, ups=1.1, wpb=494437, bsz=16297.3, num_updates=58600, lr=0.000261265, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=54086 epoch 035: 1264 / 1689 loss=3.45, nll_loss=1.89, ppl=3.71, wps=543804, ups=1.1, wpb=494437, bsz=16297.3, num_updates=58600, lr=0.000261265, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=54086 epoch 035: 1264 / 1689 loss=3.45, nll_loss=1.89, ppl=3.71, wps=543804, ups=1.1, wpb=494437, bsz=16297.3, num_updates=58600, lr=0.000261265, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=54086 epoch 035: 1264 / 1689 loss=3.45, nll_loss=1.89, ppl=3.71, wps=543804, ups=1.1, wpb=494437, bsz=16297.3, num_updates=58600, lr=0.000261265, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=54086 epoch 035: 1264 / 1689 loss=3.45, nll_loss=1.89, ppl=3.71, wps=543804, ups=1.1, wpb=494437, bsz=16297.3, num_updates=58600, lr=0.000261265, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=54086 epoch 035: 1264 / 1689 loss=3.45, nll_loss=1.89, ppl=3.71, wps=543804, ups=1.1, wpb=494437, bsz=16297.3, num_updates=58600, lr=0.000261265, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=54086 epoch 035: 1264 / 1689 loss=3.45, nll_loss=1.89, ppl=3.71, wps=543804, ups=1.1, wpb=494437, bsz=16297.3, num_updates=58600, lr=0.000261265, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=54086 epoch 035: 1264 / 1689 loss=3.45, nll_loss=1.89, ppl=3.71, wps=543804, ups=1.1, wpb=494437, bsz=16297.3, num_updates=58600, lr=0.000261265, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=54086 epoch 035: 1264 / 1689 loss=3.45, nll_loss=1.89, ppl=3.71, wps=543804, ups=1.1, wpb=494437, bsz=16297.3, num_updates=58600, lr=0.000261265, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=54086 epoch 035: 1264 / 1689 loss=3.45, nll_loss=1.89, ppl=3.71, wps=543804, ups=1.1, wpb=494437, bsz=16297.3, num_updates=58600, lr=0.000261265, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=54086 epoch 035: 1264 / 1689 loss=3.45, nll_loss=1.89, ppl=3.71, wps=543804, ups=1.1, wpb=494437, bsz=16297.3, num_updates=58600, lr=0.000261265, gnorm=0.169, clip=0, loss_scale=0.5, train_wall=90, gb_free=22.3, wall=54086 epoch 035: 1364 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=547744, ups=1.11, wpb=493679, bsz=16415.4, num_updates=58700, lr=0.000261042, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.2, wall=54176 epoch 035: 1364 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=547744, ups=1.11, wpb=493679, bsz=16415.4, num_updates=58700, lr=0.000261042, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.2, wall=54176 epoch 035: 1364 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=547744, ups=1.11, wpb=493679, bsz=16415.4, num_updates=58700, lr=0.000261042, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.2, wall=54176 epoch 035: 1364 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=547744, ups=1.11, wpb=493679, bsz=16415.4, num_updates=58700, lr=0.000261042, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.2, wall=54176 epoch 035: 1364 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=547744, ups=1.11, wpb=493679, bsz=16415.4, num_updates=58700, lr=0.000261042, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.2, wall=54176 epoch 035: 1364 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=547744, ups=1.11, wpb=493679, bsz=16415.4, num_updates=58700, lr=0.000261042, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.2, wall=54176 epoch 035: 1364 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=547744, ups=1.11, wpb=493679, bsz=16415.4, num_updates=58700, lr=0.000261042, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.2, wall=54176 epoch 035: 1364 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=547744, ups=1.11, wpb=493679, bsz=16415.4, num_updates=58700, lr=0.000261042, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.2, wall=54176 epoch 035: 1364 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=547744, ups=1.11, wpb=493679, bsz=16415.4, num_updates=58700, lr=0.000261042, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.2, wall=54176 epoch 035: 1364 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=547744, ups=1.11, wpb=493679, bsz=16415.4, num_updates=58700, lr=0.000261042, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.2, wall=54176 epoch 035: 1364 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=547744, ups=1.11, wpb=493679, bsz=16415.4, num_updates=58700, lr=0.000261042, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.2, wall=54176 epoch 035: 1364 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=547744, ups=1.11, wpb=493679, bsz=16415.4, num_updates=58700, lr=0.000261042, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.2, wall=54176 epoch 035: 1364 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=547744, ups=1.11, wpb=493679, bsz=16415.4, num_updates=58700, lr=0.000261042, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.2, wall=54176 epoch 035: 1364 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=547744, ups=1.11, wpb=493679, bsz=16415.4, num_updates=58700, lr=0.000261042, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.2, wall=54176 epoch 035: 1364 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=547744, ups=1.11, wpb=493679, bsz=16415.4, num_updates=58700, lr=0.000261042, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.2, wall=54176 epoch 035: 1364 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=547744, ups=1.11, wpb=493679, bsz=16415.4, num_updates=58700, lr=0.000261042, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.2, wall=54176 epoch 035: 1364 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=547744, ups=1.11, wpb=493679, bsz=16415.4, num_updates=58700, lr=0.000261042, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.2, wall=54176 epoch 035: 1364 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=547744, ups=1.11, wpb=493679, bsz=16415.4, num_updates=58700, lr=0.000261042, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.2, wall=54176 epoch 035: 1364 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=547744, ups=1.11, wpb=493679, bsz=16415.4, num_updates=58700, lr=0.000261042, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.2, wall=54176 epoch 035: 1364 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=547744, ups=1.11, wpb=493679, bsz=16415.4, num_updates=58700, lr=0.000261042, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.2, wall=54176 epoch 035: 1364 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=547744, ups=1.11, wpb=493679, bsz=16415.4, num_updates=58700, lr=0.000261042, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.2, wall=54176 epoch 035: 1364 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=547744, ups=1.11, wpb=493679, bsz=16415.4, num_updates=58700, lr=0.000261042, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.2, wall=54176 epoch 035: 1364 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=547744, ups=1.11, wpb=493679, bsz=16415.4, num_updates=58700, lr=0.000261042, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.2, wall=54176 epoch 035: 1364 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=547744, ups=1.11, wpb=493679, bsz=16415.4, num_updates=58700, lr=0.000261042, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.2, wall=54176 epoch 035: 1364 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=547744, ups=1.11, wpb=493679, bsz=16415.4, num_updates=58700, lr=0.000261042, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.2, wall=54176 epoch 035: 1364 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=547744, ups=1.11, wpb=493679, bsz=16415.4, num_updates=58700, lr=0.000261042, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.2, wall=54176 epoch 035: 1364 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=547744, ups=1.11, wpb=493679, bsz=16415.4, num_updates=58700, lr=0.000261042, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.2, wall=54176 epoch 035: 1364 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=547744, ups=1.11, wpb=493679, bsz=16415.4, num_updates=58700, lr=0.000261042, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.2, wall=54176 epoch 035: 1364 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=547744, ups=1.11, wpb=493679, bsz=16415.4, num_updates=58700, lr=0.000261042, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.2, wall=54176 epoch 035: 1364 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=547744, ups=1.11, wpb=493679, bsz=16415.4, num_updates=58700, lr=0.000261042, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.2, wall=54176 epoch 035: 1364 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=547744, ups=1.11, wpb=493679, bsz=16415.4, num_updates=58700, lr=0.000261042, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.2, wall=54176 epoch 035: 1364 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=547744, ups=1.11, wpb=493679, bsz=16415.4, num_updates=58700, lr=0.000261042, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.2, wall=54176 epoch 035: 1364 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=547744, ups=1.11, wpb=493679, bsz=16415.4, num_updates=58700, lr=0.000261042, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.2, wall=54176 epoch 035: 1364 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=547744, ups=1.11, wpb=493679, bsz=16415.4, num_updates=58700, lr=0.000261042, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.2, wall=54176 epoch 035: 1364 / 1689 loss=3.46, nll_loss=1.901, ppl=3.73, wps=547744, ups=1.11, wpb=493679, bsz=16415.4, num_updates=58700, lr=0.000261042, gnorm=0.162, clip=0, loss_scale=0.5, train_wall=89, gb_free=22.2, wall=54176 epoch 035: 1464 / 1689 loss=3.458, nll_loss=1.899, ppl=3.73, wps=549023, ups=1.11, wpb=496310, bsz=16611.4, num_updates=58800, lr=0.00026082, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=89, gb_free=22, wall=54267 epoch 035: 1464 / 1689 loss=3.458, nll_loss=1.899, ppl=3.73, wps=549023, ups=1.11, wpb=496310, bsz=16611.4, num_updates=58800, lr=0.00026082, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=89, gb_free=22, wall=54267 epoch 035: 1464 / 1689 loss=3.458, nll_loss=1.899, ppl=3.73, wps=549023, ups=1.11, wpb=496310, bsz=16611.4, num_updates=58800, lr=0.00026082, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=89, gb_free=22, wall=54267 epoch 035: 1464 / 1689 loss=3.458, nll_loss=1.899, ppl=3.73, wps=549023, ups=1.11, wpb=496310, bsz=16611.4, num_updates=58800, lr=0.00026082, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=89, gb_free=22, wall=54267 epoch 035: 1464 / 1689 loss=3.458, nll_loss=1.899, ppl=3.73, wps=549023, ups=1.11, wpb=496310, bsz=16611.4, num_updates=58800, lr=0.00026082, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=89, gb_free=22, wall=54267 epoch 035: 1464 / 1689 loss=3.458, nll_loss=1.899, ppl=3.73, wps=549023, ups=1.11, wpb=496310, bsz=16611.4, num_updates=58800, lr=0.00026082, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=89, gb_free=22, wall=54267 epoch 035: 1464 / 1689 loss=3.458, nll_loss=1.899, ppl=3.73, wps=549023, ups=1.11, wpb=496310, bsz=16611.4, num_updates=58800, lr=0.00026082, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=89, gb_free=22, wall=54267 epoch 035: 1464 / 1689 loss=3.458, nll_loss=1.899, ppl=3.73, wps=549023, ups=1.11, wpb=496310, bsz=16611.4, num_updates=58800, lr=0.00026082, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=89, gb_free=22, wall=54267 epoch 035: 1464 / 1689 loss=3.458, nll_loss=1.899, ppl=3.73, wps=549023, ups=1.11, wpb=496310, bsz=16611.4, num_updates=58800, lr=0.00026082, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=89, gb_free=22, wall=54267 epoch 035: 1464 / 1689 loss=3.458, nll_loss=1.899, ppl=3.73, wps=549023, ups=1.11, wpb=496310, bsz=16611.4, num_updates=58800, lr=0.00026082, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=89, gb_free=22, wall=54267 epoch 035: 1464 / 1689 loss=3.458, nll_loss=1.899, ppl=3.73, wps=549023, ups=1.11, wpb=496310, bsz=16611.4, num_updates=58800, lr=0.00026082, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=89, gb_free=22, wall=54267 epoch 035: 1464 / 1689 loss=3.458, nll_loss=1.899, ppl=3.73, wps=549023, ups=1.11, wpb=496310, bsz=16611.4, num_updates=58800, lr=0.00026082, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=89, gb_free=22, wall=54267 epoch 035: 1464 / 1689 loss=3.458, nll_loss=1.899, ppl=3.73, wps=549023, ups=1.11, wpb=496310, bsz=16611.4, num_updates=58800, lr=0.00026082, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=89, gb_free=22, wall=54267 epoch 035: 1464 / 1689 loss=3.458, nll_loss=1.899, ppl=3.73, wps=549023, ups=1.11, wpb=496310, bsz=16611.4, num_updates=58800, lr=0.00026082, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=89, gb_free=22, wall=54267 epoch 035: 1464 / 1689 loss=3.458, nll_loss=1.899, ppl=3.73, wps=549023, ups=1.11, wpb=496310, bsz=16611.4, num_updates=58800, lr=0.00026082, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=89, gb_free=22, wall=54267 epoch 035: 1464 / 1689 loss=3.458, nll_loss=1.899, ppl=3.73, wps=549023, ups=1.11, wpb=496310, bsz=16611.4, num_updates=58800, lr=0.00026082, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=89, gb_free=22, wall=54267 epoch 035: 1464 / 1689 loss=3.458, nll_loss=1.899, ppl=3.73, wps=549023, ups=1.11, wpb=496310, bsz=16611.4, num_updates=58800, lr=0.00026082, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=89, gb_free=22, wall=54267 epoch 035: 1464 / 1689 loss=3.458, nll_loss=1.899, ppl=3.73, wps=549023, ups=1.11, wpb=496310, bsz=16611.4, num_updates=58800, lr=0.00026082, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=89, gb_free=22, wall=54267 epoch 035: 1464 / 1689 loss=3.458, nll_loss=1.899, ppl=3.73, wps=549023, ups=1.11, wpb=496310, bsz=16611.4, num_updates=58800, lr=0.00026082, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=89, gb_free=22, wall=54267 epoch 035: 1464 / 1689 loss=3.458, nll_loss=1.899, ppl=3.73, wps=549023, ups=1.11, wpb=496310, bsz=16611.4, num_updates=58800, lr=0.00026082, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=89, gb_free=22, wall=54267 epoch 035: 1464 / 1689 loss=3.458, nll_loss=1.899, ppl=3.73, wps=549023, ups=1.11, wpb=496310, bsz=16611.4, num_updates=58800, lr=0.00026082, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=89, gb_free=22, wall=54267 epoch 035: 1464 / 1689 loss=3.458, nll_loss=1.899, ppl=3.73, wps=549023, ups=1.11, wpb=496310, bsz=16611.4, num_updates=58800, lr=0.00026082, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=89, gb_free=22, wall=54267 epoch 035: 1464 / 1689 loss=3.458, nll_loss=1.899, ppl=3.73, wps=549023, ups=1.11, wpb=496310, bsz=16611.4, num_updates=58800, lr=0.00026082, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=89, gb_free=22, wall=54267 epoch 035: 1464 / 1689 loss=3.458, nll_loss=1.899, ppl=3.73, wps=549023, ups=1.11, wpb=496310, bsz=16611.4, num_updates=58800, lr=0.00026082, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=89, gb_free=22, wall=54267 epoch 035: 1464 / 1689 loss=3.458, nll_loss=1.899, ppl=3.73, wps=549023, ups=1.11, wpb=496310, bsz=16611.4, num_updates=58800, lr=0.00026082, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=89, gb_free=22, wall=54267 epoch 035: 1464 / 1689 loss=3.458, nll_loss=1.899, ppl=3.73, wps=549023, ups=1.11, wpb=496310, bsz=16611.4, num_updates=58800, lr=0.00026082, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=89, gb_free=22, wall=54267 epoch 035: 1464 / 1689 loss=3.458, nll_loss=1.899, ppl=3.73, wps=549023, ups=1.11, wpb=496310, bsz=16611.4, num_updates=58800, lr=0.00026082, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=89, gb_free=22, wall=54267 epoch 035: 1464 / 1689 loss=3.458, nll_loss=1.899, ppl=3.73, wps=549023, ups=1.11, wpb=496310, bsz=16611.4, num_updates=58800, lr=0.00026082, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=89, gb_free=22, wall=54267 epoch 035: 1464 / 1689 loss=3.458, nll_loss=1.899, ppl=3.73, wps=549023, ups=1.11, wpb=496310, bsz=16611.4, num_updates=58800, lr=0.00026082, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=89, gb_free=22, wall=54267 epoch 035: 1464 / 1689 loss=3.458, nll_loss=1.899, ppl=3.73, wps=549023, ups=1.11, wpb=496310, bsz=16611.4, num_updates=58800, lr=0.00026082, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=89, gb_free=22, wall=54267 epoch 035: 1464 / 1689 loss=3.458, nll_loss=1.899, ppl=3.73, wps=549023, ups=1.11, wpb=496310, bsz=16611.4, num_updates=58800, lr=0.00026082, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=89, gb_free=22, wall=54267 epoch 035: 1464 / 1689 loss=3.458, nll_loss=1.899, ppl=3.73, wps=549023, ups=1.11, wpb=496310, bsz=16611.4, num_updates=58800, lr=0.00026082, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=89, gb_free=22, wall=54267 epoch 035: 1464 / 1689 loss=3.458, nll_loss=1.899, ppl=3.73, wps=549023, ups=1.11, wpb=496310, bsz=16611.4, num_updates=58800, lr=0.00026082, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=89, gb_free=22, wall=54267 epoch 035: 1464 / 1689 loss=3.458, nll_loss=1.899, ppl=3.73, wps=549023, ups=1.11, wpb=496310, bsz=16611.4, num_updates=58800, lr=0.00026082, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=89, gb_free=22, wall=54267 epoch 035: 1464 / 1689 loss=3.458, nll_loss=1.899, ppl=3.73, wps=549023, ups=1.11, wpb=496310, bsz=16611.4, num_updates=58800, lr=0.00026082, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=89, gb_free=22, wall=54267 epoch 035: 1564 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=550131, ups=1.11, wpb=495288, bsz=16827.1, num_updates=58900, lr=0.000260599, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.2, wall=54357 epoch 035: 1564 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=550131, ups=1.11, wpb=495288, bsz=16827.1, num_updates=58900, lr=0.000260599, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.2, wall=54357 epoch 035: 1564 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=550131, ups=1.11, wpb=495288, bsz=16827.1, num_updates=58900, lr=0.000260599, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.2, wall=54357 epoch 035: 1564 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=550131, ups=1.11, wpb=495288, bsz=16827.1, num_updates=58900, lr=0.000260599, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.2, wall=54357 epoch 035: 1564 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=550131, ups=1.11, wpb=495288, bsz=16827.1, num_updates=58900, lr=0.000260599, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.2, wall=54357 epoch 035: 1564 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=550131, ups=1.11, wpb=495288, bsz=16827.1, num_updates=58900, lr=0.000260599, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.2, wall=54357 epoch 035: 1564 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=550131, ups=1.11, wpb=495288, bsz=16827.1, num_updates=58900, lr=0.000260599, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.2, wall=54357 epoch 035: 1564 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=550131, ups=1.11, wpb=495288, bsz=16827.1, num_updates=58900, lr=0.000260599, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.2, wall=54357 epoch 035: 1564 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=550131, ups=1.11, wpb=495288, bsz=16827.1, num_updates=58900, lr=0.000260599, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.2, wall=54357 epoch 035: 1564 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=550131, ups=1.11, wpb=495288, bsz=16827.1, num_updates=58900, lr=0.000260599, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.2, wall=54357 epoch 035: 1564 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=550131, ups=1.11, wpb=495288, bsz=16827.1, num_updates=58900, lr=0.000260599, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.2, wall=54357 epoch 035: 1564 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=550131, ups=1.11, wpb=495288, bsz=16827.1, num_updates=58900, lr=0.000260599, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.2, wall=54357 epoch 035: 1564 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=550131, ups=1.11, wpb=495288, bsz=16827.1, num_updates=58900, lr=0.000260599, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.2, wall=54357 epoch 035: 1564 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=550131, ups=1.11, wpb=495288, bsz=16827.1, num_updates=58900, lr=0.000260599, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.2, wall=54357 epoch 035: 1564 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=550131, ups=1.11, wpb=495288, bsz=16827.1, num_updates=58900, lr=0.000260599, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.2, wall=54357 epoch 035: 1564 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=550131, ups=1.11, wpb=495288, bsz=16827.1, num_updates=58900, lr=0.000260599, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.2, wall=54357 epoch 035: 1564 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=550131, ups=1.11, wpb=495288, bsz=16827.1, num_updates=58900, lr=0.000260599, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.2, wall=54357 epoch 035: 1564 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=550131, ups=1.11, wpb=495288, bsz=16827.1, num_updates=58900, lr=0.000260599, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.2, wall=54357 epoch 035: 1564 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=550131, ups=1.11, wpb=495288, bsz=16827.1, num_updates=58900, lr=0.000260599, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.2, wall=54357 epoch 035: 1564 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=550131, ups=1.11, wpb=495288, bsz=16827.1, num_updates=58900, lr=0.000260599, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.2, wall=54357 epoch 035: 1564 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=550131, ups=1.11, wpb=495288, bsz=16827.1, num_updates=58900, lr=0.000260599, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.2, wall=54357 epoch 035: 1564 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=550131, ups=1.11, wpb=495288, bsz=16827.1, num_updates=58900, lr=0.000260599, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.2, wall=54357 epoch 035: 1564 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=550131, ups=1.11, wpb=495288, bsz=16827.1, num_updates=58900, lr=0.000260599, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.2, wall=54357 epoch 035: 1564 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=550131, ups=1.11, wpb=495288, bsz=16827.1, num_updates=58900, lr=0.000260599, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.2, wall=54357 epoch 035: 1564 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=550131, ups=1.11, wpb=495288, bsz=16827.1, num_updates=58900, lr=0.000260599, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.2, wall=54357 epoch 035: 1564 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=550131, ups=1.11, wpb=495288, bsz=16827.1, num_updates=58900, lr=0.000260599, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.2, wall=54357 epoch 035: 1564 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=550131, ups=1.11, wpb=495288, bsz=16827.1, num_updates=58900, lr=0.000260599, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.2, wall=54357 epoch 035: 1564 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=550131, ups=1.11, wpb=495288, bsz=16827.1, num_updates=58900, lr=0.000260599, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.2, wall=54357 epoch 035: 1564 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=550131, ups=1.11, wpb=495288, bsz=16827.1, num_updates=58900, lr=0.000260599, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.2, wall=54357 epoch 035: 1564 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=550131, ups=1.11, wpb=495288, bsz=16827.1, num_updates=58900, lr=0.000260599, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.2, wall=54357 epoch 035: 1564 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=550131, ups=1.11, wpb=495288, bsz=16827.1, num_updates=58900, lr=0.000260599, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.2, wall=54357 epoch 035: 1564 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=550131, ups=1.11, wpb=495288, bsz=16827.1, num_updates=58900, lr=0.000260599, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.2, wall=54357 epoch 035: 1564 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=550131, ups=1.11, wpb=495288, bsz=16827.1, num_updates=58900, lr=0.000260599, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.2, wall=54357 epoch 035: 1564 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=550131, ups=1.11, wpb=495288, bsz=16827.1, num_updates=58900, lr=0.000260599, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.2, wall=54357 epoch 035: 1564 / 1689 loss=3.462, nll_loss=1.903, ppl=3.74, wps=550131, ups=1.11, wpb=495288, bsz=16827.1, num_updates=58900, lr=0.000260599, gnorm=0.171, clip=0, loss_scale=0.5, train_wall=89, gb_free=21.2, wall=54357 epoch 035: 1664 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=552314, ups=1.12, wpb=495055, bsz=16799, num_updates=59000, lr=0.000260378, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.7, wall=54446 epoch 035: 1664 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=552314, ups=1.12, wpb=495055, bsz=16799, num_updates=59000, lr=0.000260378, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.7, wall=54446 epoch 035: 1664 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=552314, ups=1.12, wpb=495055, bsz=16799, num_updates=59000, lr=0.000260378, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.7, wall=54446 epoch 035: 1664 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=552314, ups=1.12, wpb=495055, bsz=16799, num_updates=59000, lr=0.000260378, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.7, wall=54446 epoch 035: 1664 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=552314, ups=1.12, wpb=495055, bsz=16799, num_updates=59000, lr=0.000260378, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.7, wall=54446 epoch 035: 1664 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=552314, ups=1.12, wpb=495055, bsz=16799, num_updates=59000, lr=0.000260378, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.7, wall=54446 epoch 035: 1664 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=552314, ups=1.12, wpb=495055, bsz=16799, num_updates=59000, lr=0.000260378, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.7, wall=54446 epoch 035: 1664 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=552314, ups=1.12, wpb=495055, bsz=16799, num_updates=59000, lr=0.000260378, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.7, wall=54446 epoch 035: 1664 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=552314, ups=1.12, wpb=495055, bsz=16799, num_updates=59000, lr=0.000260378, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.7, wall=54446 epoch 035: 1664 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=552314, ups=1.12, wpb=495055, bsz=16799, num_updates=59000, lr=0.000260378, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.7, wall=54446 epoch 035: 1664 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=552314, ups=1.12, wpb=495055, bsz=16799, num_updates=59000, lr=0.000260378, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.7, wall=54446 epoch 035: 1664 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=552314, ups=1.12, wpb=495055, bsz=16799, num_updates=59000, lr=0.000260378, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.7, wall=54446 epoch 035: 1664 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=552314, ups=1.12, wpb=495055, bsz=16799, num_updates=59000, lr=0.000260378, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.7, wall=54446 epoch 035: 1664 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=552314, ups=1.12, wpb=495055, bsz=16799, num_updates=59000, lr=0.000260378, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.7, wall=54446 epoch 035: 1664 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=552314, ups=1.12, wpb=495055, bsz=16799, num_updates=59000, lr=0.000260378, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.7, wall=54446 epoch 035: 1664 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=552314, ups=1.12, wpb=495055, bsz=16799, num_updates=59000, lr=0.000260378, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.7, wall=54446 epoch 035: 1664 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=552314, ups=1.12, wpb=495055, bsz=16799, num_updates=59000, lr=0.000260378, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.7, wall=54446 epoch 035: 1664 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=552314, ups=1.12, wpb=495055, bsz=16799, num_updates=59000, lr=0.000260378, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.7, wall=54446 epoch 035: 1664 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=552314, ups=1.12, wpb=495055, bsz=16799, num_updates=59000, lr=0.000260378, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.7, wall=54446 epoch 035: 1664 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=552314, ups=1.12, wpb=495055, bsz=16799, num_updates=59000, lr=0.000260378, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.7, wall=54446 epoch 035: 1664 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=552314, ups=1.12, wpb=495055, bsz=16799, num_updates=59000, lr=0.000260378, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.7, wall=54446 epoch 035: 1664 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=552314, ups=1.12, wpb=495055, bsz=16799, num_updates=59000, lr=0.000260378, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.7, wall=54446 epoch 035: 1664 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=552314, ups=1.12, wpb=495055, bsz=16799, num_updates=59000, lr=0.000260378, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.7, wall=54446 epoch 035: 1664 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=552314, ups=1.12, wpb=495055, bsz=16799, num_updates=59000, lr=0.000260378, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.7, wall=54446 epoch 035: 1664 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=552314, ups=1.12, wpb=495055, bsz=16799, num_updates=59000, lr=0.000260378, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.7, wall=54446 epoch 035: 1664 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=552314, ups=1.12, wpb=495055, bsz=16799, num_updates=59000, lr=0.000260378, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.7, wall=54446 epoch 035: 1664 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=552314, ups=1.12, wpb=495055, bsz=16799, num_updates=59000, lr=0.000260378, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.7, wall=54446 epoch 035: 1664 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=552314, ups=1.12, wpb=495055, bsz=16799, num_updates=59000, lr=0.000260378, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.7, wall=54446 epoch 035: 1664 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=552314, ups=1.12, wpb=495055, bsz=16799, num_updates=59000, lr=0.000260378, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.7, wall=54446 epoch 035: 1664 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=552314, ups=1.12, wpb=495055, bsz=16799, num_updates=59000, lr=0.000260378, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.7, wall=54446 epoch 035: 1664 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=552314, ups=1.12, wpb=495055, bsz=16799, num_updates=59000, lr=0.000260378, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.7, wall=54446 epoch 035: 1664 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=552314, ups=1.12, wpb=495055, bsz=16799, num_updates=59000, lr=0.000260378, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.7, wall=54446 epoch 035: 1664 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=552314, ups=1.12, wpb=495055, bsz=16799, num_updates=59000, lr=0.000260378, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.7, wall=54446 epoch 035: 1664 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=552314, ups=1.12, wpb=495055, bsz=16799, num_updates=59000, lr=0.000260378, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.7, wall=54446 epoch 035: 1664 / 1689 loss=3.466, nll_loss=1.908, ppl=3.75, wps=552314, ups=1.12, wpb=495055, bsz=16799, num_updates=59000, lr=0.000260378, gnorm=0.158, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.7, wall=54446 begin validation on "valid" subset epoch 035 | valid on 'valid' subset | loss 3.717 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 59000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.717 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 59000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.717 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 59000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.717 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 59000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.717 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 59000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.717 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 59000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.717 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 59000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.717 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 59000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.717 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 59000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.717 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 59000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.717 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 59000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.717 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 59000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.717 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 59000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.717 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 59000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.717 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 59000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.717 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 59000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.717 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 59000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.717 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 59000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.717 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 59000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.717 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 59000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.717 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 59000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.717 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 59000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.717 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 59000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.717 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 59000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.717 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 59000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.717 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 59000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.717 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 59000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.717 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 59000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.717 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 59000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.717 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 59000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.717 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 59000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.717 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 59000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.717 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 59000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.717 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 59000 | best_loss 3.702 epoch 035 | valid on 'valid' subset | loss 3.717 | nll_loss 2.177 | ppl 4.52 | wps 0 | wpb 44526 | bsz 2008 | num_updates 59000 | best_loss 3.702 end of epoch 35 (average epoch stats below) epoch 035 | loss 3.451 | nll_loss 1.891 | ppl 3.71 | wps 543806 | ups 1.1 | wpb 495120 | bsz 16502.9 | num_updates 59025 | lr 0.000260323 | gnorm 0.165 | clip 0 | loss_scale 0.5 | train_wall 1489 | gb_free 23 | wall 54484 epoch 035 | loss 3.451 | nll_loss 1.891 | ppl 3.71 | wps 543806 | ups 1.1 | wpb 495120 | bsz 16502.9 | num_updates 59025 | lr 0.000260323 | gnorm 0.165 | clip 0 | loss_scale 0.5 | train_wall 1489 | gb_free 23 | wall 54484 epoch 035 | loss 3.451 | nll_loss 1.891 | ppl 3.71 | wps 543806 | ups 1.1 | wpb 495120 | bsz 16502.9 | num_updates 59025 | lr 0.000260323 | gnorm 0.165 | clip 0 | loss_scale 0.5 | train_wall 1489 | gb_free 23 | wall 54484 epoch 035 | loss 3.451 | nll_loss 1.891 | ppl 3.71 | wps 543806 | ups 1.1 | wpb 495120 | bsz 16502.9 | num_updates 59025 | lr 0.000260323 | gnorm 0.165 | clip 0 | loss_scale 0.5 | train_wall 1489 | gb_free 23 | wall 54484 epoch 035 | loss 3.451 | nll_loss 1.891 | ppl 3.71 | wps 543806 | ups 1.1 | wpb 495120 | bsz 16502.9 | num_updates 59025 | lr 0.000260323 | gnorm 0.165 | clip 0 | loss_scale 0.5 | train_wall 1489 | gb_free 23 | wall 54484 epoch 035 | loss 3.451 | nll_loss 1.891 | ppl 3.71 | wps 543806 | ups 1.1 | wpb 495120 | bsz 16502.9 | num_updates 59025 | lr 0.000260323 | gnorm 0.165 | clip 0 | loss_scale 0.5 | train_wall 1489 | gb_free 23 | wall 54484 epoch 035 | loss 3.451 | nll_loss 1.891 | ppl 3.71 | wps 543806 | ups 1.1 | wpb 495120 | bsz 16502.9 | num_updates 59025 | lr 0.000260323 | gnorm 0.165 | clip 0 | loss_scale 0.5 | train_wall 1489 | gb_free 23 | wall 54484 epoch 035 | loss 3.451 | nll_loss 1.891 | ppl 3.71 | wps 543806 | ups 1.1 | wpb 495120 | bsz 16502.9 | num_updates 59025 | lr 0.000260323 | gnorm 0.165 | clip 0 | loss_scale 0.5 | train_wall 1489 | gb_free 23 | wall 54484 epoch 035 | loss 3.451 | nll_loss 1.891 | ppl 3.71 | wps 543806 | ups 1.1 | wpb 495120 | bsz 16502.9 | num_updates 59025 | lr 0.000260323 | gnorm 0.165 | clip 0 | loss_scale 0.5 | train_wall 1489 | gb_free 23 | wall 54484 epoch 035 | loss 3.451 | nll_loss 1.891 | ppl 3.71 | wps 543806 | ups 1.1 | wpb 495120 | bsz 16502.9 | num_updates 59025 | lr 0.000260323 | gnorm 0.165 | clip 0 | loss_scale 0.5 | train_wall 1489 | gb_free 23 | wall 54484 epoch 035 | loss 3.451 | nll_loss 1.891 | ppl 3.71 | wps 543806 | ups 1.1 | wpb 495120 | bsz 16502.9 | num_updates 59025 | lr 0.000260323 | gnorm 0.165 | clip 0 | loss_scale 0.5 | train_wall 1489 | gb_free 23 | wall 54484 epoch 035 | loss 3.451 | nll_loss 1.891 | ppl 3.71 | wps 543806 | ups 1.1 | wpb 495120 | bsz 16502.9 | num_updates 59025 | lr 0.000260323 | gnorm 0.165 | clip 0 | loss_scale 0.5 | train_wall 1489 | gb_free 23 | wall 54484 epoch 035 | loss 3.451 | nll_loss 1.891 | ppl 3.71 | wps 543806 | ups 1.1 | wpb 495120 | bsz 16502.9 | num_updates 59025 | lr 0.000260323 | gnorm 0.165 | clip 0 | loss_scale 0.5 | train_wall 1489 | gb_free 23 | wall 54484 epoch 035 | loss 3.451 | nll_loss 1.891 | ppl 3.71 | wps 543806 | ups 1.1 | wpb 495120 | bsz 16502.9 | num_updates 59025 | lr 0.000260323 | gnorm 0.165 | clip 0 | loss_scale 0.5 | train_wall 1489 | gb_free 23 | wall 54484 epoch 035 | loss 3.451 | nll_loss 1.891 | ppl 3.71 | wps 543806 | ups 1.1 | wpb 495120 | bsz 16502.9 | num_updates 59025 | lr 0.000260323 | gnorm 0.165 | clip 0 | loss_scale 0.5 | train_wall 1489 | gb_free 23 | wall 54484 epoch 035 | loss 3.451 | nll_loss 1.891 | ppl 3.71 | wps 543806 | ups 1.1 | wpb 495120 | bsz 16502.9 | num_updates 59025 | lr 0.000260323 | gnorm 0.165 | clip 0 | loss_scale 0.5 | train_wall 1489 | gb_free 23 | wall 54484 epoch 035 | loss 3.451 | nll_loss 1.891 | ppl 3.71 | wps 543806 | ups 1.1 | wpb 495120 | bsz 16502.9 | num_updates 59025 | lr 0.000260323 | gnorm 0.165 | clip 0 | loss_scale 0.5 | train_wall 1489 | gb_free 23 | wall 54484 epoch 035 | loss 3.451 | nll_loss 1.891 | ppl 3.71 | wps 543806 | ups 1.1 | wpb 495120 | bsz 16502.9 | num_updates 59025 | lr 0.000260323 | gnorm 0.165 | clip 0 | loss_scale 0.5 | train_wall 1489 | gb_free 23 | wall 54484 epoch 035 | loss 3.451 | nll_loss 1.891 | ppl 3.71 | wps 543806 | ups 1.1 | wpb 495120 | bsz 16502.9 | num_updates 59025 | lr 0.000260323 | gnorm 0.165 | clip 0 | loss_scale 0.5 | train_wall 1489 | gb_free 23 | wall 54484 epoch 035 | loss 3.451 | nll_loss 1.891 | ppl 3.71 | wps 543806 | ups 1.1 | wpb 495120 | bsz 16502.9 | num_updates 59025 | lr 0.000260323 | gnorm 0.165 | clip 0 | loss_scale 0.5 | train_wall 1489 | gb_free 23 | wall 54484 epoch 035 | loss 3.451 | nll_loss 1.891 | ppl 3.71 | wps 543806 | ups 1.1 | wpb 495120 | bsz 16502.9 | num_updates 59025 | lr 0.000260323 | gnorm 0.165 | clip 0 | loss_scale 0.5 | train_wall 1489 | gb_free 23 | wall 54484 epoch 035 | loss 3.451 | nll_loss 1.891 | ppl 3.71 | wps 543806 | ups 1.1 | wpb 495120 | bsz 16502.9 | num_updates 59025 | lr 0.000260323 | gnorm 0.165 | clip 0 | loss_scale 0.5 | train_wall 1489 | gb_free 23 | wall 54484 epoch 035 | loss 3.451 | nll_loss 1.891 | ppl 3.71 | wps 543806 | ups 1.1 | wpb 495120 | bsz 16502.9 | num_updates 59025 | lr 0.000260323 | gnorm 0.165 | clip 0 | loss_scale 0.5 | train_wall 1489 | gb_free 23 | wall 54484 epoch 035 | loss 3.451 | nll_loss 1.891 | ppl 3.71 | wps 543806 | ups 1.1 | wpb 495120 | bsz 16502.9 | num_updates 59025 | lr 0.000260323 | gnorm 0.165 | clip 0 | loss_scale 0.5 | train_wall 1489 | gb_free 23 | wall 54484 epoch 035 | loss 3.451 | nll_loss 1.891 | ppl 3.71 | wps 543806 | ups 1.1 | wpb 495120 | bsz 16502.9 | num_updates 59025 | lr 0.000260323 | gnorm 0.165 | clip 0 | loss_scale 0.5 | train_wall 1489 | gb_free 23 | wall 54484 epoch 035 | loss 3.451 | nll_loss 1.891 | ppl 3.71 | wps 543806 | ups 1.1 | wpb 495120 | bsz 16502.9 | num_updates 59025 | lr 0.000260323 | gnorm 0.165 | clip 0 | loss_scale 0.5 | train_wall 1489 | gb_free 23 | wall 54484 epoch 035 | loss 3.451 | nll_loss 1.891 | ppl 3.71 | wps 543806 | ups 1.1 | wpb 495120 | bsz 16502.9 | num_updates 59025 | lr 0.000260323 | gnorm 0.165 | clip 0 | loss_scale 0.5 | train_wall 1489 | gb_free 23 | wall 54484 epoch 035 | loss 3.451 | nll_loss 1.891 | ppl 3.71 | wps 543806 | ups 1.1 | wpb 495120 | bsz 16502.9 | num_updates 59025 | lr 0.000260323 | gnorm 0.165 | clip 0 | loss_scale 0.5 | train_wall 1489 | gb_free 23 | wall 54484 epoch 035 | loss 3.451 | nll_loss 1.891 | ppl 3.71 | wps 543806 | ups 1.1 | wpb 495120 | bsz 16502.9 | num_updates 59025 | lr 0.000260323 | gnorm 0.165 | clip 0 | loss_scale 0.5 | train_wall 1489 | gb_free 23 | wall 54484 epoch 035 | loss 3.451 | nll_loss 1.891 | ppl 3.71 | wps 543806 | ups 1.1 | wpb 495120 | bsz 16502.9 | num_updates 59025 | lr 0.000260323 | gnorm 0.165 | clip 0 | loss_scale 0.5 | train_wall 1489 | gb_free 23 | wall 54484 epoch 035 | loss 3.451 | nll_loss 1.891 | ppl 3.71 | wps 543806 | ups 1.1 | wpb 495120 | bsz 16502.9 | num_updates 59025 | lr 0.000260323 | gnorm 0.165 | clip 0 | loss_scale 0.5 | train_wall 1489 | gb_free 23 | wall 54484 epoch 035 | loss 3.451 | nll_loss 1.891 | ppl 3.71 | wps 543806 | ups 1.1 | wpb 495120 | bsz 16502.9 | num_updates 59025 | lr 0.000260323 | gnorm 0.165 | clip 0 | loss_scale 0.5 | train_wall 1489 | gb_free 23 | wall 54484 epoch 035 | loss 3.451 | nll_loss 1.891 | ppl 3.71 | wps 543806 | ups 1.1 | wpb 495120 | bsz 16502.9 | num_updates 59025 | lr 0.000260323 | gnorm 0.165 | clip 0 | loss_scale 0.5 | train_wall 1489 | gb_free 23 | wall 54484 epoch 035 | loss 3.451 | nll_loss 1.891 | ppl 3.71 | wps 543806 | ups 1.1 | wpb 495120 | bsz 16502.9 | num_updates 59025 | lr 0.000260323 | gnorm 0.165 | clip 0 | loss_scale 0.5 | train_wall 1489 | gb_free 23 | wall 54484 epoch 035 | loss 3.451 | nll_loss 1.891 | ppl 3.71 | wps 543806 | ups 1.1 | wpb 495120 | bsz 16502.9 | num_updates 59025 | lr 0.000260323 | gnorm 0.165 | clip 0 | loss_scale 0.5 | train_wall 1489 | gb_free 23 | wall 54484 Start iterating over samples epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 75 / 1689 loss=3.432, nll_loss=1.869, ppl=3.65, wps=466476, ups=0.95, wpb=491677, bsz=16792.4, num_updates=59100, lr=0.000260157, gnorm=0.166, clip=0, loss_scale=1, train_wall=87, gb_free=22.1, wall=54552 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 175 / 1689 loss=3.429, nll_loss=1.865, ppl=3.64, wps=552857, ups=1.12, wpb=493782, bsz=16280.7, num_updates=59200, lr=0.000259938, gnorm=0.173, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=54641 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 276 / 1689 loss=3.437, nll_loss=1.874, ppl=3.67, wps=551185, ups=1.11, wpb=494648, bsz=16223.8, num_updates=59300, lr=0.000259718, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54731 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 376 / 1689 loss=3.444, nll_loss=1.883, ppl=3.69, wps=554371, ups=1.12, wpb=496408, bsz=16745.4, num_updates=59400, lr=0.0002595, gnorm=0.168, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.2, wall=54820 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 476 / 1689 loss=3.434, nll_loss=1.872, ppl=3.66, wps=552545, ups=1.11, wpb=495985, bsz=16627.1, num_updates=59500, lr=0.000259281, gnorm=0.157, clip=0, loss_scale=0.5, train_wall=88, gb_free=22.1, wall=54910 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 576 / 1689 loss=3.445, nll_loss=1.884, ppl=3.69, wps=551700, ups=1.11, wpb=495190, bsz=16375, num_updates=59600, lr=0.000259064, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.5, wall=55000 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 676 / 1689 loss=3.447, nll_loss=1.886, ppl=3.7, wps=548931, ups=1.11, wpb=494026, bsz=16181, num_updates=59700, lr=0.000258847, gnorm=0.166, clip=0, loss_scale=0.5, train_wall=88, gb_free=21.8, wall=55090 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 776 / 1689 loss=3.454, nll_loss=1.893, ppl=3.72, wps=552145, ups=1.12, wpb=494093, bsz=16347.4, num_updates=59800, lr=0.00025863, gnorm=0.161, clip=0, loss_scale=1, train_wall=88, gb_free=21.6, wall=55179 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 876 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=549565, ups=1.11, wpb=494105, bsz=16375.2, num_updates=59900, lr=0.000258414, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=22.3, wall=55269 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 epoch 036: 976 / 1689 loss=3.449, nll_loss=1.888, ppl=3.7, wps=554695, ups=1.12, wpb=495296, bsz=16499.8, num_updates=60000, lr=0.000258199, gnorm=0.16, clip=0, loss_scale=1, train_wall=88, gb_free=21.7, wall=55359 Stopping training due to num_updates: 60000 >= max_update: 60000 begin validation on "valid" subset epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 epoch 036 | valid on 'valid' subset | loss 3.713 | nll_loss 2.173 | ppl 4.51 | wps 0 | wpb 44526 | bsz 2008 | num_updates 60000 | best_loss 3.702 end of epoch 36 (average epoch stats below) epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 epoch 036 | loss 3.442 | nll_loss 1.88 | ppl 3.68 | wps 537562 | ups 1.09 | wpb 494807 | bsz 16451.7 | num_updates 60000 | lr 0.000258199 | gnorm 0.165 | clip 0 | loss_scale 1 | train_wall 857 | gb_free 21.7 | wall 55382 done training in 55371.0 seconds