05/23/2024 11:11:50 - INFO - transformers.tokenization_utils_base - loading file tokenizer.model from cache at /Users/hanyiye/.cache/huggingface/hub/models--THUDM--chatglm3-6b-base/snapshots/f91a1de587fdc692073367198e65369669a0b49d/tokenizer.model 05/23/2024 11:11:50 - INFO - transformers.tokenization_utils_base - loading file added_tokens.json from cache at None 05/23/2024 11:11:50 - INFO - transformers.tokenization_utils_base - loading file special_tokens_map.json from cache at None 05/23/2024 11:11:50 - INFO - transformers.tokenization_utils_base - loading file tokenizer_config.json from cache at /Users/hanyiye/.cache/huggingface/hub/models--THUDM--chatglm3-6b-base/snapshots/f91a1de587fdc692073367198e65369669a0b49d/tokenizer_config.json 05/23/2024 11:11:50 - INFO - transformers.tokenization_utils_base - loading file tokenizer.json from cache at None 05/23/2024 11:11:51 - INFO - llmtuner.data.loader - Loading dataset ImTheFatedVillainChaptersDataset.json... 05/23/2024 11:11:51 - WARNING - llmtuner.data.utils - Checksum failed: missing SHA-1 hash value in dataset_info.json. 05/23/2024 11:12:02 - INFO - transformers.configuration_utils - loading configuration file config.json from cache at /Users/hanyiye/.cache/huggingface/hub/models--THUDM--chatglm3-6b-base/snapshots/f91a1de587fdc692073367198e65369669a0b49d/config.json 05/23/2024 11:12:02 - INFO - transformers.configuration_utils - loading configuration file config.json from cache at /Users/hanyiye/.cache/huggingface/hub/models--THUDM--chatglm3-6b-base/snapshots/f91a1de587fdc692073367198e65369669a0b49d/config.json 05/23/2024 11:12:02 - INFO - transformers.configuration_utils - Model config ChatGLMConfig { "_name_or_path": "THUDM/chatglm3-6b-base", "add_bias_linear": false, "add_qkv_bias": true, "apply_query_key_layer_scaling": true, "apply_residual_connection_post_layernorm": false, "architectures": [ "ChatGLMModel" ], "attention_dropout": 0.0, "attention_softmax_in_fp32": true, "auto_map": { "AutoConfig": "THUDM/chatglm3-6b-base--configuration_chatglm.ChatGLMConfig", "AutoModel": "THUDM/chatglm3-6b-base--modeling_chatglm.ChatGLMForConditionalGeneration", "AutoModelForCausalLM": "THUDM/chatglm3-6b-base--modeling_chatglm.ChatGLMForConditionalGeneration", "AutoModelForSeq2SeqLM": "THUDM/chatglm3-6b-base--modeling_chatglm.ChatGLMForConditionalGeneration", "AutoModelForSequenceClassification": "THUDM/chatglm3-6b-base--modeling_chatglm.ChatGLMForSequenceClassification" }, "bias_dropout_fusion": true, "classifier_dropout": null, "eos_token_id": 2, "ffn_hidden_size": 13696, "fp32_residual_connection": false, "hidden_dropout": 0.0, "hidden_size": 4096, "kv_channels": 128, "layernorm_epsilon": 1e-05, "model_type": "chatglm", "multi_query_attention": true, "multi_query_group_num": 2, "num_attention_heads": 32, "num_layers": 28, "original_rope": true, "pad_token_id": 0, "padded_vocab_size": 65024, "post_layer_norm": true, "pre_seq_len": null, "prefix_projection": false, "quantization_bit": 0, "rmsnorm": true, "seq_length": 32768, "tie_word_embeddings": false, "torch_dtype": "float16", "transformers_version": "4.40.1", "use_cache": true, "vocab_size": 65024 } 05/23/2024 11:12:03 - INFO - transformers.modeling_utils - loading weights file pytorch_model.bin from cache at /Users/hanyiye/.cache/huggingface/hub/models--THUDM--chatglm3-6b-base/snapshots/f91a1de587fdc692073367198e65369669a0b49d/pytorch_model.bin.index.json 05/23/2024 11:30:58 - INFO - transformers.modeling_utils - Instantiating ChatGLMForConditionalGeneration model under default dtype torch.float32. 05/23/2024 11:30:58 - INFO - transformers.generation.configuration_utils - Generate config GenerationConfig { "eos_token_id": 2, "pad_token_id": 0 } 05/23/2024 11:31:20 - INFO - transformers.modeling_utils - All model checkpoint weights were used when initializing ChatGLMForConditionalGeneration. 05/23/2024 11:31:20 - INFO - transformers.modeling_utils - All the weights of ChatGLMForConditionalGeneration were initialized from the model checkpoint at THUDM/chatglm3-6b-base. If your task is similar to the task the model of the checkpoint was trained on, you can already use ChatGLMForConditionalGeneration for predictions without further training. 05/23/2024 11:31:21 - INFO - transformers.modeling_utils - Generation config file not found, using a generation config created from the model config. 05/23/2024 11:31:21 - WARNING - llmtuner.model.utils.checkpointing - You are using the old GC format, some features (e.g. BAdam) will be invalid. 05/23/2024 11:31:21 - INFO - llmtuner.model.utils.checkpointing - Gradient checkpointing enabled. 05/23/2024 11:31:21 - INFO - llmtuner.model.utils.attention - Using vanilla Attention implementation. 05/23/2024 11:31:21 - INFO - llmtuner.model.adapter - Fine-tuning method: LoRA 05/23/2024 11:31:21 - INFO - llmtuner.model.loader - trainable params: 1949696 || all params: 6245533696 || trainable%: 0.0312 05/23/2024 11:31:21 - INFO - transformers.trainer - You have loaded a model on multiple GPUs. `is_model_parallel` attribute will be force-set to `True` to avoid any unexpected behavior such as device placement mismatching. 05/23/2024 11:31:21 - INFO - transformers.trainer - ***** Running training ***** 05/23/2024 11:31:21 - INFO - transformers.trainer - Num examples = 741 05/23/2024 11:31:21 - INFO - transformers.trainer - Num Epochs = 1 05/23/2024 11:31:21 - INFO - transformers.trainer - Instantaneous batch size per device = 2 05/23/2024 11:31:21 - INFO - transformers.trainer - Total train batch size (w. parallel, distributed & accumulation) = 16 05/23/2024 11:31:21 - INFO - transformers.trainer - Gradient Accumulation steps = 8 05/23/2024 11:31:21 - INFO - transformers.trainer - Total optimization steps = 46 05/23/2024 11:31:21 - INFO - transformers.trainer - Number of trainable parameters = 1,949,696 05/23/2024 12:06:13 - INFO - llmtuner.extras.callbacks - {'loss': 1.8053, 'learning_rate': 1.9423e-05, 'epoch': 0.11} 05/23/2024 12:45:11 - INFO - llmtuner.extras.callbacks - {'loss': 1.7973, 'learning_rate': 1.7757e-05, 'epoch': 0.22} 05/23/2024 13:25:31 - INFO - llmtuner.extras.callbacks - {'loss': 1.7813, 'learning_rate': 1.5196e-05, 'epoch': 0.32} 05/23/2024 14:05:34 - INFO - llmtuner.extras.callbacks - {'loss': 1.8348, 'learning_rate': 1.2035e-05, 'epoch': 0.43} 05/23/2024 14:45:14 - INFO - llmtuner.extras.callbacks - {'loss': 1.7943, 'learning_rate': 8.6383e-06, 'epoch': 0.54} 05/23/2024 15:22:48 - INFO - llmtuner.extras.callbacks - {'loss': 42.0508, 'learning_rate': 5.3993e-06, 'epoch': 0.65} 05/23/2024 15:57:14 - INFO - llmtuner.extras.callbacks - {'loss': 0.0000, 'learning_rate': 2.6916e-06, 'epoch': 0.75} 05/23/2024 16:29:15 - INFO - llmtuner.extras.callbacks - {'loss': 0.0000, 'learning_rate': 8.2789e-07, 'epoch': 0.86} 05/23/2024 17:03:20 - INFO - llmtuner.extras.callbacks - {'loss': 0.0000, 'learning_rate': 2.3312e-08, 'epoch': 0.97} 05/23/2024 17:10:13 - INFO - transformers.trainer - Training completed. Do not forget to share your model on huggingface.co/models =) 05/23/2024 17:10:14 - INFO - transformers.trainer - Saving model checkpoint to saves/ChatGLM3-6B-Base/lora/test1 05/23/2024 17:10:15 - INFO - transformers.configuration_utils - loading configuration file config.json from cache at /Users/hanyiye/.cache/huggingface/hub/models--THUDM--chatglm3-6b-base/snapshots/f91a1de587fdc692073367198e65369669a0b49d/config.json 05/23/2024 17:10:15 - INFO - transformers.configuration_utils - Model config ChatGLMConfig { "_name_or_path": "THUDM/chatglm3-6b-base", "add_bias_linear": false, "add_qkv_bias": true, "apply_query_key_layer_scaling": true, "apply_residual_connection_post_layernorm": false, "architectures": [ "ChatGLMModel" ], "attention_dropout": 0.0, "attention_softmax_in_fp32": true, "auto_map": { "AutoConfig": "THUDM/chatglm3-6b-base--configuration_chatglm.ChatGLMConfig", "AutoModel": "THUDM/chatglm3-6b-base--modeling_chatglm.ChatGLMForConditionalGeneration", "AutoModelForCausalLM": "THUDM/chatglm3-6b-base--modeling_chatglm.ChatGLMForConditionalGeneration", "AutoModelForSeq2SeqLM": "THUDM/chatglm3-6b-base--modeling_chatglm.ChatGLMForConditionalGeneration", "AutoModelForSequenceClassification": "THUDM/chatglm3-6b-base--modeling_chatglm.ChatGLMForSequenceClassification" }, "bias_dropout_fusion": true, "classifier_dropout": null, "eos_token_id": 2, "ffn_hidden_size": 13696, "fp32_residual_connection": false, "hidden_dropout": 0.0, "hidden_size": 4096, "kv_channels": 128, "layernorm_epsilon": 1e-05, "model_type": "chatglm", "multi_query_attention": true, "multi_query_group_num": 2, "num_attention_heads": 32, "num_layers": 28, "original_rope": true, "pad_token_id": 0, "padded_vocab_size": 65024, "post_layer_norm": true, "pre_seq_len": null, "prefix_projection": false, "quantization_bit": 0, "rmsnorm": true, "seq_length": 32768, "tie_word_embeddings": false, "torch_dtype": "float16", "transformers_version": "4.40.1", "use_cache": true, "vocab_size": 65024 } 05/23/2024 17:10:15 - INFO - transformers.tokenization_utils_base - tokenizer config file saved in saves/ChatGLM3-6B-Base/lora/test1/tokenizer_config.json 05/23/2024 17:10:15 - INFO - transformers.tokenization_utils_base - Special tokens file saved in saves/ChatGLM3-6B-Base/lora/test1/special_tokens_map.json 05/23/2024 17:10:15 - INFO - transformers.modelcard - Dropping the following result as it does not have all the necessary fields: {'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}}