|
05/23/2024 11:11:50 - INFO - transformers.tokenization_utils_base - loading file tokenizer.model from cache at /Users/hanyiye/.cache/huggingface/hub/models--THUDM--chatglm3-6b-base/snapshots/f91a1de587fdc692073367198e65369669a0b49d/tokenizer.model |
|
|
|
05/23/2024 11:11:50 - INFO - transformers.tokenization_utils_base - loading file added_tokens.json from cache at None |
|
|
|
05/23/2024 11:11:50 - INFO - transformers.tokenization_utils_base - loading file special_tokens_map.json from cache at None |
|
|
|
05/23/2024 11:11:50 - INFO - transformers.tokenization_utils_base - loading file tokenizer_config.json from cache at /Users/hanyiye/.cache/huggingface/hub/models--THUDM--chatglm3-6b-base/snapshots/f91a1de587fdc692073367198e65369669a0b49d/tokenizer_config.json |
|
|
|
05/23/2024 11:11:50 - INFO - transformers.tokenization_utils_base - loading file tokenizer.json from cache at None |
|
|
|
05/23/2024 11:11:51 - INFO - llmtuner.data.loader - Loading dataset ImTheFatedVillainChaptersDataset.json... |
|
|
|
05/23/2024 11:11:51 - WARNING - llmtuner.data.utils - Checksum failed: missing SHA-1 hash value in dataset_info.json. |
|
|
|
05/23/2024 11:12:02 - INFO - transformers.configuration_utils - loading configuration file config.json from cache at /Users/hanyiye/.cache/huggingface/hub/models--THUDM--chatglm3-6b-base/snapshots/f91a1de587fdc692073367198e65369669a0b49d/config.json |
|
|
|
05/23/2024 11:12:02 - INFO - transformers.configuration_utils - loading configuration file config.json from cache at /Users/hanyiye/.cache/huggingface/hub/models--THUDM--chatglm3-6b-base/snapshots/f91a1de587fdc692073367198e65369669a0b49d/config.json |
|
|
|
05/23/2024 11:12:02 - INFO - transformers.configuration_utils - Model config ChatGLMConfig { |
|
"_name_or_path": "THUDM/chatglm3-6b-base", |
|
"add_bias_linear": false, |
|
"add_qkv_bias": true, |
|
"apply_query_key_layer_scaling": true, |
|
"apply_residual_connection_post_layernorm": false, |
|
"architectures": [ |
|
"ChatGLMModel" |
|
], |
|
"attention_dropout": 0.0, |
|
"attention_softmax_in_fp32": true, |
|
"auto_map": { |
|
"AutoConfig": "THUDM/chatglm3-6b-base--configuration_chatglm.ChatGLMConfig", |
|
"AutoModel": "THUDM/chatglm3-6b-base--modeling_chatglm.ChatGLMForConditionalGeneration", |
|
"AutoModelForCausalLM": "THUDM/chatglm3-6b-base--modeling_chatglm.ChatGLMForConditionalGeneration", |
|
"AutoModelForSeq2SeqLM": "THUDM/chatglm3-6b-base--modeling_chatglm.ChatGLMForConditionalGeneration", |
|
"AutoModelForSequenceClassification": "THUDM/chatglm3-6b-base--modeling_chatglm.ChatGLMForSequenceClassification" |
|
}, |
|
"bias_dropout_fusion": true, |
|
"classifier_dropout": null, |
|
"eos_token_id": 2, |
|
"ffn_hidden_size": 13696, |
|
"fp32_residual_connection": false, |
|
"hidden_dropout": 0.0, |
|
"hidden_size": 4096, |
|
"kv_channels": 128, |
|
"layernorm_epsilon": 1e-05, |
|
"model_type": "chatglm", |
|
"multi_query_attention": true, |
|
"multi_query_group_num": 2, |
|
"num_attention_heads": 32, |
|
"num_layers": 28, |
|
"original_rope": true, |
|
"pad_token_id": 0, |
|
"padded_vocab_size": 65024, |
|
"post_layer_norm": true, |
|
"pre_seq_len": null, |
|
"prefix_projection": false, |
|
"quantization_bit": 0, |
|
"rmsnorm": true, |
|
"seq_length": 32768, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "float16", |
|
"transformers_version": "4.40.1", |
|
"use_cache": true, |
|
"vocab_size": 65024 |
|
} |
|
|
|
|
|
05/23/2024 11:12:03 - INFO - transformers.modeling_utils - loading weights file pytorch_model.bin from cache at /Users/hanyiye/.cache/huggingface/hub/models--THUDM--chatglm3-6b-base/snapshots/f91a1de587fdc692073367198e65369669a0b49d/pytorch_model.bin.index.json |
|
|
|
05/23/2024 11:30:58 - INFO - transformers.modeling_utils - Instantiating ChatGLMForConditionalGeneration model under default dtype torch.float32. |
|
|
|
05/23/2024 11:30:58 - INFO - transformers.generation.configuration_utils - Generate config GenerationConfig { |
|
"eos_token_id": 2, |
|
"pad_token_id": 0 |
|
} |
|
|
|
|
|
05/23/2024 11:31:20 - INFO - transformers.modeling_utils - All model checkpoint weights were used when initializing ChatGLMForConditionalGeneration. |
|
|
|
|
|
05/23/2024 11:31:20 - INFO - transformers.modeling_utils - All the weights of ChatGLMForConditionalGeneration were initialized from the model checkpoint at THUDM/chatglm3-6b-base. |
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use ChatGLMForConditionalGeneration for predictions without further training. |
|
|
|
05/23/2024 11:31:21 - INFO - transformers.modeling_utils - Generation config file not found, using a generation config created from the model config. |
|
|
|
05/23/2024 11:31:21 - WARNING - llmtuner.model.utils.checkpointing - You are using the old GC format, some features (e.g. BAdam) will be invalid. |
|
|
|
05/23/2024 11:31:21 - INFO - llmtuner.model.utils.checkpointing - Gradient checkpointing enabled. |
|
|
|
05/23/2024 11:31:21 - INFO - llmtuner.model.utils.attention - Using vanilla Attention implementation. |
|
|
|
05/23/2024 11:31:21 - INFO - llmtuner.model.adapter - Fine-tuning method: LoRA |
|
|
|
05/23/2024 11:31:21 - INFO - llmtuner.model.loader - trainable params: 1949696 || all params: 6245533696 || trainable%: 0.0312 |
|
|
|
05/23/2024 11:31:21 - INFO - transformers.trainer - You have loaded a model on multiple GPUs. `is_model_parallel` attribute will be force-set to `True` to avoid any unexpected behavior such as device placement mismatching. |
|
|
|
05/23/2024 11:31:21 - INFO - transformers.trainer - ***** Running training ***** |
|
|
|
05/23/2024 11:31:21 - INFO - transformers.trainer - Num examples = 741 |
|
|
|
05/23/2024 11:31:21 - INFO - transformers.trainer - Num Epochs = 1 |
|
|
|
05/23/2024 11:31:21 - INFO - transformers.trainer - Instantaneous batch size per device = 2 |
|
|
|
05/23/2024 11:31:21 - INFO - transformers.trainer - Total train batch size (w. parallel, distributed & accumulation) = 16 |
|
|
|
05/23/2024 11:31:21 - INFO - transformers.trainer - Gradient Accumulation steps = 8 |
|
|
|
05/23/2024 11:31:21 - INFO - transformers.trainer - Total optimization steps = 46 |
|
|
|
05/23/2024 11:31:21 - INFO - transformers.trainer - Number of trainable parameters = 1,949,696 |
|
|
|
05/23/2024 12:06:13 - INFO - llmtuner.extras.callbacks - {'loss': 1.8053, 'learning_rate': 1.9423e-05, 'epoch': 0.11} |
|
|
|
05/23/2024 12:45:11 - INFO - llmtuner.extras.callbacks - {'loss': 1.7973, 'learning_rate': 1.7757e-05, 'epoch': 0.22} |
|
|
|
05/23/2024 13:25:31 - INFO - llmtuner.extras.callbacks - {'loss': 1.7813, 'learning_rate': 1.5196e-05, 'epoch': 0.32} |
|
|
|
05/23/2024 14:05:34 - INFO - llmtuner.extras.callbacks - {'loss': 1.8348, 'learning_rate': 1.2035e-05, 'epoch': 0.43} |
|
|
|
05/23/2024 14:45:14 - INFO - llmtuner.extras.callbacks - {'loss': 1.7943, 'learning_rate': 8.6383e-06, 'epoch': 0.54} |
|
|
|
05/23/2024 15:22:48 - INFO - llmtuner.extras.callbacks - {'loss': 42.0508, 'learning_rate': 5.3993e-06, 'epoch': 0.65} |
|
|
|
05/23/2024 15:57:14 - INFO - llmtuner.extras.callbacks - {'loss': 0.0000, 'learning_rate': 2.6916e-06, 'epoch': 0.75} |
|
|
|
05/23/2024 16:29:15 - INFO - llmtuner.extras.callbacks - {'loss': 0.0000, 'learning_rate': 8.2789e-07, 'epoch': 0.86} |
|
|
|
05/23/2024 17:03:20 - INFO - llmtuner.extras.callbacks - {'loss': 0.0000, 'learning_rate': 2.3312e-08, 'epoch': 0.97} |
|
|
|
05/23/2024 17:10:13 - INFO - transformers.trainer - |
|
|
|
Training completed. Do not forget to share your model on huggingface.co/models =) |
|
|
|
|
|
|
|
05/23/2024 17:10:14 - INFO - transformers.trainer - Saving model checkpoint to saves/ChatGLM3-6B-Base/lora/test1 |
|
|
|
05/23/2024 17:10:15 - INFO - transformers.configuration_utils - loading configuration file config.json from cache at /Users/hanyiye/.cache/huggingface/hub/models--THUDM--chatglm3-6b-base/snapshots/f91a1de587fdc692073367198e65369669a0b49d/config.json |
|
|
|
05/23/2024 17:10:15 - INFO - transformers.configuration_utils - Model config ChatGLMConfig { |
|
"_name_or_path": "THUDM/chatglm3-6b-base", |
|
"add_bias_linear": false, |
|
"add_qkv_bias": true, |
|
"apply_query_key_layer_scaling": true, |
|
"apply_residual_connection_post_layernorm": false, |
|
"architectures": [ |
|
"ChatGLMModel" |
|
], |
|
"attention_dropout": 0.0, |
|
"attention_softmax_in_fp32": true, |
|
"auto_map": { |
|
"AutoConfig": "THUDM/chatglm3-6b-base--configuration_chatglm.ChatGLMConfig", |
|
"AutoModel": "THUDM/chatglm3-6b-base--modeling_chatglm.ChatGLMForConditionalGeneration", |
|
"AutoModelForCausalLM": "THUDM/chatglm3-6b-base--modeling_chatglm.ChatGLMForConditionalGeneration", |
|
"AutoModelForSeq2SeqLM": "THUDM/chatglm3-6b-base--modeling_chatglm.ChatGLMForConditionalGeneration", |
|
"AutoModelForSequenceClassification": "THUDM/chatglm3-6b-base--modeling_chatglm.ChatGLMForSequenceClassification" |
|
}, |
|
"bias_dropout_fusion": true, |
|
"classifier_dropout": null, |
|
"eos_token_id": 2, |
|
"ffn_hidden_size": 13696, |
|
"fp32_residual_connection": false, |
|
"hidden_dropout": 0.0, |
|
"hidden_size": 4096, |
|
"kv_channels": 128, |
|
"layernorm_epsilon": 1e-05, |
|
"model_type": "chatglm", |
|
"multi_query_attention": true, |
|
"multi_query_group_num": 2, |
|
"num_attention_heads": 32, |
|
"num_layers": 28, |
|
"original_rope": true, |
|
"pad_token_id": 0, |
|
"padded_vocab_size": 65024, |
|
"post_layer_norm": true, |
|
"pre_seq_len": null, |
|
"prefix_projection": false, |
|
"quantization_bit": 0, |
|
"rmsnorm": true, |
|
"seq_length": 32768, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "float16", |
|
"transformers_version": "4.40.1", |
|
"use_cache": true, |
|
"vocab_size": 65024 |
|
} |
|
|
|
|
|
05/23/2024 17:10:15 - INFO - transformers.tokenization_utils_base - tokenizer config file saved in saves/ChatGLM3-6B-Base/lora/test1/tokenizer_config.json |
|
|
|
05/23/2024 17:10:15 - INFO - transformers.tokenization_utils_base - Special tokens file saved in saves/ChatGLM3-6B-Base/lora/test1/special_tokens_map.json |
|
|
|
05/23/2024 17:10:15 - INFO - transformers.modelcard - Dropping the following result as it does not have all the necessary fields: |
|
{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}} |
|
|
|
|