AICube commited on
Commit
5083c9c
1 Parent(s): 9accecc

Upload 14 files

Browse files
README.md ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ library_name: peft
4
+ tags:
5
+ - llama-factory
6
+ - lora
7
+ - generated_from_trainer
8
+ base_model: THUDM/chatglm3-6b-base
9
+ model-index:
10
+ - name: test1
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # test1
18
+
19
+ This model is a fine-tuned version of [THUDM/chatglm3-6b-base](https://huggingface.co/THUDM/chatglm3-6b-base) on the im_the_fated_villain_chapters dataset.
20
+
21
+ ## Model description
22
+
23
+ More information needed
24
+
25
+ ## Intended uses & limitations
26
+
27
+ More information needed
28
+
29
+ ## Training and evaluation data
30
+
31
+ More information needed
32
+
33
+ ## Training procedure
34
+
35
+ ### Training hyperparameters
36
+
37
+ The following hyperparameters were used during training:
38
+ - learning_rate: 2e-05
39
+ - train_batch_size: 2
40
+ - eval_batch_size: 8
41
+ - seed: 42
42
+ - gradient_accumulation_steps: 8
43
+ - total_train_batch_size: 16
44
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
+ - lr_scheduler_type: cosine
46
+ - num_epochs: 1.0
47
+
48
+ ### Training results
49
+
50
+
51
+
52
+ ### Framework versions
53
+
54
+ - PEFT 0.10.0
55
+ - Transformers 4.40.1
56
+ - Pytorch 2.2.2
57
+ - Datasets 2.19.0
58
+ - Tokenizers 0.19.1
adapter_config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "THUDM/chatglm3-6b-base",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "query_key_value"
24
+ ],
25
+ "task_type": "CAUSAL_LM",
26
+ "use_dora": false,
27
+ "use_rslora": false
28
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a254467229cc39818fc5091b8bc6b37f88a4ddd915a54c7b7a8c2a7a7d1aa87
3
+ size 7807744
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9919137466307277,
3
+ "total_flos": 2.703782589549773e+16,
4
+ "train_loss": 5.550413712211277,
5
+ "train_runtime": 20332.1955,
6
+ "train_samples_per_second": 0.036,
7
+ "train_steps_per_second": 0.002
8
+ }
running_log.txt ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 05/23/2024 11:11:50 - INFO - transformers.tokenization_utils_base - loading file tokenizer.model from cache at /Users/hanyiye/.cache/huggingface/hub/models--THUDM--chatglm3-6b-base/snapshots/f91a1de587fdc692073367198e65369669a0b49d/tokenizer.model
2
+
3
+ 05/23/2024 11:11:50 - INFO - transformers.tokenization_utils_base - loading file added_tokens.json from cache at None
4
+
5
+ 05/23/2024 11:11:50 - INFO - transformers.tokenization_utils_base - loading file special_tokens_map.json from cache at None
6
+
7
+ 05/23/2024 11:11:50 - INFO - transformers.tokenization_utils_base - loading file tokenizer_config.json from cache at /Users/hanyiye/.cache/huggingface/hub/models--THUDM--chatglm3-6b-base/snapshots/f91a1de587fdc692073367198e65369669a0b49d/tokenizer_config.json
8
+
9
+ 05/23/2024 11:11:50 - INFO - transformers.tokenization_utils_base - loading file tokenizer.json from cache at None
10
+
11
+ 05/23/2024 11:11:51 - INFO - llmtuner.data.loader - Loading dataset ImTheFatedVillainChaptersDataset.json...
12
+
13
+ 05/23/2024 11:11:51 - WARNING - llmtuner.data.utils - Checksum failed: missing SHA-1 hash value in dataset_info.json.
14
+
15
+ 05/23/2024 11:12:02 - INFO - transformers.configuration_utils - loading configuration file config.json from cache at /Users/hanyiye/.cache/huggingface/hub/models--THUDM--chatglm3-6b-base/snapshots/f91a1de587fdc692073367198e65369669a0b49d/config.json
16
+
17
+ 05/23/2024 11:12:02 - INFO - transformers.configuration_utils - loading configuration file config.json from cache at /Users/hanyiye/.cache/huggingface/hub/models--THUDM--chatglm3-6b-base/snapshots/f91a1de587fdc692073367198e65369669a0b49d/config.json
18
+
19
+ 05/23/2024 11:12:02 - INFO - transformers.configuration_utils - Model config ChatGLMConfig {
20
+ "_name_or_path": "THUDM/chatglm3-6b-base",
21
+ "add_bias_linear": false,
22
+ "add_qkv_bias": true,
23
+ "apply_query_key_layer_scaling": true,
24
+ "apply_residual_connection_post_layernorm": false,
25
+ "architectures": [
26
+ "ChatGLMModel"
27
+ ],
28
+ "attention_dropout": 0.0,
29
+ "attention_softmax_in_fp32": true,
30
+ "auto_map": {
31
+ "AutoConfig": "THUDM/chatglm3-6b-base--configuration_chatglm.ChatGLMConfig",
32
+ "AutoModel": "THUDM/chatglm3-6b-base--modeling_chatglm.ChatGLMForConditionalGeneration",
33
+ "AutoModelForCausalLM": "THUDM/chatglm3-6b-base--modeling_chatglm.ChatGLMForConditionalGeneration",
34
+ "AutoModelForSeq2SeqLM": "THUDM/chatglm3-6b-base--modeling_chatglm.ChatGLMForConditionalGeneration",
35
+ "AutoModelForSequenceClassification": "THUDM/chatglm3-6b-base--modeling_chatglm.ChatGLMForSequenceClassification"
36
+ },
37
+ "bias_dropout_fusion": true,
38
+ "classifier_dropout": null,
39
+ "eos_token_id": 2,
40
+ "ffn_hidden_size": 13696,
41
+ "fp32_residual_connection": false,
42
+ "hidden_dropout": 0.0,
43
+ "hidden_size": 4096,
44
+ "kv_channels": 128,
45
+ "layernorm_epsilon": 1e-05,
46
+ "model_type": "chatglm",
47
+ "multi_query_attention": true,
48
+ "multi_query_group_num": 2,
49
+ "num_attention_heads": 32,
50
+ "num_layers": 28,
51
+ "original_rope": true,
52
+ "pad_token_id": 0,
53
+ "padded_vocab_size": 65024,
54
+ "post_layer_norm": true,
55
+ "pre_seq_len": null,
56
+ "prefix_projection": false,
57
+ "quantization_bit": 0,
58
+ "rmsnorm": true,
59
+ "seq_length": 32768,
60
+ "tie_word_embeddings": false,
61
+ "torch_dtype": "float16",
62
+ "transformers_version": "4.40.1",
63
+ "use_cache": true,
64
+ "vocab_size": 65024
65
+ }
66
+
67
+
68
+ 05/23/2024 11:12:03 - INFO - transformers.modeling_utils - loading weights file pytorch_model.bin from cache at /Users/hanyiye/.cache/huggingface/hub/models--THUDM--chatglm3-6b-base/snapshots/f91a1de587fdc692073367198e65369669a0b49d/pytorch_model.bin.index.json
69
+
70
+ 05/23/2024 11:30:58 - INFO - transformers.modeling_utils - Instantiating ChatGLMForConditionalGeneration model under default dtype torch.float32.
71
+
72
+ 05/23/2024 11:30:58 - INFO - transformers.generation.configuration_utils - Generate config GenerationConfig {
73
+ "eos_token_id": 2,
74
+ "pad_token_id": 0
75
+ }
76
+
77
+
78
+ 05/23/2024 11:31:20 - INFO - transformers.modeling_utils - All model checkpoint weights were used when initializing ChatGLMForConditionalGeneration.
79
+
80
+
81
+ 05/23/2024 11:31:20 - INFO - transformers.modeling_utils - All the weights of ChatGLMForConditionalGeneration were initialized from the model checkpoint at THUDM/chatglm3-6b-base.
82
+ If your task is similar to the task the model of the checkpoint was trained on, you can already use ChatGLMForConditionalGeneration for predictions without further training.
83
+
84
+ 05/23/2024 11:31:21 - INFO - transformers.modeling_utils - Generation config file not found, using a generation config created from the model config.
85
+
86
+ 05/23/2024 11:31:21 - WARNING - llmtuner.model.utils.checkpointing - You are using the old GC format, some features (e.g. BAdam) will be invalid.
87
+
88
+ 05/23/2024 11:31:21 - INFO - llmtuner.model.utils.checkpointing - Gradient checkpointing enabled.
89
+
90
+ 05/23/2024 11:31:21 - INFO - llmtuner.model.utils.attention - Using vanilla Attention implementation.
91
+
92
+ 05/23/2024 11:31:21 - INFO - llmtuner.model.adapter - Fine-tuning method: LoRA
93
+
94
+ 05/23/2024 11:31:21 - INFO - llmtuner.model.loader - trainable params: 1949696 || all params: 6245533696 || trainable%: 0.0312
95
+
96
+ 05/23/2024 11:31:21 - INFO - transformers.trainer - You have loaded a model on multiple GPUs. `is_model_parallel` attribute will be force-set to `True` to avoid any unexpected behavior such as device placement mismatching.
97
+
98
+ 05/23/2024 11:31:21 - INFO - transformers.trainer - ***** Running training *****
99
+
100
+ 05/23/2024 11:31:21 - INFO - transformers.trainer - Num examples = 741
101
+
102
+ 05/23/2024 11:31:21 - INFO - transformers.trainer - Num Epochs = 1
103
+
104
+ 05/23/2024 11:31:21 - INFO - transformers.trainer - Instantaneous batch size per device = 2
105
+
106
+ 05/23/2024 11:31:21 - INFO - transformers.trainer - Total train batch size (w. parallel, distributed & accumulation) = 16
107
+
108
+ 05/23/2024 11:31:21 - INFO - transformers.trainer - Gradient Accumulation steps = 8
109
+
110
+ 05/23/2024 11:31:21 - INFO - transformers.trainer - Total optimization steps = 46
111
+
112
+ 05/23/2024 11:31:21 - INFO - transformers.trainer - Number of trainable parameters = 1,949,696
113
+
114
+ 05/23/2024 12:06:13 - INFO - llmtuner.extras.callbacks - {'loss': 1.8053, 'learning_rate': 1.9423e-05, 'epoch': 0.11}
115
+
116
+ 05/23/2024 12:45:11 - INFO - llmtuner.extras.callbacks - {'loss': 1.7973, 'learning_rate': 1.7757e-05, 'epoch': 0.22}
117
+
118
+ 05/23/2024 13:25:31 - INFO - llmtuner.extras.callbacks - {'loss': 1.7813, 'learning_rate': 1.5196e-05, 'epoch': 0.32}
119
+
120
+ 05/23/2024 14:05:34 - INFO - llmtuner.extras.callbacks - {'loss': 1.8348, 'learning_rate': 1.2035e-05, 'epoch': 0.43}
121
+
122
+ 05/23/2024 14:45:14 - INFO - llmtuner.extras.callbacks - {'loss': 1.7943, 'learning_rate': 8.6383e-06, 'epoch': 0.54}
123
+
124
+ 05/23/2024 15:22:48 - INFO - llmtuner.extras.callbacks - {'loss': 42.0508, 'learning_rate': 5.3993e-06, 'epoch': 0.65}
125
+
126
+ 05/23/2024 15:57:14 - INFO - llmtuner.extras.callbacks - {'loss': 0.0000, 'learning_rate': 2.6916e-06, 'epoch': 0.75}
127
+
128
+ 05/23/2024 16:29:15 - INFO - llmtuner.extras.callbacks - {'loss': 0.0000, 'learning_rate': 8.2789e-07, 'epoch': 0.86}
129
+
130
+ 05/23/2024 17:03:20 - INFO - llmtuner.extras.callbacks - {'loss': 0.0000, 'learning_rate': 2.3312e-08, 'epoch': 0.97}
131
+
132
+ 05/23/2024 17:10:13 - INFO - transformers.trainer -
133
+
134
+ Training completed. Do not forget to share your model on huggingface.co/models =)
135
+
136
+
137
+
138
+ 05/23/2024 17:10:14 - INFO - transformers.trainer - Saving model checkpoint to saves/ChatGLM3-6B-Base/lora/test1
139
+
140
+ 05/23/2024 17:10:15 - INFO - transformers.configuration_utils - loading configuration file config.json from cache at /Users/hanyiye/.cache/huggingface/hub/models--THUDM--chatglm3-6b-base/snapshots/f91a1de587fdc692073367198e65369669a0b49d/config.json
141
+
142
+ 05/23/2024 17:10:15 - INFO - transformers.configuration_utils - Model config ChatGLMConfig {
143
+ "_name_or_path": "THUDM/chatglm3-6b-base",
144
+ "add_bias_linear": false,
145
+ "add_qkv_bias": true,
146
+ "apply_query_key_layer_scaling": true,
147
+ "apply_residual_connection_post_layernorm": false,
148
+ "architectures": [
149
+ "ChatGLMModel"
150
+ ],
151
+ "attention_dropout": 0.0,
152
+ "attention_softmax_in_fp32": true,
153
+ "auto_map": {
154
+ "AutoConfig": "THUDM/chatglm3-6b-base--configuration_chatglm.ChatGLMConfig",
155
+ "AutoModel": "THUDM/chatglm3-6b-base--modeling_chatglm.ChatGLMForConditionalGeneration",
156
+ "AutoModelForCausalLM": "THUDM/chatglm3-6b-base--modeling_chatglm.ChatGLMForConditionalGeneration",
157
+ "AutoModelForSeq2SeqLM": "THUDM/chatglm3-6b-base--modeling_chatglm.ChatGLMForConditionalGeneration",
158
+ "AutoModelForSequenceClassification": "THUDM/chatglm3-6b-base--modeling_chatglm.ChatGLMForSequenceClassification"
159
+ },
160
+ "bias_dropout_fusion": true,
161
+ "classifier_dropout": null,
162
+ "eos_token_id": 2,
163
+ "ffn_hidden_size": 13696,
164
+ "fp32_residual_connection": false,
165
+ "hidden_dropout": 0.0,
166
+ "hidden_size": 4096,
167
+ "kv_channels": 128,
168
+ "layernorm_epsilon": 1e-05,
169
+ "model_type": "chatglm",
170
+ "multi_query_attention": true,
171
+ "multi_query_group_num": 2,
172
+ "num_attention_heads": 32,
173
+ "num_layers": 28,
174
+ "original_rope": true,
175
+ "pad_token_id": 0,
176
+ "padded_vocab_size": 65024,
177
+ "post_layer_norm": true,
178
+ "pre_seq_len": null,
179
+ "prefix_projection": false,
180
+ "quantization_bit": 0,
181
+ "rmsnorm": true,
182
+ "seq_length": 32768,
183
+ "tie_word_embeddings": false,
184
+ "torch_dtype": "float16",
185
+ "transformers_version": "4.40.1",
186
+ "use_cache": true,
187
+ "vocab_size": 65024
188
+ }
189
+
190
+
191
+ 05/23/2024 17:10:15 - INFO - transformers.tokenization_utils_base - tokenizer config file saved in saves/ChatGLM3-6B-Base/lora/test1/tokenizer_config.json
192
+
193
+ 05/23/2024 17:10:15 - INFO - transformers.tokenization_utils_base - Special tokens file saved in saves/ChatGLM3-6B-Base/lora/test1/special_tokens_map.json
194
+
195
+ 05/23/2024 17:10:15 - INFO - transformers.modelcard - Dropping the following result as it does not have all the necessary fields:
196
+ {'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}}
197
+
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
tokenization_chatglm.py ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import re
4
+ from typing import List, Optional, Union, Dict
5
+ from sentencepiece import SentencePieceProcessor
6
+ from transformers import PreTrainedTokenizer
7
+ from transformers.utils import logging, PaddingStrategy
8
+ from transformers.tokenization_utils_base import EncodedInput, BatchEncoding
9
+
10
+
11
+ class SPTokenizer:
12
+ def __init__(self, model_path: str):
13
+ # reload tokenizer
14
+ assert os.path.isfile(model_path), model_path
15
+ self.sp_model = SentencePieceProcessor(model_file=model_path)
16
+
17
+ # BOS / EOS token IDs
18
+ self.n_words: int = self.sp_model.vocab_size()
19
+ self.bos_id: int = self.sp_model.bos_id()
20
+ self.eos_id: int = self.sp_model.eos_id()
21
+ self.pad_id: int = self.sp_model.unk_id()
22
+ assert self.sp_model.vocab_size() == self.sp_model.get_piece_size()
23
+
24
+ role_special_tokens = ["<|system|>", "<|user|>", "<|assistant|>", "<|observation|>"]
25
+ special_tokens = ["[MASK]", "[gMASK]", "[sMASK]", "sop", "eop"] + role_special_tokens
26
+ self.special_tokens = {}
27
+ self.index_special_tokens = {}
28
+ for token in special_tokens:
29
+ self.special_tokens[token] = self.n_words
30
+ self.index_special_tokens[self.n_words] = token
31
+ self.n_words += 1
32
+ self.role_special_token_expression = "|".join([re.escape(token) for token in role_special_tokens])
33
+
34
+ def tokenize(self, s: str, encode_special_tokens=False):
35
+ if encode_special_tokens:
36
+ last_index = 0
37
+ t = []
38
+ for match in re.finditer(self.role_special_token_expression, s):
39
+ if last_index < match.start():
40
+ t.extend(self.sp_model.EncodeAsPieces(s[last_index:match.start()]))
41
+ t.append(s[match.start():match.end()])
42
+ last_index = match.end()
43
+ if last_index < len(s):
44
+ t.extend(self.sp_model.EncodeAsPieces(s[last_index:]))
45
+ return t
46
+ else:
47
+ return self.sp_model.EncodeAsPieces(s)
48
+
49
+ def encode(self, s: str, bos: bool = False, eos: bool = False) -> List[int]:
50
+ assert type(s) is str
51
+ t = self.sp_model.encode(s)
52
+ if bos:
53
+ t = [self.bos_id] + t
54
+ if eos:
55
+ t = t + [self.eos_id]
56
+ return t
57
+
58
+ def decode(self, t: List[int]) -> str:
59
+ text, buffer = "", []
60
+ for token in t:
61
+ if token in self.index_special_tokens:
62
+ if buffer:
63
+ text += self.sp_model.decode(buffer)
64
+ buffer = []
65
+ text += self.index_special_tokens[token]
66
+ else:
67
+ buffer.append(token)
68
+ if buffer:
69
+ text += self.sp_model.decode(buffer)
70
+ return text
71
+
72
+ def decode_tokens(self, tokens: List[str]) -> str:
73
+ text = self.sp_model.DecodePieces(tokens)
74
+ return text
75
+
76
+ def convert_token_to_id(self, token):
77
+ """ Converts a token (str) in an id using the vocab. """
78
+ if token in self.special_tokens:
79
+ return self.special_tokens[token]
80
+ return self.sp_model.PieceToId(token)
81
+
82
+ def convert_id_to_token(self, index):
83
+ """Converts an index (integer) in a token (str) using the vocab."""
84
+ if index in self.index_special_tokens:
85
+ return self.index_special_tokens[index]
86
+ if index in [self.eos_id, self.bos_id, self.pad_id] or index < 0:
87
+ return ""
88
+ return self.sp_model.IdToPiece(index)
89
+
90
+
91
+ class ChatGLMTokenizer(PreTrainedTokenizer):
92
+ vocab_files_names = {"vocab_file": "tokenizer.model"}
93
+
94
+ model_input_names = ["input_ids", "attention_mask", "position_ids"]
95
+
96
+ def __init__(self, vocab_file, padding_side="left", clean_up_tokenization_spaces=False, encode_special_tokens=False,
97
+ **kwargs):
98
+ self.name = "GLMTokenizer"
99
+
100
+ self.vocab_file = vocab_file
101
+ self.tokenizer = SPTokenizer(vocab_file)
102
+ self.special_tokens = {
103
+ "<bos>": self.tokenizer.bos_id,
104
+ "<eos>": self.tokenizer.eos_id,
105
+ "<pad>": self.tokenizer.pad_id
106
+ }
107
+ self.encode_special_tokens = encode_special_tokens
108
+ super().__init__(padding_side=padding_side, clean_up_tokenization_spaces=clean_up_tokenization_spaces,
109
+ encode_special_tokens=encode_special_tokens,
110
+ **kwargs)
111
+
112
+ def get_command(self, token):
113
+ if token in self.special_tokens:
114
+ return self.special_tokens[token]
115
+ assert token in self.tokenizer.special_tokens, f"{token} is not a special token for {self.name}"
116
+ return self.tokenizer.special_tokens[token]
117
+
118
+ @property
119
+ def unk_token(self) -> str:
120
+ return "<unk>"
121
+
122
+ @property
123
+ def pad_token(self) -> str:
124
+ return "<unk>"
125
+
126
+ @property
127
+ def pad_token_id(self):
128
+ return self.get_command("<pad>")
129
+
130
+ @property
131
+ def eos_token(self) -> str:
132
+ return "</s>"
133
+
134
+ @property
135
+ def eos_token_id(self):
136
+ return self.get_command("<eos>")
137
+
138
+ @property
139
+ def vocab_size(self):
140
+ return self.tokenizer.n_words
141
+
142
+ def get_vocab(self):
143
+ """ Returns vocab as a dict """
144
+ vocab = {self._convert_id_to_token(i): i for i in range(self.vocab_size)}
145
+ vocab.update(self.added_tokens_encoder)
146
+ return vocab
147
+
148
+ def _tokenize(self, text, **kwargs):
149
+ return self.tokenizer.tokenize(text, encode_special_tokens=self.encode_special_tokens)
150
+
151
+ def _convert_token_to_id(self, token):
152
+ """ Converts a token (str) in an id using the vocab. """
153
+ return self.tokenizer.convert_token_to_id(token)
154
+
155
+ def _convert_id_to_token(self, index):
156
+ """Converts an index (integer) in a token (str) using the vocab."""
157
+ return self.tokenizer.convert_id_to_token(index)
158
+
159
+ def convert_tokens_to_string(self, tokens: List[str]) -> str:
160
+ return self.tokenizer.decode_tokens(tokens)
161
+
162
+ def save_vocabulary(self, save_directory, filename_prefix=None):
163
+ """
164
+ Save the vocabulary and special tokens file to a directory.
165
+
166
+ Args:
167
+ save_directory (`str`):
168
+ The directory in which to save the vocabulary.
169
+ filename_prefix (`str`, *optional*):
170
+ An optional prefix to add to the named of the saved files.
171
+
172
+ Returns:
173
+ `Tuple(str)`: Paths to the files saved.
174
+ """
175
+ if os.path.isdir(save_directory):
176
+ vocab_file = os.path.join(
177
+ save_directory, self.vocab_files_names["vocab_file"]
178
+ )
179
+ else:
180
+ vocab_file = save_directory
181
+
182
+ with open(self.vocab_file, 'rb') as fin:
183
+ proto_str = fin.read()
184
+
185
+ with open(vocab_file, "wb") as writer:
186
+ writer.write(proto_str)
187
+
188
+ return (vocab_file,)
189
+
190
+ def get_prefix_tokens(self):
191
+ prefix_tokens = [self.get_command("[gMASK]"), self.get_command("sop")]
192
+ return prefix_tokens
193
+
194
+ def build_single_message(self, role, metadata, message):
195
+ assert role in ["system", "user", "assistant", "observation"], role
196
+ role_tokens = [self.get_command(f"<|{role}|>")] + self.tokenizer.encode(f"{metadata}\n")
197
+ message_tokens = self.tokenizer.encode(message)
198
+ tokens = role_tokens + message_tokens
199
+ return tokens
200
+
201
+ def build_chat_input(self, query, history=None, role="user"):
202
+ if history is None:
203
+ history = []
204
+ input_ids = []
205
+ for item in history:
206
+ content = item["content"]
207
+ if item["role"] == "system" and "tools" in item:
208
+ content = content + "\n" + json.dumps(item["tools"], indent=4, ensure_ascii=False)
209
+ input_ids.extend(self.build_single_message(item["role"], item.get("metadata", ""), content))
210
+ input_ids.extend(self.build_single_message(role, "", query))
211
+ input_ids.extend([self.get_command("<|assistant|>")])
212
+ return self.batch_encode_plus([input_ids], return_tensors="pt", is_split_into_words=True)
213
+
214
+ def build_inputs_with_special_tokens(
215
+ self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
216
+ ) -> List[int]:
217
+ """
218
+ Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
219
+ adding special tokens. A BERT sequence has the following format:
220
+
221
+ - single sequence: `[CLS] X [SEP]`
222
+ - pair of sequences: `[CLS] A [SEP] B [SEP]`
223
+
224
+ Args:
225
+ token_ids_0 (`List[int]`):
226
+ List of IDs to which the special tokens will be added.
227
+ token_ids_1 (`List[int]`, *optional*):
228
+ Optional second list of IDs for sequence pairs.
229
+
230
+ Returns:
231
+ `List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
232
+ """
233
+ prefix_tokens = self.get_prefix_tokens()
234
+ token_ids_0 = prefix_tokens + token_ids_0
235
+ if token_ids_1 is not None:
236
+ token_ids_0 = token_ids_0 + token_ids_1 + [self.get_command("<eos>")]
237
+ return token_ids_0
238
+
239
+ def _pad(
240
+ self,
241
+ encoded_inputs: Union[Dict[str, EncodedInput], BatchEncoding],
242
+ max_length: Optional[int] = None,
243
+ padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
244
+ pad_to_multiple_of: Optional[int] = None,
245
+ return_attention_mask: Optional[bool] = None,
246
+ ) -> dict:
247
+ """
248
+ Pad encoded inputs (on left/right and up to predefined length or max length in the batch)
249
+
250
+ Args:
251
+ encoded_inputs:
252
+ Dictionary of tokenized inputs (`List[int]`) or batch of tokenized inputs (`List[List[int]]`).
253
+ max_length: maximum length of the returned list and optionally padding length (see below).
254
+ Will truncate by taking into account the special tokens.
255
+ padding_strategy: PaddingStrategy to use for padding.
256
+
257
+ - PaddingStrategy.LONGEST Pad to the longest sequence in the batch
258
+ - PaddingStrategy.MAX_LENGTH: Pad to the max length (default)
259
+ - PaddingStrategy.DO_NOT_PAD: Do not pad
260
+ The tokenizer padding sides are defined in self.padding_side:
261
+
262
+ - 'left': pads on the left of the sequences
263
+ - 'right': pads on the right of the sequences
264
+ pad_to_multiple_of: (optional) Integer if set will pad the sequence to a multiple of the provided value.
265
+ This is especially useful to enable the use of Tensor Core on NVIDIA hardware with compute capability
266
+ `>= 7.5` (Volta).
267
+ return_attention_mask:
268
+ (optional) Set to False to avoid returning attention mask (default: set to model specifics)
269
+ """
270
+ # Load from model defaults
271
+ assert self.padding_side == "left"
272
+
273
+ required_input = encoded_inputs[self.model_input_names[0]]
274
+ seq_length = len(required_input)
275
+
276
+ if padding_strategy == PaddingStrategy.LONGEST:
277
+ max_length = len(required_input)
278
+
279
+ if max_length is not None and pad_to_multiple_of is not None and (max_length % pad_to_multiple_of != 0):
280
+ max_length = ((max_length // pad_to_multiple_of) + 1) * pad_to_multiple_of
281
+
282
+ needs_to_be_padded = padding_strategy != PaddingStrategy.DO_NOT_PAD and len(required_input) != max_length
283
+
284
+ # Initialize attention mask if not present.
285
+ if "attention_mask" not in encoded_inputs:
286
+ encoded_inputs["attention_mask"] = [1] * seq_length
287
+
288
+ if "position_ids" not in encoded_inputs:
289
+ encoded_inputs["position_ids"] = list(range(seq_length))
290
+
291
+ if needs_to_be_padded:
292
+ difference = max_length - len(required_input)
293
+
294
+ if "attention_mask" in encoded_inputs:
295
+ encoded_inputs["attention_mask"] = [0] * difference + encoded_inputs["attention_mask"]
296
+ if "position_ids" in encoded_inputs:
297
+ encoded_inputs["position_ids"] = [0] * difference + encoded_inputs["position_ids"]
298
+ encoded_inputs[self.model_input_names[0]] = [self.pad_token_id] * difference + required_input
299
+
300
+ return encoded_inputs
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7dc4c393423b76e4373e5157ddc34803a0189ba96b21ddbb40269d31468a6f2
3
+ size 1018370
tokenizer_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {},
3
+ "auto_map": {
4
+ "AutoTokenizer": [
5
+ "tokenization_chatglm.ChatGLMTokenizer",
6
+ null
7
+ ]
8
+ },
9
+ "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message + '\\n' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '\\nAssistant: ' }}{% elif message['role'] == 'assistant' %}{{ content + '</s>' + '\\n' }}{% endif %}{% endfor %}",
10
+ "clean_up_tokenization_spaces": false,
11
+ "do_lower_case": false,
12
+ "encode_special_tokens": false,
13
+ "eos_token": "</s>",
14
+ "model_max_length": 1000000000000000019884624838656,
15
+ "pad_token": "<unk>",
16
+ "padding_side": "right",
17
+ "remove_space": false,
18
+ "split_special_tokens": false,
19
+ "tokenizer_class": "ChatGLMTokenizer",
20
+ "unk_token": "<unk>"
21
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9919137466307277,
3
+ "total_flos": 2.703782589549773e+16,
4
+ "train_loss": 5.550413712211277,
5
+ "train_runtime": 20332.1955,
6
+ "train_samples_per_second": 0.036,
7
+ "train_steps_per_second": 0.002
8
+ }
trainer_config.yaml ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cutoff_len: 1024
2
+ dataset: im_the_fated_villain_chapters
3
+ dataset_dir: data
4
+ do_train: true
5
+ finetuning_type: lora
6
+ flash_attn: auto
7
+ gradient_accumulation_steps: 8
8
+ learning_rate: 2.0e-05
9
+ logging_steps: 5
10
+ lora_alpha: 16
11
+ lora_dropout: 0
12
+ lora_rank: 8
13
+ lora_target: query_key_value
14
+ lr_scheduler_type: cosine
15
+ max_grad_norm: 1.0
16
+ max_samples: 100000
17
+ model_name_or_path: THUDM/chatglm3-6b-base
18
+ num_train_epochs: 1.0
19
+ optim: adamw_torch
20
+ output_dir: saves/ChatGLM3-6B-Base/lora/test1
21
+ packing: false
22
+ per_device_train_batch_size: 2
23
+ report_to: none
24
+ save_steps: 100
25
+ stage: sft
26
+ template: default
27
+ warmup_steps: 0
trainer_log.jsonl ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {"current_steps": 5, "total_steps": 46, "loss": 1.8053, "learning_rate": 1.9422609221188208e-05, "epoch": 0.1078167115902965, "percentage": 10.87, "elapsed_time": "0:34:51", "remaining_time": "4:45:50"}
2
+ {"current_steps": 10, "total_steps": 46, "loss": 1.7973, "learning_rate": 1.77571129070442e-05, "epoch": 0.215633423180593, "percentage": 21.74, "elapsed_time": "1:13:49", "remaining_time": "4:25:46"}
3
+ {"current_steps": 15, "total_steps": 46, "loss": 1.7813, "learning_rate": 1.5195839500354338e-05, "epoch": 0.32345013477088946, "percentage": 32.61, "elapsed_time": "1:54:09", "remaining_time": "3:55:55"}
4
+ {"current_steps": 20, "total_steps": 46, "loss": 1.8348, "learning_rate": 1.2034560130526341e-05, "epoch": 0.431266846361186, "percentage": 43.48, "elapsed_time": "2:34:12", "remaining_time": "3:20:27"}
5
+ {"current_steps": 25, "total_steps": 46, "loss": 1.7943, "learning_rate": 8.638333509037535e-06, "epoch": 0.5390835579514824, "percentage": 54.35, "elapsed_time": "3:13:52", "remaining_time": "2:42:51"}
6
+ {"current_steps": 30, "total_steps": 46, "loss": 42.0508, "learning_rate": 5.399349622688479e-06, "epoch": 0.6469002695417789, "percentage": 65.22, "elapsed_time": "3:51:26", "remaining_time": "2:03:26"}
7
+ {"current_steps": 35, "total_steps": 46, "loss": 0.0, "learning_rate": 2.691640357218759e-06, "epoch": 0.7547169811320755, "percentage": 76.09, "elapsed_time": "4:25:52", "remaining_time": "1:23:33"}
8
+ {"current_steps": 40, "total_steps": 46, "loss": 0.0, "learning_rate": 8.278869849454718e-07, "epoch": 0.862533692722372, "percentage": 86.96, "elapsed_time": "4:57:53", "remaining_time": "0:44:41"}
9
+ {"current_steps": 45, "total_steps": 46, "loss": 0.0, "learning_rate": 2.3312308094607382e-08, "epoch": 0.9703504043126685, "percentage": 97.83, "elapsed_time": "5:31:58", "remaining_time": "0:07:22"}
10
+ {"current_steps": 46, "total_steps": 46, "epoch": 0.9919137466307277, "percentage": 100.0, "elapsed_time": "5:38:52", "remaining_time": "0:00:00"}
trainer_state.json ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.9919137466307277,
5
+ "eval_steps": 500,
6
+ "global_step": 46,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.1078167115902965,
13
+ "grad_norm": 0.25050440430641174,
14
+ "learning_rate": 1.9422609221188208e-05,
15
+ "loss": 1.8053,
16
+ "step": 5
17
+ },
18
+ {
19
+ "epoch": 0.215633423180593,
20
+ "grad_norm": 0.2306346893310547,
21
+ "learning_rate": 1.77571129070442e-05,
22
+ "loss": 1.7973,
23
+ "step": 10
24
+ },
25
+ {
26
+ "epoch": 0.32345013477088946,
27
+ "grad_norm": 0.2684953510761261,
28
+ "learning_rate": 1.5195839500354338e-05,
29
+ "loss": 1.7813,
30
+ "step": 15
31
+ },
32
+ {
33
+ "epoch": 0.431266846361186,
34
+ "grad_norm": 0.2725406587123871,
35
+ "learning_rate": 1.2034560130526341e-05,
36
+ "loss": 1.8348,
37
+ "step": 20
38
+ },
39
+ {
40
+ "epoch": 0.5390835579514824,
41
+ "grad_norm": 0.30252954363822937,
42
+ "learning_rate": 8.638333509037535e-06,
43
+ "loss": 1.7943,
44
+ "step": 25
45
+ },
46
+ {
47
+ "epoch": 0.6469002695417789,
48
+ "grad_norm": NaN,
49
+ "learning_rate": 5.399349622688479e-06,
50
+ "loss": 42.0508,
51
+ "step": 30
52
+ },
53
+ {
54
+ "epoch": 0.7547169811320755,
55
+ "grad_norm": NaN,
56
+ "learning_rate": 2.691640357218759e-06,
57
+ "loss": 0.0,
58
+ "step": 35
59
+ },
60
+ {
61
+ "epoch": 0.862533692722372,
62
+ "grad_norm": NaN,
63
+ "learning_rate": 8.278869849454718e-07,
64
+ "loss": 0.0,
65
+ "step": 40
66
+ },
67
+ {
68
+ "epoch": 0.9703504043126685,
69
+ "grad_norm": NaN,
70
+ "learning_rate": 2.3312308094607382e-08,
71
+ "loss": 0.0,
72
+ "step": 45
73
+ },
74
+ {
75
+ "epoch": 0.9919137466307277,
76
+ "step": 46,
77
+ "total_flos": 2.703782589549773e+16,
78
+ "train_loss": 5.550413712211277,
79
+ "train_runtime": 20332.1955,
80
+ "train_samples_per_second": 0.036,
81
+ "train_steps_per_second": 0.002
82
+ }
83
+ ],
84
+ "logging_steps": 5,
85
+ "max_steps": 46,
86
+ "num_input_tokens_seen": 0,
87
+ "num_train_epochs": 1,
88
+ "save_steps": 100,
89
+ "total_flos": 2.703782589549773e+16,
90
+ "train_batch_size": 2,
91
+ "trial_name": null,
92
+ "trial_params": null
93
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da25c55235aa6432fae9f2038b23422a509e51cccc55e4a6c1403532d635b699
3
+ size 5112