Add files using upload-large-folder tool

Browse files

Files changed (6) hide show

.gitattributes +1 -1
LICENSE +21 -0
config.json +6 -3
generation_config.json +1 -2
special_tokens_map.json +2 -2
tokenizer_config.json +5 -7

.gitattributes CHANGED Viewed

@@ -33,4 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
-tokenizer.json filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 Zhipu AI
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

config.json CHANGED Viewed

@@ -4,7 +4,11 @@
   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
-  "eos_token_id": 151336,
   "head_dim": 128,
   "hidden_act": "silu",
   "hidden_size": 6144,
@@ -15,14 +19,13 @@
   "num_attention_heads": 48,
   "num_hidden_layers": 61,
   "num_key_value_heads": 2,
-  "pad_token_id": 151330,
   "partial_rotary_factor": 0.5,
   "rms_norm_eps": 1e-05,
   "rope_theta": 10000.0,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.52.0.dev0",
-  "unsloth_fixed": true,
   "use_cache": true,
   "vocab_size": 151552
 }

   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
+  "eos_token_id": [
+    151329,
+    151336,
+    151338
+  ],
   "head_dim": 128,
   "hidden_act": "silu",
   "hidden_size": 6144,
   "num_attention_heads": 48,
   "num_hidden_layers": 61,
   "num_key_value_heads": 2,
+  "pad_token_id": 151329,
   "partial_rotary_factor": 0.5,
   "rms_norm_eps": 1e-05,
   "rope_theta": 10000.0,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.52.0.dev0",
   "use_cache": true,
   "vocab_size": 151552
 }

generation_config.json CHANGED Viewed

@@ -5,7 +5,6 @@
     151336,
     151338
   ],
-  "max_length": 32768,
-  "pad_token_id": 151330,
   "transformers_version": "4.52.0.dev0"
 }

     151336,
     151338
   ],
+  "pad_token_id": 151329,
   "transformers_version": "4.52.0.dev0"
 }

special_tokens_map.json CHANGED Viewed

@@ -16,14 +16,14 @@
     "<|end_of_video|>"
   ],
   "eos_token": {
-    "content": "<|user|>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "pad_token": {
-    "content": "[MASK]",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,

     "<|end_of_video|>"
   ],
   "eos_token": {
+    "content": "<|endoftext|>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "pad_token": {
+    "content": "<|endoftext|>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,

tokenizer_config.json CHANGED Viewed

@@ -129,7 +129,7 @@
     "<|begin_of_video|>",
     "<|end_of_video|>"
   ],
-  "bos_token": null,
   "clean_up_tokenization_spaces": false,
   "do_lower_case": false,
   "eos_token": "<|user|>",
@@ -138,11 +138,9 @@
     "input_ids",
     "attention_mask"
   ],
-  "model_max_length": 32768,
-  "pad_token": "[MASK]",
   "padding_side": "left",
   "remove_space": false,
-  "tokenizer_class": "PreTrainedTokenizer",
-  "unk_token": null,
-  "chat_template": "[gMASK]<sop>\n{%- if tools -%}\n<|system|>\n# \u53ef\u7528\u5de5\u5177\n{% for tool in tools %}\n    {%- set function = tool.function if tool.get(\"function\") else tool %}\n\n## {{ function.name }}\n\n{{ function | tojson(indent=4, ensure_ascii=False) }}\n\u5728\u8c03\u7528\u4e0a\u8ff0\u51fd\u6570\u65f6\uff0c\u8bf7\u4f7f\u7528 Json \u683c\u5f0f\u8868\u793a\u8c03\u7528\u7684\u53c2\u6570\u3002\n{%- endfor %}\n{%- endif -%}\n\n{%- for msg in messages %}\n    {%- if msg.role == 'system' %}\n<|system|>\n{{ msg.content }}\n    {%- endif %}\n{%- endfor %}\n\n{%- for message in messages if message.role != 'system' %}\n    {%- set role = message['role'] %}\n    {%- set content = message['content'] %}\n    {%- set meta = message.get(\"metadata\", \"\") %}\n\n    {%- if role == 'user' %}\n<|user|>\n{{ content }}\n    {%- elif role == 'assistant' and not meta %}\n<|assistant|>\n{{ content }}\n    {%- elif role == 'assistant' and meta %}\n<|assistant|>{{ meta }}\n{{ content }}\n    {%- elif role == 'observation' %}\n<|observation|>\n{{ content }}\n    {%- endif %}\n{%- endfor %}\n{% if add_generation_prompt %}<|assistant|>{% endif %}"
-}

     "<|begin_of_video|>",
     "<|end_of_video|>"
   ],
+  "chat_template": "[gMASK]<sop>{%- if tools -%}<|system|>\n# 可用工具\n{% for tool in tools %}{%- set function = tool.function if tool.get(\"function\") else tool %}\n\n## {{ function.name }}\n\n{{ function | tojson(indent=4, ensure_ascii=False) }}\n在调用上述函数时，请使用 Json 格式表示调用的参数。{%- endfor %}{%- endif -%}{%- for msg in messages %}{%- if msg.role == 'system' %}<|system|>\n{{ msg.content }}{%- endif %}{%- endfor %}{%- for message in messages if message.role != 'system' %}{%- set role = message['role'] %}{%- set content = message['content'] %}{%- set meta = message.get(\"metadata\", \"\") %}{%- if role == 'user' %}<|user|>\n{{ content }}{%- elif role == 'assistant' and not meta %}<|assistant|>\n{{ content }}{%- elif role == 'assistant' and meta %}<|assistant|>{{ meta }} \n{{ content }}{%- elif role == 'observation' %}<|observation|>\n{{ content }}{%- endif %}{%- endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}",
   "clean_up_tokenization_spaces": false,
   "do_lower_case": false,
   "eos_token": "<|user|>",
     "input_ids",
     "attention_mask"
   ],
+  "model_max_length": 128000,
+  "pad_token": "<|endoftext|>",
   "padding_side": "left",
   "remove_space": false,
+  "tokenizer_class": "PreTrainedTokenizer"
+}