ayan4m1
commited on
Commit
·
afa1d07
1
Parent(s):
eba25b4
add 0.2 revision with new weights
Browse files- README.md +6 -1
- config.json +3 -3
- generation_config.json +1 -1
- model-00001-of-00004.safetensors +2 -2
- model-00002-of-00004.safetensors +2 -2
- model-00003-of-00004.safetensors +2 -2
- model-00004-of-00004.safetensors +1 -1
- tokenizer_config.json +1 -2
README.md
CHANGED
@@ -38,6 +38,11 @@ Applied open Sonnet datasets containing ~1.2mn question/answer pairs for fine-tu
|
|
38 |
<|begin_of_text|>{prompt}
|
39 |
```
|
40 |
|
|
|
|
|
|
|
|
|
|
|
41 |
## Credits
|
42 |
|
43 |
-
Thanks to Meta and [mlfoundations-dev](https://huggingface.co/mlfoundations-dev) for providing the data used to create this fine-tuning.
|
|
|
38 |
<|begin_of_text|>{prompt}
|
39 |
```
|
40 |
|
41 |
+
## Release History
|
42 |
+
|
43 |
+
* v0.1 - [2025-02-08] Initial release, trained to 512 steps
|
44 |
+
* v0.2 - [2025-02-10] Restarted training with cleaner dataset, ran to 1024 steps
|
45 |
+
|
46 |
## Credits
|
47 |
|
48 |
+
Thanks to Meta and [mlfoundations-dev](https://huggingface.co/mlfoundations-dev) for providing the data used to create this fine-tuning.
|
config.json
CHANGED
@@ -30,9 +30,9 @@
|
|
30 |
},
|
31 |
"rope_theta": 500000.0,
|
32 |
"tie_word_embeddings": false,
|
33 |
-
"torch_dtype": "
|
34 |
-
"transformers_version": "4.
|
35 |
-
"unsloth_version": "2025.2.
|
36 |
"use_cache": true,
|
37 |
"vocab_size": 128256
|
38 |
}
|
|
|
30 |
},
|
31 |
"rope_theta": 500000.0,
|
32 |
"tie_word_embeddings": false,
|
33 |
+
"torch_dtype": "float16",
|
34 |
+
"transformers_version": "4.46.0",
|
35 |
+
"unsloth_version": "2025.2.4",
|
36 |
"use_cache": true,
|
37 |
"vocab_size": 128256
|
38 |
}
|
generation_config.json
CHANGED
@@ -7,5 +7,5 @@
|
|
7 |
"pad_token_id": 128004,
|
8 |
"temperature": 0.6,
|
9 |
"top_p": 0.9,
|
10 |
-
"transformers_version": "4.
|
11 |
}
|
|
|
7 |
"pad_token_id": 128004,
|
8 |
"temperature": 0.6,
|
9 |
"top_p": 0.9,
|
10 |
+
"transformers_version": "4.46.0"
|
11 |
}
|
model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:15960273496240854588c39b46d1b703b080d4bbd067c8801f2e12bc121347b7
|
3 |
+
size 4976698592
|
model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:685323acd4d9159f51d7807d5c14001aba04232a5d581e7350effdcaa9a2dd8d
|
3 |
+
size 4999802616
|
model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:381a574d81c9a926da69f4ee63e2945454161600048128b73c3b0e0b1f3681e7
|
3 |
+
size 4915916080
|
model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1168138808
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d4d7b88162c2ff2306f622b8f826737907e1f1d3cc58eacf854bc30e35cf905
|
3 |
size 1168138808
|
tokenizer_config.json
CHANGED
@@ -2050,10 +2050,9 @@
|
|
2050 |
}
|
2051 |
},
|
2052 |
"bos_token": "<|begin_of_text|>",
|
2053 |
-
"chat_template": "{% if 'role' in messages[0] %}{
|
2054 |
"clean_up_tokenization_spaces": true,
|
2055 |
"eos_token": "<|end_of_text|>",
|
2056 |
-
"extra_special_tokens": {},
|
2057 |
"model_input_names": [
|
2058 |
"input_ids",
|
2059 |
"attention_mask"
|
|
|
2050 |
}
|
2051 |
},
|
2052 |
"bos_token": "<|begin_of_text|>",
|
2053 |
+
"chat_template": "{% if 'role' in messages[0] %}{% if messages[0]['role'] == 'system' %}{% if messages[1]['role'] == 'user' %}{{ bos_token + '[INST] <<SYS>>\n' + messages[0]['content'] + '\n<</SYS>>\n\n' + messages[1]['content'] + ' [/INST]' }}{% set loop_messages = messages[2:] %}{% else %}{{ bos_token + '[INST] ' + messages[0]['content'] + ' [/INST]' }}{% set loop_messages = messages[1:] %}{% endif %}{% else %}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + message['content'].strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'].strip() + ' ' + eos_token }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}{% else %}{% if messages[0]['from'] == 'system' %}{% if messages[1]['from'] == 'human' %}{{ bos_token + '[INST] <<SYS>>\n' + messages[0]['value'] + '\n<</SYS>>\n\n' + messages[1]['value'] + ' [/INST]' }}{% set loop_messages = messages[2:] %}{% else %}{{ bos_token + '[INST] ' + messages[0]['value'] + ' [/INST]' }}{% set loop_messages = messages[1:] %}{% endif %}{% else %}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['from'] == 'human' %}{{ bos_token + '[INST] ' + message['value'].strip() + ' [/INST]' }}{% elif message['from'] == 'gpt' %}{{ ' ' + message['value'].strip() + ' ' + eos_token }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}{% endif %}",
|
2054 |
"clean_up_tokenization_spaces": true,
|
2055 |
"eos_token": "<|end_of_text|>",
|
|
|
2056 |
"model_input_names": [
|
2057 |
"input_ids",
|
2058 |
"attention_mask"
|