lakpriya commited on
Commit
23c0d1e
·
verified ·
1 Parent(s): 308e7b4

Add new files to repository

Browse files
.gitattributes CHANGED
@@ -36,3 +36,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
36
  model.onnx_data filter=lfs diff=lfs merge=lfs -text
37
  model_quantized.onnx.data filter=lfs diff=lfs merge=lfs -text
38
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
 
36
  model.onnx_data filter=lfs diff=lfs merge=lfs -text
37
  model_quantized.onnx.data filter=lfs diff=lfs merge=lfs -text
38
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
+ onnx/model.onnx_data filter=lfs diff=lfs merge=lfs -text
40
+ onnx/model_fp16.onnx_data filter=lfs diff=lfs merge=lfs -text
config.json CHANGED
@@ -35,6 +35,6 @@
35
  "rope_theta": 500000.0,
36
  "tie_word_embeddings": true,
37
  "transformers_version": "4.48.3",
38
- "use_cache": false,
39
  "vocab_size": 128256
40
  }
 
35
  "rope_theta": 500000.0,
36
  "tie_word_embeddings": true,
37
  "transformers_version": "4.48.3",
38
+ "use_cache": true,
39
  "vocab_size": 128256
40
  }
onnx/model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86b3e2cd84d4fadea12316eb44ddedb33a73eb61ed81e1c68627c5ee46130816
3
+ size 446973
onnx/model.onnx_data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e363dc18dee39dba38aa3937caa301ad7d8cffdc0bd274f352fe8a0d034e139
3
+ size 4943257600
onnx/model_bnb4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d7fde059b85384da1e2ac809f03bda3eb23309440b45d284d62629a435fa9b9
3
+ size 1598755485
onnx/model_fp16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4be8bc5e9029223491ade1e4e0aae1caf8c0614845cce88d9c933eadec2ff81f
3
+ size 455244
onnx/model_fp16.onnx_data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58dc4a3d188a90aef50083b16776c0b6736fa593073b401f5ff0b9f2665db424
3
+ size 2471628800
onnx/model_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5caea3937562c0de1a7da17f3a77b9b9f8a4b89c7e81190003c5219cf79269f6
3
+ size 1236577163
onnx/model_q4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ef6c3226413bf11769e2e5664bde7a75aa222a6b9d461c33b11e3f96b057fbe
3
+ size 1659572029
onnx/model_q4f16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70204565a307324961a235f7d1621260bcb2058c984cfe7379fd61a94f6c6bad
3
+ size 1073290385
onnx/model_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5caea3937562c0de1a7da17f3a77b9b9f8a4b89c7e81190003c5219cf79269f6
3
+ size 1236577163
onnx/model_uint8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5aa42f4bbb25e1f4256f35279a15db56166653c7935268c5970e123fe0ee57c3
3
+ size 1236577223
quantize_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "modes": [
3
+ "fp16",
4
+ "q8",
5
+ "int8",
6
+ "uint8",
7
+ "q4",
8
+ "q4f16",
9
+ "bnb4"
10
+ ],
11
+ "per_channel": false,
12
+ "reduce_range": false,
13
+ "block_size": null,
14
+ "is_symmetric": true,
15
+ "accuracy_level": null,
16
+ "quant_type": 1,
17
+ "op_block_list": null
18
+ }