Xenova HF staff commited on
Commit
05e1692
1 Parent(s): 0a16be8

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -139,8 +139,7 @@
139
  50360,
140
  50361
141
  ],
142
- "torch_dtype": "float32",
143
- "transformers_version": "4.29.2",
144
  "use_cache": true,
145
  "use_weighted_layer_sum": false,
146
  "vocab_size": 51864
 
139
  50360,
140
  50361
141
  ],
142
+ "transformers_version": "4.30.2",
 
143
  "use_cache": true,
144
  "use_weighted_layer_sum": false,
145
  "vocab_size": 51864
generation_config.json CHANGED
@@ -110,5 +110,5 @@
110
  50360,
111
  50361
112
  ],
113
- "transformers_version": "4.29.2"
114
  }
 
110
  50360,
111
  50361
112
  ],
113
+ "transformers_version": "4.30.2"
114
  }
onnx/decoder_model_merged.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:779d5e8b901ffefae8d8f6e289081a79501d2af7afae1e86048b6c1218b95f12
3
  size 314706485
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b86d63f0644a5df4c335edddfc8c52feb1377bdffa6927f587506c609e409077
3
  size 314706485
onnx/decoder_model_merged_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cba9300ac67addfd76033fc395a6b7449a024bdda955eb3e2a830932fde6ba3b
3
- size 80630991
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95c3939f7b64dedfea7bff57264c1635bbcf6dace6837bafc6da3167bcdc8211
3
+ size 80171680
onnx/decoder_model_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e371e69760f78464236aebedef51907ff30d1039f6a7b91b79ea702cf8207a40
3
- size 80281155
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cc9146e51bebedcb6f2fdbdb3e6f4544aa4d99d49032fe3421c87497d340f95
3
+ size 79821844
onnx/decoder_with_past_model_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8911b5725dc6008a010d08ededd0d64e612138bd71e8fce591f928e0ede9e38
3
- size 77024457
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:835b6873332400f93027d88c8a38dd089f9489a34c78954467aa7bb16894e927
3
+ size 76595926
onnx/encoder_model_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e5c7694b53a299a30beb6c6b9b4934f814cf3382bec0957b612ee0934b78858
3
- size 23321703
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dc3dace4e873e8d62b34fa0524b9d26198eebb06b6282785716cbd53c853026
3
+ size 23183306
quant_config.json ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_channel": false,
3
+ "reduce_range": false,
4
+ "per_model_config": {
5
+ "encoder_model": {
6
+ "op_types": [
7
+ "Reshape",
8
+ "Shape",
9
+ "Unsqueeze",
10
+ "Div",
11
+ "Sub",
12
+ "Erf",
13
+ "Gather",
14
+ "Sqrt",
15
+ "Concat",
16
+ "Add",
17
+ "MatMul",
18
+ "Constant",
19
+ "Pow",
20
+ "ReduceMean",
21
+ "Transpose",
22
+ "Mul",
23
+ "Softmax",
24
+ "Conv"
25
+ ],
26
+ "weight_type": "QUInt8"
27
+ },
28
+ "decoder_model": {
29
+ "op_types": [
30
+ "Erf",
31
+ "Gather",
32
+ "Cast",
33
+ "ConstantOfShape",
34
+ "Transpose",
35
+ "Reshape",
36
+ "Sqrt",
37
+ "Softmax",
38
+ "Mul",
39
+ "Concat",
40
+ "Unsqueeze",
41
+ "Where",
42
+ "Less",
43
+ "MatMul",
44
+ "Slice",
45
+ "Shape",
46
+ "Range",
47
+ "Div",
48
+ "Squeeze",
49
+ "Add",
50
+ "Expand",
51
+ "Constant",
52
+ "Pow",
53
+ "Equal",
54
+ "ReduceMean",
55
+ "Sub"
56
+ ],
57
+ "weight_type": "QInt8"
58
+ },
59
+ "decoder_model_merged": {
60
+ "op_types": [
61
+ "If"
62
+ ],
63
+ "weight_type": "QInt8"
64
+ },
65
+ "decoder_with_past_model": {
66
+ "op_types": [
67
+ "Reshape",
68
+ "Shape",
69
+ "Unsqueeze",
70
+ "Div",
71
+ "Erf",
72
+ "Gather",
73
+ "Sqrt",
74
+ "Add",
75
+ "MatMul",
76
+ "Constant",
77
+ "Slice",
78
+ "Pow",
79
+ "ReduceMean",
80
+ "Sub",
81
+ "Mul",
82
+ "Softmax",
83
+ "Transpose",
84
+ "Concat"
85
+ ],
86
+ "weight_type": "QInt8"
87
+ }
88
+ }
89
+ }