Zakery Clarke commited on
Commit
cd7d4a7
·
1 Parent(s): c133a0b

flan-base-retrain

Browse files
README.md DELETED
@@ -1,7 +0,0 @@
1
- ---
2
- library_name: transformers.js
3
- ---
4
-
5
- https://huggingface.co/MBZUAI/LaMini-Flan-T5-783M with ONNX weights to be compatible with Transformers.js.
6
-
7
- Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`).
 
 
 
 
 
 
 
 
config.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "_name_or_path": "MBZUAI/LaMini-Flan-T5-783M",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
6
  "classifier_dropout": 0.0,
7
- "d_ff": 2816,
8
  "d_kv": 64,
9
- "d_model": 1024,
10
  "decoder_start_token_id": 0,
11
  "dense_act_fn": "gelu_new",
12
  "dropout_rate": 0.1,
@@ -18,15 +18,44 @@
18
  "layer_norm_epsilon": 1e-06,
19
  "model_type": "t5",
20
  "n_positions": 512,
21
- "num_decoder_layers": 24,
22
- "num_heads": 16,
23
- "num_layers": 24,
24
  "output_past": true,
25
  "pad_token_id": 0,
26
  "relative_attention_max_distance": 128,
27
  "relative_attention_num_buckets": 32,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  "tie_word_embeddings": false,
29
- "transformers_version": "4.34.0.dev0",
30
  "use_cache": true,
31
  "vocab_size": 32128
32
  }
 
1
  {
2
+ "_name_or_path": "google/flan-t5-base",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
6
  "classifier_dropout": 0.0,
7
+ "d_ff": 2048,
8
  "d_kv": 64,
9
+ "d_model": 768,
10
  "decoder_start_token_id": 0,
11
  "dense_act_fn": "gelu_new",
12
  "dropout_rate": 0.1,
 
18
  "layer_norm_epsilon": 1e-06,
19
  "model_type": "t5",
20
  "n_positions": 512,
21
+ "num_decoder_layers": 12,
22
+ "num_heads": 12,
23
+ "num_layers": 12,
24
  "output_past": true,
25
  "pad_token_id": 0,
26
  "relative_attention_max_distance": 128,
27
  "relative_attention_num_buckets": 32,
28
+ "task_specific_params": {
29
+ "summarization": {
30
+ "early_stopping": true,
31
+ "length_penalty": 2.0,
32
+ "max_length": 200,
33
+ "min_length": 30,
34
+ "no_repeat_ngram_size": 3,
35
+ "num_beams": 4,
36
+ "prefix": "summarize: "
37
+ },
38
+ "translation_en_to_de": {
39
+ "early_stopping": true,
40
+ "max_length": 300,
41
+ "num_beams": 4,
42
+ "prefix": "translate English to German: "
43
+ },
44
+ "translation_en_to_fr": {
45
+ "early_stopping": true,
46
+ "max_length": 300,
47
+ "num_beams": 4,
48
+ "prefix": "translate English to French: "
49
+ },
50
+ "translation_en_to_ro": {
51
+ "early_stopping": true,
52
+ "max_length": 300,
53
+ "num_beams": 4,
54
+ "prefix": "translate English to Romanian: "
55
+ }
56
+ },
57
  "tie_word_embeddings": false,
58
+ "transformers_version": "4.33.2",
59
  "use_cache": true,
60
  "vocab_size": 32128
61
  }
generation_config.json CHANGED
@@ -3,5 +3,5 @@
3
  "decoder_start_token_id": 0,
4
  "eos_token_id": 1,
5
  "pad_token_id": 0,
6
- "transformers_version": "4.34.0.dev0"
7
  }
 
3
  "decoder_start_token_id": 0,
4
  "eos_token_id": 1,
5
  "pad_token_id": 0,
6
+ "transformers_version": "4.33.2"
7
  }
onnx/decoder_model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd2dc690641be4851a21e4040c1d8e74c7d51486761730947de96df254bbd1c0
3
+ size 650848961
onnx/decoder_model_merged.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e0f2970b86081611577dfba0fbd41a4237546c1bd9104b4aeefb1497d0a6626
3
+ size 651182887
onnx/decoder_with_past_model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b47d2f266e175d42559a29c6330483769baca5b82042814e981402b49d4efcb
3
+ size 594197309
onnx/encoder_model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94c438df6ed8f9479f3b8ce43d4903cb4817d7c1e0d5c47b86a7a2ca9de621c9
3
+ size 438697388
quantize_config.json DELETED
@@ -1,139 +0,0 @@
1
- {
2
- "per_channel": true,
3
- "reduce_range": true,
4
- "per_model_config": {
5
- "decoder_model": {
6
- "op_types": [
7
- "Add",
8
- "Less",
9
- "ConstantOfShape",
10
- "Mul",
11
- "Pow",
12
- "Gather",
13
- "Shape",
14
- "ReduceMean",
15
- "MatMul",
16
- "Range",
17
- "Transpose",
18
- "Neg",
19
- "Expand",
20
- "Sqrt",
21
- "Min",
22
- "Unsqueeze",
23
- "Where",
24
- "Div",
25
- "Tile",
26
- "Log",
27
- "Concat",
28
- "Cast",
29
- "LessOrEqual",
30
- "Reshape",
31
- "Sub",
32
- "Softmax",
33
- "Tanh",
34
- "Constant"
35
- ],
36
- "weight_type": "QInt8"
37
- },
38
- "encoder_model": {
39
- "op_types": [
40
- "Add",
41
- "Abs",
42
- "Less",
43
- "ConstantOfShape",
44
- "Mul",
45
- "Pow",
46
- "Gather",
47
- "Shape",
48
- "ReduceMean",
49
- "MatMul",
50
- "Range",
51
- "Transpose",
52
- "Sqrt",
53
- "Min",
54
- "Unsqueeze",
55
- "Where",
56
- "Div",
57
- "Greater",
58
- "Log",
59
- "Concat",
60
- "Cast",
61
- "Reshape",
62
- "Sub",
63
- "Softmax",
64
- "Tanh",
65
- "Constant"
66
- ],
67
- "weight_type": "QInt8"
68
- },
69
- "decoder_model_merged": {
70
- "op_types": [
71
- "If",
72
- "Add",
73
- "Less",
74
- "ConstantOfShape",
75
- "Mul",
76
- "Slice",
77
- "Pow",
78
- "Gather",
79
- "Shape",
80
- "ReduceMean",
81
- "MatMul",
82
- "Range",
83
- "Transpose",
84
- "Neg",
85
- "Expand",
86
- "Sqrt",
87
- "Min",
88
- "Unsqueeze",
89
- "Where",
90
- "Div",
91
- "Tile",
92
- "Log",
93
- "Concat",
94
- "Cast",
95
- "LessOrEqual",
96
- "Reshape",
97
- "Sub",
98
- "Softmax",
99
- "Tanh",
100
- "Constant"
101
- ],
102
- "weight_type": "QInt8"
103
- },
104
- "decoder_with_past_model": {
105
- "op_types": [
106
- "Add",
107
- "Less",
108
- "ConstantOfShape",
109
- "Mul",
110
- "Slice",
111
- "Pow",
112
- "Gather",
113
- "Shape",
114
- "ReduceMean",
115
- "MatMul",
116
- "Range",
117
- "Transpose",
118
- "Neg",
119
- "Expand",
120
- "Sqrt",
121
- "Min",
122
- "Unsqueeze",
123
- "Where",
124
- "Div",
125
- "Tile",
126
- "Log",
127
- "Concat",
128
- "Cast",
129
- "LessOrEqual",
130
- "Reshape",
131
- "Sub",
132
- "Softmax",
133
- "Tanh",
134
- "Constant"
135
- ],
136
- "weight_type": "QInt8"
137
- }
138
- }
139
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656