Add/update the quantized ONNX model files and README.md for Transformers.js v3 (#1)
Browse files- Add/update the quantized ONNX model files and README.md for Transformers.js v3 (b73940d05c3dbd2307a3e19bff424663ed0417f3)
Co-authored-by: Yuichiro Tachibana <[email protected]>
- README.md +4 -5
- onnx/model_bnb4.onnx +3 -0
- onnx/model_int8.onnx +3 -0
- onnx/model_q4.onnx +3 -0
- onnx/model_q4f16.onnx +3 -0
- onnx/model_uint8.onnx +3 -0
README.md
CHANGED
@@ -7,14 +7,14 @@ https://huggingface.co/cross-encoder/ms-marco-TinyBERT-L-2-v2 with ONNX weights
|
|
7 |
|
8 |
## Usage (Transformers.js)
|
9 |
|
10 |
-
If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@
|
11 |
```bash
|
12 |
-
npm i @
|
13 |
```
|
14 |
|
15 |
**Example:** Information Retrieval w/ `Xenova/ms-marco-TinyBERT-L-2-v2`.
|
16 |
```js
|
17 |
-
import { AutoTokenizer, AutoModelForSequenceClassification } from '@
|
18 |
|
19 |
const model = await AutoModelForSequenceClassification.from_pretrained('Xenova/ms-marco-TinyBERT-L-2-v2');
|
20 |
const tokenizer = await AutoTokenizer.from_pretrained('Xenova/ms-marco-TinyBERT-L-2-v2');
|
@@ -31,7 +31,7 @@ const features = tokenizer(
|
|
31 |
}
|
32 |
)
|
33 |
|
34 |
-
const scores = await model(features)
|
35 |
console.log(scores);
|
36 |
// quantized: [ 7.210887908935547, -11.559350967407227 ]
|
37 |
// unquantized: [ 7.235750675201416, -11.562294006347656 ]
|
@@ -39,5 +39,4 @@ console.log(scores);
|
|
39 |
|
40 |
---
|
41 |
|
42 |
-
|
43 |
Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`).
|
|
|
7 |
|
8 |
## Usage (Transformers.js)
|
9 |
|
10 |
+
If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@huggingface/transformers) using:
|
11 |
```bash
|
12 |
+
npm i @huggingface/transformers
|
13 |
```
|
14 |
|
15 |
**Example:** Information Retrieval w/ `Xenova/ms-marco-TinyBERT-L-2-v2`.
|
16 |
```js
|
17 |
+
import { AutoTokenizer, AutoModelForSequenceClassification } from '@huggingface/transformers';
|
18 |
|
19 |
const model = await AutoModelForSequenceClassification.from_pretrained('Xenova/ms-marco-TinyBERT-L-2-v2');
|
20 |
const tokenizer = await AutoTokenizer.from_pretrained('Xenova/ms-marco-TinyBERT-L-2-v2');
|
|
|
31 |
}
|
32 |
)
|
33 |
|
34 |
+
const scores = await model(features);
|
35 |
console.log(scores);
|
36 |
// quantized: [ 7.210887908935547, -11.559350967407227 ]
|
37 |
// unquantized: [ 7.235750675201416, -11.562294006347656 ]
|
|
|
39 |
|
40 |
---
|
41 |
|
|
|
42 |
Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`).
|
onnx/model_bnb4.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c2a5570406cb76fde0137ed4b3976375e2724e9c8115630644ef8e6530e04df9
|
3 |
+
size 16254069
|
onnx/model_int8.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f24d6dcf08df3d26b8fba3886942575b64856deba7ac2aa0962c2fb2ccd6d895
|
3 |
+
size 4475667
|
onnx/model_q4.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eefe2de2bae1f47563d1fea3e9688f0fb2ff453430b2591e4eb76b95eafc30fa
|
3 |
+
size 16278557
|
onnx/model_q4f16.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ebc16aa0fbdb620cf4723bdf4cea73a2734930c18c4a40fdda351a860ff78dcc
|
3 |
+
size 8268506
|
onnx/model_uint8.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6547876c4b12cdc52082dc3db0ae896e0332d81ce5afa79baf5a063db2382a31
|
3 |
+
size 4475674
|