File size: 1,241 Bytes
28bdc3c
 
 
 
 
b224afc
28bdc3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from sentence_transformers import (
    SentenceTransformer,
    export_static_quantized_openvino_model,
    export_dynamic_quantized_onnx_model,
)

MODEL_NAME = "Detomo/cl-nagoya-sup-simcse-ja-nss-v_1_0_3"


def export_model(backend="onnx", use_qint8=False):
    if backend == "openvino":
        model = SentenceTransformer(MODEL_NAME, backend="openvino")
        if use_qint8:
            export_static_quantized_openvino_model(
                model,
                quantization_config=None,
                model_name_or_path=MODEL_NAME,
                push_to_hub=True,
            )
        else:
            model.push_to_hub(MODEL_NAME)
    elif backend == "onnx":
        model = SentenceTransformer(MODEL_NAME, backend="onnx")
        if use_qint8:
            export_dynamic_quantized_onnx_model(
                model, "avx512_vnni", MODEL_NAME, push_to_hub=True
            )
        else:
            model.push_to_hub(MODEL_NAME)
    else:
        raise ValueError(f"Invalid backend: {backend}")


# Export all combinations
for backend in ["openvino", "onnx"]:
    for use_qint8 in [True, False]:
        print(f"Exporting {backend} model with QINT8={use_qint8}")
        export_model(backend=backend, use_qint8=use_qint8)