Spaces:
Running
Running
update quant 8int config
Browse files- config.py +1 -0
- prepare.py +37 -4
- services/sentence_transformer_service.py +7 -4
config.py
CHANGED
@@ -22,3 +22,4 @@ SENTENCE_EMBEDDING_FILE = os.path.join(
|
|
22 |
# SENTENCE_EMBEDDING_FILE = None
|
23 |
MODEL_TYPE = "openvino"
|
24 |
DEVICE_TYPE = "cpu"
|
|
|
|
22 |
# SENTENCE_EMBEDDING_FILE = None
|
23 |
MODEL_TYPE = "openvino"
|
24 |
DEVICE_TYPE = "cpu"
|
25 |
+
QINT8 = False
|
prepare.py
CHANGED
@@ -1,5 +1,38 @@
|
|
1 |
-
from sentence_transformers import
|
2 |
-
|
|
|
|
|
|
|
3 |
|
4 |
-
|
5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sentence_transformers import (
|
2 |
+
SentenceTransformer,
|
3 |
+
export_static_quantized_openvino_model,
|
4 |
+
export_dynamic_quantized_onnx_model,
|
5 |
+
)
|
6 |
|
7 |
+
MODEL_NAME = "Detomo/cl-nagoya-sup-simcse-ja-nss-v_1_0_3"
|
8 |
+
|
9 |
+
|
10 |
+
def export_model(backend="onnx", use_qint8=False):
|
11 |
+
if backend == "openvino":
|
12 |
+
model = SentenceTransformer(MODEL_NAME, backend="openvino")
|
13 |
+
if use_qint8:
|
14 |
+
export_static_quantized_openvino_model(
|
15 |
+
model,
|
16 |
+
quantization_config=None,
|
17 |
+
model_name_or_path=MODEL_NAME,
|
18 |
+
push_to_hub=True,
|
19 |
+
)
|
20 |
+
else:
|
21 |
+
model.push_to_hub(MODEL_NAME)
|
22 |
+
elif backend == "onnx":
|
23 |
+
model = SentenceTransformer(MODEL_NAME, backend="onnx")
|
24 |
+
if use_qint8:
|
25 |
+
export_dynamic_quantized_onnx_model(
|
26 |
+
model, "avx512_vnni", MODEL_NAME, push_to_hub=True
|
27 |
+
)
|
28 |
+
else:
|
29 |
+
model.push_to_hub(MODEL_NAME)
|
30 |
+
else:
|
31 |
+
raise ValueError(f"Invalid backend: {backend}")
|
32 |
+
|
33 |
+
|
34 |
+
# Export all combinations
|
35 |
+
for backend in ["openvino", "onnx"]:
|
36 |
+
for use_qint8 in [True, False]:
|
37 |
+
print(f"Exporting {backend} model with QINT8={use_qint8}")
|
38 |
+
export_model(backend=backend, use_qint8=use_qint8)
|
services/sentence_transformer_service.py
CHANGED
@@ -2,7 +2,7 @@ import pickle
|
|
2 |
from config import (
|
3 |
MODEL_NAME, MODEL_TYPE, DEVICE_TYPE,
|
4 |
SENTENCE_EMBEDDING_FILE,
|
5 |
-
STANDARD_NAME_MAP_DATA_FILE, SUBJECT_DATA_FILE, DATA_DIR
|
6 |
)
|
7 |
from sentence_transformer_lib.sentence_transformer_helper import SentenceTransformerHelper
|
8 |
from data_lib.subject_data import SubjectData
|
@@ -25,9 +25,12 @@ class SentenceTransformerService:
|
|
25 |
|
26 |
print("Loading models and data...")
|
27 |
# Load sentence transformer model
|
28 |
-
|
29 |
-
|
30 |
-
|
|
|
|
|
|
|
31 |
# Load standard subject dictionary
|
32 |
self.dic_standard_subject = SubjectData.create_standard_subject_dic_from_file(SUBJECT_DATA_FILE)
|
33 |
|
|
|
2 |
from config import (
|
3 |
MODEL_NAME, MODEL_TYPE, DEVICE_TYPE,
|
4 |
SENTENCE_EMBEDDING_FILE,
|
5 |
+
STANDARD_NAME_MAP_DATA_FILE, SUBJECT_DATA_FILE, DATA_DIR, QINT8
|
6 |
)
|
7 |
from sentence_transformer_lib.sentence_transformer_helper import SentenceTransformerHelper
|
8 |
from data_lib.subject_data import SubjectData
|
|
|
25 |
|
26 |
print("Loading models and data...")
|
27 |
# Load sentence transformer model
|
28 |
+
print(f"Loading model {MODEL_NAME} with type {MODEL_TYPE} and qint8={QINT8}")
|
29 |
+
self.sentenceTransformerHelper = SentenceTransformerHelper(
|
30 |
+
model_name=MODEL_NAME,
|
31 |
+
model_type=MODEL_TYPE,
|
32 |
+
qint8=QINT8
|
33 |
+
)
|
34 |
# Load standard subject dictionary
|
35 |
self.dic_standard_subject = SubjectData.create_standard_subject_dic_from_file(SUBJECT_DATA_FILE)
|
36 |
|