Spaces:
Running
Running
update quant 8int config
Browse files- config.py +1 -0
- prepare.py +37 -4
- services/sentence_transformer_service.py +7 -4
config.py
CHANGED
|
@@ -22,3 +22,4 @@ SENTENCE_EMBEDDING_FILE = os.path.join(
|
|
| 22 |
# SENTENCE_EMBEDDING_FILE = None
|
| 23 |
MODEL_TYPE = "openvino"
|
| 24 |
DEVICE_TYPE = "cpu"
|
|
|
|
|
|
| 22 |
# SENTENCE_EMBEDDING_FILE = None
|
| 23 |
MODEL_TYPE = "openvino"
|
| 24 |
DEVICE_TYPE = "cpu"
|
| 25 |
+
QINT8 = False
|
prepare.py
CHANGED
|
@@ -1,5 +1,38 @@
|
|
| 1 |
-
from sentence_transformers import
|
| 2 |
-
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
-
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sentence_transformers import (
|
| 2 |
+
SentenceTransformer,
|
| 3 |
+
export_static_quantized_openvino_model,
|
| 4 |
+
export_dynamic_quantized_onnx_model,
|
| 5 |
+
)
|
| 6 |
|
| 7 |
+
MODEL_NAME = "Detomo/cl-nagoya-sup-simcse-ja-nss-v_1_0_3"
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def export_model(backend="onnx", use_qint8=False):
|
| 11 |
+
if backend == "openvino":
|
| 12 |
+
model = SentenceTransformer(MODEL_NAME, backend="openvino")
|
| 13 |
+
if use_qint8:
|
| 14 |
+
export_static_quantized_openvino_model(
|
| 15 |
+
model,
|
| 16 |
+
quantization_config=None,
|
| 17 |
+
model_name_or_path=MODEL_NAME,
|
| 18 |
+
push_to_hub=True,
|
| 19 |
+
)
|
| 20 |
+
else:
|
| 21 |
+
model.push_to_hub(MODEL_NAME)
|
| 22 |
+
elif backend == "onnx":
|
| 23 |
+
model = SentenceTransformer(MODEL_NAME, backend="onnx")
|
| 24 |
+
if use_qint8:
|
| 25 |
+
export_dynamic_quantized_onnx_model(
|
| 26 |
+
model, "avx512_vnni", MODEL_NAME, push_to_hub=True
|
| 27 |
+
)
|
| 28 |
+
else:
|
| 29 |
+
model.push_to_hub(MODEL_NAME)
|
| 30 |
+
else:
|
| 31 |
+
raise ValueError(f"Invalid backend: {backend}")
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
# Export all combinations
|
| 35 |
+
for backend in ["openvino", "onnx"]:
|
| 36 |
+
for use_qint8 in [True, False]:
|
| 37 |
+
print(f"Exporting {backend} model with QINT8={use_qint8}")
|
| 38 |
+
export_model(backend=backend, use_qint8=use_qint8)
|
services/sentence_transformer_service.py
CHANGED
|
@@ -2,7 +2,7 @@ import pickle
|
|
| 2 |
from config import (
|
| 3 |
MODEL_NAME, MODEL_TYPE, DEVICE_TYPE,
|
| 4 |
SENTENCE_EMBEDDING_FILE,
|
| 5 |
-
STANDARD_NAME_MAP_DATA_FILE, SUBJECT_DATA_FILE, DATA_DIR
|
| 6 |
)
|
| 7 |
from sentence_transformer_lib.sentence_transformer_helper import SentenceTransformerHelper
|
| 8 |
from data_lib.subject_data import SubjectData
|
|
@@ -25,9 +25,12 @@ class SentenceTransformerService:
|
|
| 25 |
|
| 26 |
print("Loading models and data...")
|
| 27 |
# Load sentence transformer model
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
| 31 |
# Load standard subject dictionary
|
| 32 |
self.dic_standard_subject = SubjectData.create_standard_subject_dic_from_file(SUBJECT_DATA_FILE)
|
| 33 |
|
|
|
|
| 2 |
from config import (
|
| 3 |
MODEL_NAME, MODEL_TYPE, DEVICE_TYPE,
|
| 4 |
SENTENCE_EMBEDDING_FILE,
|
| 5 |
+
STANDARD_NAME_MAP_DATA_FILE, SUBJECT_DATA_FILE, DATA_DIR, QINT8
|
| 6 |
)
|
| 7 |
from sentence_transformer_lib.sentence_transformer_helper import SentenceTransformerHelper
|
| 8 |
from data_lib.subject_data import SubjectData
|
|
|
|
| 25 |
|
| 26 |
print("Loading models and data...")
|
| 27 |
# Load sentence transformer model
|
| 28 |
+
print(f"Loading model {MODEL_NAME} with type {MODEL_TYPE} and qint8={QINT8}")
|
| 29 |
+
self.sentenceTransformerHelper = SentenceTransformerHelper(
|
| 30 |
+
model_name=MODEL_NAME,
|
| 31 |
+
model_type=MODEL_TYPE,
|
| 32 |
+
qint8=QINT8
|
| 33 |
+
)
|
| 34 |
# Load standard subject dictionary
|
| 35 |
self.dic_standard_subject = SubjectData.create_standard_subject_dic_from_file(SUBJECT_DATA_FILE)
|
| 36 |
|