vumichien commited on
Commit
28bdc3c
·
1 Parent(s): 9fceb3c

update quant 8int config

Browse files
config.py CHANGED
@@ -22,3 +22,4 @@ SENTENCE_EMBEDDING_FILE = os.path.join(
22
  # SENTENCE_EMBEDDING_FILE = None
23
  MODEL_TYPE = "openvino"
24
  DEVICE_TYPE = "cpu"
 
 
22
  # SENTENCE_EMBEDDING_FILE = None
23
  MODEL_TYPE = "openvino"
24
  DEVICE_TYPE = "cpu"
25
+ QINT8 = False
prepare.py CHANGED
@@ -1,5 +1,38 @@
1
- from sentence_transformers import SentenceTransformer
2
- from config import MODEL_NAME
 
 
 
3
 
4
- model = SentenceTransformer(MODEL_NAME, backend="openvino")
5
- model.push_to_hub(MODEL_NAME, create_pr=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import (
2
+ SentenceTransformer,
3
+ export_static_quantized_openvino_model,
4
+ export_dynamic_quantized_onnx_model,
5
+ )
6
 
7
+ MODEL_NAME = "Detomo/cl-nagoya-sup-simcse-ja-nss-v_1_0_3"
8
+
9
+
10
+ def export_model(backend="onnx", use_qint8=False):
11
+ if backend == "openvino":
12
+ model = SentenceTransformer(MODEL_NAME, backend="openvino")
13
+ if use_qint8:
14
+ export_static_quantized_openvino_model(
15
+ model,
16
+ quantization_config=None,
17
+ model_name_or_path=MODEL_NAME,
18
+ push_to_hub=True,
19
+ )
20
+ else:
21
+ model.push_to_hub(MODEL_NAME)
22
+ elif backend == "onnx":
23
+ model = SentenceTransformer(MODEL_NAME, backend="onnx")
24
+ if use_qint8:
25
+ export_dynamic_quantized_onnx_model(
26
+ model, "avx512_vnni", MODEL_NAME, push_to_hub=True
27
+ )
28
+ else:
29
+ model.push_to_hub(MODEL_NAME)
30
+ else:
31
+ raise ValueError(f"Invalid backend: {backend}")
32
+
33
+
34
+ # Export all combinations
35
+ for backend in ["openvino", "onnx"]:
36
+ for use_qint8 in [True, False]:
37
+ print(f"Exporting {backend} model with QINT8={use_qint8}")
38
+ export_model(backend=backend, use_qint8=use_qint8)
services/sentence_transformer_service.py CHANGED
@@ -2,7 +2,7 @@ import pickle
2
  from config import (
3
  MODEL_NAME, MODEL_TYPE, DEVICE_TYPE,
4
  SENTENCE_EMBEDDING_FILE,
5
- STANDARD_NAME_MAP_DATA_FILE, SUBJECT_DATA_FILE, DATA_DIR
6
  )
7
  from sentence_transformer_lib.sentence_transformer_helper import SentenceTransformerHelper
8
  from data_lib.subject_data import SubjectData
@@ -25,9 +25,12 @@ class SentenceTransformerService:
25
 
26
  print("Loading models and data...")
27
  # Load sentence transformer model
28
- self.sentenceTransformerHelper = SentenceTransformerHelper(model_name=MODEL_NAME, model_type=MODEL_TYPE)
29
- print(f"Loading model {MODEL_NAME} with type {MODEL_TYPE}")
30
-
 
 
 
31
  # Load standard subject dictionary
32
  self.dic_standard_subject = SubjectData.create_standard_subject_dic_from_file(SUBJECT_DATA_FILE)
33
 
 
2
  from config import (
3
  MODEL_NAME, MODEL_TYPE, DEVICE_TYPE,
4
  SENTENCE_EMBEDDING_FILE,
5
+ STANDARD_NAME_MAP_DATA_FILE, SUBJECT_DATA_FILE, DATA_DIR, QINT8
6
  )
7
  from sentence_transformer_lib.sentence_transformer_helper import SentenceTransformerHelper
8
  from data_lib.subject_data import SubjectData
 
25
 
26
  print("Loading models and data...")
27
  # Load sentence transformer model
28
+ print(f"Loading model {MODEL_NAME} with type {MODEL_TYPE} and qint8={QINT8}")
29
+ self.sentenceTransformerHelper = SentenceTransformerHelper(
30
+ model_name=MODEL_NAME,
31
+ model_type=MODEL_TYPE,
32
+ qint8=QINT8
33
+ )
34
  # Load standard subject dictionary
35
  self.dic_standard_subject = SubjectData.create_standard_subject_dic_from_file(SUBJECT_DATA_FILE)
36