vumichien commited on
Commit
892f887
·
1 Parent(s): 607ef0e

change model

Browse files
config.py CHANGED
@@ -19,3 +19,5 @@ SENTENCE_EMBEDDING_FILE = os.path.join(
19
  DATA_DIR,
20
  "anchor_name_sentence_sentence_embeddings(cl-nagoya-sup-simcse-ja-for-standard-name-v0_9_10).pkl",
21
  )
 
 
 
19
  DATA_DIR,
20
  "anchor_name_sentence_sentence_embeddings(cl-nagoya-sup-simcse-ja-for-standard-name-v0_9_10).pkl",
21
  )
22
+ MODEL_TYPE = "openvino"
23
+ DEVICE_TYPE = "cpu"
data/sampleDataInput.csv ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
 
routes/predict.py CHANGED
@@ -8,6 +8,7 @@ from auth import get_current_user
8
  from services.sentence_transformer_service import SentenceTransformerService, sentence_transformer_service
9
  from data_lib.input_name_data import InputNameData
10
  from data_lib.base_name_data import COL_NAME_SENTENCE
 
11
  from mapping_lib.name_mapper import NameMapper
12
  from config import UPLOAD_DIR, OUTPUT_DIR
13
  from models import (
@@ -48,9 +49,16 @@ async def predict(
48
  # Process input data
49
  start_time = time.time()
50
  try:
51
- inputData = InputNameData(sentence_service.dic_standard_subject)
52
  inputData.load_data_from_csv(input_file_path)
53
- inputData.process_data(sentence_service.sentenceTransformerHelper)
 
 
 
 
 
 
 
54
  except Exception as e:
55
  print(f"Error processing input data: {e}")
56
  raise HTTPException(status_code=500, detail=str(e))
@@ -107,9 +115,13 @@ async def create_embeddings(
107
  Create embeddings for a list of input sentences (requires authentication)
108
  """
109
  try:
 
110
  embeddings = sentence_service.sentenceTransformerHelper.create_embeddings(
111
  request.sentences
112
  )
 
 
 
113
  # Convert numpy array to list for JSON serialization
114
  embeddings_list = embeddings.tolist()
115
  return {"embeddings": embeddings_list}
 
8
  from services.sentence_transformer_service import SentenceTransformerService, sentence_transformer_service
9
  from data_lib.input_name_data import InputNameData
10
  from data_lib.base_name_data import COL_NAME_SENTENCE
11
+ from mapping_lib.subject_mapper import SubjectMapper
12
  from mapping_lib.name_mapper import NameMapper
13
  from config import UPLOAD_DIR, OUTPUT_DIR
14
  from models import (
 
49
  # Process input data
50
  start_time = time.time()
51
  try:
52
+ inputData = InputNameData()
53
  inputData.load_data_from_csv(input_file_path)
54
+ except Exception as e:
55
+ print(f"Error processing input data: {e}")
56
+ raise HTTPException(status_code=500, detail=str(e))
57
+ try:
58
+ subject_mapper = SubjectMapper(sentence_service.sentenceTransformerHelper, sentence_service.dic_standard_subject)
59
+ dic_subject_map = subject_mapper.map_standard_subjects(inputData.dataframe)
60
+ inputData.dic_standard_subject = dic_subject_map
61
+ inputData.process_data()
62
  except Exception as e:
63
  print(f"Error processing input data: {e}")
64
  raise HTTPException(status_code=500, detail=str(e))
 
115
  Create embeddings for a list of input sentences (requires authentication)
116
  """
117
  try:
118
+ start_time = time.time()
119
  embeddings = sentence_service.sentenceTransformerHelper.create_embeddings(
120
  request.sentences
121
  )
122
+ end_time = time.time()
123
+ execution_time = end_time - start_time
124
+ print(f"Execution time: {execution_time} seconds")
125
  # Convert numpy array to list for JSON serialization
126
  embeddings_list = embeddings.tolist()
127
  return {"embeddings": embeddings_list}
services/sentence_transformer_service.py CHANGED
@@ -1,6 +1,22 @@
1
  import pickle
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from config import (
3
- MODEL_NAME,
4
  SENTENCE_EMBEDDING_FILE,
5
  STANDARD_NAME_MAP_DATA_FILE, SUBJECT_DATA_FILE
6
  )
@@ -24,10 +40,8 @@ class SentenceTransformerService:
24
 
25
  print("Loading models and data...")
26
  # Load sentence transformer model
27
- self.sentenceTransformerHelper = SentenceTransformerHelper(
28
- convert_to_zenkaku_flag=True, replace_words=None, keywords=None
29
- )
30
- self.sentenceTransformerHelper.load_model_by_name(MODEL_NAME)
31
 
32
  # Load standard subject dictionary
33
  self.dic_standard_subject = SubjectData.create_standard_subject_dic_from_file(SUBJECT_DATA_FILE)
@@ -37,10 +51,10 @@ class SentenceTransformerService:
37
  self.anchor_name_sentence_embeddings = pickle.load(f)
38
 
39
  # Load and process sample data
40
- self.standardNameMapData = StandardNameMapData()
41
  self.standardNameMapData.load_data_from_csv(STANDARD_NAME_MAP_DATA_FILE)
42
- self.standardNameMapData.process_data(self.anchor_name_sentence_embeddings)
43
-
44
  print("Models and data loaded successfully")
45
 
46
  # Global instance (singleton)
 
1
  import pickle
2
+ import warnings
3
+
4
+ # Filter NumPy array implementation warnings specifically
5
+ warnings.filterwarnings(
6
+ "ignore",
7
+ message=".*array.*implementation doesn't accept a copy keyword.*",
8
+ category=DeprecationWarning
9
+ )
10
+
11
+ # Or alternatively, target the exact warning message:
12
+ warnings.filterwarnings(
13
+ "ignore",
14
+ message=".*NumPy will pass `copy` to the `__array__` special method.*",
15
+ category=DeprecationWarning
16
+ )
17
+
18
  from config import (
19
+ MODEL_NAME, MODEL_TYPE, DEVICE_TYPE,
20
  SENTENCE_EMBEDDING_FILE,
21
  STANDARD_NAME_MAP_DATA_FILE, SUBJECT_DATA_FILE
22
  )
 
40
 
41
  print("Loading models and data...")
42
  # Load sentence transformer model
43
+ self.sentenceTransformerHelper = SentenceTransformerHelper(model_name=MODEL_NAME, model_type=MODEL_TYPE)
44
+ print(f"Loading model {MODEL_NAME} with type {MODEL_TYPE}")
 
 
45
 
46
  # Load standard subject dictionary
47
  self.dic_standard_subject = SubjectData.create_standard_subject_dic_from_file(SUBJECT_DATA_FILE)
 
51
  self.anchor_name_sentence_embeddings = pickle.load(f)
52
 
53
  # Load and process sample data
54
+ self.standardNameMapData = StandardNameMapData(self.anchor_name_sentence_embeddings)
55
  self.standardNameMapData.load_data_from_csv(STANDARD_NAME_MAP_DATA_FILE)
56
+ self.standardNameMapData.process_data()
57
+
58
  print("Models and data loaded successfully")
59
 
60
  # Global instance (singleton)