Spaces:
Sleeping
Sleeping
| import pickle | |
| from config import ( | |
| MODEL_NAME, MODEL_TYPE, DEVICE_TYPE, | |
| SENTENCE_EMBEDDING_FILE, | |
| STANDARD_NAME_MAP_DATA_FILE, SUBJECT_DATA_FILE, DATA_DIR | |
| ) | |
| from sentence_transformer_lib.sentence_transformer_helper import SentenceTransformerHelper | |
| from data_lib.subject_data import SubjectData | |
| from data_lib.standard_name_map_data import StandardNameMapData | |
| import os | |
| class SentenceTransformerService: | |
| def __init__(self): | |
| self.sentenceTransformerHelper = None | |
| self.dic_standard_subject = None | |
| self.anchor_name_sentence_embeddings = None | |
| self.sampleData = None | |
| def load_model_data(self): | |
| """Load model and data only once at startup""" | |
| if self.sentenceTransformerHelper is not None: | |
| print("Model already loaded. Skipping reload.") | |
| return # Kh么ng load l岷 n岷縰 膽茫 c贸 model | |
| print("Loading models and data...") | |
| # Load sentence transformer model | |
| self.sentenceTransformerHelper = SentenceTransformerHelper(model_name=MODEL_NAME, model_type=MODEL_TYPE) | |
| print(f"Loading model {MODEL_NAME} with type {MODEL_TYPE}") | |
| # Load standard subject dictionary | |
| self.dic_standard_subject = SubjectData.create_standard_subject_dic_from_file(SUBJECT_DATA_FILE) | |
| # Initialize StandardNameMapData without embeddings first | |
| self.standardNameMapData = StandardNameMapData(None) | |
| self.standardNameMapData.load_data_from_csv(STANDARD_NAME_MAP_DATA_FILE) | |
| self.standardNameMapData.process_data() | |
| # Load or create embeddings | |
| if os.path.exists(SENTENCE_EMBEDDING_FILE): | |
| with open(SENTENCE_EMBEDDING_FILE, "rb") as f: | |
| self.anchor_name_sentence_embeddings = pickle.load(f) | |
| print(f"Loaded anchor name sentence embeddings shape: {self.anchor_name_sentence_embeddings.shape}") | |
| else: | |
| list_anchor_name_sentence = self.standardNameMapData.processed_data["anchor_name_sentences"] | |
| self.anchor_name_sentence_embeddings = ( | |
| self.sentenceTransformerHelper.create_embeddings( | |
| list_anchor_name_sentence | |
| ) | |
| ) | |
| with open(SENTENCE_EMBEDDING_FILE, "wb") as f: | |
| pickle.dump(self.anchor_name_sentence_embeddings, f) | |
| print(f"Saved anchor name sentence embeddings to {SENTENCE_EMBEDDING_FILE}") | |
| # Update embeddings in StandardNameMapData | |
| self.standardNameMapData.update_embeddings(self.anchor_name_sentence_embeddings) | |
| print("Models and data loaded successfully") | |
| # Global instance (singleton) | |
| sentence_transformer_service = SentenceTransformerService() | |