Spaces:
Sleeping
Sleeping
Update config.py
Browse files
config.py
CHANGED
|
@@ -1,36 +1,36 @@
|
|
| 1 |
-
|
| 2 |
-
import os
|
| 3 |
-
|
| 4 |
-
class ConfigConstants:
|
| 5 |
-
# Constants related to datasets and models
|
| 6 |
-
DATA_SET_PATH= '/
|
| 7 |
-
DATA_SET_NAMES = ['covidqa', 'cuad', 'techqa','delucionqa', 'emanual', 'expertqa', 'finqa', 'hagrid', 'hotpotqa', 'msmarco', 'pubmedqa', 'tatqa']
|
| 8 |
-
EMBEDDING_MODEL_NAME = "sentence-transformers/paraphrase-MiniLM-L3-v2"
|
| 9 |
-
RE_RANKER_MODEL_NAME = 'cross-encoder/ms-marco-electra-base'
|
| 10 |
-
GENERATION_MODEL_NAME = 'mixtral-8x7b-32768'
|
| 11 |
-
VALIDATION_MODEL_NAME = 'llama3-70b-8192'
|
| 12 |
-
GENERATION_MODELS = ["llama3-8b-8192", "qwen-2.5-32b", "mixtral-8x7b-32768", "gemma2-9b-it" ]
|
| 13 |
-
VALIDATION_MODELS = ["llama3-70b-8192", "deepseek-r1-distill-llama-70b" ]
|
| 14 |
-
DEFAULT_CHUNK_SIZE = 1000
|
| 15 |
-
CHUNK_OVERLAP = 200
|
| 16 |
-
|
| 17 |
-
class AppConfig:
|
| 18 |
-
def __init__(self, vector_store, gen_llm, val_llm):
|
| 19 |
-
self.vector_store = vector_store
|
| 20 |
-
self.gen_llm = gen_llm
|
| 21 |
-
self.val_llm = val_llm
|
| 22 |
-
self.loaded_datasets = self.detect_loaded_datasets() # Auto-detect loaded datasets
|
| 23 |
-
|
| 24 |
-
@staticmethod
|
| 25 |
-
def detect_loaded_datasets():
|
| 26 |
-
print('Calling detect_loaded_datasets')
|
| 27 |
-
"""Check which datasets are already stored locally."""
|
| 28 |
-
local_path = ConfigConstants.DATA_SET_PATH
|
| 29 |
-
if not os.path.exists(local_path):
|
| 30 |
-
return set()
|
| 31 |
-
|
| 32 |
-
dataset_files = os.listdir(local_path)
|
| 33 |
-
loaded_datasets = {
|
| 34 |
-
file.replace("_test.pkl", "") for file in dataset_files if file.endswith("_test.pkl")
|
| 35 |
-
}
|
| 36 |
return loaded_datasets
|
|
|
|
| 1 |
+
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
class ConfigConstants:
|
| 5 |
+
# Constants related to datasets and models
|
| 6 |
+
DATA_SET_PATH= '/home/user/'
|
| 7 |
+
DATA_SET_NAMES = ['covidqa', 'cuad', 'techqa','delucionqa', 'emanual', 'expertqa', 'finqa', 'hagrid', 'hotpotqa', 'msmarco', 'pubmedqa', 'tatqa']
|
| 8 |
+
EMBEDDING_MODEL_NAME = "sentence-transformers/paraphrase-MiniLM-L3-v2"
|
| 9 |
+
RE_RANKER_MODEL_NAME = 'cross-encoder/ms-marco-electra-base'
|
| 10 |
+
GENERATION_MODEL_NAME = 'mixtral-8x7b-32768'
|
| 11 |
+
VALIDATION_MODEL_NAME = 'llama3-70b-8192'
|
| 12 |
+
GENERATION_MODELS = ["llama3-8b-8192", "qwen-2.5-32b", "mixtral-8x7b-32768", "gemma2-9b-it" ]
|
| 13 |
+
VALIDATION_MODELS = ["llama3-70b-8192", "deepseek-r1-distill-llama-70b" ]
|
| 14 |
+
DEFAULT_CHUNK_SIZE = 1000
|
| 15 |
+
CHUNK_OVERLAP = 200
|
| 16 |
+
|
| 17 |
+
class AppConfig:
|
| 18 |
+
def __init__(self, vector_store, gen_llm, val_llm):
|
| 19 |
+
self.vector_store = vector_store
|
| 20 |
+
self.gen_llm = gen_llm
|
| 21 |
+
self.val_llm = val_llm
|
| 22 |
+
self.loaded_datasets = self.detect_loaded_datasets() # Auto-detect loaded datasets
|
| 23 |
+
|
| 24 |
+
@staticmethod
|
| 25 |
+
def detect_loaded_datasets():
|
| 26 |
+
print('Calling detect_loaded_datasets')
|
| 27 |
+
"""Check which datasets are already stored locally."""
|
| 28 |
+
local_path = ConfigConstants.DATA_SET_PATH
|
| 29 |
+
if not os.path.exists(local_path):
|
| 30 |
+
return set()
|
| 31 |
+
|
| 32 |
+
dataset_files = os.listdir(local_path)
|
| 33 |
+
loaded_datasets = {
|
| 34 |
+
file.replace("_test.pkl", "") for file in dataset_files if file.endswith("_test.pkl")
|
| 35 |
+
}
|
| 36 |
return loaded_datasets
|