Spaces:
Sleeping
Sleeping
| import os | |
| from dotenv import load_dotenv | |
| from rag_app.database.db_handler import DataBaseHandler | |
| from langchain_huggingface import HuggingFaceEndpoint | |
| # from langchain_huggingface import HuggingFaceHubEmbeddings | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| load_dotenv() | |
| SQLITE_FILE_NAME = os.getenv('SOURCES_CACHE') | |
| VECTOR_DATABASE_LOCATION = os.getenv('VECTOR_DATABASE_LOCATION') | |
| EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL") | |
| SEVEN_B_LLM_MODEL = os.getenv("SEVEN_B_LLM_MODEL") | |
| BERT_MODEL = os.getenv("BERT_MODEL") | |
| FAISS_INDEX_PATH = os.getenv("FAISS_INDEX_PATH") | |
| HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") | |
| # embeddings = HuggingFaceHubEmbeddings(repo_id=EMBEDDING_MODEL) | |
| model_kwargs = {'device': 'cpu'} | |
| encode_kwargs = {'normalize_embeddings': False} | |
| embeddings = HuggingFaceEmbeddings( | |
| model_name=EMBEDDING_MODEL, | |
| model_kwargs=model_kwargs, | |
| encode_kwargs=encode_kwargs | |
| ) | |
| db = DataBaseHandler() | |
| db.create_all_tables() | |
| # This model is used for task that a larger model may not need to do | |
| # as of currently we have been getting MODEL OVERLOADED errors | |
| # with huggingface | |
| SEVEN_B_LLM_MODEL = HuggingFaceEndpoint( | |
| repo_id=SEVEN_B_LLM_MODEL, | |
| temperature=0.1, # Controls randomness in response generation (lower value means less random) | |
| max_new_tokens=1024, # Maximum number of new tokens to generate in responses | |
| repetition_penalty=1.2, # Penalty for repeating the same words (higher value increases penalty) | |
| return_full_text=False # If False, only the newly generated text is returned; if True, the input is included as well | |
| ) |