Spaces:

sabazo
/

insurance_advisor_wb

Sleeping

App Files Files Community

insurance_advisor_wb / config.py

isayahc

done more refactoring

47feab3 unverified over 1 year ago

raw

history blame contribute delete

1.64 kB

	import os
	from dotenv import load_dotenv
	from rag_app.database.db_handler import DataBaseHandler
	from langchain_huggingface import HuggingFaceEndpoint
	# from langchain_huggingface import HuggingFaceHubEmbeddings
	from langchain_huggingface import HuggingFaceEmbeddings

	load_dotenv()

	SQLITE_FILE_NAME = os.getenv('SOURCES_CACHE')
	VECTOR_DATABASE_LOCATION = os.getenv('VECTOR_DATABASE_LOCATION')
	EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL")
	SEVEN_B_LLM_MODEL = os.getenv("SEVEN_B_LLM_MODEL")
	BERT_MODEL = os.getenv("BERT_MODEL")
	FAISS_INDEX_PATH = os.getenv("FAISS_INDEX_PATH")
	HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")



	# embeddings = HuggingFaceHubEmbeddings(repo_id=EMBEDDING_MODEL)

	model_kwargs = {'device': 'cpu'}
	encode_kwargs = {'normalize_embeddings': False}
	embeddings = HuggingFaceEmbeddings(
	model_name=EMBEDDING_MODEL,
	model_kwargs=model_kwargs,
	encode_kwargs=encode_kwargs
	)

	db = DataBaseHandler()

	db.create_all_tables()

	# This model is used for task that a larger model may not need to do
	# as of currently we have been getting MODEL OVERLOADED errors
	# with huggingface
	SEVEN_B_LLM_MODEL = HuggingFaceEndpoint(
	repo_id=SEVEN_B_LLM_MODEL,
	temperature=0.1, # Controls randomness in response generation (lower value means less random)
	max_new_tokens=1024, # Maximum number of new tokens to generate in responses
	repetition_penalty=1.2, # Penalty for repeating the same words (higher value increases penalty)
	return_full_text=False # If False, only the newly generated text is returned; if True, the input is included as well
	)