|
import multiprocessing |
|
from langchain.docstore.document import Document as LangChainDocument |
|
from langchain_text_splitters import RecursiveCharacterTextSplitter |
|
from langchain_huggingface import HuggingFaceEmbeddings |
|
from langchain_community.vectorstores import FAISS |
|
from huggingface_hub import login |
|
from loguru import logger |
|
from transformers import pipeline |
|
import torch |
|
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig |
|
import os |
|
from dotenv import load_dotenv |
|
|
|
|
|
def main(): |
|
load_dotenv() |
|
logger.info('Carregando arquivo no qual será baseado o RAG.') |
|
with open('train.txt', 'r') as f: |
|
data = f.read() |
|
|
|
logger.info('Representando o documento utilizando o LangChainDocument.') |
|
raw_database = LangChainDocument(page_content=data) |
|
|
|
MARKDOWN_SEPARATORS = [ |
|
"\n#{1,6} ", |
|
"```\n", |
|
"\n\\*\\*\\*+\n", |
|
"\n---+\n", |
|
"\n___+\n", |
|
"\n\n", |
|
"\n", |
|
" ", |
|
"", |
|
] |
|
|
|
logger.info('Quebrando o documento para a criação dos chunks.') |
|
splitter = RecursiveCharacterTextSplitter(separators=MARKDOWN_SEPARATORS, chunk_size=1000, chunk_overlap=100) |
|
process_data = splitter.split_documents([raw_database]) |
|
process_data = process_data[:5] |
|
|
|
embedding_model_name = "thenlper/gte-small" |
|
logger.info(f'Definição do modelo de embeddings: {embedding_model_name}.') |
|
embedding_model = HuggingFaceEmbeddings( |
|
model_name=embedding_model_name, |
|
multi_process=True, |
|
model_kwargs={"device": "cpu"}, |
|
encode_kwargs={"normalize_embeddings": True}, |
|
) |
|
|
|
logger.info('Criação da base de dados vetorial (em memória).') |
|
vectors = FAISS.from_documents(process_data, embedding_model) |
|
|
|
|
|
model_name = "HuggingFaceH4/zephyr-7b-beta" |
|
|
|
|
|
logger.info(f'Carregamento do modelo de linguagem principal: {model_name}') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model = AutoModelForCausalLM.from_pretrained(model_name) |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
|
llm_model = pipeline( |
|
model=model, |
|
tokenizer=tokenizer, |
|
task="text-generation", |
|
do_sample=True, |
|
temperature=0.4, |
|
repetition_penalty=1.1, |
|
return_full_text=False, |
|
max_new_tokens=500 |
|
) |
|
logger.info(f'Modelo {model_name} carregado com sucesso.') |
|
|
|
prompt = """ |
|
<|system|> |
|
You are a helpful assistant that answers on medical questions based on the real information provided from different sources and in the context. |
|
Give the rational and well written response. If you don't have proper info in the context, answer "I don't know" |
|
Respond only to the question asked. |
|
|
|
<|user|> |
|
Context: |
|
{} |
|
--- |
|
Here is the question you need to answer. |
|
|
|
Question: {} |
|
--- |
|
<|assistant|> |
|
""" |
|
|
|
question = "What is Cardiogenic shock?" |
|
search_results = vectors.similarity_search(question, k=3) |
|
|
|
logger.info('Contexto: ') |
|
for i, search_result in enumerate(search_results): |
|
logger.info(f"{i + 1}) {search_result.page_content}") |
|
|
|
context = " ".join([search_result.page_content for search_result in search_results]) |
|
final_prompt = prompt.format(context, question) |
|
logger.info(f'\n{final_prompt}\n') |
|
|
|
answer = llm_model(final_prompt) |
|
|
|
logger.info("AI response: ", answer[0]['generated_text']) |
|
|
|
|
|
if __name__ == '__main__': |
|
multiprocessing.freeze_support() |
|
access_token = os.getenv("ACCESS_TOKEN") |
|
login(token=access_token) |
|
logger.info('Login realizado com sucesso.') |
|
main() |
|
|