iasbeck commited on
Commit
a21dabb
·
1 Parent(s): 24542f2

Criação da classe Rag e atualização do app.py.

Browse files
Files changed (6) hide show
  1. app.py +5 -1
  2. app_bkp.py +0 -136
  3. app_echo.py +24 -0
  4. rag.py +158 -0
  5. rag_test.py +82 -42
  6. rag_test_bkp.py +119 -0
app.py CHANGED
@@ -1,6 +1,9 @@
1
  import streamlit as st
2
 
 
 
3
  st.title('Echo Bot')
 
4
 
5
  if "messages" not in st.session_state:
6
  st.session_state.messages = []
@@ -16,7 +19,8 @@ if prompt:
16
 
17
  st.session_state.messages.append({'role': 'user', 'content': prompt})
18
 
19
- response = f'**Echo**: {prompt}'
 
20
 
21
  with st.chat_message('assistant'):
22
  st.markdown(response)
 
1
  import streamlit as st
2
 
3
+ from rag import Rag
4
+
5
  st.title('Echo Bot')
6
+ rag = Rag()
7
 
8
  if "messages" not in st.session_state:
9
  st.session_state.messages = []
 
19
 
20
  st.session_state.messages.append({'role': 'user', 'content': prompt})
21
 
22
+ # response = f'**Echo**: {prompt}'
23
+ response = f'{rag.get_answer(prompt)}'
24
 
25
  with st.chat_message('assistant'):
26
  st.markdown(response)
app_bkp.py DELETED
@@ -1,136 +0,0 @@
1
- import streamlit as st
2
- import multiprocessing
3
- from langchain.docstore.document import Document as LangChainDocument
4
- from langchain_text_splitters import RecursiveCharacterTextSplitter
5
- from langchain_huggingface import HuggingFaceEmbeddings
6
- from langchain_community.vectorstores import FAISS
7
- from huggingface_hub import login
8
- from loguru import logger
9
- import os
10
- from dotenv import load_dotenv
11
-
12
- multiprocessing.freeze_support()
13
- load_dotenv()
14
- access_token = os.getenv("ACCESS_TOKEN")
15
- login(token=access_token)
16
- logger.info('Login realizado com sucesso.')
17
-
18
- logger.info('Carregando arquivo no qual será baseado o RAG.')
19
- with open('train.txt', 'r') as f:
20
- data = f.read()
21
-
22
- logger.info('Representando o documento utilizando o LangChainDocument.')
23
- raw_database = LangChainDocument(page_content=data)
24
-
25
- MARKDOWN_SEPARATORS = [
26
- "\n#{1,6} ",
27
- "```\n",
28
- "\n\\*\\*\\*+\n",
29
- "\n---+\n",
30
- "\n___+\n",
31
- "\n\n",
32
- "\n",
33
- " ",
34
- "",
35
- ]
36
-
37
- logger.info('Quebrando o documento para a criação dos chunks.')
38
- splitter = RecursiveCharacterTextSplitter(separators=MARKDOWN_SEPARATORS, chunk_size=1000, chunk_overlap=100)
39
- process_data = splitter.split_documents([raw_database])
40
- process_data = process_data[:5] # TODO: REMOVER DEPOIS
41
-
42
- embedding_model_name = "thenlper/gte-small"
43
- logger.info(f'Definição do modelo de embeddings: {embedding_model_name}.')
44
- embedding_model = HuggingFaceEmbeddings(
45
- model_name=embedding_model_name,
46
- multi_process=True,
47
- model_kwargs={"device": "cuda"},
48
- encode_kwargs={"normalize_embeddings": True}, # Set `True` for cosine similarity
49
- )
50
-
51
- logger.info('Criação da base de dados vetorial (em memória).')
52
- vectors = FAISS.from_documents(process_data, embedding_model)
53
-
54
- from transformers import pipeline
55
- import torch
56
- from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
57
-
58
- # model_name = "meta-llama/Llama-3.2-1B"
59
- model_name = "HuggingFaceH4/zephyr-7b-beta"
60
- # model_name = "mistralai/Mistral-7B-Instruct-v0.3"
61
- # model_name = "meta-llama/Llama-3.2-3B-Instruct"
62
- logger.info(f'Carregamento do modelo de linguagem principal: {model_name}')
63
-
64
- bnb_config = BitsAndBytesConfig(
65
- load_in_4bit=True,
66
- bnb_4bit_use_double_quant=True,
67
- bnb_4bit_quant_type="nf4",
68
- bnb_4bit_compute_dtype=torch.bfloat16,
69
- )
70
- model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config)
71
- tokenizer = AutoTokenizer.from_pretrained(model_name)
72
-
73
- llm_model = pipeline(
74
- model=model,
75
- tokenizer=tokenizer,
76
- task="text-generation",
77
- do_sample=True,
78
- temperature=0.4,
79
- repetition_penalty=1.1,
80
- return_full_text=False,
81
- max_new_tokens=500
82
- )
83
- logger.info(f'Modelo {model_name} carregado com sucesso.')
84
-
85
- prompt = """
86
- <|system|>
87
- You are a helpful assistant that answers on medical questions based on the real information provided from different sources and in the context.
88
- Give the rational and well written response. If you don't have proper info in the context, answer "I don't know"
89
- Respond only to the question asked.
90
-
91
- <|user|>
92
- Context:
93
- {}
94
- ---
95
- Here is the question you need to answer.
96
-
97
- Question: {}
98
- ---
99
- <|assistant|>
100
- """
101
-
102
- st.title("Echo Bot")
103
-
104
- if "messages" not in st.session_state:
105
- st.session_state.messages = []
106
-
107
- for message in st.session_state.messages:
108
- with st.chat_message(message["role"]):
109
- st.markdown(message["content"])
110
-
111
- question = st.chat_input("How can I help you?")
112
- if question:
113
- with st.chat_message("user"):
114
- st.markdown(prompt)
115
-
116
- st.session_state.messages.append({"role": "user", "content": prompt})
117
-
118
- search_results = vectors.similarity_search(question, k=3)
119
-
120
- logger.info('Contexto: ')
121
- for i, search_result in enumerate(search_results):
122
- logger.info(f"{i + 1}) {search_result.page_content}")
123
-
124
- context = " ".join([search_result.page_content for search_result in search_results])
125
- final_prompt = prompt.format(context, question)
126
- logger.info(f'\n{final_prompt}\n')
127
-
128
- answer = llm_model(final_prompt)
129
- text_answer = answer[0]['generated_text']
130
-
131
- logger.info("AI response: ", text_answer)
132
-
133
- with st.chat_message("assistant"):
134
- st.markdown(text_answer)
135
-
136
- st.session_state.messages.append({"role": "assistant", "content": text_answer})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app_echo.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ st.title('Echo Bot')
4
+
5
+ if "messages" not in st.session_state:
6
+ st.session_state.messages = []
7
+
8
+ for message in st.session_state.messages:
9
+ with st.chat_message(message['role']):
10
+ st.markdown(message['content'])
11
+
12
+ prompt = st.chat_input('How can I help you?')
13
+ if prompt:
14
+ with st.chat_message('user'):
15
+ st.markdown(prompt)
16
+
17
+ st.session_state.messages.append({'role': 'user', 'content': prompt})
18
+
19
+ response = f'**Echo**: {prompt}'
20
+
21
+ with st.chat_message('assistant'):
22
+ st.markdown(response)
23
+
24
+ st.session_state.messages.append({'role': 'assistant', 'content': response})
rag.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import multiprocessing
2
+ import time
3
+
4
+ from langchain.docstore.document import Document as LangChainDocument
5
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
6
+ from langchain_huggingface import HuggingFaceEmbeddings
7
+ from langchain_community.vectorstores import FAISS
8
+ from huggingface_hub import login
9
+ from loguru import logger
10
+ from transformers import pipeline
11
+ import torch
12
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
13
+ import os
14
+ from dotenv import load_dotenv
15
+
16
+ class Rag:
17
+ def __init__(self):
18
+ self.vectors = None
19
+ self.raw_database = None
20
+ self.process_data = None
21
+ self.embedding_model = None
22
+ self.llm_model = None
23
+
24
+ self.data_file_name = 'train.txt'
25
+ self.embedding_model_name = "thenlper/gte-small"
26
+ self.model_name = "HuggingFaceH4/zephyr-7b-beta"
27
+
28
+ multiprocessing.freeze_support()
29
+
30
+ def build_vector_database(self):
31
+ if self.vectors is None:
32
+ self.load_document()
33
+ self.generate_chunks()
34
+
35
+ logger.info('Criação da base de dados vetorial (em memória).')
36
+ self.vectors = FAISS.from_documents(self.process_data, self.embedding_model)
37
+
38
+ def load_document(self):
39
+ logger.info('Carregando arquivo no qual será baseado o RAG.')
40
+ with open(self.data_file_name, 'r') as f:
41
+ data = f.read()
42
+
43
+ logger.info('Representando o documento utilizando o LangChainDocument.')
44
+ self.raw_database = LangChainDocument(page_content=data)
45
+
46
+ def generate_chunks(self):
47
+ MARKDOWN_SEPARATORS = [
48
+ "\n#{1,6} ",
49
+ "```\n",
50
+ "\n\\*\\*\\*+\n",
51
+ "\n---+\n",
52
+ "\n___+\n",
53
+ "\n\n",
54
+ "\n",
55
+ " ",
56
+ "",
57
+ ]
58
+
59
+ logger.info('Quebrando o documento para a criação dos chunks.')
60
+ splitter = RecursiveCharacterTextSplitter(separators=MARKDOWN_SEPARATORS, chunk_size=1000,
61
+ chunk_overlap=100)
62
+ self.process_data = splitter.split_documents([self.raw_database])
63
+ self.process_data = self.process_data[:5] # TODO: REMOVER DEPOIS
64
+
65
+
66
+ logger.info(f'Definição do modelo de embeddings: {self.embedding_model_name}.')
67
+ self.embedding_model = HuggingFaceEmbeddings(
68
+ model_name=self.embedding_model_name,
69
+ multi_process=True,
70
+ model_kwargs={"device": "cuda"}, # TODO: AJUSTAR DEPOIS
71
+ encode_kwargs={"normalize_embeddings": True}, # Set `True` for cosine similarity
72
+ )
73
+
74
+ def load_model(self):
75
+ if self.llm_model is None:
76
+ load_dotenv()
77
+ login(token=os.getenv('HF_TOKEN'))
78
+ time.sleep(2)
79
+
80
+ logger.info(f'Carregamento do modelo de linguagem principal: {self.model_name}')
81
+
82
+ bnb_config = BitsAndBytesConfig(
83
+ load_in_4bit=True,
84
+ bnb_4bit_use_double_quant=True,
85
+ bnb_4bit_quant_type="nf4",
86
+ bnb_4bit_compute_dtype=torch.bfloat16,
87
+ )
88
+ model = AutoModelForCausalLM.from_pretrained(self.model_name, quantization_config=bnb_config)
89
+ tokenizer = AutoTokenizer.from_pretrained(self.model_name)
90
+
91
+ self.llm_model = pipeline(
92
+ model=model,
93
+ tokenizer=tokenizer,
94
+ task="text-generation",
95
+ do_sample=True,
96
+ temperature=0.4,
97
+ repetition_penalty=1.1,
98
+ return_full_text=False,
99
+ max_new_tokens=500
100
+ )
101
+ logger.info(f'Modelo {self.model_name} carregado com sucesso.')
102
+
103
+ def get_answer(self, question, use_context=True):
104
+ self.build_vector_database()
105
+ self.load_model()
106
+
107
+ if use_context:
108
+ prompt = """
109
+ <|system|>
110
+ You are a helpful assistant that answers on medical questions based on the real information provided from different sources and in the context.
111
+ Give the rational and well written response. If you don't have proper info in the context, answer "I don't know"
112
+ Respond only to the question asked.
113
+
114
+ <|user|>
115
+ Context:
116
+ {}
117
+ ---
118
+ Here is the question you need to answer.
119
+
120
+ Question: {}
121
+ ---
122
+ <|assistant|>
123
+ """
124
+
125
+ search_results = self.vectors.similarity_search(question, k=3)
126
+ logger.info('Contexto: ')
127
+ for i, search_result in enumerate(search_results):
128
+ logger.info(f"{i + 1}) {search_result.page_content}")
129
+
130
+ context = " ".join([search_result.page_content for search_result in search_results])
131
+
132
+ final_prompt = prompt.format(context, question)
133
+ logger.info(f'Prompt final: \n{final_prompt}\n')
134
+ answer = self.llm_model(final_prompt)
135
+ logger.info(f"Resposta da IA: {answer[0]['generated_text']}")
136
+
137
+ else:
138
+ prompt = """
139
+ <|system|>
140
+ You are a helpful assistant that answers on medical questions based on the real information provided from different sources and in the context.
141
+ Give the rational and well written response. If you don't have proper info in the context, answer "I don't know"
142
+ Respond only to the question asked.
143
+
144
+ <|user|>
145
+ ---
146
+ Here is the question you need to answer.
147
+
148
+ Question: {}
149
+ ---
150
+ <|assistant|>
151
+ """
152
+
153
+ final_prompt = prompt.format(question)
154
+ logger.info(f'Prompt final: \n{final_prompt}\n')
155
+ answer = self.llm_model(final_prompt)
156
+ logger.info(f"Resposta da IA: {answer[0]['generated_text']}")
157
+
158
+ return answer[0]['generated_text']
rag_test.py CHANGED
@@ -1,4 +1,6 @@
1
  import multiprocessing
 
 
2
  from langchain.docstore.document import Document as LangChainDocument
3
  from langchain_text_splitters import RecursiveCharacterTextSplitter
4
  from langchain_huggingface import HuggingFaceEmbeddings
@@ -11,16 +13,20 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
11
  import os
12
  from dotenv import load_dotenv
13
 
 
14
 
15
- def main():
16
- load_dotenv()
17
  logger.info('Carregando arquivo no qual será baseado o RAG.')
18
  with open('train.txt', 'r') as f:
19
  data = f.read()
20
 
21
  logger.info('Representando o documento utilizando o LangChainDocument.')
22
  raw_database = LangChainDocument(page_content=data)
 
23
 
 
 
24
  MARKDOWN_SEPARATORS = [
25
  "\n#{1,6} ",
26
  "```\n",
@@ -43,12 +49,26 @@ def main():
43
  embedding_model = HuggingFaceEmbeddings(
44
  model_name=embedding_model_name,
45
  multi_process=True,
46
- model_kwargs={"device": "cuda"},
47
  encode_kwargs={"normalize_embeddings": True}, # Set `True` for cosine similarity
48
  )
49
 
 
 
 
 
 
 
 
50
  logger.info('Criação da base de dados vetorial (em memória).')
51
  vectors = FAISS.from_documents(process_data, embedding_model)
 
 
 
 
 
 
 
52
 
53
  # model_name = "meta-llama/Llama-3.2-1B"
54
  model_name = "HuggingFaceH4/zephyr-7b-beta"
@@ -77,42 +97,62 @@ def main():
77
  )
78
  logger.info(f'Modelo {model_name} carregado com sucesso.')
79
 
80
- prompt = """
81
- <|system|>
82
- You are a helpful assistant that answers on medical questions based on the real information provided from different sources and in the context.
83
- Give the rational and well written response. If you don't have proper info in the context, answer "I don't know"
84
- Respond only to the question asked.
85
-
86
- <|user|>
87
- Context:
88
- {}
89
- ---
90
- Here is the question you need to answer.
91
-
92
- Question: {}
93
- ---
94
- <|assistant|>
95
- """
96
-
97
- question = "What is Cardiogenic shock?"
98
- search_results = vectors.similarity_search(question, k=3)
99
-
100
- logger.info('Contexto: ')
101
- for i, search_result in enumerate(search_results):
102
- logger.info(f"{i + 1}) {search_result.page_content}")
103
-
104
- context = " ".join([search_result.page_content for search_result in search_results])
105
- final_prompt = prompt.format(context, question)
106
- logger.info(f'\n{final_prompt}\n')
107
-
108
- answer = llm_model(final_prompt)
109
-
110
- logger.info("AI response: ", answer[0]['generated_text'])
111
-
112
-
113
- if __name__ == '__main__':
114
- multiprocessing.freeze_support()
115
- access_token = os.getenv("ACCESS_TOKEN")
116
- login(token=access_token)
117
- logger.info('Login realizado com sucesso.')
118
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import multiprocessing
2
+ import time
3
+
4
  from langchain.docstore.document import Document as LangChainDocument
5
  from langchain_text_splitters import RecursiveCharacterTextSplitter
6
  from langchain_huggingface import HuggingFaceEmbeddings
 
13
  import os
14
  from dotenv import load_dotenv
15
 
16
+ vector_database_builded = False
17
 
18
+
19
+ def load_document():
20
  logger.info('Carregando arquivo no qual será baseado o RAG.')
21
  with open('train.txt', 'r') as f:
22
  data = f.read()
23
 
24
  logger.info('Representando o documento utilizando o LangChainDocument.')
25
  raw_database = LangChainDocument(page_content=data)
26
+ return raw_database
27
 
28
+
29
+ def generate_chunks(raw_database):
30
  MARKDOWN_SEPARATORS = [
31
  "\n#{1,6} ",
32
  "```\n",
 
49
  embedding_model = HuggingFaceEmbeddings(
50
  model_name=embedding_model_name,
51
  multi_process=True,
52
+ model_kwargs={"device": "cuda"}, # TODO: AJUSTAR DEPOIS
53
  encode_kwargs={"normalize_embeddings": True}, # Set `True` for cosine similarity
54
  )
55
 
56
+ return process_data, embedding_model
57
+
58
+
59
+ def build_vector_database():
60
+ raw_database = load_document()
61
+ process_data, embedding_model = generate_chunks(raw_database)
62
+
63
  logger.info('Criação da base de dados vetorial (em memória).')
64
  vectors = FAISS.from_documents(process_data, embedding_model)
65
+ return vectors
66
+
67
+
68
+ def load_model():
69
+ load_dotenv()
70
+ login(token=os.getenv('HF_TOKEN'))
71
+ time.sleep(2)
72
 
73
  # model_name = "meta-llama/Llama-3.2-1B"
74
  model_name = "HuggingFaceH4/zephyr-7b-beta"
 
97
  )
98
  logger.info(f'Modelo {model_name} carregado com sucesso.')
99
 
100
+ return llm_model
101
+
102
+
103
+ def get_answer(question, use_context=True):
104
+ vectors = build_vector_database()
105
+ llm_model = load_model()
106
+
107
+ if use_context:
108
+ prompt = """
109
+ <|system|>
110
+ You are a helpful assistant that answers on medical questions based on the real information provided from different sources and in the context.
111
+ Give the rational and well written response. If you don't have proper info in the context, answer "I don't know"
112
+ Respond only to the question asked.
113
+
114
+ <|user|>
115
+ Context:
116
+ {}
117
+ ---
118
+ Here is the question you need to answer.
119
+
120
+ Question: {}
121
+ ---
122
+ <|assistant|>
123
+ """
124
+
125
+ search_results = vectors.similarity_search(question, k=3)
126
+ logger.info('Contexto: ')
127
+ for i, search_result in enumerate(search_results):
128
+ logger.info(f"{i + 1}) {search_result.page_content}")
129
+
130
+ context = " ".join([search_result.page_content for search_result in search_results])
131
+
132
+ final_prompt = prompt.format(context, question)
133
+ logger.info(f'Prompt final: \n{final_prompt}\n')
134
+ answer = llm_model(final_prompt)
135
+ logger.info(f"Resposta da IA: {answer[0]['generated_text']}")
136
+
137
+ else:
138
+ prompt = """
139
+ <|system|>
140
+ You are a helpful assistant that answers on medical questions based on the real information provided from different sources and in the context.
141
+ Give the rational and well written response. If you don't have proper info in the context, answer "I don't know"
142
+ Respond only to the question asked.
143
+
144
+ <|user|>
145
+ ---
146
+ Here is the question you need to answer.
147
+
148
+ Question: {}
149
+ ---
150
+ <|assistant|>
151
+ """
152
+
153
+ final_prompt = prompt.format(question)
154
+ logger.info(f'Prompt final: \n{final_prompt}\n')
155
+ answer = llm_model(final_prompt)
156
+ logger.info(f"Resposta da IA: {answer[0]['generated_text']}")
157
+
158
+ return answer[0]['generated_text']
rag_test_bkp.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import multiprocessing
2
+ from langchain.docstore.document import Document as LangChainDocument
3
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
4
+ from langchain_huggingface import HuggingFaceEmbeddings
5
+ from langchain_community.vectorstores import FAISS
6
+ from huggingface_hub import login
7
+ from loguru import logger
8
+ from transformers import pipeline
9
+ import torch
10
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
11
+ import os
12
+ from dotenv import load_dotenv
13
+
14
+
15
+ def main():
16
+ load_dotenv()
17
+ logger.info('Carregando arquivo no qual será baseado o RAG.')
18
+ with open('train.txt', 'r') as f:
19
+ data = f.read()
20
+
21
+ logger.info('Representando o documento utilizando o LangChainDocument.')
22
+ raw_database = LangChainDocument(page_content=data)
23
+
24
+ MARKDOWN_SEPARATORS = [
25
+ "\n#{1,6} ",
26
+ "```\n",
27
+ "\n\\*\\*\\*+\n",
28
+ "\n---+\n",
29
+ "\n___+\n",
30
+ "\n\n",
31
+ "\n",
32
+ " ",
33
+ "",
34
+ ]
35
+
36
+ logger.info('Quebrando o documento para a criação dos chunks.')
37
+ splitter = RecursiveCharacterTextSplitter(separators=MARKDOWN_SEPARATORS, chunk_size=1000, chunk_overlap=100)
38
+ process_data = splitter.split_documents([raw_database])
39
+ process_data = process_data[:5] # TODO: REMOVER DEPOIS
40
+
41
+ embedding_model_name = "thenlper/gte-small"
42
+ logger.info(f'Definição do modelo de embeddings: {embedding_model_name}.')
43
+ embedding_model = HuggingFaceEmbeddings(
44
+ model_name=embedding_model_name,
45
+ multi_process=True,
46
+ model_kwargs={"device": "cpu"}, # TODO: AJUSTAR DEPOIS
47
+ encode_kwargs={"normalize_embeddings": True}, # Set `True` for cosine similarity
48
+ )
49
+
50
+ logger.info('Criação da base de dados vetorial (em memória).')
51
+ vectors = FAISS.from_documents(process_data, embedding_model)
52
+
53
+ # model_name = "meta-llama/Llama-3.2-1B"
54
+ model_name = "HuggingFaceH4/zephyr-7b-beta"
55
+ # model_name = "mistralai/Mistral-7B-Instruct-v0.3"
56
+ # model_name = "meta-llama/Llama-3.2-3B-Instruct"
57
+ logger.info(f'Carregamento do modelo de linguagem principal: {model_name}')
58
+
59
+ # bnb_config = BitsAndBytesConfig(
60
+ # load_in_4bit=True,
61
+ # bnb_4bit_use_double_quant=True,
62
+ # bnb_4bit_quant_type="nf4",
63
+ # bnb_4bit_compute_dtype=torch.bfloat16,
64
+ # )
65
+ # model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config)
66
+ model = AutoModelForCausalLM.from_pretrained(model_name)
67
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
68
+
69
+ llm_model = pipeline(
70
+ model=model,
71
+ tokenizer=tokenizer,
72
+ task="text-generation",
73
+ do_sample=True,
74
+ temperature=0.4,
75
+ repetition_penalty=1.1,
76
+ return_full_text=False,
77
+ max_new_tokens=500
78
+ )
79
+ logger.info(f'Modelo {model_name} carregado com sucesso.')
80
+
81
+ prompt = """
82
+ <|system|>
83
+ You are a helpful assistant that answers on medical questions based on the real information provided from different sources and in the context.
84
+ Give the rational and well written response. If you don't have proper info in the context, answer "I don't know"
85
+ Respond only to the question asked.
86
+
87
+ <|user|>
88
+ Context:
89
+ {}
90
+ ---
91
+ Here is the question you need to answer.
92
+
93
+ Question: {}
94
+ ---
95
+ <|assistant|>
96
+ """
97
+
98
+ question = "What is Cardiogenic shock?"
99
+ search_results = vectors.similarity_search(question, k=3)
100
+
101
+ logger.info('Contexto: ')
102
+ for i, search_result in enumerate(search_results):
103
+ logger.info(f"{i + 1}) {search_result.page_content}")
104
+
105
+ context = " ".join([search_result.page_content for search_result in search_results])
106
+ final_prompt = prompt.format(context, question)
107
+ logger.info(f'\n{final_prompt}\n')
108
+
109
+ answer = llm_model(final_prompt)
110
+
111
+ logger.info("AI response: ", answer[0]['generated_text'])
112
+
113
+
114
+ if __name__ == '__main__':
115
+ multiprocessing.freeze_support()
116
+ access_token = os.getenv("ACCESS_TOKEN")
117
+ login(token=access_token)
118
+ logger.info('Login realizado com sucesso.')
119
+ main()