iasbeck commited on
Commit
24542f2
·
1 Parent(s): a4f5679

Restauração do Echo Bot.

Browse files
Files changed (2) hide show
  1. app.py +11 -123
  2. app_bkp.py +136 -0
app.py CHANGED
@@ -1,136 +1,24 @@
1
  import streamlit as st
2
- import multiprocessing
3
- from langchain.docstore.document import Document as LangChainDocument
4
- from langchain_text_splitters import RecursiveCharacterTextSplitter
5
- from langchain_huggingface import HuggingFaceEmbeddings
6
- from langchain_community.vectorstores import FAISS
7
- from huggingface_hub import login
8
- from loguru import logger
9
- import os
10
- from dotenv import load_dotenv
11
 
12
- multiprocessing.freeze_support()
13
- load_dotenv()
14
- access_token = os.getenv("ACCESS_TOKEN")
15
- login(token=access_token)
16
- logger.info('Login realizado com sucesso.')
17
-
18
- logger.info('Carregando arquivo no qual será baseado o RAG.')
19
- with open('train.txt', 'r') as f:
20
- data = f.read()
21
-
22
- logger.info('Representando o documento utilizando o LangChainDocument.')
23
- raw_database = LangChainDocument(page_content=data)
24
-
25
- MARKDOWN_SEPARATORS = [
26
- "\n#{1,6} ",
27
- "```\n",
28
- "\n\\*\\*\\*+\n",
29
- "\n---+\n",
30
- "\n___+\n",
31
- "\n\n",
32
- "\n",
33
- " ",
34
- "",
35
- ]
36
-
37
- logger.info('Quebrando o documento para a criação dos chunks.')
38
- splitter = RecursiveCharacterTextSplitter(separators=MARKDOWN_SEPARATORS, chunk_size=1000, chunk_overlap=100)
39
- process_data = splitter.split_documents([raw_database])
40
- process_data = process_data[:5] # TODO: REMOVER DEPOIS
41
-
42
- embedding_model_name = "thenlper/gte-small"
43
- logger.info(f'Definição do modelo de embeddings: {embedding_model_name}.')
44
- embedding_model = HuggingFaceEmbeddings(
45
- model_name=embedding_model_name,
46
- multi_process=True,
47
- model_kwargs={"device": "cuda"},
48
- encode_kwargs={"normalize_embeddings": True}, # Set `True` for cosine similarity
49
- )
50
-
51
- logger.info('Criação da base de dados vetorial (em memória).')
52
- vectors = FAISS.from_documents(process_data, embedding_model)
53
-
54
- from transformers import pipeline
55
- import torch
56
- from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
57
-
58
- # model_name = "meta-llama/Llama-3.2-1B"
59
- model_name = "HuggingFaceH4/zephyr-7b-beta"
60
- # model_name = "mistralai/Mistral-7B-Instruct-v0.3"
61
- # model_name = "meta-llama/Llama-3.2-3B-Instruct"
62
- logger.info(f'Carregamento do modelo de linguagem principal: {model_name}')
63
-
64
- bnb_config = BitsAndBytesConfig(
65
- load_in_4bit=True,
66
- bnb_4bit_use_double_quant=True,
67
- bnb_4bit_quant_type="nf4",
68
- bnb_4bit_compute_dtype=torch.bfloat16,
69
- )
70
- model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config)
71
- tokenizer = AutoTokenizer.from_pretrained(model_name)
72
-
73
- llm_model = pipeline(
74
- model=model,
75
- tokenizer=tokenizer,
76
- task="text-generation",
77
- do_sample=True,
78
- temperature=0.4,
79
- repetition_penalty=1.1,
80
- return_full_text=False,
81
- max_new_tokens=500
82
- )
83
- logger.info(f'Modelo {model_name} carregado com sucesso.')
84
-
85
- prompt = """
86
- <|system|>
87
- You are a helpful assistant that answers on medical questions based on the real information provided from different sources and in the context.
88
- Give the rational and well written response. If you don't have proper info in the context, answer "I don't know"
89
- Respond only to the question asked.
90
-
91
- <|user|>
92
- Context:
93
- {}
94
- ---
95
- Here is the question you need to answer.
96
-
97
- Question: {}
98
- ---
99
- <|assistant|>
100
- """
101
-
102
- st.title("Echo Bot")
103
 
104
  if "messages" not in st.session_state:
105
  st.session_state.messages = []
106
 
107
  for message in st.session_state.messages:
108
- with st.chat_message(message["role"]):
109
- st.markdown(message["content"])
110
 
111
- question = st.chat_input("How can I help you?")
112
- if question:
113
- with st.chat_message("user"):
114
  st.markdown(prompt)
115
 
116
- st.session_state.messages.append({"role": "user", "content": prompt})
117
-
118
- search_results = vectors.similarity_search(question, k=3)
119
-
120
- logger.info('Contexto: ')
121
- for i, search_result in enumerate(search_results):
122
- logger.info(f"{i + 1}) {search_result.page_content}")
123
-
124
- context = " ".join([search_result.page_content for search_result in search_results])
125
- final_prompt = prompt.format(context, question)
126
- logger.info(f'\n{final_prompt}\n')
127
-
128
- answer = llm_model(final_prompt)
129
- text_answer = answer[0]['generated_text']
130
 
131
- logger.info("AI response: ", text_answer)
132
 
133
- with st.chat_message("assistant"):
134
- st.markdown(text_answer)
135
 
136
- st.session_state.messages.append({"role": "assistant", "content": text_answer})
 
1
  import streamlit as st
 
 
 
 
 
 
 
 
 
2
 
3
+ st.title('Echo Bot')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  if "messages" not in st.session_state:
6
  st.session_state.messages = []
7
 
8
  for message in st.session_state.messages:
9
+ with st.chat_message(message['role']):
10
+ st.markdown(message['content'])
11
 
12
+ prompt = st.chat_input('How can I help you?')
13
+ if prompt:
14
+ with st.chat_message('user'):
15
  st.markdown(prompt)
16
 
17
+ st.session_state.messages.append({'role': 'user', 'content': prompt})
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ response = f'**Echo**: {prompt}'
20
 
21
+ with st.chat_message('assistant'):
22
+ st.markdown(response)
23
 
24
+ st.session_state.messages.append({'role': 'assistant', 'content': response})
app_bkp.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import multiprocessing
3
+ from langchain.docstore.document import Document as LangChainDocument
4
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
5
+ from langchain_huggingface import HuggingFaceEmbeddings
6
+ from langchain_community.vectorstores import FAISS
7
+ from huggingface_hub import login
8
+ from loguru import logger
9
+ import os
10
+ from dotenv import load_dotenv
11
+
12
+ multiprocessing.freeze_support()
13
+ load_dotenv()
14
+ access_token = os.getenv("ACCESS_TOKEN")
15
+ login(token=access_token)
16
+ logger.info('Login realizado com sucesso.')
17
+
18
+ logger.info('Carregando arquivo no qual será baseado o RAG.')
19
+ with open('train.txt', 'r') as f:
20
+ data = f.read()
21
+
22
+ logger.info('Representando o documento utilizando o LangChainDocument.')
23
+ raw_database = LangChainDocument(page_content=data)
24
+
25
+ MARKDOWN_SEPARATORS = [
26
+ "\n#{1,6} ",
27
+ "```\n",
28
+ "\n\\*\\*\\*+\n",
29
+ "\n---+\n",
30
+ "\n___+\n",
31
+ "\n\n",
32
+ "\n",
33
+ " ",
34
+ "",
35
+ ]
36
+
37
+ logger.info('Quebrando o documento para a criação dos chunks.')
38
+ splitter = RecursiveCharacterTextSplitter(separators=MARKDOWN_SEPARATORS, chunk_size=1000, chunk_overlap=100)
39
+ process_data = splitter.split_documents([raw_database])
40
+ process_data = process_data[:5] # TODO: REMOVER DEPOIS
41
+
42
+ embedding_model_name = "thenlper/gte-small"
43
+ logger.info(f'Definição do modelo de embeddings: {embedding_model_name}.')
44
+ embedding_model = HuggingFaceEmbeddings(
45
+ model_name=embedding_model_name,
46
+ multi_process=True,
47
+ model_kwargs={"device": "cuda"},
48
+ encode_kwargs={"normalize_embeddings": True}, # Set `True` for cosine similarity
49
+ )
50
+
51
+ logger.info('Criação da base de dados vetorial (em memória).')
52
+ vectors = FAISS.from_documents(process_data, embedding_model)
53
+
54
+ from transformers import pipeline
55
+ import torch
56
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
57
+
58
+ # model_name = "meta-llama/Llama-3.2-1B"
59
+ model_name = "HuggingFaceH4/zephyr-7b-beta"
60
+ # model_name = "mistralai/Mistral-7B-Instruct-v0.3"
61
+ # model_name = "meta-llama/Llama-3.2-3B-Instruct"
62
+ logger.info(f'Carregamento do modelo de linguagem principal: {model_name}')
63
+
64
+ bnb_config = BitsAndBytesConfig(
65
+ load_in_4bit=True,
66
+ bnb_4bit_use_double_quant=True,
67
+ bnb_4bit_quant_type="nf4",
68
+ bnb_4bit_compute_dtype=torch.bfloat16,
69
+ )
70
+ model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config)
71
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
72
+
73
+ llm_model = pipeline(
74
+ model=model,
75
+ tokenizer=tokenizer,
76
+ task="text-generation",
77
+ do_sample=True,
78
+ temperature=0.4,
79
+ repetition_penalty=1.1,
80
+ return_full_text=False,
81
+ max_new_tokens=500
82
+ )
83
+ logger.info(f'Modelo {model_name} carregado com sucesso.')
84
+
85
+ prompt = """
86
+ <|system|>
87
+ You are a helpful assistant that answers on medical questions based on the real information provided from different sources and in the context.
88
+ Give the rational and well written response. If you don't have proper info in the context, answer "I don't know"
89
+ Respond only to the question asked.
90
+
91
+ <|user|>
92
+ Context:
93
+ {}
94
+ ---
95
+ Here is the question you need to answer.
96
+
97
+ Question: {}
98
+ ---
99
+ <|assistant|>
100
+ """
101
+
102
+ st.title("Echo Bot")
103
+
104
+ if "messages" not in st.session_state:
105
+ st.session_state.messages = []
106
+
107
+ for message in st.session_state.messages:
108
+ with st.chat_message(message["role"]):
109
+ st.markdown(message["content"])
110
+
111
+ question = st.chat_input("How can I help you?")
112
+ if question:
113
+ with st.chat_message("user"):
114
+ st.markdown(prompt)
115
+
116
+ st.session_state.messages.append({"role": "user", "content": prompt})
117
+
118
+ search_results = vectors.similarity_search(question, k=3)
119
+
120
+ logger.info('Contexto: ')
121
+ for i, search_result in enumerate(search_results):
122
+ logger.info(f"{i + 1}) {search_result.page_content}")
123
+
124
+ context = " ".join([search_result.page_content for search_result in search_results])
125
+ final_prompt = prompt.format(context, question)
126
+ logger.info(f'\n{final_prompt}\n')
127
+
128
+ answer = llm_model(final_prompt)
129
+ text_answer = answer[0]['generated_text']
130
+
131
+ logger.info("AI response: ", text_answer)
132
+
133
+ with st.chat_message("assistant"):
134
+ st.markdown(text_answer)
135
+
136
+ st.session_state.messages.append({"role": "assistant", "content": text_answer})