Spaces:

iasbeck
/

ross-gpt

Sleeping

App Files Files Community

iasbeck commited on Dec 12, 2024

Commit

a21dabb

1 Parent(s): 24542f2

Criação da classe Rag e atualização do app.py.

Browse files

Files changed (6) hide show

app.py +5 -1
app_bkp.py +0 -136
app_echo.py +24 -0
rag.py +158 -0
rag_test.py +82 -42
rag_test_bkp.py +119 -0

app.py CHANGED Viewed

@@ -1,6 +1,9 @@
 import streamlit as st
 st.title('Echo Bot')
 if "messages" not in st.session_state:
     st.session_state.messages = []
@@ -16,7 +19,8 @@ if prompt:
     st.session_state.messages.append({'role': 'user', 'content': prompt})
-    response = f'**Echo**: {prompt}'
     with st.chat_message('assistant'):
         st.markdown(response)

 import streamlit as st
+from rag import Rag
 st.title('Echo Bot')
+rag = Rag()
 if "messages" not in st.session_state:
     st.session_state.messages = []
     st.session_state.messages.append({'role': 'user', 'content': prompt})
+    # response = f'**Echo**: {prompt}'
+    response = f'{rag.get_answer(prompt)}'
     with st.chat_message('assistant'):
         st.markdown(response)

app_bkp.py DELETED Viewed

@@ -1,136 +0,0 @@
-import streamlit as st
-import multiprocessing
-from langchain.docstore.document import Document as LangChainDocument
-from langchain_text_splitters import RecursiveCharacterTextSplitter
-from langchain_huggingface import HuggingFaceEmbeddings
-from langchain_community.vectorstores import FAISS
-from huggingface_hub import login
-from loguru import logger
-import os
-from dotenv import load_dotenv
-multiprocessing.freeze_support()
-load_dotenv()
-access_token = os.getenv("ACCESS_TOKEN")
-login(token=access_token)
-logger.info('Login realizado com sucesso.')
-logger.info('Carregando arquivo no qual será baseado o RAG.')
-with open('train.txt', 'r') as f:
-    data = f.read()
-logger.info('Representando o documento utilizando o LangChainDocument.')
-raw_database = LangChainDocument(page_content=data)
-MARKDOWN_SEPARATORS = [
-    "\n#{1,6} ",
-    "```\n",
-    "\n\\*\\*\\*+\n",
-    "\n---+\n",
-    "\n___+\n",
-    "\n\n",
-    "\n",
-    " ",
-    "",
-]
-logger.info('Quebrando o documento para a criação dos chunks.')
-splitter = RecursiveCharacterTextSplitter(separators=MARKDOWN_SEPARATORS, chunk_size=1000, chunk_overlap=100)
-process_data = splitter.split_documents([raw_database])
-process_data = process_data[:5]  # TODO: REMOVER DEPOIS
-embedding_model_name = "thenlper/gte-small"
-logger.info(f'Definição do modelo de embeddings: {embedding_model_name}.')
-embedding_model = HuggingFaceEmbeddings(
-    model_name=embedding_model_name,
-    multi_process=True,
-    model_kwargs={"device": "cuda"},
-    encode_kwargs={"normalize_embeddings": True},  # Set `True` for cosine similarity
-)
-logger.info('Criação da base de dados vetorial (em memória).')
-vectors = FAISS.from_documents(process_data, embedding_model)
-from transformers import pipeline
-import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
-# model_name = "meta-llama/Llama-3.2-1B"
-model_name = "HuggingFaceH4/zephyr-7b-beta"
-# model_name = "mistralai/Mistral-7B-Instruct-v0.3"
-# model_name = "meta-llama/Llama-3.2-3B-Instruct"
-logger.info(f'Carregamento do modelo de linguagem principal: {model_name}')
-bnb_config = BitsAndBytesConfig(
-    load_in_4bit=True,
-    bnb_4bit_use_double_quant=True,
-    bnb_4bit_quant_type="nf4",
-    bnb_4bit_compute_dtype=torch.bfloat16,
-)
-model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config)
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-llm_model = pipeline(
-    model=model,
-    tokenizer=tokenizer,
-    task="text-generation",
-    do_sample=True,
-    temperature=0.4,
-    repetition_penalty=1.1,
-    return_full_text=False,
-    max_new_tokens=500
-)
-logger.info(f'Modelo {model_name} carregado com sucesso.')
-prompt = """
-<|system|>
-You are a helpful assistant that answers on medical questions based on the real information provided from different sources and in the context.
-Give the rational and well written response. If you don't have proper info in the context, answer "I don't know"
-Respond only to the question asked.
-<|user|>
-Context:
-{}
----
-Here is the question you need to answer.
-Question: {}
----
-<|assistant|>
-"""
-st.title("Echo Bot")
-if "messages" not in st.session_state:
-    st.session_state.messages = []
-for message in st.session_state.messages:
-    with st.chat_message(message["role"]):
-        st.markdown(message["content"])
-question = st.chat_input("How can I help you?")
-if question:
-    with st.chat_message("user"):
-        st.markdown(prompt)
-    st.session_state.messages.append({"role": "user", "content": prompt})
-    search_results = vectors.similarity_search(question, k=3)
-    logger.info('Contexto: ')
-    for i, search_result in enumerate(search_results):
-        logger.info(f"{i + 1}) {search_result.page_content}")
-    context = " ".join([search_result.page_content for search_result in search_results])
-    final_prompt = prompt.format(context, question)
-    logger.info(f'\n{final_prompt}\n')
-    answer = llm_model(final_prompt)
-    text_answer = answer[0]['generated_text']
-    logger.info("AI response: ", text_answer)
-    with st.chat_message("assistant"):
-        st.markdown(text_answer)
-    st.session_state.messages.append({"role": "assistant", "content": text_answer})

app_echo.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import streamlit as st
+st.title('Echo Bot')
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+for message in st.session_state.messages:
+    with st.chat_message(message['role']):
+        st.markdown(message['content'])
+prompt = st.chat_input('How can I help you?')
+if prompt:
+    with st.chat_message('user'):
+        st.markdown(prompt)
+    st.session_state.messages.append({'role': 'user', 'content': prompt})
+    response = f'**Echo**: {prompt}'
+    with st.chat_message('assistant'):
+        st.markdown(response)
+    st.session_state.messages.append({'role': 'assistant', 'content': response})

rag.py ADDED Viewed

	@@ -0,0 +1,158 @@

+import multiprocessing
+import time
+from langchain.docstore.document import Document as LangChainDocument
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+from huggingface_hub import login
+from loguru import logger
+from transformers import pipeline
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+import os
+from dotenv import load_dotenv
+class Rag:
+    def __init__(self):
+        self.vectors = None
+        self.raw_database = None
+        self.process_data = None
+        self.embedding_model = None
+        self.llm_model = None
+        self.data_file_name = 'train.txt'
+        self.embedding_model_name = "thenlper/gte-small"
+        self.model_name = "HuggingFaceH4/zephyr-7b-beta"
+        multiprocessing.freeze_support()
+    def build_vector_database(self):
+        if self.vectors is None:
+            self.load_document()
+            self.generate_chunks()
+            logger.info('Criação da base de dados vetorial (em memória).')
+            self.vectors = FAISS.from_documents(self.process_data, self.embedding_model)
+    def load_document(self):
+        logger.info('Carregando arquivo no qual será baseado o RAG.')
+        with open(self.data_file_name, 'r') as f:
+            data = f.read()
+        logger.info('Representando o documento utilizando o LangChainDocument.')
+        self.raw_database = LangChainDocument(page_content=data)
+    def generate_chunks(self):
+        MARKDOWN_SEPARATORS = [
+            "\n#{1,6} ",
+            "```\n",
+            "\n\\*\\*\\*+\n",
+            "\n---+\n",
+            "\n___+\n",
+            "\n\n",
+            "\n",
+            " ",
+            "",
+        ]
+        logger.info('Quebrando o documento para a criação dos chunks.')
+        splitter = RecursiveCharacterTextSplitter(separators=MARKDOWN_SEPARATORS, chunk_size=1000,
+                                                  chunk_overlap=100)
+        self.process_data = splitter.split_documents([self.raw_database])
+        self.process_data = self.process_data[:5]  # TODO: REMOVER DEPOIS
+        logger.info(f'Definição do modelo de embeddings: {self.embedding_model_name}.')
+        self.embedding_model = HuggingFaceEmbeddings(
+            model_name=self.embedding_model_name,
+            multi_process=True,
+            model_kwargs={"device": "cuda"},  # TODO: AJUSTAR DEPOIS
+            encode_kwargs={"normalize_embeddings": True},  # Set `True` for cosine similarity
+        )
+    def load_model(self):
+        if self.llm_model is None:
+            load_dotenv()
+            login(token=os.getenv('HF_TOKEN'))
+            time.sleep(2)
+            logger.info(f'Carregamento do modelo de linguagem principal: {self.model_name}')
+            bnb_config = BitsAndBytesConfig(
+                load_in_4bit=True,
+                bnb_4bit_use_double_quant=True,
+                bnb_4bit_quant_type="nf4",
+                bnb_4bit_compute_dtype=torch.bfloat16,
+            )
+            model = AutoModelForCausalLM.from_pretrained(self.model_name, quantization_config=bnb_config)
+            tokenizer = AutoTokenizer.from_pretrained(self.model_name)
+            self.llm_model = pipeline(
+                model=model,
+                tokenizer=tokenizer,
+                task="text-generation",
+                do_sample=True,
+                temperature=0.4,
+                repetition_penalty=1.1,
+                return_full_text=False,
+                max_new_tokens=500
+            )
+            logger.info(f'Modelo {self.model_name} carregado com sucesso.')
+    def get_answer(self, question, use_context=True):
+        self.build_vector_database()
+        self.load_model()
+        if use_context:
+            prompt = """
+            <|system|>
+            You are a helpful assistant that answers on medical questions based on the real information provided from different sources and in the context.
+            Give the rational and well written response. If you don't have proper info in the context, answer "I don't know"
+            Respond only to the question asked.
+            <|user|>
+            Context:
+            {}
+            ---
+            Here is the question you need to answer.
+            Question: {}
+            ---
+            <|assistant|>
+            """
+            search_results = self.vectors.similarity_search(question, k=3)
+            logger.info('Contexto: ')
+            for i, search_result in enumerate(search_results):
+                logger.info(f"{i + 1}) {search_result.page_content}")
+            context = " ".join([search_result.page_content for search_result in search_results])
+            final_prompt = prompt.format(context, question)
+            logger.info(f'Prompt final: \n{final_prompt}\n')
+            answer = self.llm_model(final_prompt)
+            logger.info(f"Resposta da IA: {answer[0]['generated_text']}")
+        else:
+            prompt = """
+            <|system|>
+            You are a helpful assistant that answers on medical questions based on the real information provided from different sources and in the context.
+            Give the rational and well written response. If you don't have proper info in the context, answer "I don't know"
+            Respond only to the question asked.
+            <|user|>
+            ---
+            Here is the question you need to answer.
+            Question: {}
+            ---
+            <|assistant|>
+            """
+            final_prompt = prompt.format(question)
+            logger.info(f'Prompt final: \n{final_prompt}\n')
+            answer = self.llm_model(final_prompt)
+            logger.info(f"Resposta da IA: {answer[0]['generated_text']}")
+        return answer[0]['generated_text']

rag_test.py CHANGED Viewed

@@ -1,4 +1,6 @@
 import multiprocessing
 from langchain.docstore.document import Document as LangChainDocument
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_huggingface import HuggingFaceEmbeddings
@@ -11,16 +13,20 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 import os
 from dotenv import load_dotenv
-def main():
-    load_dotenv()
     logger.info('Carregando arquivo no qual será baseado o RAG.')
     with open('train.txt', 'r') as f:
         data = f.read()
     logger.info('Representando o documento utilizando o LangChainDocument.')
     raw_database = LangChainDocument(page_content=data)
     MARKDOWN_SEPARATORS = [
         "\n#{1,6} ",
         "```\n",
@@ -43,12 +49,26 @@ def main():
     embedding_model = HuggingFaceEmbeddings(
         model_name=embedding_model_name,
         multi_process=True,
-        model_kwargs={"device": "cuda"},
         encode_kwargs={"normalize_embeddings": True},  # Set `True` for cosine similarity
     )
     logger.info('Criação da base de dados vetorial (em memória).')
     vectors = FAISS.from_documents(process_data, embedding_model)
     # model_name = "meta-llama/Llama-3.2-1B"
     model_name = "HuggingFaceH4/zephyr-7b-beta"
@@ -77,42 +97,62 @@ def main():
     )
     logger.info(f'Modelo {model_name} carregado com sucesso.')
-    prompt = """
-    <|system|>
-    You are a helpful assistant that answers on medical questions based on the real information provided from different sources and in the context.
-    Give the rational and well written response. If you don't have proper info in the context, answer "I don't know"
-    Respond only to the question asked.
-    <|user|>
-    Context:
-    {}
-    ---
-    Here is the question you need to answer.
-    Question: {}
-    ---
-    <|assistant|>
-    """
-    question = "What is Cardiogenic shock?"
-    search_results = vectors.similarity_search(question, k=3)
-    logger.info('Contexto: ')
-    for i, search_result in enumerate(search_results):
-        logger.info(f"{i + 1}) {search_result.page_content}")
-    context = " ".join([search_result.page_content for search_result in search_results])
-    final_prompt = prompt.format(context, question)
-    logger.info(f'\n{final_prompt}\n')
-    answer = llm_model(final_prompt)
-    logger.info("AI response: ", answer[0]['generated_text'])
-if __name__ == '__main__':
-    multiprocessing.freeze_support()
-    access_token = os.getenv("ACCESS_TOKEN")
-    login(token=access_token)
-    logger.info('Login realizado com sucesso.')
-    main()

 import multiprocessing
+import time
 from langchain.docstore.document import Document as LangChainDocument
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_huggingface import HuggingFaceEmbeddings
 import os
 from dotenv import load_dotenv
+vector_database_builded = False
+def load_document():
     logger.info('Carregando arquivo no qual será baseado o RAG.')
     with open('train.txt', 'r') as f:
         data = f.read()
     logger.info('Representando o documento utilizando o LangChainDocument.')
     raw_database = LangChainDocument(page_content=data)
+    return raw_database
+def generate_chunks(raw_database):
     MARKDOWN_SEPARATORS = [
         "\n#{1,6} ",
         "```\n",
     embedding_model = HuggingFaceEmbeddings(
         model_name=embedding_model_name,
         multi_process=True,
+        model_kwargs={"device": "cuda"},  # TODO: AJUSTAR DEPOIS
         encode_kwargs={"normalize_embeddings": True},  # Set `True` for cosine similarity
     )
+    return process_data, embedding_model
+def build_vector_database():
+    raw_database = load_document()
+    process_data, embedding_model = generate_chunks(raw_database)
     logger.info('Criação da base de dados vetorial (em memória).')
     vectors = FAISS.from_documents(process_data, embedding_model)
+    return vectors
+def load_model():
+    load_dotenv()
+    login(token=os.getenv('HF_TOKEN'))
+    time.sleep(2)
     # model_name = "meta-llama/Llama-3.2-1B"
     model_name = "HuggingFaceH4/zephyr-7b-beta"
     )
     logger.info(f'Modelo {model_name} carregado com sucesso.')
+    return llm_model
+def get_answer(question, use_context=True):
+    vectors = build_vector_database()
+    llm_model = load_model()
+    if use_context:
+        prompt = """
+        <|system|>
+        You are a helpful assistant that answers on medical questions based on the real information provided from different sources and in the context.
+        Give the rational and well written response. If you don't have proper info in the context, answer "I don't know"
+        Respond only to the question asked.
+        <|user|>
+        Context:
+        {}
+        ---
+        Here is the question you need to answer.
+        Question: {}
+        ---
+        <|assistant|>
+        """
+        search_results = vectors.similarity_search(question, k=3)
+        logger.info('Contexto: ')
+        for i, search_result in enumerate(search_results):
+            logger.info(f"{i + 1}) {search_result.page_content}")
+        context = " ".join([search_result.page_content for search_result in search_results])
+        final_prompt = prompt.format(context, question)
+        logger.info(f'Prompt final: \n{final_prompt}\n')
+        answer = llm_model(final_prompt)
+        logger.info(f"Resposta da IA: {answer[0]['generated_text']}")
+    else:
+        prompt = """
+        <|system|>
+        You are a helpful assistant that answers on medical questions based on the real information provided from different sources and in the context.
+        Give the rational and well written response. If you don't have proper info in the context, answer "I don't know"
+        Respond only to the question asked.
+        <|user|>
+        ---
+        Here is the question you need to answer.
+        Question: {}
+        ---
+        <|assistant|>
+        """
+        final_prompt = prompt.format(question)
+        logger.info(f'Prompt final: \n{final_prompt}\n')
+        answer = llm_model(final_prompt)
+        logger.info(f"Resposta da IA: {answer[0]['generated_text']}")
+    return answer[0]['generated_text']

rag_test_bkp.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import multiprocessing
+from langchain.docstore.document import Document as LangChainDocument
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+from huggingface_hub import login
+from loguru import logger
+from transformers import pipeline
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+import os
+from dotenv import load_dotenv
+def main():
+    load_dotenv()
+    logger.info('Carregando arquivo no qual será baseado o RAG.')
+    with open('train.txt', 'r') as f:
+        data = f.read()
+    logger.info('Representando o documento utilizando o LangChainDocument.')
+    raw_database = LangChainDocument(page_content=data)
+    MARKDOWN_SEPARATORS = [
+        "\n#{1,6} ",
+        "```\n",
+        "\n\\*\\*\\*+\n",
+        "\n---+\n",
+        "\n___+\n",
+        "\n\n",
+        "\n",
+        " ",
+        "",
+    ]
+    logger.info('Quebrando o documento para a criação dos chunks.')
+    splitter = RecursiveCharacterTextSplitter(separators=MARKDOWN_SEPARATORS, chunk_size=1000, chunk_overlap=100)
+    process_data = splitter.split_documents([raw_database])
+    process_data = process_data[:5]  # TODO: REMOVER DEPOIS
+    embedding_model_name = "thenlper/gte-small"
+    logger.info(f'Definição do modelo de embeddings: {embedding_model_name}.')
+    embedding_model = HuggingFaceEmbeddings(
+        model_name=embedding_model_name,
+        multi_process=True,
+        model_kwargs={"device": "cpu"},  # TODO: AJUSTAR DEPOIS
+        encode_kwargs={"normalize_embeddings": True},  # Set `True` for cosine similarity
+    )
+    logger.info('Criação da base de dados vetorial (em memória).')
+    vectors = FAISS.from_documents(process_data, embedding_model)
+    # model_name = "meta-llama/Llama-3.2-1B"
+    model_name = "HuggingFaceH4/zephyr-7b-beta"
+    # model_name = "mistralai/Mistral-7B-Instruct-v0.3"
+    # model_name = "meta-llama/Llama-3.2-3B-Instruct"
+    logger.info(f'Carregamento do modelo de linguagem principal: {model_name}')
+    # bnb_config = BitsAndBytesConfig(
+    #     load_in_4bit=True,
+    #     bnb_4bit_use_double_quant=True,
+    #     bnb_4bit_quant_type="nf4",
+    #     bnb_4bit_compute_dtype=torch.bfloat16,
+    # )
+    # model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config)
+    model = AutoModelForCausalLM.from_pretrained(model_name)
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    llm_model = pipeline(
+        model=model,
+        tokenizer=tokenizer,
+        task="text-generation",
+        do_sample=True,
+        temperature=0.4,
+        repetition_penalty=1.1,
+        return_full_text=False,
+        max_new_tokens=500
+    )
+    logger.info(f'Modelo {model_name} carregado com sucesso.')
+    prompt = """
+    <|system|>
+    You are a helpful assistant that answers on medical questions based on the real information provided from different sources and in the context.
+    Give the rational and well written response. If you don't have proper info in the context, answer "I don't know"
+    Respond only to the question asked.
+    <|user|>
+    Context:
+    {}
+    ---
+    Here is the question you need to answer.
+    Question: {}
+    ---
+    <|assistant|>
+    """
+    question = "What is Cardiogenic shock?"
+    search_results = vectors.similarity_search(question, k=3)
+    logger.info('Contexto: ')
+    for i, search_result in enumerate(search_results):
+        logger.info(f"{i + 1}) {search_result.page_content}")
+    context = " ".join([search_result.page_content for search_result in search_results])
+    final_prompt = prompt.format(context, question)
+    logger.info(f'\n{final_prompt}\n')
+    answer = llm_model(final_prompt)
+    logger.info("AI response: ", answer[0]['generated_text'])
+if __name__ == '__main__':
+    multiprocessing.freeze_support()
+    access_token = os.getenv("ACCESS_TOKEN")
+    login(token=access_token)
+    logger.info('Login realizado com sucesso.')
+    main()