Spaces:
Sleeping
Sleeping
| __import__('pysqlite3') | |
| import sys | |
| sys.modules['sqlite3'] = sys.modules.pop('pysqlite3') | |
| # DATABASES = { | |
| # 'default': { | |
| # 'ENGINE': 'django.db.backends.sqlite3', | |
| # 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'), | |
| # } | |
| # } | |
| import streamlit as st | |
| from huggingface_hub import InferenceClient | |
| from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ServiceContext, PromptTemplate | |
| from llama_index.vector_stores.chroma import ChromaVectorStore | |
| from llama_index.core import StorageContext | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain.vectorstores import Chroma | |
| import chromadb | |
| from langchain.memory import ConversationBufferMemory | |
| import pandas as pd | |
| from langchain.schema import Document | |
| # Set page config | |
| st.set_page_config(page_title="MBAL Chatbot", page_icon="🛡️", layout="wide") | |
| # Set your Hugging Face token here | |
| HF_TOKEN = st.secrets["HF_TOKEN"] | |
| def init_chroma(): | |
| persist_directory = "chroma_db" | |
| chroma_client = chromadb.PersistentClient(path=persist_directory) | |
| chroma_collection = chroma_client.get_or_create_collection("my_collection") | |
| return chroma_client, chroma_collection | |
| def init_vectorstore(): | |
| persist_directory = "chroma_db" | |
| embeddings = HuggingFaceEmbeddings() | |
| vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embeddings, collection_name="my_collection") | |
| return vectorstore | |
| def setup_vector(): | |
| # Đọc dữ liệu từ file Excel | |
| df = pd.read_excel("chunk_metadata_template.xlsx") | |
| chunks = [] | |
| # Tạo danh sách các Document có metadata | |
| for _, row in df.iterrows(): | |
| chunk_with_metadata = Document( | |
| page_content=row['page_content'], | |
| metadata={ | |
| 'chunk_id': row['chunk_id'], | |
| 'document_title': row['document_title'], | |
| 'topic': row['topic'], | |
| 'access': row['access'] | |
| } | |
| ) | |
| chunks.append(chunk_with_metadata) | |
| # Khởi tạo embedding | |
| embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2') | |
| # Khởi tạo hoặc ghi vào vectorstore đã tồn tại | |
| persist_directory = "chroma_db" | |
| collection_name = "my_collection" | |
| # Tạo vectorstore từ dữ liệu và ghi vào Chroma | |
| vectorstore = Chroma.from_documents( | |
| documents=chunks, | |
| embedding=embeddings, | |
| persist_directory=persist_directory, | |
| collection_name=collection_name | |
| ) | |
| # Ghi xuống đĩa để đảm bảo dữ liệu được lưu | |
| vectorstore.persist() | |
| return vectorstore | |
| # Initialize components | |
| client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3", token=HF_TOKEN) | |
| chroma_client, chroma_collection = init_chroma() | |
| init_vectorstore() | |
| vectorstore = setup_vector() | |
| # Initialize memory buffer | |
| memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) | |
| def rag_query(query): | |
| # Lấy tài liệu liên quan | |
| retrieved_docs = vectorstore.similarity_search(query, k=5) | |
| context = "\n".join([doc.page_content for doc in retrieved_docs]) if retrieved_docs else "" | |
| # Lấy tương tác cũ | |
| past_interactions = memory.load_memory_variables({})[memory.memory_key] | |
| context_with_memory = f"{context}\n\nConversation History:\n{past_interactions}" | |
| # Chuẩn bị prompt | |
| messages = [ | |
| { | |
| "role": "user", | |
| "content": f"""You are a consultant advising clients on insurance products from MB Ageas Life in Vietnam. Please respond professionally and accurately, and suggest suitable products by asking a few questions about the customer's needs. All information provided must remain within the scope of MBAL. Invite the customer to register for a more detailed consultation at https://www.mbageas.life/ | |
| {context_with_memory} | |
| Question: {query} | |
| Answer:""" | |
| } | |
| ] | |
| response_content = client.chat_completion(messages=messages, max_tokens=1024, stream=False) | |
| response = response_content.choices[0].message.content.split("Answer:")[-1].strip() | |
| return response | |
| def process_feedback(query, response, feedback): | |
| # st.write(f"Feedback received: {'👍' if feedback else '👎'} for query: {query}") | |
| if feedback: | |
| # If thumbs up, store the response in memory buffer | |
| memory.chat_memory.add_ai_message(response) | |
| else: | |
| # If thumbs down, remove the response from memory buffer and regenerate the response | |
| # memory.chat_memory.messages = [msg for msg in memory.chat_memory.messages if msg.get("content") != response] | |
| new_query=f"{query}. Tạo câu trả lời đúng với câu hỏi" | |
| new_response = rag_query(new_query) | |
| st.markdown(new_response) | |
| memory.chat_memory.add_ai_message(new_response) | |
| # Streamlit interface | |
| st.title("Chào mừng bạn đã đến với MBAL Chatbot") | |
| st.markdown("***") | |
| st.info(''' | |
| Tôi sẽ giải đáp các thắc mắc của bạn liên quan đến các sản phẩm bảo hiểm nhân thọ của MB Ageas Life''') | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| chat = st.button("Chat") | |
| if chat: | |
| st.switch_page("pages/chatbot.py") | |
| with col2: | |
| rag = st.button("Store Document") | |
| if rag: | |
| st.switch_page("pages/management.py") | |
| st.markdown("<div style='text-align:center;'></div>", unsafe_allow_html=True) |