Spaces:

AI-RESEARCHER-2024
/

CHAINLIT-RAG

Runtime error

App Files Files Community

AI-RESEARCHER-2024 commited on Oct 30, 2024

Commit

d7b6100

verified ·

1 Parent(s): 992aa46

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -67

app.py CHANGED Viewed

@@ -1,53 +1,11 @@
 import os
-from typing import Any, List, Mapping, Optional, Dict
 import chainlit as cl
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain.prompts import ChatPromptTemplate
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.runnables import RunnablePassthrough
 from langchain_community.vectorstores import Chroma
-from langchain.callbacks.manager import CallbackManagerForLLMRun
-from langchain.llms.base import LLM
 from llama_cpp import Llama
-from pydantic import Field, BaseModel
-class LlamaCppLLM(LLM, BaseModel):
-    """Custom LangChain wrapper for llama.cpp"""
-    client: Any = Field(default=None, exclude=True)
-    model_path: str = Field(..., description="Path to the model file")
-    n_ctx: int = Field(default=2048, description="Context window size")
-    n_threads: int = Field(default=4, description="Number of CPU threads")
-    n_gpu_layers: int = Field(default=0, description="Number of GPU layers")
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
-        self.client = Llama(
-            model_path=self.model_path,
-            n_ctx=self.n_ctx,
-            n_threads=self.n_threads,
-            n_gpu_layers=self.n_gpu_layers
-        )
-    @property
-    def _llm_type(self) -> str:
-        return "llama.cpp"
-    def _call(
-        self,
-        prompt: str,
-        stop: Optional[List[str]] = None,
-        run_manager: Optional[CallbackManagerForLLMRun] = None,
-        **kwargs: Any,
-    ) -> str:
-        if not self.client:
-            raise RuntimeError("Model not initialized")
-        response = self.client.create_chat_completion(
-            messages=[{"role": "user", "content": prompt}],
-            **kwargs
-        )
-        return response["choices"][0]["message"]["content"]
 # Initialize the embedding model
 embeddings = HuggingFaceEmbeddings(
@@ -58,15 +16,15 @@ embeddings = HuggingFaceEmbeddings(
 # Load the existing Chroma vector store
 persist_directory = os.path.join(os.path.dirname(__file__), 'mydb')
-vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
-# Initialize the LLM
-model_path = os.path.join(os.path.dirname(__file__), "models", "llama-model.gguf")
-llm = LlamaCppLLM(
-    model_path=model_path,
-    n_ctx=2048,
-    n_threads=4,
-    n_gpu_layers=0
 )
 # Create the RAG prompt template
@@ -84,39 +42,39 @@ prompt = ChatPromptTemplate.from_template(template)
 @cl.on_chat_start
 async def start():
-    # Send initial message
     await cl.Message(
         content="Hi! I'm ready to answer your questions based on the stored documents. What would you like to know?"
     ).send()
 @cl.on_message
 async def main(message: cl.Message):
-    # Create a loading message
     msg = cl.Message(content="")
     await msg.send()
-    # Start typing effect
     async with cl.Step(name="Searching documents..."):
         try:
-            # Search the vector store
             retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
-            # Create the RAG chain
-            rag_chain = (
-                {"context": retriever, "question": RunnablePassthrough()}
-                | prompt
-                | llm
-                | StrOutputParser()
             )
-            # Execute the chain
-            response = await cl.make_async(rag_chain)(message.content)
             # Update loading message with response
-            await msg.update(content=response)
             # Show source documents
-            docs = retriever.get_relevant_documents(message.content)
             elements = []
             for i, doc in enumerate(docs):
                 source_name = f"Source {i+1}"
@@ -133,4 +91,4 @@ async def main(message: cl.Message):
             await msg.update(content=error_msg)
 if __name__ == '__main__':
-    cl.start()

 import os
 import chainlit as cl
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain.prompts import ChatPromptTemplate
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.runnables import RunnablePassthrough
 from langchain_community.vectorstores import Chroma
 from llama_cpp import Llama
 # Initialize the embedding model
 embeddings = HuggingFaceEmbeddings(
 # Load the existing Chroma vector store
 persist_directory = os.path.join(os.path.dirname(__file__), 'mydb')
+vectorstore = Chroma(
+    persist_directory=persist_directory,
+    embedding_function=embeddings
+)
+# Initialize the Llama model using from_pretrained
+llm = Llama.from_pretrained(
+    repo_id="bartowski/Llama-3.2-1B-Instruct-GGUF",
+    filename="Llama-3.2-1B-Instruct-Q8_0.gguf",
 )
 # Create the RAG prompt template
 @cl.on_chat_start
 async def start():
     await cl.Message(
         content="Hi! I'm ready to answer your questions based on the stored documents. What would you like to know?"
     ).send()
 @cl.on_message
 async def main(message: cl.Message):
     msg = cl.Message(content="")
     await msg.send()
     async with cl.Step(name="Searching documents..."):
         try:
             retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
+            docs = retriever.get_relevant_documents(message.content)
+            context = "\n\n".join([doc.page_content for doc in docs])
+            # Format the prompt
+            final_prompt = prompt.format(context=context, question=message.content)
+            # Generate response using the Llama model
+            response = llm.create_chat_completion(
+                messages=[
+                    {
+                        "role": "user",
+                        "content": final_prompt
+                    }
+                ]
             )
+            assistant_reply = response['choices'][0]['message']['content']
             # Update loading message with response
+            await msg.update(content=assistant_reply)
             # Show source documents
             elements = []
             for i, doc in enumerate(docs):
                 source_name = f"Source {i+1}"
             await msg.update(content=error_msg)
 if __name__ == '__main__':
+    cl.run()