base_models_rag

Runtime error

DeepVen commited on Sep 25, 2023

Commit

93bc725

1 Parent(s): 1af91d4

Upload 6 files

pointing to llama2

Files changed (1) hide show

main.py CHANGED Viewed

@@ -2,25 +2,27 @@ from fastapi import FastAPI
 from transformers import pipeline
 from txtai.embeddings import Embeddings
 from txtai.pipeline import Extractor
 # NOTE - we configure docs_url to serve the interactive Docs at the root path
 # of the app. This way, we can use the docs as a landing page for the app on Spaces.
 app = FastAPI(docs_url="/")
 # Create embeddings model with content support
-embeddings = Embeddings({"path": "sentence-transformers/all-MiniLM-L6-v2", "content": True})
-embeddings.load('index')
 # Create extractor instance
-extractor = Extractor(embeddings, "google/flan-t5-base")
-pipe = pipeline("text2text-generation", model="google/flan-t5-large")
 @app.get("/generate")
 def generate(text: str):
     """
-    deployed flan-t5-xxl model as backend
     """
     output = pipe(text)
     return {"output": output[0]["generated_text"]}
@@ -40,9 +42,9 @@ def search(query, question=None):
   return extractor([("answer", query, prompt(question), False)])[0][1]
-@app.get("/rag")
-def rag(question: str):
-    # question = "what is the document about?"
-    answer = search(question)
-    # print(question, answer)
-    return {answer}

 from transformers import pipeline
 from txtai.embeddings import Embeddings
 from txtai.pipeline import Extractor
+from llama_cpp import Llama
 # NOTE - we configure docs_url to serve the interactive Docs at the root path
 # of the app. This way, we can use the docs as a landing page for the app on Spaces.
 app = FastAPI(docs_url="/")
 # Create embeddings model with content support
+# embeddings = Embeddings({"path": "sentence-transformers/all-MiniLM-L6-v2", "content": True})
+# embeddings.load('index')
 # Create extractor instance
+#extractor = Extractor(embeddings, "google/flan-t5-base")
+pipe = pipeline(model="TheBloke/Llama-2-7B-GGML/llama-2-7b.ggmlv3.q4_0.bin")
 @app.get("/generate")
 def generate(text: str):
     """
+    llama2 q4 backend
     """
     output = pipe(text)
     return {"output": output[0]["generated_text"]}
   return extractor([("answer", query, prompt(question), False)])[0][1]
+# @app.get("/rag")
+# def rag(question: str):
+#     # question = "what is the document about?"
+#     answer = search(question)
+#     # print(question, answer)
+#     return {answer}