Spaces:

demoPOC
/

JSearch

Runtime error

App Files Files Community

demoPOC commited on Sep 14, 2023

Commit

03a32b6

1 Parent(s): f0aca2c

Update app.py

Browse files

Files changed (1) hide show

app.py +87 -45

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import openai
 import os
-openai.api_key=os.getenv("OPENAI_API_KEY")
 from dotenv import load_dotenv
 load_dotenv()
@@ -49,7 +49,7 @@ warnings.filterwarnings("ignore")
 app = Flask(__name__, template_folder="./")
 # Create a directory in a known location to save files to.
-uploads_dir = os.path.join(app.root_path,'static', 'uploads')
 os.makedirs(uploads_dir, exist_ok=True)
@@ -183,6 +183,14 @@ def KBUpload():
 def aiassist():
     return render_template("index.html")
 @app.route('/agent/chat/suggestion', methods=['POST'])
 def process_json():
     print(f"\n{'*' * 100}\n")
@@ -190,53 +198,87 @@ def process_json():
     content_type = request.headers.get('Content-Type')
     if (content_type == 'application/json'):
         requestQuery = request.get_json()
-        print(type(requestQuery))
-        custDetailsPresent=False
-        customerName=""
-        customerDistrict=""
-        if("custDetails" in requestQuery):
-            custDetailsPresent = True
-            customerName=requestQuery['custDetails']['cName']
-            customerDistrict=requestQuery['custDetails']['cDistrict']
-        print("chain initiation")
-        chainRAG=getRAGChain(customerName, customerDistrict, custDetailsPresent,vectordb)
-        print("chain created")
-        suggestionArray = []
-        for index, query in enumerate(requestQuery['message']):
-            #message = answering(query)
-            relevantDoc = vectordb.similarity_search_with_score(query)
-            for doc in relevantDoc:
-                print(f"\n{'-' * 100}\n")
-                print("Document Source>>>>>>  " + doc[len(doc) - 2].metadata['source'] + "\n\n")
-                print("Page Content>>>>>> " + doc[len(doc) - 2].page_content + "\n\n")
-                print("Similarity Score>>>> " + str(doc[len(doc) - 1]))
-                print(f"\n{'-' * 100}\n")
-            message = chainRAG.run({"query": query})
-            print("query:",query)
-            print("Response:", message)
-            if "I don't know" in message:
-                 message = "Dear Sir/ Ma'am, Could you please ask questions relevant to Jio?"
-            responseJSON={"message":message,"id":index}
-            suggestionArray.append(responseJSON)
-        return jsonify(suggestions=suggestionArray)
     else:
         return 'Content-Type not supported!'
 @app.route('/file_upload', methods=['POST'])
-def file_Upload():
-    fileprovided = not request.files.getlist('files[]')[0].filename == ''
-    urlProvided = not request.form.getlist('weburl')[0] == ''
-    print("*******")
-    print("File Provided:" + str(fileprovided))
-    print("URL Provided:" + str(urlProvided))
-    print("*******")
-    print(uploads_dir)
-    documents = loadKB(fileprovided, urlProvided, uploads_dir, request)
-    vectordb=createVectorDB(documents)
-    return render_template("index.html")
 if __name__ == '__main__':
     app.run(host='0.0.0.0',  port=int(os.environ.get('PORT', 7860)))

 import openai
 import os
+# openai.api_key=os.getenv("OPENAI_API_KEY")
 from dotenv import load_dotenv
 load_dotenv()
 app = Flask(__name__, template_folder="./")
 # Create a directory in a known location to save files to.
+uploads_dir = os.path.join(app.root_path,'static', 'searchUploads')
 os.makedirs(uploads_dir, exist_ok=True)
 def aiassist():
     return render_template("index.html")
+@app.route('/aiSearch')
+def html():
+    return render_template("AISearch.html")
+@app.route('/searchKB')
+def KBUpload():
+    return render_template("SearchKB.html")
 @app.route('/agent/chat/suggestion', methods=['POST'])
 def process_json():
     print(f"\n{'*' * 100}\n")
     content_type = request.headers.get('Content-Type')
     if (content_type == 'application/json'):
         requestQuery = request.get_json()
+        print()
+        relevantDoc=vectordb.similarity_search_with_score(requestQuery['query'],distance_metric="cos", k = 3)
+        searchResultArray=[]
+        for doc in relevantDoc:
+            searchResult = {}
+            print(f"\n{'-' * 100}\n")
+            searchResult['documentSource']=doc[len(doc)-2].metadata['source']
+            searchResult['pageContent']=doc[len(doc)-2].page_content
+            searchResult['similarityScore']=str(doc[len(doc)-1])
+            print(doc)
+            print("Document Source>>>>>>  "+searchResult['documentSource']+"\n\n")
+            print("Page Content>>>>>> "+searchResult['pageContent']+"\n\n")
+            print("Similarity Score>>>> "+searchResult['similarityScore'])
+            print(f"\n{'-' * 100}\n")
+            searchResultArray.append(searchResult)
+        print(f"\n{'*' * 100}\n")
+        return jsonify(botMessage=searchResultArray)
     else:
         return 'Content-Type not supported!'
 @app.route('/file_upload', methods=['POST'])
+def file_Upload():
+        fileprovided=not request.files.getlist('files[]')[0].filename==''
+        urlProvided=not request.form.getlist('weburl')[0]==''
+        print("*******")
+        print("File Provided:"+str(fileprovided))
+        print("URL Provided:"+str(urlProvided))
+        print("*******")
+        documents = []
+        if fileprovided:
+            #Delete Files
+            for filename in os.listdir(uploads_dir):
+                file_path = os.path.join(uploads_dir, filename)
+                print("Clearing Doc Directory. Trying to delete"+file_path)
+                try:
+                    if os.path.isfile(file_path) or os.path.islink(file_path):
+                        os.unlink(file_path)
+                    elif os.path.isdir(file_path):
+                        shutil.rmtree(file_path)
+                except Exception as e:
+                    print('Failed to delete %s. Reason: %s' % (file_path, e))
+            #Read and Embed New Files provided
+            for file in request.files.getlist('files[]'):
+                print("File Received>>>"+file.filename)
+                file.save(os.path.join(uploads_dir, secure_filename(file.filename)))
+                #loader = UnstructuredFileLoader(os.path.join(uploads_dir, secure_filename(file.filename)), mode='elements')
+                loader = PyPDFLoader(os.path.join(uploads_dir, secure_filename(file.filename)))
+                documents.extend(loader.load())
+        if urlProvided:
+            weburl=request.form.getlist('weburl')
+            print(weburl)
+            urlList=weburl[0].split(';')
+            print(urlList)
+            print("Selenium Started", datetime.now().strftime("%H:%M:%S"))
+            #urlLoader=RecursiveUrlLoader(urlList[0])
+            urlLoader=SeleniumURLLoader(urlList)
+            print("Selenium Completed", datetime.now().strftime("%H:%M:%S"))
+            documents.extend(urlLoader.load())
+        print(uploads_dir)
+        global chain;
+        text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=150)
+        #text_splitter = CharacterTextSplitter(chunk_size=1500, chunk_overlap=150,separator="</Q>")
+        texts = text_splitter.split_documents(documents)
+        print("All chunk List START ***********************\n\n")
+        pretty_print_docs(texts)
+        print("All chunk List END ***********************\n\n")
+        #embeddings = OpenAIEmbeddings()
+        from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
+        embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
+        global vectordb
+        #vectordb = Chroma.from_documents(texts,embeddings)
+        vectordb=Chroma.from_documents(documents=texts, embedding=embeddings, collection_metadata={"hnsw:space": "cosine"})
+        return render_template("AISearch.html")
 if __name__ == '__main__':
     app.run(host='0.0.0.0',  port=int(os.environ.get('PORT', 7860)))