|  | import streamlit as st | 
					
						
						|  | from txtai.pipeline import Textractor | 
					
						
						|  | from txtai.embeddings import Embeddings | 
					
						
						|  | import nltk | 
					
						
						|  | nltk.download('punkt') | 
					
						
						|  |  | 
					
						
						|  | import bs4 as bs | 
					
						
						|  | import urllib.request | 
					
						
						|  | import re | 
					
						
						|  |  | 
					
						
						|  | embeddings = Embeddings({"path": "sentence-transformers/nli-mpnet-base-v2"}) | 
					
						
						|  |  | 
					
						
						|  | url = "https://cdn.pixabay.com/photo/2022/02/25/09/23/background-7033808_1280.jpg" | 
					
						
						|  |  | 
					
						
						|  | st.title("AIP-S³") | 
					
						
						|  | st.write("AI Powered Smart Search System") | 
					
						
						|  | st.image(url) | 
					
						
						|  |  | 
					
						
						|  | st.markdown('_Welecome to Question Answering System 🧠 🤖_') | 
					
						
						|  |  | 
					
						
						|  | a = st.sidebar.radio("SELECT -", ['PDF', 'Website']) | 
					
						
						|  |  | 
					
						
						|  | def my_function_pdf(): | 
					
						
						|  | textract = Textractor(sentences=True) | 
					
						
						|  |  | 
					
						
						|  | data_lines = [] | 
					
						
						|  | for i in (locations_max): | 
					
						
						|  | lines = textract(i) | 
					
						
						|  | data_lines.append(lines) | 
					
						
						|  | total_lines = [] | 
					
						
						|  | for i in data_lines: | 
					
						
						|  | total_lines += i | 
					
						
						|  | seq = embeddings.similarity(quer, total_lines) | 
					
						
						|  | three_most = seq[0:3] | 
					
						
						|  | indexes = [] | 
					
						
						|  | for i in three_most: | 
					
						
						|  | indexes.append(i[0]) | 
					
						
						|  | for j in indexes: | 
					
						
						|  | st.write(total_lines[j]) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def my_web(): | 
					
						
						|  | from txtai.pipeline import Textractor | 
					
						
						|  | textract = Textractor(sentences=True) | 
					
						
						|  | data_lines = [] | 
					
						
						|  | total_lines = [] | 
					
						
						|  | article_text = " " | 
					
						
						|  | for i in (locations_max): | 
					
						
						|  |  | 
					
						
						|  | scraped_data = urllib.request.urlopen(i) | 
					
						
						|  | article = scraped_data.read() | 
					
						
						|  | parsed_article = bs.BeautifulSoup(article,'lxml') | 
					
						
						|  | paragraphs = parsed_article.find_all('p') | 
					
						
						|  | for p in paragraphs: | 
					
						
						|  | article_text += p.text | 
					
						
						|  | lines = textract(i) | 
					
						
						|  | data_lines.append(lines) | 
					
						
						|  | total_lines = [] | 
					
						
						|  | for i in data_lines: | 
					
						
						|  | total_lines += i | 
					
						
						|  | seq = embeddings.similarity(quer, total_lines) | 
					
						
						|  | three_most = seq[0:3] | 
					
						
						|  | indexes = [] | 
					
						
						|  | for i in three_most: | 
					
						
						|  | indexes.append(i[0]) | 
					
						
						|  | for j in indexes: | 
					
						
						|  | st.write(total_lines[j]) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if a == 'PDF' : | 
					
						
						|  | number = st.number_input('Insert a number of files -',value =1, step =1) | 
					
						
						|  | st.write('Number of PDF files - ', number) | 
					
						
						|  | st.markdown("---") | 
					
						
						|  | locations_max = [] | 
					
						
						|  | for i in range (number) : | 
					
						
						|  | loc = st.text_input('Enter the PDF path :', placeholder = 'ex- /content/drive/MyDrive/', key = i) | 
					
						
						|  | locations_max.append(loc) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | quer = st.text_input('ask me anything!', placeholder = 'ex - what is AI?') | 
					
						
						|  | st.write('Your query is - ', quer) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if st.button('Confirm!'): | 
					
						
						|  | st.write('Confirmed') | 
					
						
						|  | my_function_pdf() | 
					
						
						|  | else: | 
					
						
						|  | st.write('') | 
					
						
						|  |  | 
					
						
						|  | else: | 
					
						
						|  | number = st.number_input('Insert a number of Links -',value =1, step =1) | 
					
						
						|  | st.write('Number of web pages - ', number) | 
					
						
						|  | st.markdown("---") | 
					
						
						|  | locations_max = [] | 
					
						
						|  | for i in range (number) : | 
					
						
						|  | loc = st.text_input('Enter the URL :', placeholder = 'ex- https:\\', key = i) | 
					
						
						|  | locations_max.append(loc) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | quer = st.text_input('ask me anything!', placeholder = 'ex - what is AI?') | 
					
						
						|  | st.write('Your query is - ', quer) | 
					
						
						|  |  | 
					
						
						|  | if st.button('Confirm!'): | 
					
						
						|  | st.write('Confirmed') | 
					
						
						|  | my_web() | 
					
						
						|  | else: | 
					
						
						|  | st.write('') |