Spaces:

Sambhavnoobcoder
/

Lecture-Notes-Chatbot

Running

App Files Files Community

Sambhavnoobcoder commited on Jun 25, 2024

Commit

d2dd8e3

verified ·

1 Parent(s): 3346531

final version of app.py

Browse files

made substantial changes to app.py in terms of code . current version is version with finalised ui and al base requirements satisfied .

Files changed (1) hide show

app.py +40 -56

app.py CHANGED Viewed

@@ -6,26 +6,11 @@ from sentence_transformers import SentenceTransformer
 from bs4 import BeautifulSoup
 import gradio as gr
- # Configure Gemini API key
-gemini_api_secret_name = 'AIzaSyA0yLvySmj8xjMd0sedSgklg1fj0wBDyyw'
- from google.colab import userdata
- try:
-    GOOGLE_API_KEY = userdata.get(gemini_api_secret_name)
-    genai.configure(api_key=GOOGLE_API_KEY)
-except userdata.SecretNotFoundError as e:
-    print(f'Secret not found\n\nThis expects you to create a secret named {gemini_api_secret_name} in Colab\n\nVisit https://makersuite.google.com/app/apikey to create an API key\n\nStore that in the secrets section on the left side of the notebook (key icon)\n\nName the secret {gemini_api_secret_name}')
-    raise e
-except userdata.NotebookAccessError as e:
-    print(f'You need to grant this notebook access to the {gemini_api_secret_name} secret in order for the notebook to access Gemini on your behalf.')
-    raise e
-except Exception as e:
-    # unknown error
-    print(f"There was an unknown error. Ensure you have a secret {gemini_api_secret_name} stored in Colab and it's a valid key from https://makersuite.google.com/app/apikey")
-    raise e
- # Fetch lecture notes and model architectures
 def fetch_lecture_notes():
     lecture_urls = [
         "https://stanford-cs324.github.io/winter2022/lectures/introduction/",
@@ -43,7 +28,7 @@ def fetch_lecture_notes():
             print(f"Failed to fetch content from {url}, status code: {response.status_code}")
     return lecture_texts
- def fetch_model_architectures():
     url = "https://github.com/Hannibal046/Awesome-LLM#milestone-papers"
     response = requests.get(url)
     if response.status_code == 200:
@@ -53,7 +38,7 @@ def fetch_lecture_notes():
         print(f"Failed to fetch model architectures, status code: {response.status_code}")
         return "", url
- # Extract text from HTML content
 def extract_text_from_html(html_content):
     soup = BeautifulSoup(html_content, 'html.parser')
     for script in soup(["script", "style"]):
@@ -61,110 +46,109 @@ def extract_text_from_html(html_content):
     text = soup.get_text(separator="\n", strip=True)
     return text
- # Generate embeddings using SentenceTransformers
 def create_embeddings(texts, model):
     texts_only = [text for text, _ in texts]
     embeddings = model.encode(texts_only)
     return embeddings
- # Initialize FAISS index
 def initialize_faiss_index(embeddings):
     dimension = embeddings.shape[1]  # Assuming all embeddings have the same dimension
     index = faiss.IndexFlatL2(dimension)
     index.add(embeddings.astype('float32'))
     return index
- # Handle natural language queries
 conversation_history = []
- def handle_query(query, faiss_index, embeddings_texts, model):
     global conversation_history
-     query_embedding = model.encode([query]).astype('float32')
-     # Search FAISS index
     _, indices = faiss_index.search(query_embedding, 3)  # Retrieve top 3 results
     relevant_texts = [embeddings_texts[idx] for idx in indices[0]]
-     # Combine relevant texts and truncate if necessary
     combined_text = "\n".join([text for text, _ in relevant_texts])
     max_length = 500  # Adjust as necessary
     if len(combined_text) > max_length:
         combined_text = combined_text[:max_length] + "..."
-     # Generate a response using Gemini
     try:
         response = genai.generate_text(
             model="models/text-bison-001",
             prompt=f"Based on the following context:\n\n{combined_text}\n\nAnswer the following question: {query}",
             max_output_tokens=200
         )
-        generated_text = response.result
     except Exception as e:
         print(f"Error generating text: {e}")
         generated_text = "An error occurred while generating the response."
-     # Update conversation history
-    conversation_history.append(f"User: {query}")
-    conversation_history.append(f"System: {generated_text}")
-     # Extract sources
     sources = [url for _, url in relevant_texts]
-     return generated_text, sources
- def generate_concise_response(prompt, context):
     try:
         response = genai.generate_text(
             model="models/text-bison-001",
             prompt=f"{prompt}\n\nContext: {context}\n\nAnswer:",
             max_output_tokens=200
         )
-        return response.result
     except Exception as e:
         print(f"Error generating concise response: {e}")
         return "An error occurred while generating the concise response."
- # Main function to execute the pipeline
-def chatbot(message , history):
     lecture_notes = fetch_lecture_notes()
     model_architectures = fetch_model_architectures()
-     all_texts = lecture_notes + [model_architectures]
-     # Load the SentenceTransformers model
     embedding_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
-     embeddings = create_embeddings(all_texts, embedding_model)
-     # Initialize FAISS index
     faiss_index = initialize_faiss_index(np.array(embeddings))
-     response, sources = handle_query(message, faiss_index, all_texts, embedding_model)
     print("Query:", message)
     print("Response:", response)
     total_text = response
     if sources:
         print("Sources:", sources)
-        relevant_source = ""
-        for source in sources:
-          relevant_source += source +"\n"
-        total_text += "\n\nSources:\n" + relevant_source
     else:
         print("Sources: None of the provided sources were used.")
     print("----")
-     # Generate a concise and relevant summary using Gemini
     prompt = "Summarize the user queries so far"
-    user_queries_summary = " ".join(message)
     concise_response = generate_concise_response(prompt, user_queries_summary)
     print("Concise Response:")
     print(concise_response)
     return total_text
- iface = gr.ChatInterface(
     chatbot,
     title="LLM Research Assistant",
     description="Ask questions about LLM architectures, datasets, and training techniques.",
@@ -180,5 +164,5 @@ def chatbot(message , history):
     clear_btn="Clear",
 )
- if __name__ == "__main__":
-    iface.launch(debug=True)

 from bs4 import BeautifulSoup
 import gradio as gr
+# Configure Gemini API key
+GOOGLE_API_KEY = 'AIzaSyA0yLvySmj8xjMd0sedSgklg1fj0wBDyyw'  # Replace with your API key
+genai.configure(api_key=GOOGLE_API_KEY)
+# Fetch lecture notes and model architectures
 def fetch_lecture_notes():
     lecture_urls = [
         "https://stanford-cs324.github.io/winter2022/lectures/introduction/",
             print(f"Failed to fetch content from {url}, status code: {response.status_code}")
     return lecture_texts
+def fetch_model_architectures():
     url = "https://github.com/Hannibal046/Awesome-LLM#milestone-papers"
     response = requests.get(url)
     if response.status_code == 200:
         print(f"Failed to fetch model architectures, status code: {response.status_code}")
         return "", url
+# Extract text from HTML content
 def extract_text_from_html(html_content):
     soup = BeautifulSoup(html_content, 'html.parser')
     for script in soup(["script", "style"]):
     text = soup.get_text(separator="\n", strip=True)
     return text
+# Generate embeddings using SentenceTransformers
 def create_embeddings(texts, model):
     texts_only = [text for text, _ in texts]
     embeddings = model.encode(texts_only)
     return embeddings
+# Initialize FAISS index
 def initialize_faiss_index(embeddings):
     dimension = embeddings.shape[1]  # Assuming all embeddings have the same dimension
     index = faiss.IndexFlatL2(dimension)
     index.add(embeddings.astype('float32'))
     return index
+# Handle natural language queries
 conversation_history = []
+def handle_query(query, faiss_index, embeddings_texts, model):
     global conversation_history
+    query_embedding = model.encode([query]).astype('float32')
+    # Search FAISS index
     _, indices = faiss_index.search(query_embedding, 3)  # Retrieve top 3 results
     relevant_texts = [embeddings_texts[idx] for idx in indices[0]]
+    # Combine relevant texts and truncate if necessary
     combined_text = "\n".join([text for text, _ in relevant_texts])
     max_length = 500  # Adjust as necessary
     if len(combined_text) > max_length:
         combined_text = combined_text[:max_length] + "..."
+    # Generate a response using Gemini
     try:
         response = genai.generate_text(
             model="models/text-bison-001",
             prompt=f"Based on the following context:\n\n{combined_text}\n\nAnswer the following question: {query}",
             max_output_tokens=200
         )
+        generated_text = response.result if response else "No response generated."
     except Exception as e:
         print(f"Error generating text: {e}")
         generated_text = "An error occurred while generating the response."
+    # Update conversation history
+    conversation_history.append((query, generated_text))
+    # Extract sources
     sources = [url for _, url in relevant_texts]
+    return generated_text, sources
+def generate_concise_response(prompt, context):
     try:
         response = genai.generate_text(
             model="models/text-bison-001",
             prompt=f"{prompt}\n\nContext: {context}\n\nAnswer:",
             max_output_tokens=200
         )
+        return response.result if response else "No response generated."
     except Exception as e:
         print(f"Error generating concise response: {e}")
         return "An error occurred while generating the concise response."
+# Main function to execute the pipeline
+def chatbot(message, history):
     lecture_notes = fetch_lecture_notes()
     model_architectures = fetch_model_architectures()
+    all_texts = lecture_notes + [model_architectures]
+    # Load the SentenceTransformers model
     embedding_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
+    embeddings = create_embeddings(all_texts, embedding_model)
+    # Initialize FAISS index
     faiss_index = initialize_faiss_index(np.array(embeddings))
+    response, sources = handle_query(message, faiss_index, all_texts, embedding_model)
     print("Query:", message)
     print("Response:", response)
     total_text = response
     if sources:
         print("Sources:", sources)
+        relevant_source = "\n".join(sources)
+        total_text += f"\n\nSources:\n{relevant_source}"
     else:
         print("Sources: None of the provided sources were used.")
     print("----")
+    # Generate a concise and relevant summary using Gemini
     prompt = "Summarize the user queries so far"
+    user_queries_summary = " ".join([msg[0] for msg in history] + [message])
     concise_response = generate_concise_response(prompt, user_queries_summary)
     print("Concise Response:")
     print(concise_response)
     return total_text
+# Create the Gradio interface
+iface = gr.ChatInterface(
     chatbot,
     title="LLM Research Assistant",
     description="Ask questions about LLM architectures, datasets, and training techniques.",
     clear_btn="Clear",
 )
+if __name__ == "__main__":
+    iface.launch(server_name="0.0.0.0", server_port=7860)