ababio commited on
Commit
b3dd8dc
1 Parent(s): 5ef5575

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -69
app.py CHANGED
@@ -1,92 +1,67 @@
1
  import os
2
- from getpass import getpass
3
- import gradio as gr
4
- import random
5
  import time
6
-
7
- pinecone_api_key = os.getenv("PINECONE_API_KEY") or getpass("Enter your Pinecone API Key: ")
8
- openai_api_key = os.getenv("OPENAI_API_KEY") or getpass("Enter your OpenAI API Key: ")
9
-
10
  from llama_index.node_parser import SemanticSplitterNodeParser
11
  from llama_index.embeddings import OpenAIEmbedding
12
  from llama_index.ingestion import IngestionPipeline
13
-
14
- # This will be the model we use both for Node parsing and for vectorization
15
- embed_model = OpenAIEmbedding(api_key=openai_api_key)
16
-
17
- # Define the initial pipeline
18
- pipeline = IngestionPipeline(
19
- transformations=[
20
- SemanticSplitterNodeParser(
21
- buffer_size=1,
22
- breakpoint_percentile_threshold=95,
23
- embed_model=embed_model,
24
- ),
25
- embed_model,
26
- ],
27
- )
28
-
29
  from pinecone.grpc import PineconeGRPC
30
- from pinecone import ServerlessSpec
31
-
32
  from llama_index.vector_stores import PineconeVectorStore
 
 
 
33
 
34
- # Initialize connection to Pinecone
35
- pc = PineconeGRPC(api_key=pinecone_api_key)
36
- index_name = "anualreport"
37
 
38
- # Initialize your index
39
- pinecone_index = pc.Index(index_name)
40
 
41
- # Initialize VectorStore
 
 
 
42
  vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
43
 
44
- pinecone_index.describe_index_stats()
45
-
46
- from llama_index import VectorStoreIndex
47
- from llama_index.retrievers import VectorIndexRetriever
48
-
49
- # Set the OpenAI API key if not already set
50
- if not os.getenv('OPENAI_API_KEY'):
51
- os.environ['OPENAI_API_KEY'] = openai_api_key
52
-
53
- # Instantiate VectorStoreIndex object from our vector_store object
54
  vector_index = VectorStoreIndex.from_vector_store(vector_store=vector_store)
55
-
56
- # Grab 5 search results
57
  retriever = VectorIndexRetriever(index=vector_index, similarity_top_k=5)
58
-
59
- from llama_index.query_engine import RetrieverQueryEngine
60
-
61
- # Pass in your retriever from above, which is configured to return the top 5 results
62
  query_engine = RetrieverQueryEngine(retriever=retriever)
63
 
64
- def query_anual_report(query):
 
 
 
 
 
 
 
 
 
65
  response = query_engine.query(query)
66
  return response.response
67
 
68
- # Define the chat functions
69
- def user(user_message, history):
70
- return "", history + [[user_message, None]]
71
 
72
- def bot(history):
73
- bot_message = query_anual_report(history[-1][0])
74
- history[-1][1] = ""
75
- for character in bot_message:
76
- history[-1][1] += character
77
- time.sleep(0.01) # Reduced sleep time to make response appear faster
78
- yield history
79
 
80
- # Define Gradio Blocks interface
81
- with gr.Blocks() as demo:
82
- chatbot = gr.Chatbot()
83
- msg = gr.Textbox()
84
- clear = gr.Button("Clear")
85
 
86
- msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
87
- bot, chatbot, chatbot
88
- )
89
- clear.click(lambda: None, None, chatbot, queue=False)
 
90
 
91
- if __name__ == "__main__":
92
- demo.launch()
 
 
 
1
  import os
 
 
 
2
  import time
3
+ import streamlit as st
4
+ from getpass import getpass
5
+ from openai import OpenAI
 
6
  from llama_index.node_parser import SemanticSplitterNodeParser
7
  from llama_index.embeddings import OpenAIEmbedding
8
  from llama_index.ingestion import IngestionPipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  from pinecone.grpc import PineconeGRPC
 
 
10
  from llama_index.vector_stores import PineconeVectorStore
11
+ from llama_index import VectorStoreIndex
12
+ from llama_index.retrievers import VectorIndexRetriever
13
+ from llama_index.query_engine import RetrieverQueryEngine
14
 
15
+ # Set OpenAI API key from Streamlit secrets
16
+ openai_api_key = st.secrets["OPENAI_API_KEY"]
17
+ pinecone_api_key = st.secrets["PINECONE_API_KEY"]
18
 
19
+ # Initialize OpenAI client
20
+ client = OpenAI(api_key=openai_api_key)
21
 
22
+ # Initialize Pinecone connection
23
+ pc = PineconeGRPC(api_key=pinecone_api_key)
24
+ index_name = "annualreport"
25
+ pinecone_index = pc.Index(index_name)
26
  vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
27
 
28
+ # Initialize vector index and retriever
 
 
 
 
 
 
 
 
 
29
  vector_index = VectorStoreIndex.from_vector_store(vector_store=vector_store)
 
 
30
  retriever = VectorIndexRetriever(index=vector_index, similarity_top_k=5)
 
 
 
 
31
  query_engine = RetrieverQueryEngine(retriever=retriever)
32
 
33
+ # Set up LlamaIndex embedding model and pipeline
34
+ embed_model = OpenAIEmbedding(api_key=openai_api_key)
35
+ pipeline = IngestionPipeline(
36
+ transformations=[
37
+ SemanticSplitterNodeParser(buffer_size=1, breakpoint_percentile_threshold=95, embed_model=embed_model),
38
+ embed_model,
39
+ ],
40
+ )
41
+
42
+ def query_annual_report(query):
43
  response = query_engine.query(query)
44
  return response.response
45
 
46
+ # Streamlit app setup
47
+ st.title("ChatGPT-like Clone with Pinecone Integration")
 
48
 
49
+ # Initialize chat history
50
+ if "messages" not in st.session_state:
51
+ st.session_state.messages = []
 
 
 
 
52
 
53
+ # Display chat messages from history
54
+ for message in st.session_state.messages:
55
+ with st.chat_message(message["role"]):
56
+ st.markdown(message["content"])
 
57
 
58
+ # Accept user input
59
+ if prompt := st.chat_input("What is up?"):
60
+ st.session_state.messages.append({"role": "user", "content": prompt})
61
+ with st.chat_message("user"):
62
+ st.markdown(prompt)
63
 
64
+ with st.chat_message("assistant"):
65
+ response = query_annual_report(prompt)
66
+ st.markdown(response)
67
+ st.session_state.messages.append({"role": "assistant", "content": response})