pdfReport / app.py
ababio's picture
Update app.py
e24982e verified
raw
history blame
2.38 kB
# Streamlit application
import streamlit as st
import os
from getpass import getpass
from transformers import pipeline
from llama_index.node_parser import SemanticSplitterNodeParser
from llama_index.embeddings import OpenAIEmbedding
from llama_index.ingestion import IngestionPipeline
from pinecone.grpc import PineconeGRPC
from pinecone import ServerlessSpec
from llama_index.vector_stores import PineconeVectorStore
from llama_index import VectorStoreIndex
from llama_index.retrievers import VectorIndexRetriever
from llama_index.query_engine import RetrieverQueryEngine
# Function to initialize the Pinecone and LlamaIndex setup
def initialize_pipeline():
pinecone_api_key = os.getenv("PINECONE_API_KEY")
openai_api_key = os.getenv("OPENAI_API_KEY")
embed_model = OpenAIEmbedding(api_key=openai_api_key)
pipeline = IngestionPipeline(
transformations=[
SemanticSplitterNodeParser(
buffer_size=1,
breakpoint_percentile_threshold=95,
embed_model=embed_model,
),
embed_model,
],
)
pc = PineconeGRPC(api_key=pinecone_api_key)
index_name = "anualreport"
pinecone_index = pc.Index(index_name)
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
pinecone_index.describe_index_stats()
if not os.getenv('OPENAI_API_KEY'):
os.environ['OPENAI_API_KEY'] = openai_api_key
vector_index = VectorStoreIndex.from_vector_store(vector_store=vector_store)
retriever = VectorIndexRetriever(index=vector_index, similarity_top_k=5)
query_engine = RetrieverQueryEngine(retriever=retriever)
return query_engine
# Streamlit UI
st.title("Chat with Annual Reports")
# Initialize the query engine
query_engine = initialize_pipeline()
# Conversation model using Hugging Face transformers
conversation_pipeline = pipeline("conversational", model="microsoft/DialoGPT-medium")
# User input
user_input = st.text_input("You: ", "")
if user_input:
# Query the vector DB
llm_query = query_engine.query(user_input)
response = llm_query.response
# Generate response using Hugging Face conversation model
conversation = conversation_pipeline([user_input, response])
bot_response = conversation[-1]["generated_text"]
# Display response
st.text_area("Bot: ", bot_response, height=200)