File size: 3,912 Bytes
acc9eef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
00665aa
acc9eef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4d80aaf
 
acc9eef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import streamlit as st
from utils import *


# clear the chat history from streamlit session state
def clear_history():
    if 'history' in st.session_state:
        del st.session_state['history']


if __name__ == "__main__":
    import os

    # loading the OpenAI api key from .env
    from dotenv import load_dotenv, find_dotenv

    load_dotenv(find_dotenv(), override=True)

    # st.image('img.png')

    st.subheader('Load a Document and Ask a Question')
    with st.sidebar:
        # text_input for the OpenAI API key (alternative to python-dotenv and .env)
        api_key = st.text_input('OpenAI API Key:', type='password')
        if api_key:
            os.environ['OPENAI_API_KEY'] = api_key

        # file uploader widget
        uploaded_file = st.file_uploader('To upload a file drag and drop it on the area below:', type=['pdf', 'docx', 'txt'])

        # chunk size number widget
        chunk_size = st.number_input('Chunk size:', min_value=100, max_value=2048, value=512, on_change=clear_history)

        # k number input widget
        k = st.number_input('k', min_value=1, max_value=20, value=3, on_change=clear_history)

        # add data button widget
        add_data = st.button('Add Data', on_click=clear_history)

        if add_data:
            if api_key:
                if uploaded_file and add_data:  # if the user browsed a file
                    with st.spinner('Reading, chunking and embedding file ...'):
                        # writing the file from RAM to the current directory on disk
                        bytes_data = uploaded_file.read()
                        file_name = os.path.join('./', uploaded_file.name)
                        with open(file_name, 'wb') as f:
                            f.write(bytes_data)

                        data = load_document(file_name)
                        chunks = chunk_data(data, chunk_size=chunk_size)
                        st.write(f'Chunk size: {chunk_size}, Chunks: {len(chunks)}')

                        tokens, embedding_cost = calculate_embedding_cost(chunks)
                        st.write(f'Embedding cost: ${embedding_cost:.4f}')

                        # creating the embeddings and returning the Chroma vector store
                        vector_store = create_embeddings(chunks)

                        # saving the vector store in the streamlit session state (to be persistent between reruns)
                        st.session_state.vs = vector_store
                        st.success('File uploaded, chunked and embedded successfully.')
                else:
                    st.error("Please drag and drop your file to the upload area above.....")
            else:
                st.error("Please provide your OpenAI API key above.....")

    # user's question text input widget
    q = st.text_input('Ask a question about the content of your file:')
    if q:  # if the user entered a question and hit enter
        if 'vs' in st.session_state:  # if there's the vector store (user uploaded, split and embedded a file)
            vector_store = st.session_state.vs
            st.write(f'k: {k}')
            answer = ask_and_get_answer(vector_store, q, k)

            # text area widget for the LLM answer
            st.text_area('LLM Answer: ', value=answer)

            st.divider()

            # if there's no chat history in the session state, create it
            if 'history' not in st.session_state:
                st.session_state.history = ''

            # the current question and answer
            value = f'Q: {q} \nA: {answer}'

            st.session_state.history = f'{value} \n {"-" * 100} \n {st.session_state.history}'
            h = st.session_state.history

            # text area widget for the chat history
            st.text_area(label='Chat History', value=h, key='history', height=400)

# run the app: streamlit run ./chat_with_documents.py