cb1716pics commited on
Commit
ced5431
Β·
verified Β·
1 Parent(s): 345c104

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +171 -63
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,88 +1,196 @@
1
- import streamlit as st
2
- from generator import generate_response_from_document
3
- from retrieval import retrieve_documents_hybrid
4
- from evaluation import calculate_metrics
5
- #from data_processing import load_data_from_faiss
6
- import time
7
-
8
- # Page Title
9
- st.title("RAG7 - Real World RAG System")
10
-
11
- # global retrieved_documents
12
- # retrieved_documents = []
13
-
14
- # global response
15
- # response = ""
16
-
17
- # global time_taken_for_response
18
- # time_taken_for_response = 'N/A'
19
-
20
- # @st.cache_data
21
- # def load_data():
22
- # load_data_from_faiss()
23
-
24
- # data_status = load_data()
25
-
26
- # Question Section
27
- st.subheader("Hi, What do you want to know today?")
28
- question = st.text_area("Enter your question:", placeholder="Type your question here...", height=100)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  # # Submit Button
31
  # if st.button("Submit"):
32
  # start_time = time.time()
33
- # retrieved_documents = retrieve_documents_hybrid(question, 10)
34
- # response = generate_response_from_document(question, retrieved_documents)
35
  # end_time = time.time()
36
- # time_taken_for_response = end_time-start_time
37
- # else:
38
- # response = ""
39
 
40
- # # Response Section
41
  # st.subheader("Response")
42
- # st.text_area("Generated Response:", value=response, height=150, disabled=True)
43
-
44
- # # Metrics Section
45
- # st.subheader("Metrics")
46
 
47
  # col1, col2 = st.columns([1, 3]) # Creating two columns for button and metrics display
48
 
 
49
  # with col1:
50
  # if st.button("Calculate Metrics"):
51
- # metrics = calculate_metrics(question, response, retrieved_documents, time_taken_for_response)
52
  # else:
53
- # metrics = ""
54
 
55
  # with col2:
56
- # st.text_area("Metrics:", value=metrics, height=100, disabled=True)
 
57
 
58
- if "retrieved_documents" not in st.session_state:
59
- st.session_state.retrieved_documents = []
60
- if "response" not in st.session_state:
61
- st.session_state.response = ""
62
- if "time_taken_for_response" not in st.session_state:
63
- st.session_state.time_taken_for_response = "N/A"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  # Submit Button
66
- if st.button("Submit"):
67
  start_time = time.time()
68
- st.session_state.retrieved_documents = retrieve_documents_hybrid(question, 10)
69
- st.session_state.response = generate_response_from_document(question, st.session_state.retrieved_documents)
70
  end_time = time.time()
71
- st.session_state.time_taken_for_response = end_time - start_time
72
 
73
- # Display stored response
74
- st.subheader("Response")
75
- st.text_area("Generated Response:", value=st.session_state.response, height=150, disabled=True)
76
 
77
- col1, col2 = st.columns([1, 3]) # Creating two columns for button and metrics display
 
78
 
79
- # Calculate Metrics Button
80
- with col1:
81
- if st.button("Calculate Metrics"):
82
- metrics = calculate_metrics(question, st.session_state.response, st.session_state.retrieved_documents, st.session_state.time_taken_for_response)
83
- else:
84
- metrics = {}
85
 
86
- with col2:
87
- #st.text_area("Metrics:", value=metrics, height=100, disabled=True)
88
  st.json(metrics)
 
 
 
 
 
 
1
+ # import streamlit as st
2
+ # from generator import generate_response_from_document
3
+ # from retrieval import retrieve_documents_hybrid
4
+ # from evaluation import calculate_metrics
5
+ # #from data_processing import load_data_from_faiss
6
+ # import time
7
+
8
+ # # Page Title
9
+ # st.title("RAG7 - Real World RAG System")
10
+
11
+ # # global retrieved_documents
12
+ # # retrieved_documents = []
13
+
14
+ # # global response
15
+ # # response = ""
16
+
17
+ # # global time_taken_for_response
18
+ # # time_taken_for_response = 'N/A'
19
+
20
+ # # @st.cache_data
21
+ # # def load_data():
22
+ # # load_data_from_faiss()
23
+
24
+ # # data_status = load_data()
25
+
26
+ # # Question Section
27
+ # st.subheader("Hi, What do you want to know today?")
28
+ # question = st.text_area("Enter your question:", placeholder="Type your question here...", height=100)
29
+
30
+ # # # Submit Button
31
+ # # if st.button("Submit"):
32
+ # # start_time = time.time()
33
+ # # retrieved_documents = retrieve_documents_hybrid(question, 10)
34
+ # # response = generate_response_from_document(question, retrieved_documents)
35
+ # # end_time = time.time()
36
+ # # time_taken_for_response = end_time-start_time
37
+ # # else:
38
+ # # response = ""
39
+
40
+ # # # Response Section
41
+ # # st.subheader("Response")
42
+ # # st.text_area("Generated Response:", value=response, height=150, disabled=True)
43
+
44
+ # # # Metrics Section
45
+ # # st.subheader("Metrics")
46
+
47
+ # # col1, col2 = st.columns([1, 3]) # Creating two columns for button and metrics display
48
+
49
+ # # with col1:
50
+ # # if st.button("Calculate Metrics"):
51
+ # # metrics = calculate_metrics(question, response, retrieved_documents, time_taken_for_response)
52
+ # # else:
53
+ # # metrics = ""
54
+
55
+ # # with col2:
56
+ # # st.text_area("Metrics:", value=metrics, height=100, disabled=True)
57
+
58
+ # if "retrieved_documents" not in st.session_state:
59
+ # st.session_state.retrieved_documents = []
60
+ # if "response" not in st.session_state:
61
+ # st.session_state.response = ""
62
+ # if "time_taken_for_response" not in st.session_state:
63
+ # st.session_state.time_taken_for_response = "N/A"
64
 
65
  # # Submit Button
66
  # if st.button("Submit"):
67
  # start_time = time.time()
68
+ # st.session_state.retrieved_documents = retrieve_documents_hybrid(question, 10)
69
+ # st.session_state.response = generate_response_from_document(question, st.session_state.retrieved_documents)
70
  # end_time = time.time()
71
+ # st.session_state.time_taken_for_response = end_time - start_time
 
 
72
 
73
+ # # Display stored response
74
  # st.subheader("Response")
75
+ # st.text_area("Generated Response:", value=st.session_state.response, height=150, disabled=True)
 
 
 
76
 
77
  # col1, col2 = st.columns([1, 3]) # Creating two columns for button and metrics display
78
 
79
+ # # Calculate Metrics Button
80
  # with col1:
81
  # if st.button("Calculate Metrics"):
82
+ # metrics = calculate_metrics(question, st.session_state.response, st.session_state.retrieved_documents, st.session_state.time_taken_for_response)
83
  # else:
84
+ # metrics = {}
85
 
86
  # with col2:
87
+ # #st.text_area("Metrics:", value=metrics, height=100, disabled=True)
88
+ # st.json(metrics)
89
 
90
+ import streamlit as st
91
+ import plotly.express as px
92
+ from datasets import load_dataset, Dataset, DatasetDict
93
+ from generator import generate_response_from_document
94
+ from retrieval import retrieve_documents_hybrid
95
+ from evaluation import calculate_metrics
96
+ import time
97
+
98
+ # Hugging Face Dataset Details
99
+ HF_DATASET_REPO = "cb1716pics/23RAG7_recent_questions" # Hugging Face repo
100
+
101
+ # Load Dataset from Hugging Face
102
+ @st.cache_resource
103
+ def load_hf_dataset():
104
+ try:
105
+ return load_dataset(HF_DATASET_REPO)
106
+ except:
107
+ return DatasetDict({"recent": Dataset.from_dict({"question": [], "response": [], "metrics": []})})
108
+
109
+ dataset = load_hf_dataset()
110
+
111
+ # Function to Save Data to Hugging Face Dataset
112
+ def save_to_hf_dataset(question, response, metrics):
113
+ global dataset
114
+ new_data = {
115
+ "question": [question],
116
+ "response": [response],
117
+ "metrics": [metrics]
118
+ }
119
+
120
+ # Convert existing dataset to a list and append new data
121
+ dataset_dict = dataset["recent"].to_dict()
122
+ for key in new_data.keys():
123
+ dataset_dict[key] = dataset_dict.get(key, []) + new_data[key]
124
+
125
+ # Keep only the last 10 entries
126
+ for key in dataset_dict.keys():
127
+ dataset_dict[key] = dataset_dict[key][-10:]
128
+
129
+ # Convert back to dataset and push to Hugging Face
130
+ dataset["recent"] = Dataset.from_dict(dataset_dict)
131
+ dataset.push_to_hub(HF_DATASET_REPO)
132
+
133
+ # Streamlit UI
134
+ st.title("πŸ” RAG7 - Real World RAG System")
135
+
136
+ # Sidebar - Recent Questions
137
+ st.sidebar.header("πŸ“Œ Recent Questions")
138
+ if len(dataset["recent"]) > 0:
139
+ for q in dataset["recent"]["question"][-10:]:
140
+ st.sidebar.write(f"πŸ”Ή {q}")
141
+
142
+ # Sidebar - Analytics with Graph
143
+ st.sidebar.header("πŸ“Š Analytics Overview")
144
+ if len(dataset["recent"]) > 0:
145
+ # Extract recent metrics for visualization
146
+ metrics_data = dataset["recent"]["metrics"][-10:]
147
+ metrics_keys = ["context_relevance", "context_utilization", "completeness", "adherence"]
148
+
149
+ # Prepare a dictionary for graphing
150
+ graph_data = {key: [m[key] for m in metrics_data] for key in metrics_keys}
151
+ graph_data["Question #"] = list(range(1, len(metrics_data) + 1))
152
+
153
+ # Convert to DataFrame for Plotly
154
+ import pandas as pd
155
+ df = pd.DataFrame(graph_data)
156
+
157
+ # Plot Metrics Over Time
158
+ fig = px.line(df, x="Question #", y=metrics_keys,
159
+ labels={"value": "Score", "variable": "Metric"},
160
+ title="πŸ“ˆ Model Performance Over Recent Questions")
161
+ st.sidebar.plotly_chart(fig, use_container_width=True)
162
+
163
+ # Evaluate Button
164
+ if st.sidebar.button("⚑ Evaluate RAG Model"):
165
+ st.sidebar.success("βœ… Model Evaluation Triggered!")
166
+
167
+ # Main Section - User Input
168
+ st.subheader("πŸ’¬ Ask a Question")
169
+ question = st.text_area("Enter your question:", placeholder="Type your question here...", height=100)
170
 
171
  # Submit Button
172
+ if st.button("πŸš€ Submit"):
173
  start_time = time.time()
174
+ retrieved_documents = retrieve_documents_hybrid(question, 10)
175
+ response = generate_response_from_document(question, retrieved_documents)
176
  end_time = time.time()
177
+ time_taken_for_response = end_time - start_time
178
 
179
+ # Calculate Metrics
180
+ metrics = calculate_metrics(question, response, retrieved_documents, time_taken_for_response)
 
181
 
182
+ # Save Data
183
+ save_to_hf_dataset(question, response, metrics)
184
 
185
+ # Display Response
186
+ st.subheader("πŸ’‘ Response")
187
+ st.text_area("Generated Response:", value=response, height=150, disabled=True)
 
 
 
188
 
189
+ # Display Metrics with Bar Chart
190
+ st.subheader("πŸ“Š Metrics")
191
  st.json(metrics)
192
+
193
+ # Plot Bar Chart for Metrics
194
+ metric_df = pd.DataFrame({"Metric": list(metrics.keys()), "Score": list(metrics.values())})
195
+ fig2 = px.bar(metric_df, x="Metric", y="Score", title="πŸ“Š Current Query Metrics")
196
+ st.plotly_chart(fig2, use_container_width=True)
requirements.txt CHANGED
@@ -15,4 +15,5 @@ nltk
15
  requests
16
  rouge-score
17
  numpy
18
- rank_bm25
 
 
15
  requests
16
  rouge-score
17
  numpy
18
+ plotly
19
+ huggingface_hub