cb1716pics commited on
Commit
9665824
·
verified ·
1 Parent(s): da626d3

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +8 -10
  2. data_processing.py +26 -4
  3. evaluation.py +16 -17
app.py CHANGED
@@ -64,20 +64,10 @@ if "time_taken_for_response" not in st.session_state:
64
  if "metrics" not in st.session_state:
65
  st.session_state.metrics = {}
66
 
67
- # Streamlit Sidebar for Recent Questions
68
- st.sidebar.title("Recent Questions")
69
-
70
  recent_data = load_recent_questions()
71
- for q in reversed(recent_data["questions"]): # Show latest first
72
- st.sidebar.write(f"🔹 {q['question']}")
73
- st.json({q['metrics']})
74
-
75
- st.sidebar.markdown("---") # Separator
76
-
77
 
78
  import matplotlib.pyplot as plt
79
 
80
-
81
  # for visualization
82
  st.sidebar.title("Analytics")
83
 
@@ -93,6 +83,14 @@ ax.set_ylabel("Time Taken for Response")
93
  ax.legend()
94
  st.sidebar.pyplot(fig)
95
 
 
 
 
 
 
 
 
 
96
  # Submit Button
97
  # if st.button("Submit"):
98
  # start_time = time.time()
 
64
  if "metrics" not in st.session_state:
65
  st.session_state.metrics = {}
66
 
 
 
 
67
  recent_data = load_recent_questions()
 
 
 
 
 
 
68
 
69
  import matplotlib.pyplot as plt
70
 
 
71
  # for visualization
72
  st.sidebar.title("Analytics")
73
 
 
83
  ax.legend()
84
  st.sidebar.pyplot(fig)
85
 
86
+ st.sidebar.markdown("---") # Separator
87
+
88
+ # Streamlit Sidebar for Recent Questions
89
+ st.sidebar.title("Recent Questions")
90
+ for q in reversed(recent_data["questions"]): # Show latest first
91
+ with st.expander(f"🔹 {q['question']}"):
92
+ st.json(q["metrics"])
93
+
94
  # Submit Button
95
  # if st.button("Submit"):
96
  # start_time = time.time()
data_processing.py CHANGED
@@ -95,11 +95,11 @@ def load_ragbench():
95
 
96
  def load_query_dataset(query_dataset):
97
  global query_dataset_data
98
- if query_dataset_data:
99
- return query_dataset_data
100
  else:
101
- query_dataset_data = load_dataset("rungalileo/ragbench", query_dataset)
102
- return query_dataset_data
103
 
104
  def load_faiss(query_dataset):
105
  global index
@@ -149,5 +149,27 @@ def save_recent_question(question, metrics):
149
  data["questions"] = data["questions"][-5:]
150
 
151
  # Write back to file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  with open(RECENT_QUESTIONS_FILE, "w") as file:
153
  json.dump(data, file, indent=4)
 
95
 
96
  def load_query_dataset(query_dataset):
97
  global query_dataset_data
98
+ if query_dataset_data[query_dataset]:
99
+ return query_dataset_data[query_dataset]
100
  else:
101
+ query_dataset_data[query_dataset] = load_dataset("rungalileo/ragbench", query_dataset)
102
+ return query_dataset_data[query_dataset]
103
 
104
  def load_faiss(query_dataset):
105
  global index
 
149
  data["questions"] = data["questions"][-5:]
150
 
151
  # Write back to file
152
+ with open(RECENT_QUESTIONS_FILE, "w") as file:
153
+ json.dump(data, file, indent=4)
154
+
155
+ # Function to save/update a question in the recent list
156
+ def save_recent_question(question, metrics):
157
+ data = load_recent_questions()
158
+
159
+ # Check if the question already exists
160
+ existing_questions = {q["question"]: q for q in data["questions"]}
161
+
162
+ if question in existing_questions:
163
+ # Update metrics & move to the latest position
164
+ existing_questions[question]["metrics"] = metrics
165
+ data["questions"].remove(existing_questions[question]) # Remove old entry
166
+
167
+ # Add the updated/new question at the latest position
168
+ data["questions"].append({"question": question, "metrics": metrics})
169
+
170
+ # Keep only the last 10 questions
171
+ data["questions"] = data["questions"][-5:]
172
+
173
+ # Write back to the file
174
  with open(RECENT_QUESTIONS_FILE, "w") as file:
175
  json.dump(data, file, indent=4)
evaluation.py CHANGED
@@ -111,22 +111,21 @@ def calculate_metrics(question, response, docs, time_taken):
111
  }
112
  return predicted_metrics
113
 
114
- def retrieve_ground_truths(question, ragbench_set):
115
- for dataset_name in ragbench_set.keys():
116
- for split_name, instances in ragbench_set[dataset_name].items():
117
- print(f"Processing {split_name} split")
118
- for instance in instances:
119
- if instance['question'] == question:
120
- instance_id = instance['id']
121
- instance_response = instance['response']
122
- # ground_truth_metrics = {
123
- # "context_relevance": instance['relevance_score'],
124
- # "context_utilization": instance['utilization_score'],
125
- # "completeness": instance['completeness_score'],
126
- # "adherence": instance['adherence_score']
127
- # }
128
- print(f"Match found in {split_name} split!")
129
- print(f"ID: {instance_id}, Response: {instance_response}")
130
- return instance_response # Return ground truth response immediately
131
 
132
  return None # Return None if no match is found
 
111
  }
112
  return predicted_metrics
113
 
114
+ def retrieve_ground_truths(question, dataset):
115
+ for split_name, instances in dataset.items():
116
+ print(f"Processing {split_name} split")
117
+ for instance in instances:
118
+ if instance['question'] == question:
119
+ instance_id = instance['id']
120
+ instance_response = instance['response']
121
+ # ground_truth_metrics = {
122
+ # "context_relevance": instance['relevance_score'],
123
+ # "context_utilization": instance['utilization_score'],
124
+ # "completeness": instance['completeness_score'],
125
+ # "adherence": instance['adherence_score']
126
+ # }
127
+ print(f"Match found in {split_name} split!")
128
+ print(f"ID: {instance_id}, Response: {instance_response}")
129
+ return instance_response # Return ground truth response immediately
 
130
 
131
  return None # Return None if no match is found