cb1716pics commited on
Commit
75c991a
·
verified ·
1 Parent(s): 26ec43e

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +1 -1
  2. evaluation.py +1 -36
app.py CHANGED
@@ -49,7 +49,7 @@ if st.session_state.recent_questions:
49
  # Display Recent Questions
50
  st.sidebar.title("Overall RMSE")
51
  rmse_values = [q["metrics"]["RMSE"] for q in recent_qns if "metrics" in q and "RMSE" in q["metrics"]]
52
- if any(rmse_values):
53
  average_rmse = sum(rmse_values) / len(rmse_values) if rmse_values else 0
54
  st.sidebar.write(f"📊 **Average RMSE:** {average_rmse:.4f} for {len(rmse_values)} questions")
55
 
 
49
  # Display Recent Questions
50
  st.sidebar.title("Overall RMSE")
51
  rmse_values = [q["metrics"]["RMSE"] for q in recent_qns if "metrics" in q and "RMSE" in q["metrics"]]
52
+ if any(rmse_values) and len(rmse_values) > 0:
53
  average_rmse = sum(rmse_values) / len(rmse_values) if rmse_values else 0
54
  st.sidebar.write(f"📊 **Average RMSE:** {average_rmse:.4f} for {len(rmse_values)} questions")
55
 
evaluation.py CHANGED
@@ -11,41 +11,6 @@ global ground_truth_answer, ground_truth_metrics
11
  ground_truth_answer = ''
12
  ground_truth_metrics = {}
13
 
14
- # def calculate_metrics(question, response, docs, time_taken):
15
- # data = load_ragbench()
16
- # retrieve_ground_truths(question, data)
17
- # # Predicted metrics
18
- # predicted_metrics = {
19
- # "ground_truth": ground_truth_answer,
20
- # "context_relevance": context_relevance(question, docs),
21
- # "context_utilization": context_utilization(response, docs),
22
- # "completeness": completeness(response, ground_truth_answer),
23
- # "adherence": adherence(response, docs),
24
- # "response_time" : time_taken
25
- # }
26
- # return predicted_metrics
27
-
28
- # def retrieve_ground_truths(question,ragbench_set):
29
- # for dataset_name in ragbench_set.keys():
30
- # for split_name,instances in ragbench_set[dataset_name].items(): # Fixed: Removed extra '.' and corrected indentation
31
- # print(f"Processing {split_name} split")
32
- # for instance in instances: # Fixed: Corrected indentation
33
- # # Check if the question (data) matches the query
34
- # if instance['question'] == question:
35
- # # If a match is found, retrieve id and response
36
- # instance_id = instance['id']
37
- # instance_response = instance['response']
38
- # ground_truth_metrics = {
39
- # "context_relevance": instance['relevance_score'],
40
- # "context_utilization": instance['utilization_score'],
41
- # "completeness": instance['completeness_score'],
42
- # "adherence": instance['adherence_score']
43
- # }
44
- # ground_truth_answer = instance_response
45
- # print(f"Match found in {split_name} split!")
46
- # print(f"ID: {instance_id}, Response: {instance_response}")
47
- # break # Exit after finding the first match (optional)
48
-
49
  # Step 1: Helper function to compute cosine similarity
50
  def compute_cosine_similarity(text1, text2):
51
  if not text1 or not text2: # Check for empty or None values
@@ -109,7 +74,7 @@ def calculate_metrics(question, q_dataset, response, docs, time_taken):
109
  "adherence": adherence(response, docs),
110
  }
111
 
112
- rmse = compute_rmse(predicted_metrics, ground_truth_metrics),
113
 
114
  metrics = {
115
  "RMSE": rmse,
 
11
  ground_truth_answer = ''
12
  ground_truth_metrics = {}
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  # Step 1: Helper function to compute cosine similarity
15
  def compute_cosine_similarity(text1, text2):
16
  if not text1 or not text2: # Check for empty or None values
 
74
  "adherence": adherence(response, docs),
75
  }
76
 
77
+ rmse = compute_rmse(predicted_metrics, ground_truth_metrics)
78
 
79
  metrics = {
80
  "RMSE": rmse,