cb1716pics commited on
Commit
43b460f
·
verified ·
1 Parent(s): 1b04b96

Upload 3 files

Browse files
Files changed (2) hide show
  1. data_processing.py +8 -1
  2. evaluation.py +3 -3
data_processing.py CHANGED
@@ -17,6 +17,7 @@ embedding_model = HuggingFaceEmbeddings(
17
  all_documents = []
18
  index = None
19
  actual_docs = None
 
20
 
21
 
22
  def create_faiss_index_file():
@@ -52,9 +53,15 @@ def create_faiss_index_file():
52
  print(f"data is stored!")
53
 
54
  def load_data_from_faiss():
 
55
  load_faiss()
56
  load_metatdata()
57
 
 
 
 
 
 
58
  def load_faiss():
59
  # Load the correct FAISS index
60
  faiss_index_path = f"data_local\rag7_index.faiss"
@@ -63,4 +70,4 @@ def load_faiss():
63
  def load_metatdata():
64
  # Load document metadata
65
  with open(f"data_local\rag7_docs.json", "r") as f:
66
- actual_docs = json.load(f) # Contains all documents for this dataset
 
17
  all_documents = []
18
  index = None
19
  actual_docs = None
20
+ ragbench = {}
21
 
22
 
23
  def create_faiss_index_file():
 
53
  print(f"data is stored!")
54
 
55
  def load_data_from_faiss():
56
+ load_ragbench()
57
  load_faiss()
58
  load_metatdata()
59
 
60
+ def load_ragbench():
61
+ ragbench = {}
62
+ for dataset in ['covidqa', 'cuad', 'delucionqa', 'emanual', 'expertqa', 'finqa', 'hagrid', 'hotpotqa', 'msmarco', 'pubmedqa', 'tatqa', 'techqa']:
63
+ ragbench[dataset] = load_dataset("rungalileo/ragbench", dataset)
64
+
65
  def load_faiss():
66
  # Load the correct FAISS index
67
  faiss_index_path = f"data_local\rag7_index.faiss"
 
70
  def load_metatdata():
71
  # Load document metadata
72
  with open(f"data_local\rag7_docs.json", "r") as f:
73
+ actual_docs = json.load(f) # Contains all documents for this dataset
evaluation.py CHANGED
@@ -19,10 +19,10 @@ def calculate_metrics(question, response, docs,data):
19
  }
20
  return predicted_metrics
21
 
22
- def retrieve_ground_truths(question,ragbench):
23
  # Iterate through all splits (train, test, validation)
24
- for dataset_name in ragbench.keys():
25
- for split_name,instances in ragbench[dataset_name].items(): # Fixed: Removed extra '.' and corrected indentation
26
  print(f"Processing {split_name} split")
27
  for instance in instances: # Fixed: Corrected indentation
28
  # Check if the question (data) matches the query
 
19
  }
20
  return predicted_metrics
21
 
22
+ def retrieve_ground_truths(question,ragbench_set):
23
  # Iterate through all splits (train, test, validation)
24
+ for dataset_name in ragbench_set.keys():
25
+ for split_name,instances in ragbench_set[dataset_name].items(): # Fixed: Removed extra '.' and corrected indentation
26
  print(f"Processing {split_name} split")
27
  for instance in instances: # Fixed: Corrected indentation
28
  # Check if the question (data) matches the query