23RAG7

Sleeping

cb1716pics commited on Feb 20

Commit

43b460f

verified ·

1 Parent(s): 1b04b96

Upload 3 files

Files changed (2) hide show

data_processing.py CHANGED Viewed

@@ -17,6 +17,7 @@ embedding_model = HuggingFaceEmbeddings(
 all_documents = []
 index = None
 actual_docs = None
 def create_faiss_index_file():
@@ -52,9 +53,15 @@ def create_faiss_index_file():
     print(f"data is stored!")
 def load_data_from_faiss():
     load_faiss()
     load_metatdata()
 def load_faiss():
     # Load the correct FAISS index
     faiss_index_path = f"data_local\rag7_index.faiss"
@@ -63,4 +70,4 @@ def load_faiss():
 def load_metatdata():
     # Load document metadata
     with open(f"data_local\rag7_docs.json", "r") as f:
-        actual_docs = json.load(f)  # Contains all documents for this dataset

 all_documents = []
 index = None
 actual_docs = None
+ragbench = {}
 def create_faiss_index_file():
     print(f"data is stored!")
 def load_data_from_faiss():
+    load_ragbench()
     load_faiss()
     load_metatdata()
+def load_ragbench():
+    ragbench = {}
+    for dataset in ['covidqa', 'cuad', 'delucionqa', 'emanual', 'expertqa', 'finqa', 'hagrid', 'hotpotqa', 'msmarco', 'pubmedqa', 'tatqa', 'techqa']:
+        ragbench[dataset] = load_dataset("rungalileo/ragbench", dataset)
 def load_faiss():
     # Load the correct FAISS index
     faiss_index_path = f"data_local\rag7_index.faiss"
 def load_metatdata():
     # Load document metadata
     with open(f"data_local\rag7_docs.json", "r") as f:
+        actual_docs = json.load(f)  # Contains all documents for this dataset

evaluation.py CHANGED Viewed

@@ -19,10 +19,10 @@ def calculate_metrics(question, response, docs,data):
     }
     return predicted_metrics
-def retrieve_ground_truths(question,ragbench):
     # Iterate through all splits (train, test, validation)
-    for dataset_name in ragbench.keys():
-        for split_name,instances in ragbench[dataset_name].items(): # Fixed: Removed extra '.' and corrected indentation
             print(f"Processing {split_name} split")
             for instance in instances: # Fixed: Corrected indentation
                 # Check if the question (data) matches the query

     }
     return predicted_metrics
+def retrieve_ground_truths(question,ragbench_set):
     # Iterate through all splits (train, test, validation)
+    for dataset_name in ragbench_set.keys():
+        for split_name,instances in ragbench_set[dataset_name].items(): # Fixed: Removed extra '.' and corrected indentation
             print(f"Processing {split_name} split")
             for instance in instances: # Fixed: Corrected indentation
                 # Check if the question (data) matches the query