Spaces:
Sleeping
Sleeping
Upload 3 files
Browse files- data_processing.py +8 -1
- evaluation.py +3 -3
data_processing.py
CHANGED
@@ -17,6 +17,7 @@ embedding_model = HuggingFaceEmbeddings(
|
|
17 |
all_documents = []
|
18 |
index = None
|
19 |
actual_docs = None
|
|
|
20 |
|
21 |
|
22 |
def create_faiss_index_file():
|
@@ -52,9 +53,15 @@ def create_faiss_index_file():
|
|
52 |
print(f"data is stored!")
|
53 |
|
54 |
def load_data_from_faiss():
|
|
|
55 |
load_faiss()
|
56 |
load_metatdata()
|
57 |
|
|
|
|
|
|
|
|
|
|
|
58 |
def load_faiss():
|
59 |
# Load the correct FAISS index
|
60 |
faiss_index_path = f"data_local\rag7_index.faiss"
|
@@ -63,4 +70,4 @@ def load_faiss():
|
|
63 |
def load_metatdata():
|
64 |
# Load document metadata
|
65 |
with open(f"data_local\rag7_docs.json", "r") as f:
|
66 |
-
actual_docs = json.load(f) # Contains all documents for this dataset
|
|
|
17 |
all_documents = []
|
18 |
index = None
|
19 |
actual_docs = None
|
20 |
+
ragbench = {}
|
21 |
|
22 |
|
23 |
def create_faiss_index_file():
|
|
|
53 |
print(f"data is stored!")
|
54 |
|
55 |
def load_data_from_faiss():
|
56 |
+
load_ragbench()
|
57 |
load_faiss()
|
58 |
load_metatdata()
|
59 |
|
60 |
+
def load_ragbench():
|
61 |
+
ragbench = {}
|
62 |
+
for dataset in ['covidqa', 'cuad', 'delucionqa', 'emanual', 'expertqa', 'finqa', 'hagrid', 'hotpotqa', 'msmarco', 'pubmedqa', 'tatqa', 'techqa']:
|
63 |
+
ragbench[dataset] = load_dataset("rungalileo/ragbench", dataset)
|
64 |
+
|
65 |
def load_faiss():
|
66 |
# Load the correct FAISS index
|
67 |
faiss_index_path = f"data_local\rag7_index.faiss"
|
|
|
70 |
def load_metatdata():
|
71 |
# Load document metadata
|
72 |
with open(f"data_local\rag7_docs.json", "r") as f:
|
73 |
+
actual_docs = json.load(f) # Contains all documents for this dataset
|
evaluation.py
CHANGED
@@ -19,10 +19,10 @@ def calculate_metrics(question, response, docs,data):
|
|
19 |
}
|
20 |
return predicted_metrics
|
21 |
|
22 |
-
def retrieve_ground_truths(question,
|
23 |
# Iterate through all splits (train, test, validation)
|
24 |
-
for dataset_name in
|
25 |
-
for split_name,instances in
|
26 |
print(f"Processing {split_name} split")
|
27 |
for instance in instances: # Fixed: Corrected indentation
|
28 |
# Check if the question (data) matches the query
|
|
|
19 |
}
|
20 |
return predicted_metrics
|
21 |
|
22 |
+
def retrieve_ground_truths(question,ragbench_set):
|
23 |
# Iterate through all splits (train, test, validation)
|
24 |
+
for dataset_name in ragbench_set.keys():
|
25 |
+
for split_name,instances in ragbench_set[dataset_name].items(): # Fixed: Removed extra '.' and corrected indentation
|
26 |
print(f"Processing {split_name} split")
|
27 |
for instance in instances: # Fixed: Corrected indentation
|
28 |
# Check if the question (data) matches the query
|