Spaces:
Sleeping
Sleeping
Upload data_processing.py
Browse files- data_processing.py +7 -7
data_processing.py
CHANGED
@@ -101,18 +101,18 @@ def load_query_dataset(q_dataset):
|
|
101 |
query_dataset_data[q_dataset] = load_dataset("rungalileo/ragbench", q_dataset)
|
102 |
return query_dataset_data[q_dataset]
|
103 |
|
104 |
-
def load_faiss(
|
105 |
global index
|
106 |
-
faiss_index_path = f"data_local/{
|
107 |
if os.path.exists(faiss_index_path):
|
108 |
index = faiss.read_index(faiss_index_path)
|
109 |
print("FAISS index loaded successfully.")
|
110 |
else:
|
111 |
print("FAISS index file not found. Run create_faiss_index_file() first.")
|
112 |
|
113 |
-
def load_chunks(
|
114 |
global chunk_docs
|
115 |
-
metadata_path = f"data_local/{
|
116 |
if os.path.exists(metadata_path):
|
117 |
with open(metadata_path, "r") as f:
|
118 |
chunk_docs = json.load(f)
|
@@ -120,9 +120,9 @@ def load_chunks(query_dataset):
|
|
120 |
else:
|
121 |
print("Metadata file not found. Run create_faiss_index_file() first.")
|
122 |
|
123 |
-
def load_data_from_faiss(
|
124 |
-
load_faiss(
|
125 |
-
load_chunks(
|
126 |
|
127 |
def rerank_documents(query, retrieved_docs):
|
128 |
doc_texts = [doc for doc in retrieved_docs]
|
|
|
101 |
query_dataset_data[q_dataset] = load_dataset("rungalileo/ragbench", q_dataset)
|
102 |
return query_dataset_data[q_dataset]
|
103 |
|
104 |
+
def load_faiss(q_dataset):
|
105 |
global index
|
106 |
+
faiss_index_path = f"data_local/{q_dataset}_quantized.faiss"
|
107 |
if os.path.exists(faiss_index_path):
|
108 |
index = faiss.read_index(faiss_index_path)
|
109 |
print("FAISS index loaded successfully.")
|
110 |
else:
|
111 |
print("FAISS index file not found. Run create_faiss_index_file() first.")
|
112 |
|
113 |
+
def load_chunks(q_dataset):
|
114 |
global chunk_docs
|
115 |
+
metadata_path = f"data_local/{q_dataset}_chunked_docs.json"
|
116 |
if os.path.exists(metadata_path):
|
117 |
with open(metadata_path, "r") as f:
|
118 |
chunk_docs = json.load(f)
|
|
|
120 |
else:
|
121 |
print("Metadata file not found. Run create_faiss_index_file() first.")
|
122 |
|
123 |
+
def load_data_from_faiss(q_dataset):
|
124 |
+
load_faiss(q_dataset)
|
125 |
+
load_chunks(q_dataset)
|
126 |
|
127 |
def rerank_documents(query, retrieved_docs):
|
128 |
doc_texts = [doc for doc in retrieved_docs]
|