Spaces:

cb1716pics
/

23RAG7

Sleeping

cb1716pics commited on Feb 21

Commit

7c78daa

verified ·

1 Parent(s): 599d161

Upload data_processing.py

Files changed (1) hide show

data_processing.py CHANGED Viewed

@@ -101,18 +101,18 @@ def load_query_dataset(q_dataset):
         query_dataset_data[q_dataset] = load_dataset("rungalileo/ragbench", q_dataset)
     return query_dataset_data[q_dataset]
-def load_faiss(query_dataset):
     global index
-    faiss_index_path = f"data_local/{query_dataset}_quantized.faiss"
     if os.path.exists(faiss_index_path):
         index = faiss.read_index(faiss_index_path)
         print("FAISS index loaded successfully.")
     else:
         print("FAISS index file not found. Run create_faiss_index_file() first.")
-def load_chunks(query_dataset):
     global chunk_docs
-    metadata_path = f"data_local/{query_dataset}_chunked_docs.json"
     if os.path.exists(metadata_path):
         with open(metadata_path, "r") as f:
             chunk_docs = json.load(f)
@@ -120,9 +120,9 @@ def load_chunks(query_dataset):
     else:
         print("Metadata file not found. Run create_faiss_index_file() first.")
-def load_data_from_faiss(query_dataset):
-    load_faiss(query_dataset)
-    load_chunks(query_dataset)
 def rerank_documents(query, retrieved_docs):
     doc_texts = [doc for doc in retrieved_docs]

         query_dataset_data[q_dataset] = load_dataset("rungalileo/ragbench", q_dataset)
     return query_dataset_data[q_dataset]
+def load_faiss(q_dataset):
     global index
+    faiss_index_path = f"data_local/{q_dataset}_quantized.faiss"
     if os.path.exists(faiss_index_path):
         index = faiss.read_index(faiss_index_path)
         print("FAISS index loaded successfully.")
     else:
         print("FAISS index file not found. Run create_faiss_index_file() first.")
+def load_chunks(q_dataset):
     global chunk_docs
+    metadata_path = f"data_local/{q_dataset}_chunked_docs.json"
     if os.path.exists(metadata_path):
         with open(metadata_path, "r") as f:
             chunk_docs = json.load(f)
     else:
         print("Metadata file not found. Run create_faiss_index_file() first.")
+def load_data_from_faiss(q_dataset):
+    load_faiss(q_dataset)
+    load_chunks(q_dataset)
 def rerank_documents(query, retrieved_docs):
     doc_texts = [doc for doc in retrieved_docs]