cb1716pics commited on
Commit
7c78daa
·
verified ·
1 Parent(s): 599d161

Upload data_processing.py

Browse files
Files changed (1) hide show
  1. data_processing.py +7 -7
data_processing.py CHANGED
@@ -101,18 +101,18 @@ def load_query_dataset(q_dataset):
101
  query_dataset_data[q_dataset] = load_dataset("rungalileo/ragbench", q_dataset)
102
  return query_dataset_data[q_dataset]
103
 
104
- def load_faiss(query_dataset):
105
  global index
106
- faiss_index_path = f"data_local/{query_dataset}_quantized.faiss"
107
  if os.path.exists(faiss_index_path):
108
  index = faiss.read_index(faiss_index_path)
109
  print("FAISS index loaded successfully.")
110
  else:
111
  print("FAISS index file not found. Run create_faiss_index_file() first.")
112
 
113
- def load_chunks(query_dataset):
114
  global chunk_docs
115
- metadata_path = f"data_local/{query_dataset}_chunked_docs.json"
116
  if os.path.exists(metadata_path):
117
  with open(metadata_path, "r") as f:
118
  chunk_docs = json.load(f)
@@ -120,9 +120,9 @@ def load_chunks(query_dataset):
120
  else:
121
  print("Metadata file not found. Run create_faiss_index_file() first.")
122
 
123
- def load_data_from_faiss(query_dataset):
124
- load_faiss(query_dataset)
125
- load_chunks(query_dataset)
126
 
127
  def rerank_documents(query, retrieved_docs):
128
  doc_texts = [doc for doc in retrieved_docs]
 
101
  query_dataset_data[q_dataset] = load_dataset("rungalileo/ragbench", q_dataset)
102
  return query_dataset_data[q_dataset]
103
 
104
+ def load_faiss(q_dataset):
105
  global index
106
+ faiss_index_path = f"data_local/{q_dataset}_quantized.faiss"
107
  if os.path.exists(faiss_index_path):
108
  index = faiss.read_index(faiss_index_path)
109
  print("FAISS index loaded successfully.")
110
  else:
111
  print("FAISS index file not found. Run create_faiss_index_file() first.")
112
 
113
+ def load_chunks(q_dataset):
114
  global chunk_docs
115
+ metadata_path = f"data_local/{q_dataset}_chunked_docs.json"
116
  if os.path.exists(metadata_path):
117
  with open(metadata_path, "r") as f:
118
  chunk_docs = json.load(f)
 
120
  else:
121
  print("Metadata file not found. Run create_faiss_index_file() first.")
122
 
123
+ def load_data_from_faiss(q_dataset):
124
+ load_faiss(q_dataset)
125
+ load_chunks(q_dataset)
126
 
127
  def rerank_documents(query, retrieved_docs):
128
  doc_texts = [doc for doc in retrieved_docs]