cb1716pics commited on
Commit
2d5dee0
·
verified ·
1 Parent(s): d346441

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +5 -5
  2. retrieval.py +9 -7
app.py CHANGED
@@ -2,17 +2,17 @@ import streamlit as st
2
  from generator import generate_response_from_document
3
  from retrieval import retrieve_documents
4
  from evaluation import calculate_metrics
5
- from data_processing import load_data_from_faiss
6
  import time
7
 
8
  # Page Title
9
  st.title("RAG7 - Real World RAG System")
10
 
11
- @st.cache_data
12
- def load_data():
13
- load_data_from_faiss()
14
 
15
- data_status = load_data()
16
 
17
  time_taken_for_response = 'N/A'
18
 
 
2
  from generator import generate_response_from_document
3
  from retrieval import retrieve_documents
4
  from evaluation import calculate_metrics
5
+ #from data_processing import load_data_from_faiss
6
  import time
7
 
8
  # Page Title
9
  st.title("RAG7 - Real World RAG System")
10
 
11
+ # @st.cache_data
12
+ # def load_data():
13
+ # load_data_from_faiss()
14
 
15
+ # data_status = load_data()
16
 
17
  time_taken_for_response = 'N/A'
18
 
retrieval.py CHANGED
@@ -1,27 +1,29 @@
1
  import json
2
  import numpy as np
3
  from langchain.schema import Document
4
- from langchain.vectorstores import faiss
5
 
6
- from data_processing import embedding_model, index
 
 
7
 
8
  # Retrieval Function
9
  def retrieve_documents(query, top_k=5):
10
- # Embed the query
 
 
11
  query_embedding = np.array(embedding_model.embed_documents([query]), dtype=np.float32)
12
 
13
- # Search in FAISS (top 5 results)
14
  _, nearest_indices = index.search(query_embedding, top_k)
15
 
16
- # Load document metadata
17
- with open(f"data_local\rag7_docs.json", "r") as f:
18
  documents = json.load(f) # Contains all documents for this dataset
19
 
20
- # Retrieve the actual documents and create Document objects
21
  retrieved_docs = [Document(page_content=documents[i]) for i in nearest_indices[0]]
22
 
23
  return retrieved_docs
24
 
 
25
  def remove_duplicate_documents(documents):
26
  unique_documents = []
27
  seen_documents = set() # To keep track of seen documents
 
1
  import json
2
  import numpy as np
3
  from langchain.schema import Document
4
+ import faiss
5
 
6
+ from data_processing import embedding_model #, index, actual_docs
7
+
8
+ retrieved_docs = None
9
 
10
  # Retrieval Function
11
  def retrieve_documents(query, top_k=5):
12
+ faiss_index_path = f"rag7_index.faiss"
13
+ index = faiss.read_index(faiss_index_path)
14
+
15
  query_embedding = np.array(embedding_model.embed_documents([query]), dtype=np.float32)
16
 
 
17
  _, nearest_indices = index.search(query_embedding, top_k)
18
 
19
+ with open(f"rag7_docs.json", "r") as f:
 
20
  documents = json.load(f) # Contains all documents for this dataset
21
 
 
22
  retrieved_docs = [Document(page_content=documents[i]) for i in nearest_indices[0]]
23
 
24
  return retrieved_docs
25
 
26
+
27
  def remove_duplicate_documents(documents):
28
  unique_documents = []
29
  seen_documents = set() # To keep track of seen documents