DrishtiSharma commited on
Commit
3570753
·
verified ·
1 Parent(s): ec7ddd1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -11
app.py CHANGED
@@ -28,33 +28,42 @@ class DocumentRAG:
28
  if not self.api_key:
29
  raise ValueError("API Key not found. Make sure to set the 'OPENAI_API_KEY' environment variable.")
30
 
31
- def process_documents(self, file_paths):
 
32
  if not self.api_key:
33
  return "Please set the OpenAI API key in the environment variables."
34
- if not file_paths:
35
  return "Please upload documents first."
36
 
37
  try:
38
  documents = []
39
- for file_path in file_paths:
40
- if file_path.name.endswith('.pdf'):
41
- loader = PyPDFLoader(file_path.name)
42
- elif file_path.name.endswith('.txt'):
43
- loader = TextLoader(file_path.name)
44
- elif file_path.name.endswith('.csv'):
45
- loader = CSVLoader(file_path.name)
 
 
 
 
 
 
46
  else:
47
  continue
48
 
 
49
  try:
50
  documents.extend(loader.load())
51
  except Exception as e:
52
- print(f"Error loading {file_path.name}: {str(e)}")
53
  continue
54
 
55
  if not documents:
56
  return "No valid documents were processed. Please check your files."
57
 
 
58
  text_splitter = RecursiveCharacterTextSplitter(
59
  chunk_size=1000,
60
  chunk_overlap=200,
@@ -62,9 +71,11 @@ class DocumentRAG:
62
  )
63
  documents = text_splitter.split_documents(documents)
64
 
 
65
  combined_text = " ".join([doc.page_content for doc in documents])
66
  self.document_summary = self.generate_summary(combined_text)
67
 
 
68
  embeddings = OpenAIEmbeddings(api_key=self.api_key)
69
  self.document_store = Chroma.from_documents(documents, embeddings)
70
  self.qa_chain = ConversationalRetrievalChain.from_llm(
@@ -80,7 +91,7 @@ class DocumentRAG:
80
  return f"Error processing documents: {str(e)}"
81
 
82
  def generate_summary(self, text):
83
- """Generate a summary of the uploaded documents."""
84
  if not self.api_key:
85
  return "API Key not set. Please set it in the environment variables."
86
  try:
 
28
  if not self.api_key:
29
  raise ValueError("API Key not found. Make sure to set the 'OPENAI_API_KEY' environment variable.")
30
 
31
+ def process_documents(self, uploaded_files):
32
+ """Process uploaded files by saving them temporarily and extracting content."""
33
  if not self.api_key:
34
  return "Please set the OpenAI API key in the environment variables."
35
+ if not uploaded_files:
36
  return "Please upload documents first."
37
 
38
  try:
39
  documents = []
40
+ for uploaded_file in uploaded_files:
41
+ # Save uploaded file to a temporary location
42
+ temp_file_path = tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]).name
43
+ with open(temp_file_path, "wb") as temp_file:
44
+ temp_file.write(uploaded_file.read())
45
+
46
+ # Determine the loader based on the file type
47
+ if temp_file_path.endswith('.pdf'):
48
+ loader = PyPDFLoader(temp_file_path)
49
+ elif temp_file_path.endswith('.txt'):
50
+ loader = TextLoader(temp_file_path)
51
+ elif temp_file_path.endswith('.csv'):
52
+ loader = CSVLoader(temp_file_path)
53
  else:
54
  continue
55
 
56
+ # Load the documents
57
  try:
58
  documents.extend(loader.load())
59
  except Exception as e:
60
+ print(f"Error loading {temp_file_path}: {str(e)}")
61
  continue
62
 
63
  if not documents:
64
  return "No valid documents were processed. Please check your files."
65
 
66
+ # Split text for better processing
67
  text_splitter = RecursiveCharacterTextSplitter(
68
  chunk_size=1000,
69
  chunk_overlap=200,
 
71
  )
72
  documents = text_splitter.split_documents(documents)
73
 
74
+ # Combine text for summary
75
  combined_text = " ".join([doc.page_content for doc in documents])
76
  self.document_summary = self.generate_summary(combined_text)
77
 
78
+ # Create embeddings and initialize retrieval chain
79
  embeddings = OpenAIEmbeddings(api_key=self.api_key)
80
  self.document_store = Chroma.from_documents(documents, embeddings)
81
  self.qa_chain = ConversationalRetrievalChain.from_llm(
 
91
  return f"Error processing documents: {str(e)}"
92
 
93
  def generate_summary(self, text):
94
+ """Generate a summary of the provided text."""
95
  if not self.api_key:
96
  return "API Key not set. Please set it in the environment variables."
97
  try: