ngcanh commited on
Commit
9e46e0a
·
verified ·
1 Parent(s): b11685c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -6
app.py CHANGED
@@ -31,12 +31,18 @@ class PDFChatbot:
31
  pdf_directory = "data"
32
 
33
  # Duyệt qua các file trong thư mục và đọc từng file PDF
34
- pdf_reader = PyPDF2.PdfReader(pdf_file)
35
- text = ""
36
- for page_num in range(len(pdf_reader.pages)):
37
- page = pdf_reader.pages[page_num]
38
- text += page.extract_text() + "\n"
39
- words = text.split()
 
 
 
 
 
 
40
  chunks = []
41
  current_chunk = []
42
  current_length = 0
 
31
  pdf_directory = "data"
32
 
33
  # Duyệt qua các file trong thư mục và đọc từng file PDF
34
+ for filename in os.listdir(pdf_directory):
35
+ if filename.lower().endswith(".pdf"):
36
+ pdf_path = os.path.join(pdf_directory, filename)
37
+ with open(pdf_path, "rb") as pdf_file:
38
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
39
+ text = ""
40
+ for page_num in range(len(pdf_reader.pages)):
41
+ page = pdf_reader.pages[page_num]
42
+ text += page.extract_text() + "\n"
43
+
44
+ # Optional: split into words
45
+ words = text.split()
46
  chunks = []
47
  current_chunk = []
48
  current_length = 0