Spaces:
Sleeping
Sleeping
Update Functions.py
Browse files- Functions.py +11 -8
Functions.py
CHANGED
@@ -170,20 +170,23 @@ class RFPProcessor:
|
|
170 |
)
|
171 |
st.session_state["is_estimation_data_created"] = True
|
172 |
|
173 |
-
def process_rfp_data(self, project_name,
|
174 |
-
if project_name and
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
|
|
|
|
181 |
# loader = PdfReader(file)
|
182 |
# for i, page in enumerate(loader.pages):
|
183 |
# content = page.extract_text()
|
184 |
# if content:
|
185 |
# temp = st.session_state["rfp_details"]
|
186 |
# st.session_state["rfp_details"] = temp + content
|
|
|
187 |
|
188 |
text_splitter = CharacterTextSplitter(
|
189 |
separator="\n", chunk_size=1000, chunk_overlap=150, length_function=len
|
|
|
170 |
)
|
171 |
st.session_state["is_estimation_data_created"] = True
|
172 |
|
173 |
+
def process_rfp_data(self, project_name, files):
|
174 |
+
if project_name and files:
|
175 |
+
extracted_data = []
|
176 |
+
for file in files:
|
177 |
+
if file.name.endswith(".docx"):
|
178 |
+
extracted_data.append(read_docx(file))
|
179 |
+
elif file.name.endswith(".pdf"):
|
180 |
+
extracted_data.append(get_pdf_text(file))
|
181 |
+
else:
|
182 |
+
extracted_data.append(extract_text_from_file(file))
|
183 |
# loader = PdfReader(file)
|
184 |
# for i, page in enumerate(loader.pages):
|
185 |
# content = page.extract_text()
|
186 |
# if content:
|
187 |
# temp = st.session_state["rfp_details"]
|
188 |
# st.session_state["rfp_details"] = temp + content
|
189 |
+
st.session_state["rfp_details"] = " ".join(extracted_data)
|
190 |
|
191 |
text_splitter = CharacterTextSplitter(
|
192 |
separator="\n", chunk_size=1000, chunk_overlap=150, length_function=len
|