Spaces:
Running
Running
Update Functions.py
Browse files- Functions.py +124 -36
Functions.py
CHANGED
@@ -1,6 +1,9 @@
|
|
1 |
from dotenv import load_dotenv
|
2 |
import os
|
3 |
from PyPDF2 import PdfReader
|
|
|
|
|
|
|
4 |
from langchain.text_splitter import CharacterTextSplitter
|
5 |
from langchain_community.vectorstores import Chroma
|
6 |
from langchain_community.embeddings import OpenAIEmbeddings
|
@@ -12,6 +15,50 @@ import json
|
|
12 |
from Utils import estimate_to_value
|
13 |
from Utils import is_key_value_present
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
class RFPProcessor:
|
16 |
def __init__(self):
|
17 |
load_dotenv()
|
@@ -19,38 +66,73 @@ class RFPProcessor:
|
|
19 |
self.chains_obj = LLMChains()
|
20 |
|
21 |
def generate_roadmap(self):
|
22 |
-
|
23 |
-
roadmap_data = self.chains_obj.roadmap_chain.run(
|
|
|
|
|
24 |
print(roadmap_data)
|
25 |
st.session_state["roadmap_data_json"] = roadmap_data
|
26 |
data = json.loads(roadmap_data)
|
27 |
-
|
28 |
try:
|
29 |
-
|
30 |
-
|
31 |
except json.decoder.JSONDecodeError as e:
|
32 |
-
|
33 |
-
|
34 |
-
for phases_data in data[
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
st.session_state["is_roadmap_data_created"] = True
|
55 |
|
56 |
def generate_estimations(self, tech_leads, senior_developers, junior_developers):
|
@@ -64,9 +146,9 @@ class RFPProcessor:
|
|
64 |
}
|
65 |
|
66 |
data = self.chains_obj.estimations_chain.run(inputs)
|
67 |
-
|
68 |
-
st.session_state["estimation_data_json"]= data
|
69 |
-
|
70 |
estimation_json_data = json.loads(data)
|
71 |
|
72 |
for epic_data in estimation_json_data["epics"]:
|
@@ -90,12 +172,18 @@ class RFPProcessor:
|
|
90 |
|
91 |
def process_rfp_data(self, project_name, file):
|
92 |
if project_name and file:
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
|
100 |
text_splitter = CharacterTextSplitter(
|
101 |
separator="\n", chunk_size=1000, chunk_overlap=150, length_function=len
|
|
|
1 |
from dotenv import load_dotenv
|
2 |
import os
|
3 |
from PyPDF2 import PdfReader
|
4 |
+
from docx import Document
|
5 |
+
from docx.text.paragraph import Paragraph
|
6 |
+
from docx.table import Table
|
7 |
from langchain.text_splitter import CharacterTextSplitter
|
8 |
from langchain_community.vectorstores import Chroma
|
9 |
from langchain_community.embeddings import OpenAIEmbeddings
|
|
|
15 |
from Utils import estimate_to_value
|
16 |
from Utils import is_key_value_present
|
17 |
|
18 |
+
|
19 |
+
def extract_text_from_file(file):
|
20 |
+
text = file.read().decode("utf-8")
|
21 |
+
return text
|
22 |
+
|
23 |
+
|
24 |
+
def process_paragraph(paragraph):
|
25 |
+
# Process the content of the paragraph as needed
|
26 |
+
return paragraph.text
|
27 |
+
|
28 |
+
|
29 |
+
def process_table(table):
|
30 |
+
# Process the content of the table as needed
|
31 |
+
text = ""
|
32 |
+
for row in table.rows:
|
33 |
+
for cell in row.cells:
|
34 |
+
text += cell.text
|
35 |
+
|
36 |
+
return text
|
37 |
+
|
38 |
+
|
39 |
+
def read_docx(file_path):
|
40 |
+
doc = Document(file_path)
|
41 |
+
data = []
|
42 |
+
|
43 |
+
for element in doc.iter_inner_content():
|
44 |
+
if isinstance(element, Paragraph):
|
45 |
+
data.append(process_paragraph(element))
|
46 |
+
if isinstance(element, Table):
|
47 |
+
data.append(process_table(element))
|
48 |
+
|
49 |
+
return "\n".join(data)
|
50 |
+
|
51 |
+
|
52 |
+
def get_pdf_text(pdf):
|
53 |
+
"""This function extracts the text from the PDF file"""
|
54 |
+
text = []
|
55 |
+
pdf_reader = PdfReader(pdf)
|
56 |
+
for page in pdf_reader.pages:
|
57 |
+
text.append(page.extract_text())
|
58 |
+
|
59 |
+
return "\n".join(text)
|
60 |
+
|
61 |
+
|
62 |
class RFPProcessor:
|
63 |
def __init__(self):
|
64 |
load_dotenv()
|
|
|
66 |
self.chains_obj = LLMChains()
|
67 |
|
68 |
def generate_roadmap(self):
|
69 |
+
|
70 |
+
roadmap_data = self.chains_obj.roadmap_chain.run(
|
71 |
+
{"project_input": st.session_state["estimation_data_json"]}
|
72 |
+
)
|
73 |
print(roadmap_data)
|
74 |
st.session_state["roadmap_data_json"] = roadmap_data
|
75 |
data = json.loads(roadmap_data)
|
76 |
+
|
77 |
try:
|
78 |
+
decoded_data = json.loads(roadmap_data)
|
79 |
+
print(decoded_data)
|
80 |
except json.decoder.JSONDecodeError as e:
|
81 |
+
print(f"JSON decoding error: {e}")
|
82 |
+
|
83 |
+
for phases_data in data["phases"]:
|
84 |
+
phase = phases_data["name"]
|
85 |
+
for milestone in phases_data["milestones"]:
|
86 |
+
milestone_name = milestone["name"]
|
87 |
+
total_time = milestone["totalTime"]
|
88 |
+
for feature in milestone["features"]:
|
89 |
+
featue_name = feature["name"]
|
90 |
+
featue_rationale = feature["rationale"]
|
91 |
+
featue_effort = feature["effort"]
|
92 |
+
phase_key_present = is_key_value_present(
|
93 |
+
st.session_state["roadmap_data"], "phases", phase
|
94 |
+
)
|
95 |
+
|
96 |
+
if phase_key_present:
|
97 |
+
milestone_key_present = is_key_value_present(
|
98 |
+
st.session_state["roadmap_data"],
|
99 |
+
"milestones",
|
100 |
+
milestone_name,
|
101 |
+
)
|
102 |
+
if milestone_key_present:
|
103 |
+
st.session_state.roadmap_data.append(
|
104 |
+
{
|
105 |
+
"phases": "",
|
106 |
+
"milestones": "",
|
107 |
+
"total_time": "",
|
108 |
+
"features": featue_name,
|
109 |
+
"rational": featue_rationale,
|
110 |
+
"effort": featue_effort,
|
111 |
+
}
|
112 |
+
)
|
113 |
+
else:
|
114 |
+
st.session_state.roadmap_data.append(
|
115 |
+
{
|
116 |
+
"phases": "",
|
117 |
+
"milestones": milestone_name,
|
118 |
+
"total_time": total_time,
|
119 |
+
"features": featue_name,
|
120 |
+
"rational": featue_rationale,
|
121 |
+
"effort": featue_effort,
|
122 |
+
}
|
123 |
+
)
|
124 |
+
else:
|
125 |
+
st.session_state.roadmap_data.append(
|
126 |
+
{
|
127 |
+
"phases": phase,
|
128 |
+
"milestones": milestone_name,
|
129 |
+
"total_time": total_time,
|
130 |
+
"features": featue_name,
|
131 |
+
"rational": featue_rationale,
|
132 |
+
"effort": featue_effort,
|
133 |
+
}
|
134 |
+
)
|
135 |
+
|
136 |
st.session_state["is_roadmap_data_created"] = True
|
137 |
|
138 |
def generate_estimations(self, tech_leads, senior_developers, junior_developers):
|
|
|
146 |
}
|
147 |
|
148 |
data = self.chains_obj.estimations_chain.run(inputs)
|
149 |
+
|
150 |
+
st.session_state["estimation_data_json"] = data
|
151 |
+
|
152 |
estimation_json_data = json.loads(data)
|
153 |
|
154 |
for epic_data in estimation_json_data["epics"]:
|
|
|
172 |
|
173 |
def process_rfp_data(self, project_name, file):
|
174 |
if project_name and file:
|
175 |
+
if file.name.endswith(".docx"):
|
176 |
+
st.session_state["rfp_details"] = read_docx(file)
|
177 |
+
elif file.name.endswith(".pdf"):
|
178 |
+
st.session_state["rfp_details"] = get_pdf_text(file)
|
179 |
+
else:
|
180 |
+
st.session_state["rfp_details"] = extract_text_from_file(file)
|
181 |
+
# loader = PdfReader(file)
|
182 |
+
# for i, page in enumerate(loader.pages):
|
183 |
+
# content = page.extract_text()
|
184 |
+
# if content:
|
185 |
+
# temp = st.session_state["rfp_details"]
|
186 |
+
# st.session_state["rfp_details"] = temp + content
|
187 |
|
188 |
text_splitter = CharacterTextSplitter(
|
189 |
separator="\n", chunk_size=1000, chunk_overlap=150, length_function=len
|