Darpan07 commited on
Commit
e12e334
1 Parent(s): 17dae65

Update Functions.py

Browse files
Files changed (1) hide show
  1. Functions.py +124 -36
Functions.py CHANGED
@@ -1,6 +1,9 @@
1
  from dotenv import load_dotenv
2
  import os
3
  from PyPDF2 import PdfReader
 
 
 
4
  from langchain.text_splitter import CharacterTextSplitter
5
  from langchain_community.vectorstores import Chroma
6
  from langchain_community.embeddings import OpenAIEmbeddings
@@ -12,6 +15,50 @@ import json
12
  from Utils import estimate_to_value
13
  from Utils import is_key_value_present
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  class RFPProcessor:
16
  def __init__(self):
17
  load_dotenv()
@@ -19,38 +66,73 @@ class RFPProcessor:
19
  self.chains_obj = LLMChains()
20
 
21
  def generate_roadmap(self):
22
-
23
- roadmap_data = self.chains_obj.roadmap_chain.run({"project_input":st.session_state["estimation_data_json"]})
 
 
24
  print(roadmap_data)
25
  st.session_state["roadmap_data_json"] = roadmap_data
26
  data = json.loads(roadmap_data)
27
-
28
  try:
29
- decoded_data = json.loads(roadmap_data)
30
- print(decoded_data)
31
  except json.decoder.JSONDecodeError as e:
32
- print(f"JSON decoding error: {e}")
33
-
34
- for phases_data in data['phases']:
35
- phase = phases_data["name"]
36
- for milestone in phases_data['milestones']:
37
- milestone_name = milestone["name"]
38
- total_time = milestone["totalTime"]
39
- for feature in milestone["features"]:
40
- featue_name = feature["name"]
41
- featue_rationale = feature["rationale"]
42
- featue_effort = feature["effort"]
43
- phase_key_present = is_key_value_present(st.session_state["roadmap_data"], "phases", phase)
44
-
45
- if phase_key_present:
46
- milestone_key_present = is_key_value_present(st.session_state["roadmap_data"], "milestones", milestone_name)
47
- if milestone_key_present:
48
- st.session_state.roadmap_data.append({"phases": "","milestones": "","total_time": "","features": featue_name,"rational": featue_rationale,"effort": featue_effort,})
49
- else:
50
- st.session_state.roadmap_data.append({"phases": "","milestones": milestone_name,"total_time": total_time,"features": featue_name,"rational": featue_rationale,"effort": featue_effort,})
51
- else:
52
- st.session_state.roadmap_data.append({"phases": phase,"milestones": milestone_name,"total_time": total_time,"features": featue_name,"rational": featue_rationale,"effort": featue_effort,})
53
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  st.session_state["is_roadmap_data_created"] = True
55
 
56
  def generate_estimations(self, tech_leads, senior_developers, junior_developers):
@@ -64,9 +146,9 @@ class RFPProcessor:
64
  }
65
 
66
  data = self.chains_obj.estimations_chain.run(inputs)
67
-
68
- st.session_state["estimation_data_json"]= data
69
-
70
  estimation_json_data = json.loads(data)
71
 
72
  for epic_data in estimation_json_data["epics"]:
@@ -90,12 +172,18 @@ class RFPProcessor:
90
 
91
  def process_rfp_data(self, project_name, file):
92
  if project_name and file:
93
- loader = PdfReader(file)
94
- for i, page in enumerate(loader.pages):
95
- content = page.extract_text()
96
- if content:
97
- temp = st.session_state["rfp_details"]
98
- st.session_state["rfp_details"] = temp + content
 
 
 
 
 
 
99
 
100
  text_splitter = CharacterTextSplitter(
101
  separator="\n", chunk_size=1000, chunk_overlap=150, length_function=len
 
1
  from dotenv import load_dotenv
2
  import os
3
  from PyPDF2 import PdfReader
4
+ from docx import Document
5
+ from docx.text.paragraph import Paragraph
6
+ from docx.table import Table
7
  from langchain.text_splitter import CharacterTextSplitter
8
  from langchain_community.vectorstores import Chroma
9
  from langchain_community.embeddings import OpenAIEmbeddings
 
15
  from Utils import estimate_to_value
16
  from Utils import is_key_value_present
17
 
18
+
19
+ def extract_text_from_file(file):
20
+ text = file.read().decode("utf-8")
21
+ return text
22
+
23
+
24
+ def process_paragraph(paragraph):
25
+ # Process the content of the paragraph as needed
26
+ return paragraph.text
27
+
28
+
29
+ def process_table(table):
30
+ # Process the content of the table as needed
31
+ text = ""
32
+ for row in table.rows:
33
+ for cell in row.cells:
34
+ text += cell.text
35
+
36
+ return text
37
+
38
+
39
+ def read_docx(file_path):
40
+ doc = Document(file_path)
41
+ data = []
42
+
43
+ for element in doc.iter_inner_content():
44
+ if isinstance(element, Paragraph):
45
+ data.append(process_paragraph(element))
46
+ if isinstance(element, Table):
47
+ data.append(process_table(element))
48
+
49
+ return "\n".join(data)
50
+
51
+
52
+ def get_pdf_text(pdf):
53
+ """This function extracts the text from the PDF file"""
54
+ text = []
55
+ pdf_reader = PdfReader(pdf)
56
+ for page in pdf_reader.pages:
57
+ text.append(page.extract_text())
58
+
59
+ return "\n".join(text)
60
+
61
+
62
  class RFPProcessor:
63
  def __init__(self):
64
  load_dotenv()
 
66
  self.chains_obj = LLMChains()
67
 
68
  def generate_roadmap(self):
69
+
70
+ roadmap_data = self.chains_obj.roadmap_chain.run(
71
+ {"project_input": st.session_state["estimation_data_json"]}
72
+ )
73
  print(roadmap_data)
74
  st.session_state["roadmap_data_json"] = roadmap_data
75
  data = json.loads(roadmap_data)
76
+
77
  try:
78
+ decoded_data = json.loads(roadmap_data)
79
+ print(decoded_data)
80
  except json.decoder.JSONDecodeError as e:
81
+ print(f"JSON decoding error: {e}")
82
+
83
+ for phases_data in data["phases"]:
84
+ phase = phases_data["name"]
85
+ for milestone in phases_data["milestones"]:
86
+ milestone_name = milestone["name"]
87
+ total_time = milestone["totalTime"]
88
+ for feature in milestone["features"]:
89
+ featue_name = feature["name"]
90
+ featue_rationale = feature["rationale"]
91
+ featue_effort = feature["effort"]
92
+ phase_key_present = is_key_value_present(
93
+ st.session_state["roadmap_data"], "phases", phase
94
+ )
95
+
96
+ if phase_key_present:
97
+ milestone_key_present = is_key_value_present(
98
+ st.session_state["roadmap_data"],
99
+ "milestones",
100
+ milestone_name,
101
+ )
102
+ if milestone_key_present:
103
+ st.session_state.roadmap_data.append(
104
+ {
105
+ "phases": "",
106
+ "milestones": "",
107
+ "total_time": "",
108
+ "features": featue_name,
109
+ "rational": featue_rationale,
110
+ "effort": featue_effort,
111
+ }
112
+ )
113
+ else:
114
+ st.session_state.roadmap_data.append(
115
+ {
116
+ "phases": "",
117
+ "milestones": milestone_name,
118
+ "total_time": total_time,
119
+ "features": featue_name,
120
+ "rational": featue_rationale,
121
+ "effort": featue_effort,
122
+ }
123
+ )
124
+ else:
125
+ st.session_state.roadmap_data.append(
126
+ {
127
+ "phases": phase,
128
+ "milestones": milestone_name,
129
+ "total_time": total_time,
130
+ "features": featue_name,
131
+ "rational": featue_rationale,
132
+ "effort": featue_effort,
133
+ }
134
+ )
135
+
136
  st.session_state["is_roadmap_data_created"] = True
137
 
138
  def generate_estimations(self, tech_leads, senior_developers, junior_developers):
 
146
  }
147
 
148
  data = self.chains_obj.estimations_chain.run(inputs)
149
+
150
+ st.session_state["estimation_data_json"] = data
151
+
152
  estimation_json_data = json.loads(data)
153
 
154
  for epic_data in estimation_json_data["epics"]:
 
172
 
173
  def process_rfp_data(self, project_name, file):
174
  if project_name and file:
175
+ if file.name.endswith(".docx"):
176
+ st.session_state["rfp_details"] = read_docx(file)
177
+ elif file.name.endswith(".pdf"):
178
+ st.session_state["rfp_details"] = get_pdf_text(file)
179
+ else:
180
+ st.session_state["rfp_details"] = extract_text_from_file(file)
181
+ # loader = PdfReader(file)
182
+ # for i, page in enumerate(loader.pages):
183
+ # content = page.extract_text()
184
+ # if content:
185
+ # temp = st.session_state["rfp_details"]
186
+ # st.session_state["rfp_details"] = temp + content
187
 
188
  text_splitter = CharacterTextSplitter(
189
  separator="\n", chunk_size=1000, chunk_overlap=150, length_function=len