rfp_to_story / Functions.py
Darpan07's picture
Update Functions.py
e12e334 verified
raw
history blame
No virus
9.72 kB
from dotenv import load_dotenv
import os
from PyPDF2 import PdfReader
from docx import Document
from docx.text.paragraph import Paragraph
from docx.table import Table
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import OpenAIEmbeddings
import streamlit as st
from textwrap import dedent
from Prompts_and_Chains import LLMChains
from Templates import json_structure
import json
from Utils import estimate_to_value
from Utils import is_key_value_present
def extract_text_from_file(file):
text = file.read().decode("utf-8")
return text
def process_paragraph(paragraph):
# Process the content of the paragraph as needed
return paragraph.text
def process_table(table):
# Process the content of the table as needed
text = ""
for row in table.rows:
for cell in row.cells:
text += cell.text
return text
def read_docx(file_path):
doc = Document(file_path)
data = []
for element in doc.iter_inner_content():
if isinstance(element, Paragraph):
data.append(process_paragraph(element))
if isinstance(element, Table):
data.append(process_table(element))
return "\n".join(data)
def get_pdf_text(pdf):
"""This function extracts the text from the PDF file"""
text = []
pdf_reader = PdfReader(pdf)
for page in pdf_reader.pages:
text.append(page.extract_text())
return "\n".join(text)
class RFPProcessor:
def __init__(self):
load_dotenv()
self.openai_api_key = os.getenv("OPENAI_API_KEY")
self.chains_obj = LLMChains()
def generate_roadmap(self):
roadmap_data = self.chains_obj.roadmap_chain.run(
{"project_input": st.session_state["estimation_data_json"]}
)
print(roadmap_data)
st.session_state["roadmap_data_json"] = roadmap_data
data = json.loads(roadmap_data)
try:
decoded_data = json.loads(roadmap_data)
print(decoded_data)
except json.decoder.JSONDecodeError as e:
print(f"JSON decoding error: {e}")
for phases_data in data["phases"]:
phase = phases_data["name"]
for milestone in phases_data["milestones"]:
milestone_name = milestone["name"]
total_time = milestone["totalTime"]
for feature in milestone["features"]:
featue_name = feature["name"]
featue_rationale = feature["rationale"]
featue_effort = feature["effort"]
phase_key_present = is_key_value_present(
st.session_state["roadmap_data"], "phases", phase
)
if phase_key_present:
milestone_key_present = is_key_value_present(
st.session_state["roadmap_data"],
"milestones",
milestone_name,
)
if milestone_key_present:
st.session_state.roadmap_data.append(
{
"phases": "",
"milestones": "",
"total_time": "",
"features": featue_name,
"rational": featue_rationale,
"effort": featue_effort,
}
)
else:
st.session_state.roadmap_data.append(
{
"phases": "",
"milestones": milestone_name,
"total_time": total_time,
"features": featue_name,
"rational": featue_rationale,
"effort": featue_effort,
}
)
else:
st.session_state.roadmap_data.append(
{
"phases": phase,
"milestones": milestone_name,
"total_time": total_time,
"features": featue_name,
"rational": featue_rationale,
"effort": featue_effort,
}
)
st.session_state["is_roadmap_data_created"] = True
def generate_estimations(self, tech_leads, senior_developers, junior_developers):
print(st.session_state["user_stories_json"])
inputs = {
"project_summary": st.session_state["rfp_summary"],
"user_stories": st.session_state["user_stories_json"],
"tech_leads": tech_leads,
"senior_developers": senior_developers,
"junior_developers": junior_developers,
}
data = self.chains_obj.estimations_chain.run(inputs)
st.session_state["estimation_data_json"] = data
estimation_json_data = json.loads(data)
for epic_data in estimation_json_data["epics"]:
epic = epic_data["name"]
for feature_data in epic_data["features"]:
feature = feature_data["name"]
for story in feature_data["stories"]:
average = estimate_to_value(story["estimate"])
st.session_state.estimation_data.append(
{
"epic": epic,
"Feature": feature,
"Story Description": story["description"],
"Estimate": story["estimate"],
"Person Days Range": story["effort"],
"Story Rationale": story["rationale"],
"Person Days": average,
}
)
st.session_state["is_estimation_data_created"] = True
def process_rfp_data(self, project_name, file):
if project_name and file:
if file.name.endswith(".docx"):
st.session_state["rfp_details"] = read_docx(file)
elif file.name.endswith(".pdf"):
st.session_state["rfp_details"] = get_pdf_text(file)
else:
st.session_state["rfp_details"] = extract_text_from_file(file)
# loader = PdfReader(file)
# for i, page in enumerate(loader.pages):
# content = page.extract_text()
# if content:
# temp = st.session_state["rfp_details"]
# st.session_state["rfp_details"] = temp + content
text_splitter = CharacterTextSplitter(
separator="\n", chunk_size=1000, chunk_overlap=150, length_function=len
)
texts = text_splitter.split_text(st.session_state["rfp_details"])
st.session_state["vectorstore"] = Chroma().from_texts(
texts, embedding=OpenAIEmbeddings(openai_api_key=self.openai_api_key)
)
st.session_state.project_name = project_name
st.session_state["rfp_summary"] = self.chains_obj.summary_chain.run(
{
"project_name": st.session_state["project_name"],
"rfp_details": dedent(st.session_state["rfp_details"]),
}
)
st.session_state["is_data_processed"] = True
st.success("data processed sucessfully")
def genrate_bot_result(self):
if len(st.session_state["input"]) > 0:
db = st.session_state["vectorstore"]
context = db.similarity_search(st.session_state["input"])
inputs = {
"context": context[0].page_content,
"input": st.session_state["input"],
}
output = self.chains_obj.bot_chain.run(inputs)
st.session_state.past.append(st.session_state["input"])
st.session_state.generated.append(output)
st.session_state["input"] = ""
def genrate_user_stories(self):
output = self.chains_obj.user_story_chain.run(
{
"project_name": st.session_state["project_name"],
"rfp_details": st.session_state["rfp_details"],
}
)
st.session_state["user_stories"] = output
json_response = self.chains_obj.json_chain.run(
{
"user_stories": st.session_state["user_stories"],
"json_structure": json_structure,
}
)
user_stories_data = json.loads(json_response)
print(user_stories_data)
st.session_state["user_stories_json"] = user_stories_data
for epic_data in user_stories_data["epics"]:
epic = epic_data["name"]
for feature_data in epic_data["features"]:
feature = feature_data["name"]
for story in feature_data["stories"]:
st.session_state.user_stories_data.append(
{
"epic": epic,
"Feature": feature,
"Story Description": story["description"],
}
)
st.session_state["is_user_stories_created"] = True