from dotenv import load_dotenv import os from PyPDF2 import PdfReader from docx import Document from docx.text.paragraph import Paragraph from docx.table import Table from langchain.text_splitter import CharacterTextSplitter from langchain_community.vectorstores import Chroma from langchain_community.embeddings import OpenAIEmbeddings import streamlit as st from textwrap import dedent from Prompts_and_Chains import LLMChains from Templates import json_structure import json from Utils import estimate_to_value from Utils import is_key_value_present def extract_text_from_file(file): text = file.read().decode("utf-8") return text def process_paragraph(paragraph): # Process the content of the paragraph as needed return paragraph.text def process_table(table): # Process the content of the table as needed text = "" for row in table.rows: for cell in row.cells: text += cell.text return text def read_docx(file_path): doc = Document(file_path) data = [] for element in doc.iter_inner_content(): if isinstance(element, Paragraph): data.append(process_paragraph(element)) if isinstance(element, Table): data.append(process_table(element)) return "\n".join(data) def get_pdf_text(pdf): """This function extracts the text from the PDF file""" text = [] pdf_reader = PdfReader(pdf) for page in pdf_reader.pages: text.append(page.extract_text()) return "\n".join(text) class RFPProcessor: def __init__(self): load_dotenv() self.openai_api_key = os.getenv("OPENAI_API_KEY") self.chains_obj = LLMChains() def generate_roadmap(self): roadmap_data = self.chains_obj.roadmap_chain.run( {"project_input": st.session_state["estimation_data_json"]} ) print(roadmap_data) st.session_state["roadmap_data_json"] = roadmap_data data = json.loads(roadmap_data) try: decoded_data = json.loads(roadmap_data) print(decoded_data) except json.decoder.JSONDecodeError as e: print(f"JSON decoding error: {e}") for phases_data in data["phases"]: phase = phases_data["name"] for milestone in phases_data["milestones"]: milestone_name = milestone["name"] total_time = milestone["totalTime"] for feature in milestone["features"]: featue_name = feature["name"] featue_rationale = feature["rationale"] featue_effort = feature["effort"] phase_key_present = is_key_value_present( st.session_state["roadmap_data"], "phases", phase ) if phase_key_present: milestone_key_present = is_key_value_present( st.session_state["roadmap_data"], "milestones", milestone_name, ) if milestone_key_present: st.session_state.roadmap_data.append( { "phases": "", "milestones": "", "total_time": "", "features": featue_name, "rational": featue_rationale, "effort": featue_effort, } ) else: st.session_state.roadmap_data.append( { "phases": "", "milestones": milestone_name, "total_time": total_time, "features": featue_name, "rational": featue_rationale, "effort": featue_effort, } ) else: st.session_state.roadmap_data.append( { "phases": phase, "milestones": milestone_name, "total_time": total_time, "features": featue_name, "rational": featue_rationale, "effort": featue_effort, } ) st.session_state["is_roadmap_data_created"] = True def generate_estimations(self, tech_leads, senior_developers, junior_developers): print(st.session_state["user_stories_json"]) inputs = { "project_summary": st.session_state["rfp_summary"], "user_stories": st.session_state["user_stories_json"], "tech_leads": tech_leads, "senior_developers": senior_developers, "junior_developers": junior_developers, } data = self.chains_obj.estimations_chain.run(inputs) st.session_state["estimation_data_json"] = data estimation_json_data = json.loads(data) for epic_data in estimation_json_data["epics"]: epic = epic_data["name"] for feature_data in epic_data["features"]: feature = feature_data["name"] for story in feature_data["stories"]: average = estimate_to_value(story["estimate"]) st.session_state.estimation_data.append( { "epic": epic, "Feature": feature, "Story Description": story["description"], "Estimate": story["estimate"], "Person Days Range": story["effort"], "Story Rationale": story["rationale"], "Person Days": average, } ) st.session_state["is_estimation_data_created"] = True def process_rfp_data(self, project_name, files): if project_name and files: extracted_data = [] for file in files: if file.name.endswith(".docx"): extracted_data.append(read_docx(file)) elif file.name.endswith(".pdf"): extracted_data.append(get_pdf_text(file)) else: extracted_data.append(extract_text_from_file(file)) # loader = PdfReader(file) # for i, page in enumerate(loader.pages): # content = page.extract_text() # if content: # temp = st.session_state["rfp_details"] # st.session_state["rfp_details"] = temp + content st.session_state["rfp_details"] = " ".join(extracted_data) text_splitter = CharacterTextSplitter( separator="\n", chunk_size=1000, chunk_overlap=150, length_function=len ) texts = text_splitter.split_text(st.session_state["rfp_details"]) st.session_state["vectorstore"] = Chroma().from_texts( texts, embedding=OpenAIEmbeddings(openai_api_key=self.openai_api_key) ) st.session_state.project_name = project_name st.session_state["rfp_summary"] = self.chains_obj.summary_chain.run( { "project_name": st.session_state["project_name"], "rfp_details": dedent(st.session_state["rfp_details"]), } ) st.session_state["is_data_processed"] = True st.success("data processed sucessfully") def genrate_bot_result(self): if len(st.session_state["input"]) > 0: db = st.session_state["vectorstore"] context = db.similarity_search(st.session_state["input"]) inputs = { "context": context[0].page_content, "input": st.session_state["input"], } output = self.chains_obj.bot_chain.run(inputs) st.session_state.past.append(st.session_state["input"]) st.session_state.generated.append(output) st.session_state["input"] = "" def genrate_user_stories(self): output = self.chains_obj.user_story_chain.run( { "project_name": st.session_state["project_name"], "rfp_details": st.session_state["rfp_details"], } ) st.session_state["user_stories"] = output json_response = self.chains_obj.json_chain.run( { "user_stories": st.session_state["user_stories"], "json_structure": json_structure, } ) user_stories_data = json.loads(json_response) print(user_stories_data) st.session_state["user_stories_json"] = user_stories_data for epic_data in user_stories_data["epics"]: epic = epic_data["name"] for feature_data in epic_data["features"]: feature = feature_data["name"] for story in feature_data["stories"]: st.session_state.user_stories_data.append( { "epic": epic, "Feature": feature, "Story Description": story["description"], } ) st.session_state["is_user_stories_created"] = True