Spaces:

nmurugesh
/

Chat-Analyze-NSE-Corporate-Announcements-MultipleModels-v2

Runtime error

App Files Files Community

nmurugesh commited on Jul 20, 2024

Commit

b66ee3d

verified ·

1 Parent(s): 7f8e771

Upload 3 files

Browse files

Files changed (3) hide show

app.py +1019 -0
ind_nifty50list.csv +51 -0
requirements.txt +9 -0

app.py ADDED Viewed

	@@ -0,0 +1,1019 @@

+# !pip install langchain langchain-groq sentence-transformers langchainhub faiss-cpu gradio gradio_client yfinance duckduckgo-search
+import pandas as pd
+import io
+import requests
+import os
+import json
+import matplotlib.pyplot as plt
+from datetime import datetime, timedelta
+import requests
+from bs4 import BeautifulSoup
+import requests
+import yfinance as yf
+import ast
+import re
+from datetime import datetime, timedelta
+import pytz
+# import langchain libraries
+# !pip install langchain langchain-groq langchainhub  duckduckgo-search
+from langchain.agents import AgentExecutor
+from langchain.agents import create_react_agent
+from langchain.agents import create_structured_chat_agent
+from langchain import hub
+from langchain_groq import ChatGroq
+from langchain_core.prompts import ChatPromptTemplate
+from langchain.agents import Tool
+from langchain_community.tools import DuckDuckGoSearchResults
+from langchain.schema.output_parser import StrOutputParser
+from langchain_core.prompts import PromptTemplate
+from langchain_community.tools import DuckDuckGoSearchRun
+from langchain.chains.combine_documents import create_stuff_documents_chain
+from langchain.chains import create_retrieval_chain
+from langchain import hub
+from langchain.chains import RetrievalQA
+from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
+from langchain_community.document_loaders.csv_loader import CSVLoader
+from langchain.tools import DuckDuckGoSearchRun
+from langchain_core.output_parsers import JsonOutputParser
+from langchain.agents import AgentExecutor, create_tool_calling_agent
+from langchain_core.prompts import ChatPromptTemplate
+#import gradio libraries
+# !pip install  gradio gradio_client
+import gradio as gr
+#import vectorstore libraries
+# !pip install faiss-cpu
+from langchain_community.vectorstores import FAISS
+embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
+############################################
+############################################
+# # Code steps involved:
+# 1. Define the LLM
+# 2. Extract data from NSE
+# 2. Process the datafrme and store it as CSV files
+# 3. Use Langchain CSV Loaders to load the CSV data
+# 4. Create Vector Stores
+# 5. Create company lists
+# 6. Create the LLM functions required
+# 7. Create the python functions for stock data and charting functions
+# 8. Create Gradio Blocks
+# 9. Find any recent real time addition to NSE data and add it to the vector stores.
+# 10. Create retrievers and langchain QA retrieval chains
+# 11. Define charts for default
+# 12. Gradio app
+##########################################
+##########################################
+# Define the LLM - We shall use ChatGroq of Groq Platform and LLama70B
+# This llm definition is redundant as now models will be chosen by user
+# llm = ChatGroq(
+#             api_key="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",
+#             model="llama3-70b-8192",
+#             # model = 'gemma-7b-it',
+#             temperature = 0
+#             # model = 'mixtral-8x7B-32768'
+#         )
+# Get the data from NSE as pandas dataframe
+# Function to get dataframe from NSE website
+# Data from two pages: NSE Announcements and NSE corporate actions are fetched and hence two dataframes
+def get_pd(d):
+  # Get the current date
+  current_date = datetime.now()
+  # Get the previous day
+  previous_day = current_date - timedelta(days=d)
+  # Format the dates in the required format (dd-mm-yyyy)
+  current_date_str = current_date.strftime("%d-%m-%Y")
+  previous_day_str = previous_day.strftime("%d-%m-%Y")
+  base_url = 'https://www.nseindia.com'
+  session = requests.Session()
+  headers = {
+      'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, '
+                            'like Gecko) '
+                            'Chrome/80.0.3987.149 Safari/537.36',
+      'accept-language': 'en,gu;q=0.9,hi;q=0.8',
+      'accept-encoding': 'gzip, deflate, br'}
+  r = session.get(base_url, headers=headers, timeout=120)
+  cookies = dict(r.cookies)
+    # Use the dates in the URL
+  url1 = f"https://www.nseindia.com/api/corporate-announcements?index=equities&from_date={previous_day_str}&to_date={current_date_str}&csv=true"
+  url2 = f"https://www.nseindia.com/api/corporates-corporateActions?index=equities&csv=true"
+  response1 = session.get(url1, timeout=120, headers=headers, cookies=cookies)
+  response2 = session.get(url2, timeout=120, headers=headers, cookies=cookies)
+  content1 = response1.content
+  content2 = response2.content
+  df=pd.read_csv(io.StringIO(content1.decode('utf-8')))
+  dfca=pd.read_csv(io.StringIO(content2.decode('utf-8')))
+  return df, dfca
+# Process the datafrme and store it as CSV files
+# To increase the speed of prcocessing in RAG, I decided to use three separate vectostores
+# First vector store will all data, second store with minimum data and third one with CA related data
+# Owing to context window problem of RAG, it is always good to ensure that we don't have any irrelevant data
+df_old, dfca = get_pd(1)
+df_back = df_old.copy()
+df_back.to_csv("df_backup.csv",index=False)
+df_old.drop(['RECEIPT','DISSEMINATION','DIFFERENCE'],axis=1,inplace=True)
+df_old2 = df_old.drop(['ATTACHMENT'],axis=1)
+# Save it as a CSV file
+df_old.to_csv("nse_data_old.csv", index=False)
+# df_old1.to_csv("nse_data_old1.csv", index=False)
+df_old2.to_csv("nse_data_old2.csv", index=False)
+dfca.to_csv("nse_ca.csv", index=False)
+# Use Langchain CSV Loaders to load the CSV data
+loader = CSVLoader("nse_data_old.csv")
+data_old = loader.load()
+loader2 = CSVLoader("nse_data_old2.csv")
+data_old_2 = loader2.load()
+loader3 = CSVLoader("nse_ca.csv")
+data_ca = loader3.load()
+global vectorstore,vectorstore2,vectorstore3, colist, colist_tracked
+# Create vectorstores - I tried Chroma but FAISS turned out to be successful
+vectorstore = FAISS.from_documents(data_old, embedding_function)
+vectorstore2 = FAISS.from_documents(data_old_2, embedding_function)
+vectorstore3 = FAISS.from_documents(data_ca, embedding_function)
+vectorstore.save_local("vectorstore")
+vectorstore2.save_local("vectorstore2")
+vectorstore3.save_local("vectorstore3")
+###########################
+# Create company list
+# Upload the NIFTY company names - this is currently hardcoded as NIFTY does not change as often but can be made dynamic
+co1 = pd.read_csv('ind_nifty50list.csv')
+# Create company lists required
+# Get the column you want to convert to a list
+column_name = "Company Name"
+# # Convert the column to a list
+co_list1 = co1[column_name].tolist()
+# # These are the companies that are being tracked - this can be uploaded / hardcoded
+co_list_tracked = ['Reliance Industries Limited', 'Infosys Limited','ICICI Bank Ltd', 'Indusind Bank Ltd','Ramco Systems', \
+                   'Zydus Lifesciences Limited','Bharti Airtel Limited',\
+                   'ICICI Bank Limited','TechMahindra Limited', 'Indiabulls Real Estate Limited','Tamilnad Mercanitle Bank Limited', \
+                   'Bajaj Finance Limited', 'Apollo Tyres Limited', 'Zydus Lifesciences Limited', 'Indusind Bank Limited', 'Kirloskar Oil Engines Limited']
+co_list = co_list1 + co_list_tracked
+####################################
+##################################
+# Let us create some functions required
+##################################
+# LLM function to get announcement detail
+def give_announcement(llm,stock):
+    if not stock:
+      return "This company has not made any announcements today or yesterday"
+    else:
+      retriever1 = vectorstore.as_retriever()
+      qa_chain = RetrievalQA.from_chain_type(llm,
+                                        retriever=retriever1,
+                                        return_source_documents=False)
+      response = qa_chain({"query":f"What are the announcements made by the company {stock}?. If no announcement has been made by that company, \
+      just say that no announcement has been made by that company."})
+      return  f"Announcements made by {stock}: {response['result']}"
+# LLM function to get Corporate Action Detail
+def get_ca(llm,stock):
+  # stock = stock_name
+  if not stock:
+    return "This company has not made any announcements today or yesterday"
+  else:
+    # resp1 = llm.invoke(f"get all the yahoo finance company name(s) of entity name in {stock}. Just print the ticker(s) alone. Do not print leading sentences.")
+    # stock = resp1.content
+    retriever3 = vectorstore3.as_retriever()
+    qa_chain2 = RetrievalQA.from_chain_type(llm,
+                                       retriever=retriever3,
+                                       return_source_documents=False)
+    response = qa_chain2({"query":f"What are the corporate action announcements made by the company {stock}?. If no announcement has been made by that company, do not print any source documents and \
+    just say that no announcement has been made by that company."})
+    return  response['result']#, response['source_documents']
+# a web search tool
+search=DuckDuckGoSearchRun()
+# Fetch stock data from Yahoo Finance
+def get_stock_price(ticker,history=5):
+    # time.sleep(4) #To avoid rate limit error
+    if "." in ticker:
+        ticker=ticker.split(".")[0]
+    ticker=ticker+".NS"
+    stock = yf.Ticker(ticker)
+    df = stock.history(period="1y")
+    df=df[["Close","Volume"]]
+    df.index=[str(x).split()[0] for x in list(df.index)]
+    df.index.rename("Date",inplace=True)
+    df=df[-history:]
+    # print(df.columns)
+    return df.to_string()
+# get stock price movements
+def get_movements(llm,stock):
+  if not stock:
+    return "This company has not made any announcements today or yesterday"
+  else:
+    stock = stock[0]
+    dfc = pd.read_csv('nse_data_old.csv')
+    stockdesc = dfc[dfc['COMPANY NAME'] == stock]['COMPANY NAME'].iloc[0]
+    stock1 = dfc[dfc['COMPANY NAME'] == stock]['SYMBOL'].iloc[0]
+    stock = get_ticker(stock1)
+    print("stock is ",stock)
+    tools=[
+        Tool(
+            name="get stock data",
+            func=get_stock_price,
+            description=f"Use this tool to get stock price data. This tool will return three values: date, volume and closing price of the stock \
+            for the period of 5 days. stock = {stock}"
+        ),
+        Tool(
+            name="DuckDuckGo Search",
+            func=search.run,
+            description=f"Use this tool for for web search for searching details about stock like broker sentiment. You can also get recent stock \
+            related news. stock symbol = {stock} and stockname = {stockdesc}"
+        ),
+    ]
+    prompt = ChatPromptTemplate.from_messages(
+        [
+            (
+                "system",
+                "You are a helpful stock market analysis assistant. Make sure to use the tools given for information.",
+            ),
+            ("placeholder", "{chat_history}"),
+            ("human", "{input}"),
+            ("placeholder", "{agent_scratchpad}"),
+        ]
+    )
+    # Construct the Tools agent
+    agent = create_tool_calling_agent(llm, tools, prompt)
+    # Create an agent executor by passing in the agent and tools
+    agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
+    response = agent_executor.invoke({"input": f"How much the stock price of stock {stock} with name {stockdesc} moved in the last few days?. Give the prices \
+    over the last few days and also percentage change. For example, If the stock has not moved in single direction, \
+    you can say the stock has been volatile. But if it has moved up over five days, you can say so with percentage movement"})
+    return f"Answer for {stock} - {response['output']}"
+  #####################################
+  # get stock sentiments
+  #####################################
+prompt1 = """Hello, I need broker sentiment data for a specific stock. Please search and summarize current market analyses, broker reports, \
+and overall sentiment regarding the given stock:\Focus on information from credible sources like financial news, broker reports, and investment research firms. \
+Provide key insights, including:\
+    Recent broker recommendations (buy, hold, sell), \
+    Notable broker analyses or reports, \
+    General trends in broker sentiment, \
+    Any major news or events impacting the stock's sentiment. \
+    Please ensure the data is up-to-date and from reputable sources. Provide a concise summary with relevant details and any supporting context to understand the current sentiment.\
+    Please note that you are not chat agent, but meant for single usage, so do not conclude with any greetings or asking for further assistance etc!.\
+    """
+def get_sentiments(llm,stock):
+  if not stock:
+    return "This company has not made any announcements today or yesterday"
+  else:
+    print("st1",stock)
+    stock = stock[0]
+    print("af ",stock)
+    #####
+    dfc = pd.read_csv('nse_data_old.csv')
+    stockdesc = dfc[dfc['COMPANY NAME'] == stock]['COMPANY NAME'].iloc[0]
+    stock1 = dfc[dfc['COMPANY NAME'] == stock]['SYMBOL'].iloc[0]
+    stock = get_ticker(stock1)
+    tools=[
+        Tool(
+            name="get stock data",
+            func=get_stock_price,
+            description=f"Use this tool to get stock price data. This tool will return three values: date, volume and closing price of the stock \
+            for the period of 5 days. stock = {stock}"
+        ),
+        Tool(
+            name="DuckDuckGo Search",
+            func=search.run,
+            description=f"Use this tool for for web search for searching details about stock like broker sentiment. You can also get recent stock \
+            related news. stock name = {stockdesc}"
+        ),
+    ]
+    prompt = ChatPromptTemplate.from_messages(
+        [
+            (
+                "system",
+                f"{prompt1}",
+            ),
+            ("placeholder", "{chat_history}"),
+            ("human", "{input}"),
+            ("placeholder", "{agent_scratchpad}"),
+        ]
+    )
+    # Construct the Tools agent
+    # agent = create_tool_calling_agent(llm, tools, prompt)
+    # Create an agent executor by passing in the agent and tools
+    agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
+    try:
+      agent = create_tool_calling_agent(llm, tools, prompt)
+      agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
+      response = agent_executor.invoke({"input": f"Get broker sentiment for the stock {stock} and stock name {stockdesc}"})
+      return f"Broker sentiment analysis for {stock}. - {response['output']}"
+    except Exception as e:
+      return f"An error occurred: {str(e)}"
+    #################
+# Fetch financial statements from Yahoo Finance
+def get_balancesheet(ticker):
+    # time.sleep(4) #To avoid rate limit error
+    if "." in ticker:
+        ticker=ticker.split(".")[0]
+    else:
+        ticker=ticker
+    ticker=ticker+".NS"
+    company = yf.Ticker(ticker)
+    df = company.balance_sheet
+    # df = df.head(30)
+    df.fillna(method='ffill',inplace=True)
+    df.dropna(inplace=True)
+    return df
+def get_incomestatement(ticker):
+    # time.sleep(4) #To avoid rate limit error
+    if "." in ticker:
+        ticker=ticker.split(".")[0]
+    else:
+        ticker=ticker
+    ticker=ticker+".NS"
+    company = yf.Ticker(ticker)
+    df = company.financials
+    # df = df.head(30)
+    df.fillna(method='ffill',inplace=True)
+    df.dropna(inplace=True)
+    return df
+def get_ticker(company_name):
+    com=company_name+".NS"
+    ticker = yf.Ticker(com)
+    return ticker.info['symbol']
+def get_financialratio(model, input,stock):
+    stock_name = get_companynames(stock)
+    llm = get_model(model)
+    if not stock_name:
+      return "This company has not made any announcements"
+    else:
+      stockname = stock_name[0]
+      print("stock1 ",stockname)
+      dfc = pd.read_csv('nse_data_old.csv')
+      stock1 = dfc[dfc['COMPANY NAME'] == stockname]['SYMBOL'].iloc[0]
+      print("staock1 ",stock1)
+      stock = get_ticker(stock1)
+      print("stock is ",stock)
+      if input == '':
+          return "No query has been entered!"
+      else:
+          resp = llm.invoke(f"You have to answer either 'A' or 'B' without any leading sentences - check whether the input {input} pertains \
+          to financial ratio query. If it pertains to financial ratio query, \
+          respond with letter 'A', else with letter 'B' if it contains only something like company name")
+          print("nature of query ",resp)
+          if resp.content == 'B':
+            return "Enter a query pertaining to financial ratios!"
+          else:
+            # resp1 = llm.invoke(f"get yahoo finance ticker name of entity name in {input}. Just print the ticker alone. Do not print leading sentences.")
+            # stock = resp1.content
+            # resp2 = llm.invoke(f"to answer the query {input}, whether balance sheet or income statement required? If balance sheet, answer A, else B")
+            resp2 = llm.invoke(f"Answer A, if balance sheet or B, if income statement. To answer the query {input}, \
+            whether balance sheet or income statement required - If balance sheet, answer A, else B")
+            if resp2.content=='A':
+              df1 = get_balancesheet(f'{stock}')
+              print("balance sheet")
+            else:
+              df1 = get_incomestatement(f'{stock}')
+              print("income statement")
+            df=df1.T
+            print("the df is ",df)
+            cols= df.columns.tolist()
+            resp3 = llm.invoke(f"List the column names, as python list, in {cols} needed for {input} calculation. Do not output any sentence other than column names.\
+            For example, do not output leading answer statements like: Here are the column names needed for ..")
+            message=resp3.content
+            def extract_df(df, message):
+              c = ast.literal_eval(message)
+              return df[c]
+            df_new=extract_df(df,message)
+            # prompt1 = f"List the column names, as python list, in {cols} needed for {data} calculation. Do not output any sentence other than column names.\
+            # For example, do not output leading answer statements like: Here are the column names needed for .."
+            # prompt = f"What is the current ratio of {stock}?. Use {df_new}. Give only year and current ratio for that year in JSON format"
+            parser = JsonOutputParser()
+            prompt = PromptTemplate(
+                template="Answer the user query.\n{format_instructions}\n{query}\n",
+                input_variables=["query"],
+                partial_variables={"format_instructions": parser.get_format_instructions()},
+            )
+            # prompt = ChatPromptTemplate.from_messages(
+            #     [
+            #         (
+            #             "system",
+            #             "You are a helpful financial data analysis assistant.",
+            #         ),
+            #         ("placeholder", "{chat_history}"),
+            #         ("human", f"Answer the user using df_new and input: question:{input}, dataframe: {df_new}, \
+            #         format_instructions: parser.get_format_instructions()"\
+            #          ),
+            #         ("placeholder", "{agent_scratchpad}"),
+            #     ]
+            # )
+            chain = prompt | llm | parser
+            try:
+              response= chain.invoke( f"Using {df_new}, {input}?")
+              # Print only the results. Print the output in Json format.")
+              return f"For the company: {stockname}, Here are the details: {response}"
+            except Exception as e:
+              return f"An error occurred: {str(e)}"
+##########################
+# Functions to plot a chart over ratios - this has scope for major enhancements!
+def plot_chart(data):
+# Load the JSON string into a Python object
+    # data = json.loads(json_str)
+    # Get the first key in the dictionary
+    try:
+      key = list(data.keys())[0]
+      # Create a plot
+      plt.figure(figsize=(8, 6))
+      plt.bar(data[key].keys(), data[key].values())
+      plt.title(f"{key} Over Years")
+      plt.xlabel("Year")
+      plt.ylabel(key)
+      plt.tight_layout()
+      # Return the plot
+      return plt
+    except Exception as e:
+        return None
+# def get_chart(input):
+#     response = get_financialratio(model,input)
+#     plt = plot_chart(response)
+#     return plt
+def get_chart(model,input,stock):
+    stock_name = get_companynames(stock)
+    if stock_name:
+      response = get_financialratio(model,input,stock)
+      # Extract the dictionary part using regex
+      dict_match = re.search(r"\{.*\}", response)  # Search for content within curly braces
+      # Convert the extracted string to a dictionary
+      if dict_match:
+          extracted_dict_str = dict_match.group(0)  # Get the matching text
+          extracted_dict = ast.literal_eval(extracted_dict_str)  # Convert string to dictionary
+      else:
+          extracted_dict = None  # No dictionary found
+      print("extrated tic ", extracted_dict)
+      plt = plot_chart(extracted_dict)
+      return plt
+    else: return None
+def combined_ratio(model, input,stock):
+  return get_financialratio(model,input,stock), get_chart(model, input,stock)
+###############################
+###############################
+# Create the Gradio Blocks interface with a title and description
+##################################
+global flag
+def incremental_process():
+    global vectorstore,vectorstore2,vectorstore3, flag
+    try:
+        df_new, _ = get_pd(1)
+        flag = 0
+    except:
+        df_new = pd.read_csv("df_backup.csv")
+        flag = 1
+    df_new.to_csv("df_new.csv",index=False)
+    print("length of df_new ",len(df_new))
+    print("length of df_old ", len(df_old))
+    #drop unnecessary common columns
+    df_new.drop(['RECEIPT','DISSEMINATION','DIFFERENCE'],axis=1,inplace=True)
+    # #find the difference and add incrementally for first store
+    df_merged = df_new.merge(df_old, how='left', indicator=True)
+    # Filter rows that are unique to 'n' (i.e., where '_merge' is 'left_only')
+    df_add1= df_merged[df_merged['_merge'] == 'left_only'].drop(columns=['_merge'])
+    # Save it as a CSV file
+    df_add1.to_csv("nse_data_add1.csv", index=False)
+    #drop unnecessary columns for second vector store
+    df_new2 = df_new.drop(['ATTACHMENT'],axis=1)
+    # add increment for second store
+    df_merged = df_new2.merge(df_old2, how='left', indicator=True)
+    df_add2 = df_merged[df_merged['_merge'] == 'left_only'].drop(columns=['_merge'])
+    # Save it as a CSV file
+    df_add2.to_csv("nse_data_add2.csv", index=False)
+    #####################
+    # Load the first CSV file
+    dfold = pd.read_csv('nse_data_old.csv')
+    # Load the second CSV file
+    dfadd = pd.read_csv('nse_data_add1.csv')
+    # print("df old",dfold)
+    # print("######")
+    # print("df add ",dfadd)
+    if dfadd.empty:
+      dfco = dfold.copy()
+    else:
+      # Append df2 at the end of df1
+      dfco = pd.concat([dfold, dfadd], ignore_index=True)
+    dfco.to_csv("dfco.csv",index=False)
+    # Here incremental RAG is achieved by adding additional data dynamically to vectorstore
+    loader = CSVLoader("nse_data_add1.csv")
+    data_new1 = loader.load()
+    loader = CSVLoader("nse_data_add2.csv")
+    data_new2 = loader.load()
+    print("original size ",vectorstore.index.ntotal)
+    len1 = len(pd.read_csv('nse_data_old.csv')) + len(pd.read_csv('nse_data_add1.csv'))
+    print("len1 old + new csv ",len1)
+    len2 = vectorstore.index.ntotal
+    if len1!=len2:
+      print("old size ",vectorstore.index.ntotal)
+      # for first store
+      vectorstore_add1 = FAISS.from_documents(data_new1, embedding_function)
+      print("incremental size ",vectorstore_add1.index.ntotal)
+      vectorstore_new1 = FAISS.load_local("vectorstore",embedding_function,allow_dangerous_deserialization=True)
+      vectorstore_new1.merge_from(vectorstore_add1)
+      vectorstore_new1.save_local("vectorstore")
+      print("new size ",vectorstore_new1.index.ntotal)
+      print("new old size ",vectorstore.index.ntotal)
+      # retrieverx = vectorstore_new.as_retriever()
+      # for second store
+      vectorstore_add2 = FAISS.from_documents(data_new2, embedding_function)
+      print("incremental size ",vectorstore_add2.index.ntotal)
+      vectorstore_new2 = FAISS.load_local("vectorstore2",embedding_function,allow_dangerous_deserialization=True)
+      vectorstore_new2.merge_from(vectorstore_add2)
+      vectorstore_new2.save_local("vectorstore2")
+      print("new size ",vectorstore_new2.index.ntotal)
+      print("new old size ",vectorstore2.index.ntotal)
+      # retrieverx = vectorstore_new2.as_retriever()
+    ##########################
+    # Define updated vector stores, retrievers and QA chains
+    ##########################
+    vectorstore = FAISS.load_local("vectorstore",embedding_function,allow_dangerous_deserialization=True)
+    print("final size store 1",vectorstore.index.ntotal)
+    vectorstore2 = FAISS.load_local("vectorstore2",embedding_function,allow_dangerous_deserialization=True)
+    print("final size store 2",vectorstore2.index.ntotal)
+    vectorstore3 = FAISS.load_local("vectorstore3",embedding_function,allow_dangerous_deserialization=True)
+    print("final size store 3",vectorstore3.index.ntotal)
+    return flag
+def get_colist2():
+  dfco = pd.read_csv('dfco.csv')
+  dfco1 = dfco[['COMPANY NAME']]
+  dfco2 = dfco1.drop_duplicates()
+  # Save the result to a new CSV file
+  dfco2.to_csv('companies.csv', index=False)
+  dfco3 = dfco2.head(10)
+  co_list3 = dfco3['COMPANY NAME'].unique().tolist()
+  filtered_df = dfco2[dfco2['COMPANY NAME'].isin(co_list)]
+  co_list2 = filtered_df['COMPANY NAME'].tolist()
+  return co_list2, co_list3
+def get_timestampmessage(flag):
+    dfco = pd.read_csv('dfco.csv')
+    timestamp = dfco[['BROADCAST DATE/TIME']].max().values.tolist()[0]
+    if flag == 1:
+      message = f"There is NSE timeout error. The latest filing information is available upto {timestamp}"
+    else: message = f"Lastest filing information is available upto {timestamp}"
+    return message
+def update():
+  global flag
+  flag = incremental_process()
+  message = get_timestampmessage(flag)
+  return message
+def give_time():
+  dfco = pd.read_csv("dfco.csv")
+  timestamp = dfco[['BROADCAST DATE/TIME']].max().values.tolist()[0]
+  return timestamp
+# Define the IST timezone
+ist_timezone = pytz.timezone("Asia/Kolkata")
+# Define UTC for server-side time
+utc_timezone = pytz.utc
+def refresh():
+    # Get the client-side timestamp (assuming it is in IST)
+    timestamp_str = give_time()  # The format returned should match the expected format
+    given_time = datetime.strptime(timestamp_str, "%d-%b-%Y %H:%M:%S")
+    given_time_ist = ist_timezone.localize(given_time)  # Localize to IST
+    # Get the current server time in UTC
+    current_time_utc = datetime.now(tz=utc_timezone)
+    # Convert the client-side time to UTC for consistent comparison
+    given_time_utc = given_time_ist.astimezone(utc_timezone)
+    # Calculate the time difference
+    time_difference = current_time_utc - given_time_utc
+    print("the time diff is ", time_difference)
+    # Check if the time difference is greater than one hour
+    if time_difference > timedelta(hours=1):
+        message1 = update()
+        print("Incremental update run")
+    else:
+        message1 = f"Refresh allowed only if data is stale for more than one hour. Current client timestamp: {timestamp_str}"
+    return message1
+    ##########################################################################
+def plot1_top_20():
+    df = pd.read_csv('nse_data_old.csv')
+    subjects = ['Acquisition',
+                'Alteration Of Capital and Fund Raising-XBRL',
+                'Analysts/Institutional Investor Meet/Con. Call Updates',
+                'Board Meeting Intimation',
+                'Book Closure',
+                'Change in Directors/ Key Managerial Personnel/ Auditor/ Compliance Officer/ Share Transfer Agent',
+                'Change in Management',
+                'Credit Rating',
+                'Disclosure of material issue',
+                'Dividend',
+                'Financial Result Updates',
+                'Investor Presentation',
+                'Notice Of Shareholders Meetings-XBRL',
+                'Related Party Transactions',
+                'Resignation',
+                'Rights Issue',
+                'Shareholders meeting',
+                'Spurt in Volume',
+                'Update-Acquisition/Scheme/Sale/Disposal-XBRL',
+                ]
+    # companies = co_list2
+    # df = df[df['COMPANY NAME'].isin(co_list2)]
+    df = df[df['SUBJECT'].isin(subjects)]
+    # df['SUBJECT'] = df['SUBJECT'].replace('Change in Directors/ Key Managerial Personnel/ Auditor/ Compliance Officer/ Share Transfer Agent', 'Change in Key Managerial Personnel')
+    df['SUBJECT'] = df['SUBJECT'].replace('Change in Directors/ Key Managerial Personnel/ Auditor/ Compliance Officer/ Share Transfer Agent', 'Change in Key Managerial Personnel')
+    value_counts = df['SUBJECT'].value_counts()
+    # Get the top 10 labels by count
+    # top_20_value_counts = value_counts[:20]
+    plt.figure(figsize=(10, 6))
+    plt.barh(value_counts.index, value_counts.values)
+    plt.xlabel('Count')
+    plt.ylabel('Announcements')
+    plt.title('NSE Corporate Announcements - A Glance')
+    plt.tight_layout()
+    # plt.close()
+    return plt
+  ## Function to create company list specific chart
+def plot2_top_20():
+  co_list2,_ = get_colist2()
+  # global co_list2
+  # Get the counts of each label
+  df = pd.read_csv('nse_data_old.csv')
+  subjects = ['Acquisition',
+              'Alteration Of Capital and Fund Raising-XBRL',
+              'Analysts/Institutional Investor Meet/Con. Call Updates',
+              'Board Meeting Intimation',
+              'Book Closure',
+              'Change in Directors/ Key Managerial Personnel/ Auditor/ Compliance Officer/ Share Transfer Agent',
+              'Change in Management',
+              'Credit Rating',
+              'Disclosure of material issue',
+              'Dividend',
+              'Financial Result Updates',
+              'Investor Presentation',
+              'Notice Of Shareholders Meetings-XBRL',
+              'Related Party Transactions',
+              'Resignation',
+              'Rights Issue',
+              'Shareholders meeting',
+              'Spurt in Volume',
+              'Update-Acquisition/Scheme/Sale/Disposal-XBRL',
+              ]
+  # companies = co_list2
+  df = df[df['COMPANY NAME'].isin(co_list2)]
+  # df = df[df['COMPANY NAME'].isin(co_list_tracked)]
+  # df = df[df['SUBJECT'].isin(subjects)]
+  # df['SUBJECT'] = df['SUBJECT'].replace('Change in Directors/ Key Managerial Personnel/ Auditor/ Compliance Officer/ Share Transfer Agent', 'Change in Key Managerial Personnel')
+  df['SUBJECT'] = df['SUBJECT'].replace('Change in Directors/ Key Managerial Personnel/ Auditor/ Compliance Officer/ Share Transfer Agent', 'Change in Key Managerial Personnel')
+  value_counts = df['SUBJECT'].value_counts()
+  # Get the top 10 labels by count
+  # top_20_value_counts = value_counts[:20]
+  plt.figure(figsize=(10, 6))
+  plt.barh(value_counts.index, value_counts.values)
+  plt.xlabel('Count')
+  plt.ylabel('Announcements')
+  plt.title('NSE Corporate Announcements - Tracked Companies')
+  plt.tight_layout()
+  # plt.close()
+  return plt
+def get_companynames(stock):
+    df = pd.read_csv('nse_data_old.csv')
+    if stock:
+      # Create a regular expression pattern
+      pattern = f'.*{stock}.*'
+      # Get rows where 'COMPANY NAME' contains the keyword (case-insensitive)
+      matched_rows = df[df['COMPANY NAME'].str.contains(pattern, case=False)]
+      # Get unique company names
+      unique_companies = matched_rows['COMPANY NAME'].unique()
+      return list(set(unique_companies))
+    else: return None
+# A combined function to be used in Gradio output box
+def print_model(llm):
+  co_list2,_ = get_colist2()
+  if co_list2:
+      return f"You are using {llm.model_name} model for this session. \n \n" \
+      f"These are the companies you track: {co_list_tracked}. \n \n" \
+      f"These are the companies, including those in NIFTY, that have filed any information with NSE either today / yesterday - {co_list2}"
+  else:
+      return f"You are using {llm.model_name} model for this session. \n \n" \
+      f"Your are tracking these companies: {co_list_tracked}, \n \n"\
+      f"None of the tracked companies or  NIFTY 50 have filed any information with NSE on either today or yesterday"
+def print_model1(llm):
+    return f"You are using {llm.model_name} model for this session. \n \n [Note: There is NSE timeout error preventing fetching of latest data. So, results may not be real-time / up-to-date]"
+def combined_function1(model,stock):
+  global flag
+  llm = get_model(model)
+  stock = get_companynames(stock)
+  if flag == 0:
+    return print_model(llm), give_announcement(llm,stock),get_ca(llm,stock),get_movements(llm,stock), get_sentiments(llm,stock)
+  else:
+    return print_model1(llm), give_announcement(llm,stock),get_ca(llm,stock),get_movements(llm,stock), get_sentiments(llm,stock)
+def get_model(model_name):
+  llm = ChatGroq(
+            api_key="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",
+            model=model_name,
+            max_tokens = 8192,
+            # model = 'gemma-7b-it',
+            temperature = 0
+            # model = 'mixtral-8x7B-32768'
+        )
+  return llm
+# This function is given here as company list is dynamic
+def give_names():
+  global co_list_tracked
+  co_list2, co_list3 = get_colist2()
+  return f"Apart from NIFTY, these are the companies you track: \n \n" \
+  f" {co_list_tracked}. \n \n" \
+  f"These are the tracked companies that have made announcements: \n \n" \
+  f"{co_list2}. \n \n" \
+  f"These are latest 10 companies that have made announcements: \n \n " \
+  f"{co_list3}"
+##############################
+retrieval_qa_chat_prompt = hub.pull("langchain-ai/retrieval-qa-chat")
+  ###############################
+# This function is for chat queries. Given here due to retriever defined here
+def chat_chain(model,query):
+  llm = get_model(model)
+  if query=='':
+    return "Please enter a query!"
+  else:
+    combine_docs_chain = create_stuff_documents_chain(
+    llm, retrieval_qa_chat_prompt)
+    retriever2 = vectorstore2.as_retriever()
+    retrieval_chain = create_retrieval_chain(retriever2, combine_docs_chain)
+    response = retrieval_chain.invoke({"input": query})
+    return response['answer']
+#################################
+## Update the vectorstate with latest data
+flag = incremental_process()
+###########################################################################
+with gr.Blocks() as demo:
+  # Add a Markdown block for the description
+  gr.Markdown("""<h1 style='color: blue;'>Chat and Analyze with NSE Filings Information</h1>""")
+  gr.Markdown("""Powered by Gradio, Groq, Llama3, FAISS, Langchain, YahooFinance""")
+  gr.Markdown(
+      """
+      <img src="https://upload.wikimedia.org/wikipedia/commons/1/12/NSE_Exchange_Plaza.jpg" width=500px>
+      Enter any company name to know its recent filings with NSE in real time. This app can track a list of companies for any corporate announcements \
+      with NSE (now NSE 50 hard coded). If you want to know whether any of the tracked company has made any announcements either yesterday or today,\
+      enter the company name and submit. The first output box will list all the companies (that are tracked and) that have made an announcement today. \
+      The second box provides details about the announcement. You can also do ratio analysis and chat with the filings information (beta).
+      """
+  )
+  txt_output = gr.Text(give_time(),label = "Opening Data - Timestamp of latest Filing")
+  txt_output = gr.Text(give_names(),label = "Announcements for tracked companies")
+  # This is for defaulting charts when app is launched
+  plot_output1 = gr.Plot(plot1_top_20(), label="Chart")  # Call the function to create the plot
+  plt.close()
+  plot_output2 = gr.Plot(plot2_top_20(), label="Chart")  # Call the function to create the plot
+  plt.close()
+  gr.Markdown("""<h2 style='color: blue;'>Fetch Announcements/Corporate Actions/Price Movements/Broker Sentiments</h2>""")
+  # Use a Column to structure the inputs and outputs
+  with gr.Column():
+        outputs5 = [gr.Textbox(label="Latest Filing Timestamp",placeholder="Refresh data if stale for more than an hour")]
+        button5 = gr.Button("Refresh Data")
+        # button5.click(lambda: refresh(dfco), inputs=None, outputs=outputs5)
+        button5.click(lambda: refresh(), inputs=None, outputs=outputs5)
+        # Create a dropdown box for selecting the operation
+        operation_dropdown = gr.Dropdown(
+          label="Select a model",
+          choices=['llama3-70b-8192','llama3-8b-8192',  'gemma-7b-it','mixtral-8x7B-32768' ],  # Options for the dropdown
+          value='llama3-70b-8192',  # Default value
+      )
+      # First text input and button
+        text_input1 = gr.Textbox(
+          label="Enter Company Name",
+          placeholder="Enter a company name; e.g., Zydus Lifesciences Limited",
+          lines=1
+      )
+        button1 = gr.Button("Start Analysis")
+        outputs1 = [
+          gr.Textbox(label="Selected Model",show_copy_button=True),
+          gr.Textbox(label="Announcement Detail", max_lines=100,show_copy_button=True),
+          gr.Textbox(label="Any Corporate Actions during last week?", max_lines=100,show_copy_button=True),
+          gr.Textbox(label="Stock Price Movement", max_lines=100,show_copy_button=True),
+          gr.Textbox(label="Broker Sentiment", max_lines=100,show_copy_button=True),
+      ]
+        button1.click(lambda x,y: combined_function1(x,y), inputs=[operation_dropdown,text_input1], outputs=outputs1)
+        gr.Markdown("""<h1 style='color: green;'>Analyse the Financial Statements of the above Company</h1>""")
+        text_input3 = gr.Textbox(
+          label="Enter Query",
+          placeholder="Enter your query: e.g., What is the current ratio of the stock over three years?",
+          lines=1)
+        button3 = gr.Button("Analyse")
+        outputs3 = [
+          gr.Textbox(label="Chat Response", max_lines=100,show_copy_button=True),
+          gr.Plot(label = "Chart")]
+        button3.click(combined_ratio, inputs=[operation_dropdown,text_input3,text_input1], outputs=outputs3)
+        gr.Markdown("""<h1 style='color: orange;'>Chat With the NSE Filings Information</h1>""")
+      # Second text input and button
+        text_input2 = gr.Textbox(
+          label="Enter Chat Query",
+          placeholder="Enter your query: e.g., List the companies that have recently made acquisitions",
+          lines=2
+      )
+        button2 = gr.Button("Chat")
+        outputs2 = [gr.Textbox(label="Chat Response", max_lines=100,lines=10,show_copy_button=True)]
+                  # gr.Plot(label = "Categories")]
+        button2.click(chat_chain, inputs=[operation_dropdown,text_input2], outputs=outputs2)
+# Launch the Gradio app
+demo.launch()

ind_nifty50list.csv ADDED Viewed

	@@ -0,0 +1,51 @@

+Company Name,Industry,Symbol,Series,ISIN Code
+Adani Enterprises Ltd.,Metals & Mining,ADANIENT,EQ,INE423A01024
+Adani Ports and Special Economic Zone Ltd.,Services,ADANIPORTS,EQ,INE742F01042
+Apollo Hospitals Enterprise Ltd.,Healthcare,APOLLOHOSP,EQ,INE437A01024
+Asian Paints Ltd.,Consumer Durables,ASIANPAINT,EQ,INE021A01026
+Axis Bank Ltd.,Financial Services,AXISBANK,EQ,INE238A01034
+Bajaj Auto Ltd.,Automobile and Auto Components,BAJAJ-AUTO,EQ,INE917I01010
+Bajaj Finance Ltd.,Financial Services,BAJFINANCE,EQ,INE296A01024
+Bajaj Finserv Ltd.,Financial Services,BAJAJFINSV,EQ,INE918I01026
+Bharat Petroleum Corporation Ltd.,Oil Gas & Consumable Fuels,BPCL,EQ,INE029A01011
+Bharti Airtel Ltd.,Telecommunication,BHARTIARTL,EQ,INE397D01024
+Britannia Industries Ltd.,Fast Moving Consumer Goods,BRITANNIA,EQ,INE216A01030
+Cipla Ltd.,Healthcare,CIPLA,EQ,INE059A01026
+Coal India Ltd.,Oil Gas & Consumable Fuels,COALINDIA,EQ,INE522F01014
+Divi's Laboratories Ltd.,Healthcare,DIVISLAB,EQ,INE361B01024
+Dr. Reddy's Laboratories Ltd.,Healthcare,DRREDDY,EQ,INE089A01023
+Eicher Motors Ltd.,Automobile and Auto Components,EICHERMOT,EQ,INE066A01021
+Grasim Industries Ltd.,Construction Materials,GRASIM,EQ,INE047A01021
+HCL Technologies Ltd.,Information Technology,HCLTECH,EQ,INE860A01027
+HDFC Bank Ltd.,Financial Services,HDFCBANK,EQ,INE040A01034
+HDFC Life Insurance Company Ltd.,Financial Services,HDFCLIFE,EQ,INE795G01014
+Hero MotoCorp Ltd.,Automobile and Auto Components,HEROMOTOCO,EQ,INE158A01026
+Hindalco Industries Ltd.,Metals & Mining,HINDALCO,EQ,INE038A01020
+Hindustan Unilever Ltd.,Fast Moving Consumer Goods,HINDUNILVR,EQ,INE030A01027
+ICICI Bank Ltd.,Financial Services,ICICIBANK,EQ,INE090A01021
+ITC Ltd.,Fast Moving Consumer Goods,ITC,EQ,INE154A01025
+IndusInd Bank Ltd.,Financial Services,INDUSINDBK,EQ,INE095A01012
+Infosys Ltd.,Information Technology,INFY,EQ,INE009A01021
+JSW Steel Ltd.,Metals & Mining,JSWSTEEL,EQ,INE019A01038
+Kotak Mahindra Bank Ltd.,Financial Services,KOTAKBANK,EQ,INE237A01028
+LTIMindtree Ltd.,Information Technology,LTIM,EQ,INE214T01019
+Larsen & Toubro Ltd.,Construction,LT,EQ,INE018A01030
+Mahindra & Mahindra Ltd.,Automobile and Auto Components,M&M,EQ,INE101A01026
+Maruti Suzuki India Ltd.,Automobile and Auto Components,MARUTI,EQ,INE585B01010
+NTPC Ltd.,Power,NTPC,EQ,INE733E01010
+Nestle India Ltd.,Fast Moving Consumer Goods,NESTLEIND,EQ,INE239A01024
+Oil & Natural Gas Corporation Ltd.,Oil Gas & Consumable Fuels,ONGC,EQ,INE213A01029
+Power Grid Corporation of India Ltd.,Power,POWERGRID,EQ,INE752E01010
+Reliance Industries Ltd.,Oil Gas & Consumable Fuels,RELIANCE,EQ,INE002A01018
+SBI Life Insurance Company Ltd.,Financial Services,SBILIFE,EQ,INE123W01016
+Shriram Finance Ltd.,Financial Services,SHRIRAMFIN,EQ,INE721A01013
+State Bank of India,Financial Services,SBIN,EQ,INE062A01020
+Sun Pharmaceutical Industries Ltd.,Healthcare,SUNPHARMA,EQ,INE044A01036
+Tata Consultancy Services Ltd.,Information Technology,TCS,EQ,INE467B01029
+Tata Consumer Products Ltd.,Fast Moving Consumer Goods,TATACONSUM,EQ,INE192A01025
+Tata Motors Ltd.,Automobile and Auto Components,TATAMOTORS,EQ,INE155A01022
+Tata Steel Ltd.,Metals & Mining,TATASTEEL,EQ,INE081A01020
+Tech Mahindra Ltd.,Information Technology,TECHM,EQ,INE669C01036
+Titan Company Ltd.,Consumer Durables,TITAN,EQ,INE280A01028
+UltraTech Cement Ltd.,Construction Materials,ULTRACEMCO,EQ,INE481G01011
+Wipro Ltd.,Information Technology,WIPRO,EQ,INE075A01022

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+langchain
+langchain-groq
+sentence-transformers
+langchainhub
+faiss-cpu
+gradio
+gradio_client
+duckduckgo-search
+yfinance