from langchain_community.document_loaders import WikipediaLoader from langchain.text_splitter import TokenTextSplitter from knowledge_graph_builder import extract_and_store_graph from dotenv import load_dotenv from tqdm import tqdm # Load environment variables load_dotenv() # Define articles to load articles = { "Chemotherapy": "Chemotherapy", "Traffic Law": "Traffic laws in the United States" } def build_graph_for_article(article_name, category): print(f"Loading documents for: {article_name}") # Load and process the Wikipedia article raw_documents = WikipediaLoader(query=article_name).load() if not raw_documents: print(f"Failed to load content for {article_name}") return text_splitter = TokenTextSplitter(chunk_size=4096, chunk_overlap=96) documents = text_splitter.split_documents(raw_documents[:5]) # Only process the first 5 documents print("Building the knowledge graph...") for i, document in tqdm(enumerate(documents), total=len(documents)): extract_and_store_graph(document, category) def main(): for category, title in articles.items(): build_graph_for_article(title, category) if __name__ == "__main__": main() # import os # from openai import OpenAI # from api_connections import get_graph_connection # from knowledge_graph_builder import extract_and_store_graph # from query_graph import query_knowledge_graph # from langchain_community.document_loaders import WikipediaLoader # from langchain.text_splitter import TokenTextSplitter # from tqdm import tqdm # def get_llm(): # api_key = os.getenv("OPENAI_API_KEY") # if not api_key: # raise ValueError("No OpenAI API key found in environment variables.") # return OpenAI(api_key=api_key) # def classify_query(query): # llm = get_llm() # response = llm.Completion.create( # model="text-davinci-003", # Consider updating to the latest model as necessary # prompt=f"Classify the following query into 'Chemotherapy' or 'Traffic Law': {query}", # max_tokens=60 # ) # return response.choices[0].text.strip() # def main(): # print("Starting the script...") # # Take Wikipedia article name as input # article_name = input("Enter the Wikipedia article name: ") # print(f"Loading documents for: {article_name}") # # Load and process the Wikipedia article # raw_documents = WikipediaLoader(query=article_name).load() # text_splitter = TokenTextSplitter(chunk_size=4096, chunk_overlap=96) # documents = text_splitter.split_documents(raw_documents[:5]) # Only process the first 5 documents # print("Building the knowledge graph...") # # Build the knowledge graph from the documents # for i, d in tqdm(enumerate(documents), total=len(documents)): # extract_and_store_graph(d) # print("Graph construction complete. Please enter your query.") # # Take a query related to the graph # user_query = input("Enter your query related to the graph: ") # print(f"Querying the graph with: {user_query}") # # Query the graph and print the answer # answer = query_knowledge_graph(user_query) # print("Answer to your query:", answer) # if __name__ == "__main__": # main()