File size: 3,263 Bytes
b77d203 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
from langchain_community.document_loaders import WikipediaLoader
from langchain.text_splitter import TokenTextSplitter
from knowledge_graph_builder import extract_and_store_graph
from dotenv import load_dotenv
from tqdm import tqdm
# Load environment variables
load_dotenv()
# Define articles to load
articles = {
"Chemotherapy": "Chemotherapy",
"Traffic Law": "Traffic laws in the United States"
}
def build_graph_for_article(article_name, category):
print(f"Loading documents for: {article_name}")
# Load and process the Wikipedia article
raw_documents = WikipediaLoader(query=article_name).load()
if not raw_documents:
print(f"Failed to load content for {article_name}")
return
text_splitter = TokenTextSplitter(chunk_size=4096, chunk_overlap=96)
documents = text_splitter.split_documents(raw_documents[:5]) # Only process the first 5 documents
print("Building the knowledge graph...")
for i, document in tqdm(enumerate(documents), total=len(documents)):
extract_and_store_graph(document, category)
def main():
for category, title in articles.items():
build_graph_for_article(title, category)
if __name__ == "__main__":
main()
# import os
# from openai import OpenAI
# from api_connections import get_graph_connection
# from knowledge_graph_builder import extract_and_store_graph
# from query_graph import query_knowledge_graph
# from langchain_community.document_loaders import WikipediaLoader
# from langchain.text_splitter import TokenTextSplitter
# from tqdm import tqdm
# def get_llm():
# api_key = os.getenv("OPENAI_API_KEY")
# if not api_key:
# raise ValueError("No OpenAI API key found in environment variables.")
# return OpenAI(api_key=api_key)
# def classify_query(query):
# llm = get_llm()
# response = llm.Completion.create(
# model="text-davinci-003", # Consider updating to the latest model as necessary
# prompt=f"Classify the following query into 'Chemotherapy' or 'Traffic Law': {query}",
# max_tokens=60
# )
# return response.choices[0].text.strip()
# def main():
# print("Starting the script...")
# # Take Wikipedia article name as input
# article_name = input("Enter the Wikipedia article name: ")
# print(f"Loading documents for: {article_name}")
# # Load and process the Wikipedia article
# raw_documents = WikipediaLoader(query=article_name).load()
# text_splitter = TokenTextSplitter(chunk_size=4096, chunk_overlap=96)
# documents = text_splitter.split_documents(raw_documents[:5]) # Only process the first 5 documents
# print("Building the knowledge graph...")
# # Build the knowledge graph from the documents
# for i, d in tqdm(enumerate(documents), total=len(documents)):
# extract_and_store_graph(d)
# print("Graph construction complete. Please enter your query.")
# # Take a query related to the graph
# user_query = input("Enter your query related to the graph: ")
# print(f"Querying the graph with: {user_query}")
# # Query the graph and print the answer
# answer = query_knowledge_graph(user_query)
# print("Answer to your query:", answer)
# if __name__ == "__main__":
# main()
|