michaelwechner
commited on
Commit
·
7206760
1
Parent(s):
56403af
loading content form public website started
Browse files
kg_builder/src/graph_creation.py
CHANGED
@@ -24,6 +24,9 @@ load_dotenv()
|
|
24 |
articles = {
|
25 |
"Traffic Law": "Traffic laws in the United States"
|
26 |
}
|
|
|
|
|
|
|
27 |
|
28 |
def build_graph_for_article(query, data_source_name):
|
29 |
"""
|
@@ -38,10 +41,15 @@ def build_graph_for_article(query, data_source_name):
|
|
38 |
chunk_size=400
|
39 |
chunk_overlap=10
|
40 |
|
41 |
-
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
43 |
if not raw_documents:
|
44 |
-
logger.error(f"Failed to load content for
|
45 |
return
|
46 |
|
47 |
logger.info(f"{str(len(raw_documents))} document(s) loaded from Wikipedia.")
|
|
|
24 |
articles = {
|
25 |
"Traffic Law": "Traffic laws in the United States"
|
26 |
}
|
27 |
+
#articles = {
|
28 |
+
# "SquirroDocs": "https://docs.squirro.com/en/latest/technical/getting-started.html"
|
29 |
+
#}
|
30 |
|
31 |
def build_graph_for_article(query, data_source_name):
|
32 |
"""
|
|
|
41 |
chunk_size=400
|
42 |
chunk_overlap=10
|
43 |
|
44 |
+
if data_source_name == "SquirroDocs":
|
45 |
+
logger.info(f"Loading document(s) from public website {query} ...")
|
46 |
+
raw_documents = None
|
47 |
+
else:
|
48 |
+
logger.info(f"Loading document(s) from Wikipedia using query '{query}' ...")
|
49 |
+
raw_documents = WikipediaLoader(query=query, load_max_docs=load_max_documents).load()
|
50 |
+
|
51 |
if not raw_documents:
|
52 |
+
logger.error(f"Failed to load content for Data Source '{data_source_name}'!")
|
53 |
return
|
54 |
|
55 |
logger.info(f"{str(len(raw_documents))} document(s) loaded from Wikipedia.")
|