michaelwechner commited on
Commit
7206760
·
1 Parent(s): 56403af

loading content form public website started

Browse files
Files changed (1) hide show
  1. kg_builder/src/graph_creation.py +11 -3
kg_builder/src/graph_creation.py CHANGED
@@ -24,6 +24,9 @@ load_dotenv()
24
  articles = {
25
  "Traffic Law": "Traffic laws in the United States"
26
  }
 
 
 
27
 
28
  def build_graph_for_article(query, data_source_name):
29
  """
@@ -38,10 +41,15 @@ def build_graph_for_article(query, data_source_name):
38
  chunk_size=400
39
  chunk_overlap=10
40
 
41
- logger.info(f"Loading document(s) from Wikipedia using query '{query}' ...")
42
- raw_documents = WikipediaLoader(query=query, load_max_docs=load_max_documents).load()
 
 
 
 
 
43
  if not raw_documents:
44
- logger.error(f"Failed to load content for query: {query}")
45
  return
46
 
47
  logger.info(f"{str(len(raw_documents))} document(s) loaded from Wikipedia.")
 
24
  articles = {
25
  "Traffic Law": "Traffic laws in the United States"
26
  }
27
+ #articles = {
28
+ # "SquirroDocs": "https://docs.squirro.com/en/latest/technical/getting-started.html"
29
+ #}
30
 
31
  def build_graph_for_article(query, data_source_name):
32
  """
 
41
  chunk_size=400
42
  chunk_overlap=10
43
 
44
+ if data_source_name == "SquirroDocs":
45
+ logger.info(f"Loading document(s) from public website {query} ...")
46
+ raw_documents = None
47
+ else:
48
+ logger.info(f"Loading document(s) from Wikipedia using query '{query}' ...")
49
+ raw_documents = WikipediaLoader(query=query, load_max_docs=load_max_documents).load()
50
+
51
  if not raw_documents:
52
+ logger.error(f"Failed to load content for Data Source '{data_source_name}'!")
53
  return
54
 
55
  logger.info(f"{str(len(raw_documents))} document(s) loaded from Wikipedia.")