michaelwechner commited on
Commit
608d44d
·
1 Parent(s): acd93cd

Squirro Docs added

Browse files
Files changed (1) hide show
  1. kg_builder/src/api_connections.py +41 -2
kg_builder/src/api_connections.py CHANGED
@@ -29,8 +29,13 @@ def get_graph_connection(data_source_name):
29
  url = os.getenv("TRAFFIC_NEO4J_URL")
30
  username = os.getenv("TRAFFIC_NEO4J_USERNAME")
31
  password = os.getenv("TRAFFIC_NEO4J_PASSWORD")
 
 
 
 
32
  else:
33
- raise ValueError(f"No such Data Source connection configured: {data_source_name}")
 
34
 
35
  return Neo4jGraph(url=url, username=username, password=password)
36
 
@@ -87,6 +92,39 @@ def get_extraction_chain(
87
  Rigorous adherence to these instructions is essential. Failure to comply with the specified formatting and labeling norms will necessitate output revision or discard.
88
  """
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  elif data_source_name == "Traffic Law":
91
  # Traffic Law-specific prompt
92
  prompt_text = f"""# Knowledge Graph Instructions for GPT-4
@@ -121,7 +159,8 @@ def get_extraction_chain(
121
  """
122
 
123
  else:
124
- raise ValueError(f"No prompt configured for Data Source {data_source_name}")
 
125
 
126
  logger.info(f"Prompt to extract graph data: {prompt_text}")
127
 
 
29
  url = os.getenv("TRAFFIC_NEO4J_URL")
30
  username = os.getenv("TRAFFIC_NEO4J_USERNAME")
31
  password = os.getenv("TRAFFIC_NEO4J_PASSWORD")
32
+ elif data_source_name == "SquirroDocs":
33
+ url = os.getenv("TRAFFIC_NEO4J_URL")
34
+ username = os.getenv("TRAFFIC_NEO4J_USERNAME")
35
+ password = os.getenv("TRAFFIC_NEO4J_PASSWORD")
36
  else:
37
+ errorMsg = f"No such Data Source connection configured: {data_source_name}"
38
+ raise ValueError(errorMsg)
39
 
40
  return Neo4jGraph(url=url, username=username, password=password)
41
 
 
92
  Rigorous adherence to these instructions is essential. Failure to comply with the specified formatting and labeling norms will necessitate output revision or discard.
93
  """
94
 
95
+ elif data_source_name == "SquirroDocs":
96
+ # Squirro Docs-specific prompt
97
+ prompt_text = f"""# Knowledge Graph Instructions for GPT-4
98
+ ## 1. Overview
99
+ You are a sophisticated algorithm tailored for parsing Wikipedia pages to construct a knowledge graph about Squirro documentation.
100
+ - **Nodes** symbolize entities such as types of traffic violations, penalties, driving regulations, and relevant legal statutes.
101
+ - The goal is to create a precise and comprehensible knowledge graph, serving as a reliable resource for legal professionals, law enforcement agencies, and the general public.
102
+
103
+ ## 2. Labeling Nodes
104
+ - **Consistency**: Utilize uniform labels for node types to maintain clarity.
105
+ - For instance, consistently label violations as **"Violation"**, penalties as **"Penalty"**, and statutes as **"Statute"**.
106
+ - **Node IDs**: Apply descriptive, legible identifiers for node IDs, sourced directly from the text.
107
+ {'- **Allowed Node Labels:**' + ", ".join(['Violation', 'Penalty', 'Statute', 'VehicleType', 'LegalDocument']) if allowed_nodes else ""}
108
+ {'- **Allowed Relationship Types**:' + ", ".join(['Violates', 'Penalizes', 'Governs', 'Cites']) if allowed_rels else ""}
109
+
110
+ ## 3. Handling Numerical Data and Dates
111
+ - Integrate numerical data and dates as attributes of the corresponding nodes.
112
+ - **No Isolated Nodes for Dates/Numbers**: Directly associate dates and numerical figures as attributes with pertinent nodes.
113
+ - **Property Format**: Follow a straightforward key-value pattern for properties, with keys in camelCase, for example, `fineAmount`, `lawEffectiveDate`.
114
+
115
+ ## 4. Coreference Resolution
116
+ - **Entity Consistency**: Guarantee uniform identification of each entity across the graph.
117
+ - For example, if "Vehicle Code 22350" and "Speed Law" reference the same statute, uniformly apply "Vehicle Code 22350" as the node ID.
118
+
119
+ ## 5. Relationship Naming Conventions
120
+ - **Clarity and Standardization**: Utilize clear and standardized relationship names, preferring uppercase with underscores for readability.
121
+ - For instance, use "IS_PENALIZED_BY" instead of "ISPENALIZEDBY", use "IS_GOVERNED_BY" instead of "ISGOVERNEDBY" etc. You keep making the same mistakes of storing the relationships without the "_" in between the words. Any further similar errors will lead to termination.
122
+ - **Relevance and Specificity**: Choose relationship names that accurately reflect the connection between nodes, such as "REQUIRES" or "PROHIBITS" for legal requirements or prohibitions.
123
+
124
+ ## 6. Strict Compliance
125
+ Rigorous adherence to these instructions is essential. Failure to comply with the specified formatting and labeling norms will necessitate output revision or discard.
126
+ """
127
+
128
  elif data_source_name == "Traffic Law":
129
  # Traffic Law-specific prompt
130
  prompt_text = f"""# Knowledge Graph Instructions for GPT-4
 
159
  """
160
 
161
  else:
162
+ errorMsg = f"No prompt configured for Data Source ¨{data_source_name}¨!"
163
+ raise ValueError(errorMsg)
164
 
165
  logger.info(f"Prompt to extract graph data: {prompt_text}")
166