michaelwechner
commited on
Commit
·
608d44d
1
Parent(s):
acd93cd
Squirro Docs added
Browse files
kg_builder/src/api_connections.py
CHANGED
@@ -29,8 +29,13 @@ def get_graph_connection(data_source_name):
|
|
29 |
url = os.getenv("TRAFFIC_NEO4J_URL")
|
30 |
username = os.getenv("TRAFFIC_NEO4J_USERNAME")
|
31 |
password = os.getenv("TRAFFIC_NEO4J_PASSWORD")
|
|
|
|
|
|
|
|
|
32 |
else:
|
33 |
-
|
|
|
34 |
|
35 |
return Neo4jGraph(url=url, username=username, password=password)
|
36 |
|
@@ -87,6 +92,39 @@ def get_extraction_chain(
|
|
87 |
Rigorous adherence to these instructions is essential. Failure to comply with the specified formatting and labeling norms will necessitate output revision or discard.
|
88 |
"""
|
89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
elif data_source_name == "Traffic Law":
|
91 |
# Traffic Law-specific prompt
|
92 |
prompt_text = f"""# Knowledge Graph Instructions for GPT-4
|
@@ -121,7 +159,8 @@ def get_extraction_chain(
|
|
121 |
"""
|
122 |
|
123 |
else:
|
124 |
-
|
|
|
125 |
|
126 |
logger.info(f"Prompt to extract graph data: {prompt_text}")
|
127 |
|
|
|
29 |
url = os.getenv("TRAFFIC_NEO4J_URL")
|
30 |
username = os.getenv("TRAFFIC_NEO4J_USERNAME")
|
31 |
password = os.getenv("TRAFFIC_NEO4J_PASSWORD")
|
32 |
+
elif data_source_name == "SquirroDocs":
|
33 |
+
url = os.getenv("TRAFFIC_NEO4J_URL")
|
34 |
+
username = os.getenv("TRAFFIC_NEO4J_USERNAME")
|
35 |
+
password = os.getenv("TRAFFIC_NEO4J_PASSWORD")
|
36 |
else:
|
37 |
+
errorMsg = f"No such Data Source connection configured: {data_source_name}"
|
38 |
+
raise ValueError(errorMsg)
|
39 |
|
40 |
return Neo4jGraph(url=url, username=username, password=password)
|
41 |
|
|
|
92 |
Rigorous adherence to these instructions is essential. Failure to comply with the specified formatting and labeling norms will necessitate output revision or discard.
|
93 |
"""
|
94 |
|
95 |
+
elif data_source_name == "SquirroDocs":
|
96 |
+
# Squirro Docs-specific prompt
|
97 |
+
prompt_text = f"""# Knowledge Graph Instructions for GPT-4
|
98 |
+
## 1. Overview
|
99 |
+
You are a sophisticated algorithm tailored for parsing Wikipedia pages to construct a knowledge graph about Squirro documentation.
|
100 |
+
- **Nodes** symbolize entities such as types of traffic violations, penalties, driving regulations, and relevant legal statutes.
|
101 |
+
- The goal is to create a precise and comprehensible knowledge graph, serving as a reliable resource for legal professionals, law enforcement agencies, and the general public.
|
102 |
+
|
103 |
+
## 2. Labeling Nodes
|
104 |
+
- **Consistency**: Utilize uniform labels for node types to maintain clarity.
|
105 |
+
- For instance, consistently label violations as **"Violation"**, penalties as **"Penalty"**, and statutes as **"Statute"**.
|
106 |
+
- **Node IDs**: Apply descriptive, legible identifiers for node IDs, sourced directly from the text.
|
107 |
+
{'- **Allowed Node Labels:**' + ", ".join(['Violation', 'Penalty', 'Statute', 'VehicleType', 'LegalDocument']) if allowed_nodes else ""}
|
108 |
+
{'- **Allowed Relationship Types**:' + ", ".join(['Violates', 'Penalizes', 'Governs', 'Cites']) if allowed_rels else ""}
|
109 |
+
|
110 |
+
## 3. Handling Numerical Data and Dates
|
111 |
+
- Integrate numerical data and dates as attributes of the corresponding nodes.
|
112 |
+
- **No Isolated Nodes for Dates/Numbers**: Directly associate dates and numerical figures as attributes with pertinent nodes.
|
113 |
+
- **Property Format**: Follow a straightforward key-value pattern for properties, with keys in camelCase, for example, `fineAmount`, `lawEffectiveDate`.
|
114 |
+
|
115 |
+
## 4. Coreference Resolution
|
116 |
+
- **Entity Consistency**: Guarantee uniform identification of each entity across the graph.
|
117 |
+
- For example, if "Vehicle Code 22350" and "Speed Law" reference the same statute, uniformly apply "Vehicle Code 22350" as the node ID.
|
118 |
+
|
119 |
+
## 5. Relationship Naming Conventions
|
120 |
+
- **Clarity and Standardization**: Utilize clear and standardized relationship names, preferring uppercase with underscores for readability.
|
121 |
+
- For instance, use "IS_PENALIZED_BY" instead of "ISPENALIZEDBY", use "IS_GOVERNED_BY" instead of "ISGOVERNEDBY" etc. You keep making the same mistakes of storing the relationships without the "_" in between the words. Any further similar errors will lead to termination.
|
122 |
+
- **Relevance and Specificity**: Choose relationship names that accurately reflect the connection between nodes, such as "REQUIRES" or "PROHIBITS" for legal requirements or prohibitions.
|
123 |
+
|
124 |
+
## 6. Strict Compliance
|
125 |
+
Rigorous adherence to these instructions is essential. Failure to comply with the specified formatting and labeling norms will necessitate output revision or discard.
|
126 |
+
"""
|
127 |
+
|
128 |
elif data_source_name == "Traffic Law":
|
129 |
# Traffic Law-specific prompt
|
130 |
prompt_text = f"""# Knowledge Graph Instructions for GPT-4
|
|
|
159 |
"""
|
160 |
|
161 |
else:
|
162 |
+
errorMsg = f"No prompt configured for Data Source ¨{data_source_name}¨!"
|
163 |
+
raise ValueError(errorMsg)
|
164 |
|
165 |
logger.info(f"Prompt to extract graph data: {prompt_text}")
|
166 |
|