Prakhar Bhandari
commited on
Commit
·
0beb8e1
1
Parent(s):
b77d203
updated prompt and function for traffic law wiki
Browse files- kg_builder/src/__pycache__/api_connections.cpython-39.pyc +0 -0
- kg_builder/src/__pycache__/knowledge_graph_builder.cpython-39.pyc +0 -0
- kg_builder/src/__pycache__/models.cpython-39.pyc +0 -0
- kg_builder/src/__pycache__/utils.cpython-39.pyc +0 -0
- kg_builder/src/api_connections.py +62 -2
- kg_builder/src/knowledge_graph_builder.py +1 -1
kg_builder/src/__pycache__/api_connections.cpython-39.pyc
CHANGED
|
Binary files a/kg_builder/src/__pycache__/api_connections.cpython-39.pyc and b/kg_builder/src/__pycache__/api_connections.cpython-39.pyc differ
|
|
|
kg_builder/src/__pycache__/knowledge_graph_builder.cpython-39.pyc
CHANGED
|
Binary files a/kg_builder/src/__pycache__/knowledge_graph_builder.cpython-39.pyc and b/kg_builder/src/__pycache__/knowledge_graph_builder.cpython-39.pyc differ
|
|
|
kg_builder/src/__pycache__/models.cpython-39.pyc
CHANGED
|
Binary files a/kg_builder/src/__pycache__/models.cpython-39.pyc and b/kg_builder/src/__pycache__/models.cpython-39.pyc differ
|
|
|
kg_builder/src/__pycache__/utils.cpython-39.pyc
CHANGED
|
Binary files a/kg_builder/src/__pycache__/utils.cpython-39.pyc and b/kg_builder/src/__pycache__/utils.cpython-39.pyc differ
|
|
|
kg_builder/src/api_connections.py
CHANGED
|
@@ -40,10 +40,70 @@ def get_extraction_chain(
|
|
| 40 |
):
|
| 41 |
if category == "Chemotherapy":
|
| 42 |
# Chemotherapy-specific prompt
|
| 43 |
-
prompt_text = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
elif category == "Traffic Law":
|
| 45 |
# Traffic Law-specific prompt
|
| 46 |
-
prompt_text = "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
else:
|
| 48 |
raise ValueError("Unknown category")
|
| 49 |
|
|
|
|
| 40 |
):
|
| 41 |
if category == "Chemotherapy":
|
| 42 |
# Chemotherapy-specific prompt
|
| 43 |
+
prompt_text = f"""# Knowledge Graph Instructions for GPT-4
|
| 44 |
+
## 1. Overview
|
| 45 |
+
You are a sophisticated algorithm tailored for parsing Wikipedia pages to construct a knowledge graph about chemotherapy and related cancer treatments.
|
| 46 |
+
- **Nodes** symbolize entities such as medical conditions, drugs, symptoms, treatments, and associated medical concepts.
|
| 47 |
+
- The goal is to create a precise and comprehensible knowledge graph, serving as a reliable resource for medical practitioners and scholarly research.
|
| 48 |
+
|
| 49 |
+
## 2. Labeling Nodes
|
| 50 |
+
- **Consistency**: Utilize uniform labels for node types to maintain clarity.
|
| 51 |
+
- For instance, consistently label drugs as **"Drug"**, symptoms as **"Symptom"**, and treatments as **"Treatment"**.
|
| 52 |
+
- **Node IDs**: Apply descriptive, legible identifiers for node IDs, sourced directly from the text.
|
| 53 |
+
{'- **Allowed Node Labels:**' + ", ".join(['Drug', 'Symptom', 'Treatment', 'MedicalCondition', 'ResearchStudy']) if allowed_nodes else ""}
|
| 54 |
+
{'- **Allowed Relationship Types**:' + ", ".join(['Treats', 'Causes', 'Researches', 'Recommends']) if allowed_rels else ""}
|
| 55 |
+
|
| 56 |
+
## 3. Handling Numerical Data and Dates
|
| 57 |
+
- Integrate numerical data and dates as attributes of the corresponding nodes.
|
| 58 |
+
- **No Isolated Nodes for Dates/Numbers**: Directly associate dates and numerical figures as attributes with pertinent nodes.
|
| 59 |
+
- **Property Format**: Follow a straightforward key-value pattern for properties, with keys in camelCase, for example, `approvedYear`, `dosageAmount`.
|
| 60 |
+
|
| 61 |
+
## 4. Coreference Resolution
|
| 62 |
+
- **Entity Consistency**: Guarantee uniform identification of each entity across the graph.
|
| 63 |
+
- For example, if "Methotrexate" and "MTX" reference the same medication, uniformly apply "Methotrexate" as the node ID.
|
| 64 |
+
|
| 65 |
+
## 5. Relationship Naming Conventions
|
| 66 |
+
- **Clarity and Standardization**: Utilize clear and standardized relationship names, preferring uppercase with underscores for readability.
|
| 67 |
+
- For instance, use "HAS_SIDE_EFFECT" instead of "HASSIDEEFFECT", use "CAN_RESULT_FROM" instead of "CANRESULTFROM" etc. You keep making the same mistakes of storing the relationships without the "_" in between the words. Any further similar errors will lead to termination.
|
| 68 |
+
- **Relevance and Specificity**: Choose relationship names that accurately reflect the connection between nodes, such as "INHIBITS" or "ACTIVATES" for interactions between substances.
|
| 69 |
+
|
| 70 |
+
## 6. Strict Compliance
|
| 71 |
+
Rigorous adherence to these instructions is essential. Failure to comply with the specified formatting and labeling norms will necessitate output revision or discard.
|
| 72 |
+
"""
|
| 73 |
+
|
| 74 |
elif category == "Traffic Law":
|
| 75 |
# Traffic Law-specific prompt
|
| 76 |
+
prompt_text = f"""# Knowledge Graph Instructions for GPT-4
|
| 77 |
+
## 1. Overview
|
| 78 |
+
You are a sophisticated algorithm tailored for parsing Wikipedia pages to construct a knowledge graph about traffic laws and regulations in the United States.
|
| 79 |
+
- **Nodes** symbolize entities such as types of traffic violations, penalties, driving regulations, and relevant legal statutes.
|
| 80 |
+
- The goal is to create a precise and comprehensible knowledge graph, serving as a reliable resource for legal professionals, law enforcement agencies, and the general public.
|
| 81 |
+
|
| 82 |
+
## 2. Labeling Nodes
|
| 83 |
+
- **Consistency**: Utilize uniform labels for node types to maintain clarity.
|
| 84 |
+
- For instance, consistently label violations as **"Violation"**, penalties as **"Penalty"**, and statutes as **"Statute"**.
|
| 85 |
+
- **Node IDs**: Apply descriptive, legible identifiers for node IDs, sourced directly from the text.
|
| 86 |
+
{'- **Allowed Node Labels:**' + ", ".join(['Violation', 'Penalty', 'Statute', 'VehicleType', 'LegalDocument']) if allowed_nodes else ""}
|
| 87 |
+
{'- **Allowed Relationship Types**:' + ", ".join(['Violates', 'Penalizes', 'Governs', 'Cites']) if allowed_rels else ""}
|
| 88 |
+
|
| 89 |
+
## 3. Handling Numerical Data and Dates
|
| 90 |
+
- Integrate numerical data and dates as attributes of the corresponding nodes.
|
| 91 |
+
- **No Isolated Nodes for Dates/Numbers**: Directly associate dates and numerical figures as attributes with pertinent nodes.
|
| 92 |
+
- **Property Format**: Follow a straightforward key-value pattern for properties, with keys in camelCase, for example, `fineAmount`, `lawEffectiveDate`.
|
| 93 |
+
|
| 94 |
+
## 4. Coreference Resolution
|
| 95 |
+
- **Entity Consistency**: Guarantee uniform identification of each entity across the graph.
|
| 96 |
+
- For example, if "Vehicle Code 22350" and "Speed Law" reference the same statute, uniformly apply "Vehicle Code 22350" as the node ID.
|
| 97 |
+
|
| 98 |
+
## 5. Relationship Naming Conventions
|
| 99 |
+
- **Clarity and Standardization**: Utilize clear and standardized relationship names, preferring uppercase with underscores for readability.
|
| 100 |
+
- For instance, use "IS_PENALIZED_BY" instead of "ISPENALIZEDBY", use "IS_GOVERNED_BY" instead of "ISGOVERNEDBY" etc. You keep making the same mistakes of storing the relationships without the "_" in between the words. Any further similar errors will lead to termination.
|
| 101 |
+
- **Relevance and Specificity**: Choose relationship names that accurately reflect the connection between nodes, such as "REQUIRES" or "PROHIBITS" for legal requirements or prohibitions.
|
| 102 |
+
|
| 103 |
+
## 6. Strict Compliance
|
| 104 |
+
Rigorous adherence to these instructions is essential. Failure to comply with the specified formatting and labeling norms will necessitate output revision or discard.
|
| 105 |
+
"""
|
| 106 |
+
|
| 107 |
else:
|
| 108 |
raise ValueError("Unknown category")
|
| 109 |
|
kg_builder/src/knowledge_graph_builder.py
CHANGED
|
@@ -28,7 +28,7 @@ def extract_and_store_graph(
|
|
| 28 |
|
| 29 |
graph = get_graph_connection(category)
|
| 30 |
# Extract graph data using OpenAI functions
|
| 31 |
-
extract_chain = get_extraction_chain(nodes, rels)
|
| 32 |
data = extract_chain.invoke(document.page_content)['function']
|
| 33 |
# Construct a graph document
|
| 34 |
graph_document = GraphDocument(
|
|
|
|
| 28 |
|
| 29 |
graph = get_graph_connection(category)
|
| 30 |
# Extract graph data using OpenAI functions
|
| 31 |
+
extract_chain = get_extraction_chain(category, nodes, rels)
|
| 32 |
data = extract_chain.invoke(document.page_content)['function']
|
| 33 |
# Construct a graph document
|
| 34 |
graph_document = GraphDocument(
|