File size: 25,200 Bytes

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ['OPENAI_API_KEY'] = \"\"\n",
    "\n",
    "import logging\n",
    "import sys\n",
    "\n",
    "logging.basicConfig(\n",
    "    stream=sys.stdout, level=logging.INFO\n",
    ")  # logging.DEBUG for more verbose output\n",
    "\n",
    "\n",
    "# define LLM\n",
    "from llama_index.llms.openai import OpenAI\n",
    "from llama_index.core import Settings\n",
    "\n",
    "Settings.llm = OpenAI(temperature=0, model=\"gpt-3.5-turbo-0125\")\n",
    "Settings.chunk_size = 512"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: langchain in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (0.1.16)\n",
      "Requirement already satisfied: neo4j in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (5.19.0)\n",
      "Requirement already satisfied: openai in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (1.23.2)\n",
      "Requirement already satisfied: wikipedia in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (1.4.0)\n",
      "Requirement already satisfied: tiktoken in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (0.6.0)\n",
      "Requirement already satisfied: langchain_openai in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (0.1.3)\n",
      "Requirement already satisfied: PyYAML>=5.3 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from langchain) (6.0.1)\n",
      "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from langchain) (2.0.29)\n",
      "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from langchain) (3.9.5)\n",
      "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from langchain) (4.0.3)\n",
      "Requirement already satisfied: dataclasses-json<0.7,>=0.5.7 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from langchain) (0.6.4)\n",
      "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from langchain) (1.33)\n",
      "Requirement already satisfied: langchain-community<0.1,>=0.0.32 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from langchain) (0.0.34)\n",
      "Requirement already satisfied: langchain-core<0.2.0,>=0.1.42 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from langchain) (0.1.45)\n",
      "Requirement already satisfied: langchain-text-splitters<0.1,>=0.0.1 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from langchain) (0.0.1)\n",
      "Requirement already satisfied: langsmith<0.2.0,>=0.1.17 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from langchain) (0.1.49)\n",
      "Requirement already satisfied: numpy<2,>=1 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from langchain) (1.26.4)\n",
      "Requirement already satisfied: pydantic<3,>=1 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from langchain) (2.7.0)\n",
      "Requirement already satisfied: requests<3,>=2 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from langchain) (2.31.0)\n",
      "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from langchain) (8.2.3)\n",
      "Requirement already satisfied: pytz in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from neo4j) (2024.1)\n",
      "Requirement already satisfied: anyio<5,>=3.5.0 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from openai) (4.3.0)\n",
      "Requirement already satisfied: distro<2,>=1.7.0 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from openai) (1.9.0)\n",
      "Requirement already satisfied: httpx<1,>=0.23.0 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from openai) (0.27.0)\n",
      "Requirement already satisfied: sniffio in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from openai) (1.3.1)\n",
      "Requirement already satisfied: tqdm>4 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from openai) (4.66.2)\n",
      "Requirement already satisfied: typing-extensions<5,>=4.7 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from openai) (4.11.0)\n",
      "Requirement already satisfied: beautifulsoup4 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from wikipedia) (4.12.3)\n",
      "Requirement already satisfied: regex>=2022.1.18 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from tiktoken) (2024.4.16)\n",
      "Requirement already satisfied: aiosignal>=1.1.2 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.1)\n",
      "Requirement already satisfied: attrs>=17.3.0 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (23.2.0)\n",
      "Requirement already satisfied: frozenlist>=1.1.1 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.4.1)\n",
      "Requirement already satisfied: multidict<7.0,>=4.5 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.0.5)\n",
      "Requirement already satisfied: yarl<2.0,>=1.0 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.9.4)\n",
      "Requirement already satisfied: idna>=2.8 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from anyio<5,>=3.5.0->openai) (3.7)\n",
      "Requirement already satisfied: exceptiongroup>=1.0.2 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from anyio<5,>=3.5.0->openai) (1.2.1)\n",
      "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain) (3.21.1)\n",
      "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain) (0.9.0)\n",
      "Requirement already satisfied: certifi in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from httpx<1,>=0.23.0->openai) (2024.2.2)\n",
      "Requirement already satisfied: httpcore==1.* in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from httpx<1,>=0.23.0->openai) (1.0.5)\n",
      "Requirement already satisfied: h11<0.15,>=0.13 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai) (0.14.0)\n",
      "Requirement already satisfied: jsonpointer>=1.9 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from jsonpatch<2.0,>=1.33->langchain) (2.4)\n",
      "Requirement already satisfied: packaging<24.0,>=23.2 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from langchain-core<0.2.0,>=0.1.42->langchain) (23.2)\n",
      "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from langsmith<0.2.0,>=0.1.17->langchain) (3.10.1)\n",
      "Requirement already satisfied: annotated-types>=0.4.0 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from pydantic<3,>=1->langchain) (0.6.0)\n",
      "Requirement already satisfied: pydantic-core==2.18.1 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from pydantic<3,>=1->langchain) (2.18.1)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from requests<3,>=2->langchain) (3.3.2)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from requests<3,>=2->langchain) (2.2.1)\n",
      "Requirement already satisfied: greenlet!=0.4.17 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from SQLAlchemy<3,>=1.4->langchain) (3.0.3)\n",
      "Requirement already satisfied: soupsieve>1.2 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from beautifulsoup4->wikipedia) (2.5)\n",
      "Requirement already satisfied: mypy-extensions>=0.3.0 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain) (1.0.0)\n"
     ]
    }
   ],
   "source": [
    "%pip install langchain neo4j openai wikipedia tiktoken langchain_openai"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.graphs import Neo4jGraph\n",
    "\n",
    "url = \"neo4j+s://2f409740.databases.neo4j.io\"\n",
    "username =\"neo4j\"\n",
    "password = \"\"\n",
    "graph = Neo4jGraph(\n",
    "    url=url,\n",
    "    username=username,\n",
    "    password=password\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain_community.graphs.graph_document import (\n",
    "    Node as BaseNode,\n",
    "    Relationship as BaseRelationship,\n",
    "    GraphDocument,\n",
    ")\n",
    "from langchain.schema import Document\n",
    "from typing import List, Dict, Any, Optional\n",
    "from langchain.pydantic_v1 import Field, BaseModel\n",
    "\n",
    "class Property(BaseModel):\n",
    "  \"\"\"A single property consisting of key and value\"\"\"\n",
    "  key: str = Field(..., description=\"key\")\n",
    "  value: str = Field(..., description=\"value\")\n",
    "\n",
    "class Node(BaseNode):\n",
    "    properties: Optional[List[Property]] = Field(\n",
    "        None, description=\"List of node properties\")\n",
    "\n",
    "class Relationship(BaseRelationship):\n",
    "    properties: Optional[List[Property]] = Field(\n",
    "        None, description=\"List of relationship properties\"\n",
    "    )\n",
    "\n",
    "class KnowledgeGraph(BaseModel):\n",
    "    \"\"\"Generate a knowledge graph with entities and relationships.\"\"\"\n",
    "    nodes: List[Node] = Field(\n",
    "        ..., description=\"List of nodes in the knowledge graph\")\n",
    "    rels: List[Relationship] = Field(\n",
    "        ..., description=\"List of relationships in the knowledge graph\"\n",
    "    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def format_property_key(s: str) -> str:\n",
    "    words = s.split()\n",
    "    if not words:\n",
    "        return s\n",
    "    first_word = words[0].lower()\n",
    "    capitalized_words = [word.capitalize() for word in words[1:]]\n",
    "    return \"\".join([first_word] + capitalized_words)\n",
    "\n",
    "def props_to_dict(props) -> dict:\n",
    "    \"\"\"Convert properties to a dictionary.\"\"\"\n",
    "    properties = {}\n",
    "    if not props:\n",
    "      return properties\n",
    "    for p in props:\n",
    "        properties[format_property_key(p.key)] = p.value\n",
    "    return properties\n",
    "\n",
    "def map_to_base_node(node: Node) -> BaseNode:\n",
    "    \"\"\"Map the KnowledgeGraph Node to the base Node.\"\"\"\n",
    "    properties = props_to_dict(node.properties) if node.properties else {}\n",
    "    # Add name property for better Cypher statement generation\n",
    "    properties[\"name\"] = node.id.title()\n",
    "    return BaseNode(\n",
    "        id=node.id.title(), type=node.type.capitalize(), properties=properties\n",
    "    )\n",
    "\n",
    "\n",
    "def map_to_base_relationship(rel: Relationship) -> BaseRelationship:\n",
    "    \"\"\"Map the KnowledgeGraph Relationship to the base Relationship.\"\"\"\n",
    "    source = map_to_base_node(rel.source)\n",
    "    target = map_to_base_node(rel.target)\n",
    "    properties = props_to_dict(rel.properties) if rel.properties else {}\n",
    "    return BaseRelationship(\n",
    "        source=source, target=target, type=rel.type, properties=properties\n",
    "    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "from langchain.chains.openai_functions import (\n",
    "    create_openai_fn_chain,\n",
    "    create_structured_output_chain,\n",
    ")\n",
    "from langchain_openai import ChatOpenAI\n",
    "from langchain.prompts import ChatPromptTemplate\n",
    "\n",
    "os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
    "llm = ChatOpenAI(model=\"gpt-3.5-turbo-16k\", temperature=0)\n",
    "\n",
    "def get_extraction_chain(\n",
    "    allowed_nodes: Optional[List[str]] = None,\n",
    "    allowed_rels: Optional[List[str]] = None\n",
    "    ):\n",
    "    prompt = ChatPromptTemplate.from_messages(\n",
    "        [(\n",
    "          \"system\",\n",
    "        f\"\"\"# Knowledge Graph Instructions for GPT-4\n",
    "## 1. Overview\n",
    "You are a sophisticated algorithm tailored for parsing Wikipedia pages to construct a knowledge graph about chemotherapy and related cancer treatments.\n",
    "- **Nodes** symbolize entities such as medical conditions, drugs, symptoms, treatments, and associated medical concepts.\n",
    "- The goal is to create a precise and comprehensible knowledge graph, serving as a reliable resource for medical practitioners and scholarly research.\n",
    "\n",
    "## 2. Labeling Nodes\n",
    "- **Consistency**: Utilize uniform labels for node types to maintain clarity.\n",
    "  - For instance, consistently label drugs as **\"Drug\"**, symptoms as **\"Symptom\"**, and treatments as **\"Treatment\"**.\n",
    "- **Node IDs**: Apply descriptive, legible identifiers for node IDs, sourced directly from the text.\n",
    "\n",
    "{'- **Allowed Node Labels:**' + \", \".join(['Drug', 'Symptom', 'Treatment', 'MedicalCondition', 'ResearchStudy']) if allowed_nodes else \"\"}\n",
    "{'- **Allowed Relationship Types**:' + \", \".join(['Treats', 'Causes', 'Researches', 'Recommends']) if allowed_rels else \"\"}\n",
    "\n",
    "## 3. Handling Numerical Data and Dates\n",
    "- Integrate numerical data and dates as attributes of the corresponding nodes.\n",
    "- **No Isolated Nodes for Dates/Numbers**: Directly associate dates and numerical figures as attributes with pertinent nodes.\n",
    "- **Property Format**: Follow a straightforward key-value pattern for properties, with keys in camelCase, for example, `approvedYear`, `dosageAmount`.\n",
    "\n",
    "## 4. Coreference Resolution\n",
    "- **Entity Consistency**: Guarantee uniform identification of each entity across the graph.\n",
    "  - For example, if \"Methotrexate\" and \"MTX\" reference the same medication, uniformly apply \"Methotrexate\" as the node ID.\n",
    "\n",
    "## 5. Relationship Naming Conventions\n",
    "- **Clarity and Standardization**: Utilize clear and standardized relationship names, preferring uppercase with underscores for readability.\n",
    "  - For instance, use \"HAS_SIDE_EFFECT\" instead of \"HASSIDEEFFECT\", use \"CAN_RESULT_FROM\" instead of \"CANRESULTFROM\" etc. You keep making the same mistakes of storing the relationships without the \"_\" in between the words. Any further similar errors will lead to termination.\n",
    "- **Relevance and Specificity**: Choose relationship names that accurately reflect the connection between nodes, such as \"INHIBITS\" or \"ACTIVATES\" for interactions between substances.\n",
    "\n",
    "## 6. Strict Compliance\n",
    "Rigorous adherence to these instructions is essential. Failure to comply with the specified formatting and labeling norms will necessitate output revision or discard.\n",
    "        \"\"\"),\n",
    "            (\"human\", \"Use the given format to extract information from the following input: {input}\"),\n",
    "            (\"human\", \"Tip: Precision in the node and relationship creation is vital for the integrity of the knowledge graph.\"),\n",
    "        ])\n",
    "    return create_structured_output_chain(KnowledgeGraph, llm, prompt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "def extract_and_store_graph(\n",
    "    document: Document,\n",
    "    nodes:Optional[List[str]] = None,\n",
    "    rels:Optional[List[str]]=None) -> None:\n",
    "    # Extract graph data using OpenAI functions\n",
    "    extract_chain = get_extraction_chain(nodes, rels)\n",
    "    data = extract_chain.invoke(document.page_content)['function']\n",
    "    # Construct a graph document\n",
    "    graph_document = GraphDocument(\n",
    "      nodes = [map_to_base_node(node) for node in data.nodes],\n",
    "      relationships = [map_to_base_relationship(rel) for rel in data.rels],\n",
    "      source = document\n",
    "    )\n",
    "    # Store information into a graph\n",
    "    graph.add_graph_documents([graph_document])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.document_loaders import WikipediaLoader\n",
    "from langchain.text_splitter import TokenTextSplitter\n",
    "\n",
    "# Read the wikipedia article\n",
    "raw_documents = WikipediaLoader(query=\"Chemotherapy\").load()\n",
    "# Define chunking strategy\n",
    "text_splitter = TokenTextSplitter(chunk_size=4096, chunk_overlap=96)\n",
    "\n",
    "# Only take the first the raw_documents\n",
    "documents = text_splitter.split_documents(raw_documents[:5])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0%|          | 0/5 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0%|          | 0/5 [01:25<?, ?it/s]\n"
     ]
    },
    {
     "ename": "TypeError",
     "evalue": "'KnowledgeGraph' object is not subscriptable",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[14], line 4\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtqdm\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m tqdm\n\u001b[1;32m      3\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i, d \u001b[38;5;129;01min\u001b[39;00m tqdm(\u001b[38;5;28menumerate\u001b[39m(documents), total\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mlen\u001b[39m(documents)):\n\u001b[0;32m----> 4\u001b[0m     \u001b[43mextract_and_store_graph\u001b[49m\u001b[43m(\u001b[49m\u001b[43md\u001b[49m\u001b[43m)\u001b[49m\n",
      "Cell \u001b[0;32mIn[12], line 7\u001b[0m, in \u001b[0;36mextract_and_store_graph\u001b[0;34m(document, nodes, rels)\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mextract_and_store_graph\u001b[39m(\n\u001b[1;32m      2\u001b[0m     document: Document,\n\u001b[1;32m      3\u001b[0m     nodes:Optional[List[\u001b[38;5;28mstr\u001b[39m]] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m      4\u001b[0m     rels:Optional[List[\u001b[38;5;28mstr\u001b[39m]]\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m      5\u001b[0m     \u001b[38;5;66;03m# Extract graph data using OpenAI functions\u001b[39;00m\n\u001b[1;32m      6\u001b[0m     extract_chain \u001b[38;5;241m=\u001b[39m get_extraction_chain(nodes, rels)\n\u001b[0;32m----> 7\u001b[0m     data \u001b[38;5;241m=\u001b[39m \u001b[43mextract_chain\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minvoke\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdocument\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpage_content\u001b[49m\u001b[43m)\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mfunction\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\n\u001b[1;32m      8\u001b[0m     \u001b[38;5;66;03m# Construct a graph document\u001b[39;00m\n\u001b[1;32m      9\u001b[0m     graph_document \u001b[38;5;241m=\u001b[39m GraphDocument(\n\u001b[1;32m     10\u001b[0m       nodes \u001b[38;5;241m=\u001b[39m [map_to_base_node(node) \u001b[38;5;28;01mfor\u001b[39;00m node \u001b[38;5;129;01min\u001b[39;00m data\u001b[38;5;241m.\u001b[39mnodes],\n\u001b[1;32m     11\u001b[0m       relationships \u001b[38;5;241m=\u001b[39m [map_to_base_relationship(rel) \u001b[38;5;28;01mfor\u001b[39;00m rel \u001b[38;5;129;01min\u001b[39;00m data\u001b[38;5;241m.\u001b[39mrels],\n\u001b[1;32m     12\u001b[0m       source \u001b[38;5;241m=\u001b[39m document\n\u001b[1;32m     13\u001b[0m     )\n",
      "\u001b[0;31mTypeError\u001b[0m: 'KnowledgeGraph' object is not subscriptable"
     ]
    }
   ],
   "source": [
    "from tqdm import tqdm\n",
    "\n",
    "for i, d in tqdm(enumerate(documents), total=len(documents)):\n",
    "    extract_and_store_graph(d)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Query the knowledge graph in a RAG application\n",
    "from langchain.chains import GraphCypherQAChain\n",
    "\n",
    "graph.refresh_schema()\n",
    "\n",
    "cypher_chain = GraphCypherQAChain.from_llm(\n",
    "    graph=graph,\n",
    "    cypher_llm=ChatOpenAI(temperature=0, model=\"gpt-4\"),\n",
    "    qa_llm=ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-16k\"),\n",
    "    #validate_cypher=True, # Validate relationship directions\n",
    "    verbose=True\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\u001b[1m> Entering new GraphCypherQAChain chain...\u001b[0m\n",
      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
      "Generated Cypher:\n",
      "\u001b[32;1m\u001b[1;3mMATCH (t:Treatment {name: \"Induction Chemotherapy\"})-[:CONTROLS]->(mc) RETURN mc.name\u001b[0m\n",
      "Full Context:\n",
      "\u001b[32;1m\u001b[1;3m[{'mc.name': 'Malignant Lymphomas'}, {'mc.name': 'Head And Neck Squamous Cell Carcinomas'}, {'mc.name': 'Malignant Lymphomas'}, {'mc.name': 'Head And Neck Squamous Cell Carcinomas'}]\u001b[0m\n",
      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
      "\n",
      "\u001b[1m> Finished chain.\u001b[0m\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'query': 'What does Induction Chemotherapy control?',\n",
       " 'result': 'Induction Chemotherapy controls Malignant Lymphomas and Head And Neck Squamous Cell Carcinomas.'}"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cypher_chain.invoke({\"query\": \"What does Induction Chemotherapy control?\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "my_project_env",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.19"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}