{ "cells": [ { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [], "source": [ "from datasets import load_dataset\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [], "source": [ "# import json\n", "# import os\n", "\n", "# # Read in the organ to conditions mapping JSON file\n", "# with open(\"clin-oracle-tahoe-hack-2025/data/organ_to_conditions_mapping.json\", \"r\") as f:\n", "# organ_to_conditions = json.load(f)\n", " \n", "# # Display basic information about the mapping\n", "# print(f\"Loaded organ to conditions mapping with {len(organ_to_conditions)} organs\")\n", "# print(f\"Sample organ: {list(organ_to_conditions.keys())[0]}\")\n", "# print(f\"Sample conditions: {organ_to_conditions[list(organ_to_conditions.keys())[0]][:3]}...\")\n" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [], "source": [ "drug_metadata = pd.DataFrame(load_dataset(\"tahoebio/Tahoe-100M\",\"drug_metadata\", split=\"train\"))\n", "drug_metadata = drug_metadata.loc[drug_metadata[\"targets\"].notna()]\n", "target_to_drug = dict(zip(drug_metadata[\"targets\"], drug_metadata[\"drug\"]))" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(\"/home/ubuntu/tahoe/clin-oracle-tahoe-hack-2025/indication_to_organ_mapping_confident.tsv\", sep='\\t')" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [], "source": [ "indic = pd.read_csv(\"genetic_support/data/indic.tsv\", sep='\\t')\n", "assoc = pd.read_csv(\"genetic_support/data/assoc.tsv.gz\", sep='\\t')" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [], "source": [ "assoc = assoc.merge(df, left_on=\"mesh_term\", right_on=\"indication_mesh_term\")" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "189" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(target_to_drug)" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [], "source": [ "assoc[\"drug\"] = assoc[\"gene\"].map(target_to_drug)\n", "assoc[\"drug_organ\"] = assoc[\"drug\"] + \"_\" + assoc[\"organ\"]" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [], "source": [ "assoc = assoc.loc[assoc[\"drug_organ\"].notna()]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "assoc[[\"drug_organ\",]]" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "arow 3885\n", "gene ABL1\n", "mesh_id D014012\n", "source OTG\n", "original_trait Tinnitus / tiniitis | non-cancer illness code,...\n", "original_link https://genetics.opentargets.org/study/NEALE2_...\n", "extra_info NaN\n", "l2g_rank 4.0\n", "l2g_share 0.12\n", "pval 0.0\n", "year 2018.0\n", "beta NaN\n", "odds_ratio 4.072184\n", "pic_qtl_pval NaN\n", "pic_h4 NaN\n", "af_gnomad_nfe 0.02489\n", "mesh_term Tinnitus\n", "indication_mesh_term Tinnitus\n", "organ CNS/Brain\n", "drug Flumatinib (mesylate)\n", "drug_organ Flumatinib (mesylate)_CNS/Brain\n", "Name: 234, dtype: object" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "assoc.iloc[0]" ] }, { "cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [], "source": [ "assoc[\"is_associated_with_indication\"] = 1\n", "assoc[[\"drug_organ\",\"is_associated_with_indication\"]].to_csv(\"/home/ubuntu/tahoe/clin-oracle-tahoe-hack-2025/data_for_classifier/assoc_drug_organ_label.tsv\", sep='\\t', index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.10" } }, "nbformat": 4, "nbformat_minor": 2 }