{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/ubuntu/miniconda3/envs/rapids_singlecell/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ "from datasets import load_dataset\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "drug_metadata = pd.DataFrame(load_dataset(\"tahoebio/Tahoe-100M\",\"drug_metadata\", split=\"train\"))\n", "drug_metadata = drug_metadata.loc[drug_metadata[\"targets\"].notna()]\n", "target_to_drug = dict(zip(drug_metadata[\"targets\"], drug_metadata[\"drug\"]))\n", "cell_metadata = pd.DataFrame(load_dataset(\"tahoebio/Tahoe-100M\",\"cell_line_metadata\", split=\"train\"))\n", "depmap = pd.read_csv(\"../CRISPRGeneDependency.csv\", index_col=0)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "depmap_to_tahoe = dict(zip(cell_metadata[\"Cell_ID_DepMap\"], cell_metadata[\"Cell_ID_Cellosaur\"]))\n", "depmap.index = depmap.index.map(depmap_to_tahoe)\n", "depmap = depmap.loc[depmap.index.notna()]\n", "depmap.columns = depmap.columns.map(lambda x: x.split(\" \")[0])" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "depmap = depmap.reset_index().melt(id_vars='index')\n", "depmap[\"drug\"] = depmap[\"variable\"].map(target_to_drug)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "depmap = depmap.dropna(subset=[\"drug\"])\n", "depmap[\"drug_cellline\"] = depmap[\"drug\"] + \"_\" + depmap[\"index\"]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "depmap.index = depmap[\"drug_cellline\"]\n", "depmap[\"depmap_dependency_score\"] = depmap[\"value\"]\n", "depmap[[\"depmap_dependency_score\"]].to_csv(\"data_for_classifier/depmap_dependency_scores.tsv\", index=True, sep='\\t')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.10" } }, "nbformat": 4, "nbformat_minor": 2 }