| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603 | 
							- {
 -  "cells": [
 -   {
 -    "cell_type": "code",
 -    "execution_count": 12,
 -    "id": "9f0d2ac5",
 -    "metadata": {},
 -    "outputs": [],
 -    "source": [
 -     "import pandas as pd\n",
 -     "import numpy as np\n",
 -     "from tqdm.notebook import tqdm"
 -    ]
 -   },
 -   {
 -    "cell_type": "code",
 -    "execution_count": 2,
 -    "id": "e0c2c306",
 -    "metadata": {},
 -    "outputs": [
 -     {
 -      "name": "stderr",
 -      "output_type": "stream",
 -      "text": [
 -       "C:\\Users\\Notebook\\anaconda3\\lib\\site-packages\\numpy\\lib\\arraysetops.py:583: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison\n",
 -       "  mask |= (ar1 == a)\n"
 -      ]
 -     },
 -     {
 -      "data": {
 -       "text/html": [
 -        "<div>\n",
 -        "<style scoped>\n",
 -        "    .dataframe tbody tr th:only-of-type {\n",
 -        "        vertical-align: middle;\n",
 -        "    }\n",
 -        "\n",
 -        "    .dataframe tbody tr th {\n",
 -        "        vertical-align: top;\n",
 -        "    }\n",
 -        "\n",
 -        "    .dataframe thead th {\n",
 -        "        text-align: right;\n",
 -        "    }\n",
 -        "</style>\n",
 -        "<table border=\"1\" class=\"dataframe\">\n",
 -        "  <thead>\n",
 -        "    <tr style=\"text-align: right;\">\n",
 -        "      <th></th>\n",
 -        "      <th>drug id</th>\n",
 -        "      <th>drug name</th>\n",
 -        "      <th>interaction drug id</th>\n",
 -        "      <th>interaction drug name</th>\n",
 -        "      <th>description</th>\n",
 -        "    </tr>\n",
 -        "  </thead>\n",
 -        "  <tbody>\n",
 -        "    <tr>\n",
 -        "      <th>0</th>\n",
 -        "      <td>DB00001</td>\n",
 -        "      <td>Lepirudin</td>\n",
 -        "      <td>DB06605</td>\n",
 -        "      <td>Apixaban</td>\n",
 -        "      <td>Apixaban may increase the anticoagulant activi...</td>\n",
 -        "    </tr>\n",
 -        "    <tr>\n",
 -        "      <th>1</th>\n",
 -        "      <td>DB00001</td>\n",
 -        "      <td>Lepirudin</td>\n",
 -        "      <td>DB06695</td>\n",
 -        "      <td>Dabigatran etexilate</td>\n",
 -        "      <td>Dabigatran etexilate may increase the anticoag...</td>\n",
 -        "    </tr>\n",
 -        "    <tr>\n",
 -        "      <th>2</th>\n",
 -        "      <td>DB00001</td>\n",
 -        "      <td>Lepirudin</td>\n",
 -        "      <td>DB01254</td>\n",
 -        "      <td>Dasatinib</td>\n",
 -        "      <td>The risk or severity of bleeding and hemorrhag...</td>\n",
 -        "    </tr>\n",
 -        "    <tr>\n",
 -        "      <th>3</th>\n",
 -        "      <td>DB00001</td>\n",
 -        "      <td>Lepirudin</td>\n",
 -        "      <td>DB01609</td>\n",
 -        "      <td>Deferasirox</td>\n",
 -        "      <td>The risk or severity of gastrointestinal bleed...</td>\n",
 -        "    </tr>\n",
 -        "    <tr>\n",
 -        "      <th>4</th>\n",
 -        "      <td>DB00001</td>\n",
 -        "      <td>Lepirudin</td>\n",
 -        "      <td>DB01586</td>\n",
 -        "      <td>Ursodeoxycholic acid</td>\n",
 -        "      <td>The risk or severity of bleeding and bruising ...</td>\n",
 -        "    </tr>\n",
 -        "    <tr>\n",
 -        "      <th>...</th>\n",
 -        "      <td>...</td>\n",
 -        "      <td>...</td>\n",
 -        "      <td>...</td>\n",
 -        "      <td>...</td>\n",
 -        "      <td>...</td>\n",
 -        "    </tr>\n",
 -        "    <tr>\n",
 -        "      <th>2768186</th>\n",
 -        "      <td>DB16746</td>\n",
 -        "      <td>Elivaldogene autotemcel</td>\n",
 -        "      <td>DB01264</td>\n",
 -        "      <td>Darunavir</td>\n",
 -        "      <td>The therapeutic efficacy of Elivaldogene autot...</td>\n",
 -        "    </tr>\n",
 -        "    <tr>\n",
 -        "      <th>2768187</th>\n",
 -        "      <td>DB16746</td>\n",
 -        "      <td>Elivaldogene autotemcel</td>\n",
 -        "      <td>DB01319</td>\n",
 -        "      <td>Fosamprenavir</td>\n",
 -        "      <td>The therapeutic efficacy of Elivaldogene autot...</td>\n",
 -        "    </tr>\n",
 -        "    <tr>\n",
 -        "      <th>2768188</th>\n",
 -        "      <td>DB16746</td>\n",
 -        "      <td>Elivaldogene autotemcel</td>\n",
 -        "      <td>DB01601</td>\n",
 -        "      <td>Lopinavir</td>\n",
 -        "      <td>The therapeutic efficacy of Elivaldogene autot...</td>\n",
 -        "    </tr>\n",
 -        "    <tr>\n",
 -        "      <th>2768189</th>\n",
 -        "      <td>DB16746</td>\n",
 -        "      <td>Elivaldogene autotemcel</td>\n",
 -        "      <td>DB11586</td>\n",
 -        "      <td>Asunaprevir</td>\n",
 -        "      <td>The therapeutic efficacy of Elivaldogene autot...</td>\n",
 -        "    </tr>\n",
 -        "    <tr>\n",
 -        "      <th>2768190</th>\n",
 -        "      <td>DB16746</td>\n",
 -        "      <td>Elivaldogene autotemcel</td>\n",
 -        "      <td>DB15623</td>\n",
 -        "      <td>TMC-310911</td>\n",
 -        "      <td>The therapeutic efficacy of Elivaldogene autot...</td>\n",
 -        "    </tr>\n",
 -        "  </tbody>\n",
 -        "</table>\n",
 -        "<p>2768191 rows × 5 columns</p>\n",
 -        "</div>"
 -       ],
 -       "text/plain": [
 -        "         drug id                drug name interaction drug id  \\\n",
 -        "0        DB00001                Lepirudin             DB06605   \n",
 -        "1        DB00001                Lepirudin             DB06695   \n",
 -        "2        DB00001                Lepirudin             DB01254   \n",
 -        "3        DB00001                Lepirudin             DB01609   \n",
 -        "4        DB00001                Lepirudin             DB01586   \n",
 -        "...          ...                      ...                 ...   \n",
 -        "2768186  DB16746  Elivaldogene autotemcel             DB01264   \n",
 -        "2768187  DB16746  Elivaldogene autotemcel             DB01319   \n",
 -        "2768188  DB16746  Elivaldogene autotemcel             DB01601   \n",
 -        "2768189  DB16746  Elivaldogene autotemcel             DB11586   \n",
 -        "2768190  DB16746  Elivaldogene autotemcel             DB15623   \n",
 -        "\n",
 -        "        interaction drug name  \\\n",
 -        "0                    Apixaban   \n",
 -        "1        Dabigatran etexilate   \n",
 -        "2                   Dasatinib   \n",
 -        "3                 Deferasirox   \n",
 -        "4        Ursodeoxycholic acid   \n",
 -        "...                       ...   \n",
 -        "2768186             Darunavir   \n",
 -        "2768187         Fosamprenavir   \n",
 -        "2768188             Lopinavir   \n",
 -        "2768189           Asunaprevir   \n",
 -        "2768190            TMC-310911   \n",
 -        "\n",
 -        "                                               description  \n",
 -        "0        Apixaban may increase the anticoagulant activi...  \n",
 -        "1        Dabigatran etexilate may increase the anticoag...  \n",
 -        "2        The risk or severity of bleeding and hemorrhag...  \n",
 -        "3        The risk or severity of gastrointestinal bleed...  \n",
 -        "4        The risk or severity of bleeding and bruising ...  \n",
 -        "...                                                    ...  \n",
 -        "2768186  The therapeutic efficacy of Elivaldogene autot...  \n",
 -        "2768187  The therapeutic efficacy of Elivaldogene autot...  \n",
 -        "2768188  The therapeutic efficacy of Elivaldogene autot...  \n",
 -        "2768189  The therapeutic efficacy of Elivaldogene autot...  \n",
 -        "2768190  The therapeutic efficacy of Elivaldogene autot...  \n",
 -        "\n",
 -        "[2768191 rows x 5 columns]"
 -       ]
 -      },
 -      "execution_count": 2,
 -      "metadata": {},
 -      "output_type": "execute_result"
 -     }
 -    ],
 -    "source": [
 -     "ddi = pd.read_csv('raw/Drugbank_drug_interactions.tsv', sep='\\t', index_col=0)\n",
 -     "ddi"
 -    ]
 -   },
 -   {
 -    "cell_type": "code",
 -    "execution_count": 3,
 -    "id": "35d57012",
 -    "metadata": {},
 -    "outputs": [
 -     {
 -      "data": {
 -       "text/plain": [
 -        "array(['DB00001', 'DB00002', 'DB00004', ..., 'DB09047', 'DB11074',\n",
 -        "       'DB00878'], dtype=object)"
 -       ]
 -      },
 -      "execution_count": 3,
 -      "metadata": {},
 -      "output_type": "execute_result"
 -     }
 -    ],
 -    "source": [
 -     "drug1s = ddi['drug id']\n",
 -     "drug2s = ddi['interaction drug id']\n",
 -     "drugs = pd.concat([drug1s, drug2s], axis=0).unique()\n",
 -     "drugs"
 -    ]
 -   },
 -   {
 -    "cell_type": "code",
 -    "execution_count": 4,
 -    "id": "79575691",
 -    "metadata": {},
 -    "outputs": [
 -     {
 -      "name": "stdout",
 -      "output_type": "stream",
 -      "text": [
 -       "4417\n",
 -       "4418\n"
 -      ]
 -     },
 -     {
 -      "data": {
 -       "text/plain": [
 -        "5915"
 -       ]
 -      },
 -      "execution_count": 4,
 -      "metadata": {},
 -      "output_type": "execute_result"
 -     }
 -    ],
 -    "source": [
 -     "print(len(drug1s.unique()))\n",
 -     "print(len(drug2s.unique()))\n",
 -     "len(drugs)"
 -    ]
 -   },
 -   {
 -    "cell_type": "code",
 -    "execution_count": 10,
 -    "id": "e82113d7",
 -    "metadata": {},
 -    "outputs": [
 -     {
 -      "name": "stderr",
 -      "output_type": "stream",
 -      "text": [
 -       "5915it [00:00, 2353154.53it/s]\n"
 -      ]
 -     },
 -     {
 -      "data": {
 -       "text/html": [
 -        "<div>\n",
 -        "<style scoped>\n",
 -        "    .dataframe tbody tr th:only-of-type {\n",
 -        "        vertical-align: middle;\n",
 -        "    }\n",
 -        "\n",
 -        "    .dataframe tbody tr th {\n",
 -        "        vertical-align: top;\n",
 -        "    }\n",
 -        "\n",
 -        "    .dataframe thead th {\n",
 -        "        text-align: right;\n",
 -        "    }\n",
 -        "</style>\n",
 -        "<table border=\"1\" class=\"dataframe\">\n",
 -        "  <thead>\n",
 -        "    <tr style=\"text-align: right;\">\n",
 -        "      <th></th>\n",
 -        "      <th>DrugBank_id</th>\n",
 -        "      <th>node_index</th>\n",
 -        "    </tr>\n",
 -        "  </thead>\n",
 -        "  <tbody>\n",
 -        "    <tr>\n",
 -        "      <th>0</th>\n",
 -        "      <td>DB00001</td>\n",
 -        "      <td>0</td>\n",
 -        "    </tr>\n",
 -        "    <tr>\n",
 -        "      <th>1</th>\n",
 -        "      <td>DB00002</td>\n",
 -        "      <td>1</td>\n",
 -        "    </tr>\n",
 -        "    <tr>\n",
 -        "      <th>2</th>\n",
 -        "      <td>DB00004</td>\n",
 -        "      <td>2</td>\n",
 -        "    </tr>\n",
 -        "    <tr>\n",
 -        "      <th>3</th>\n",
 -        "      <td>DB00005</td>\n",
 -        "      <td>3</td>\n",
 -        "    </tr>\n",
 -        "    <tr>\n",
 -        "      <th>4</th>\n",
 -        "      <td>DB00006</td>\n",
 -        "      <td>4</td>\n",
 -        "    </tr>\n",
 -        "    <tr>\n",
 -        "      <th>...</th>\n",
 -        "      <td>...</td>\n",
 -        "      <td>...</td>\n",
 -        "    </tr>\n",
 -        "    <tr>\n",
 -        "      <th>5910</th>\n",
 -        "      <td>DB12264</td>\n",
 -        "      <td>5910</td>\n",
 -        "    </tr>\n",
 -        "    <tr>\n",
 -        "      <th>5911</th>\n",
 -        "      <td>DB06614</td>\n",
 -        "      <td>5911</td>\n",
 -        "    </tr>\n",
 -        "    <tr>\n",
 -        "      <th>5912</th>\n",
 -        "      <td>DB09047</td>\n",
 -        "      <td>5912</td>\n",
 -        "    </tr>\n",
 -        "    <tr>\n",
 -        "      <th>5913</th>\n",
 -        "      <td>DB11074</td>\n",
 -        "      <td>5913</td>\n",
 -        "    </tr>\n",
 -        "    <tr>\n",
 -        "      <th>5914</th>\n",
 -        "      <td>DB00878</td>\n",
 -        "      <td>5914</td>\n",
 -        "    </tr>\n",
 -        "  </tbody>\n",
 -        "</table>\n",
 -        "<p>5915 rows × 2 columns</p>\n",
 -        "</div>"
 -       ],
 -       "text/plain": [
 -        "     DrugBank_id node_index\n",
 -        "0        DB00001          0\n",
 -        "1        DB00002          1\n",
 -        "2        DB00004          2\n",
 -        "3        DB00005          3\n",
 -        "4        DB00006          4\n",
 -        "...          ...        ...\n",
 -        "5910     DB12264       5910\n",
 -        "5911     DB06614       5911\n",
 -        "5912     DB09047       5912\n",
 -        "5913     DB11074       5913\n",
 -        "5914     DB00878       5914\n",
 -        "\n",
 -        "[5915 rows x 2 columns]"
 -       ]
 -      },
 -      "execution_count": 10,
 -      "metadata": {},
 -      "output_type": "execute_result"
 -     }
 -    ],
 -    "source": [
 -     "# assign node index to drugs\n",
 -     "data = []\n",
 -     "for idx, drug in enumerate(drugs):\n",
 -     "    data.append([drug,idx])\n",
 -     "\n",
 -     "data = np.asarray(data)\n",
 -     "drug_id_df = pd.DataFrame({'DrugBank_id': data[:,0], 'node_index': data[:,1]})\n",
 -     "drug_id_df"
 -    ]
 -   },
 -   {
 -    "cell_type": "code",
 -    "execution_count": 6,
 -    "id": "466fa3f4",
 -    "metadata": {},
 -    "outputs": [],
 -    "source": [
 -     "drug_id_df.to_csv('raw/drug2id.tsv',sep='\\t',index=False) # save drug2id file"
 -    ]
 -   },
 -   {
 -    "cell_type": "code",
 -    "execution_count": 16,
 -    "id": "b7a7b1be",
 -    "metadata": {},
 -    "outputs": [
 -     {
 -      "data": {
 -       "application/vnd.jupyter.widget-view+json": {
 -        "model_id": "e939f7b60d8f4721970453214f5c785f",
 -        "version_major": 2,
 -        "version_minor": 0
 -       },
 -       "text/plain": [
 -        "  0%|          | 0/2768191 [00:00<?, ?it/s]"
 -       ]
 -      },
 -      "metadata": {},
 -      "output_type": "display_data"
 -     }
 -    ],
 -    "source": [
 -     "edge_indexes = []\n",
 -     "drug1_prev = drug2_prev = ''\n",
 -     "drug1_idx = drug2_idx = 0\n",
 -     "for index, row in tqdm(ddi.iterrows(), total = len(ddi)):\n",
 -     "    drug1 = row['drug id']\n",
 -     "    drug2 = row['interaction drug id']\n",
 -     "    if drug1 != drug1_prev:\n",
 -     "        drug1_idx = drug_id_df.index[drug_id_df['DrugBank_id'] == drug1][0]\n",
 -     "\n",
 -     "    if drug2 != drug2_prev:\n",
 -     "        drug2_idx = drug_id_df.index[drug_id_df['DrugBank_id'] == drug2][0]\n",
 -     "        \n",
 -     "    edge_indexes.append([drug1_idx,drug2_idx])\n",
 -     "    edge_indexes.append([drug2_idx,drug1_idx]) # the graph is not directed\n",
 -     "    \n",
 -     "    drug1_prev = drug1\n",
 -     "    drug2_prev = drug2"
 -    ]
 -   },
 -   {
 -    "cell_type": "code",
 -    "execution_count": 19,
 -    "id": "f1296a7a",
 -    "metadata": {},
 -    "outputs": [
 -     {
 -      "data": {
 -       "text/html": [
 -        "<div>\n",
 -        "<style scoped>\n",
 -        "    .dataframe tbody tr th:only-of-type {\n",
 -        "        vertical-align: middle;\n",
 -        "    }\n",
 -        "\n",
 -        "    .dataframe tbody tr th {\n",
 -        "        vertical-align: top;\n",
 -        "    }\n",
 -        "\n",
 -        "    .dataframe thead th {\n",
 -        "        text-align: right;\n",
 -        "    }\n",
 -        "</style>\n",
 -        "<table border=\"1\" class=\"dataframe\">\n",
 -        "  <thead>\n",
 -        "    <tr style=\"text-align: right;\">\n",
 -        "      <th></th>\n",
 -        "      <th>drug1_idx</th>\n",
 -        "      <th>drug2_idx</th>\n",
 -        "    </tr>\n",
 -        "  </thead>\n",
 -        "  <tbody>\n",
 -        "    <tr>\n",
 -        "      <th>0</th>\n",
 -        "      <td>0</td>\n",
 -        "      <td>2022</td>\n",
 -        "    </tr>\n",
 -        "    <tr>\n",
 -        "      <th>1</th>\n",
 -        "      <td>2022</td>\n",
 -        "      <td>0</td>\n",
 -        "    </tr>\n",
 -        "    <tr>\n",
 -        "      <th>2</th>\n",
 -        "      <td>0</td>\n",
 -        "      <td>4417</td>\n",
 -        "    </tr>\n",
 -        "    <tr>\n",
 -        "      <th>3</th>\n",
 -        "      <td>4417</td>\n",
 -        "      <td>0</td>\n",
 -        "    </tr>\n",
 -        "    <tr>\n",
 -        "      <th>4</th>\n",
 -        "      <td>0</td>\n",
 -        "      <td>4418</td>\n",
 -        "    </tr>\n",
 -        "    <tr>\n",
 -        "      <th>...</th>\n",
 -        "      <td>...</td>\n",
 -        "      <td>...</td>\n",
 -        "    </tr>\n",
 -        "    <tr>\n",
 -        "      <th>5536377</th>\n",
 -        "      <td>1358</td>\n",
 -        "      <td>4416</td>\n",
 -        "    </tr>\n",
 -        "    <tr>\n",
 -        "      <th>5536378</th>\n",
 -        "      <td>4416</td>\n",
 -        "      <td>2891</td>\n",
 -        "    </tr>\n",
 -        "    <tr>\n",
 -        "      <th>5536379</th>\n",
 -        "      <td>2891</td>\n",
 -        "      <td>4416</td>\n",
 -        "    </tr>\n",
 -        "    <tr>\n",
 -        "      <th>5536380</th>\n",
 -        "      <td>4416</td>\n",
 -        "      <td>4329</td>\n",
 -        "    </tr>\n",
 -        "    <tr>\n",
 -        "      <th>5536381</th>\n",
 -        "      <td>4329</td>\n",
 -        "      <td>4416</td>\n",
 -        "    </tr>\n",
 -        "  </tbody>\n",
 -        "</table>\n",
 -        "<p>5536382 rows × 2 columns</p>\n",
 -        "</div>"
 -       ],
 -       "text/plain": [
 -        "         drug1_idx  drug2_idx\n",
 -        "0                0       2022\n",
 -        "1             2022          0\n",
 -        "2                0       4417\n",
 -        "3             4417          0\n",
 -        "4                0       4418\n",
 -        "...            ...        ...\n",
 -        "5536377       1358       4416\n",
 -        "5536378       4416       2891\n",
 -        "5536379       2891       4416\n",
 -        "5536380       4416       4329\n",
 -        "5536381       4329       4416\n",
 -        "\n",
 -        "[5536382 rows x 2 columns]"
 -       ]
 -      },
 -      "execution_count": 19,
 -      "metadata": {},
 -      "output_type": "execute_result"
 -     }
 -    ],
 -    "source": [
 -     "edge_indexes = np.asarray(edge_indexes)\n",
 -     "drug_interaction_df = pd.DataFrame({'drug1_idx': edge_indexes[:,0], 'drug2_idx': edge_indexes[:,1]})\n",
 -     "drug_interaction_df"
 -    ]
 -   },
 -   {
 -    "cell_type": "code",
 -    "execution_count": 20,
 -    "id": "daa952c2",
 -    "metadata": {},
 -    "outputs": [],
 -    "source": [
 -     "drug_interaction_df.to_csv('raw/drug_interactions.tsv',sep='\\t',index=False) # save drug_interactions file"
 -    ]
 -   },
 -   {
 -    "cell_type": "code",
 -    "execution_count": null,
 -    "id": "b34d252b",
 -    "metadata": {},
 -    "outputs": [],
 -    "source": []
 -   }
 -  ],
 -  "metadata": {
 -   "kernelspec": {
 -    "display_name": "Python 3 (ipykernel)",
 -    "language": "python",
 -    "name": "python3"
 -   },
 -   "language_info": {
 -    "codemirror_mode": {
 -     "name": "ipython",
 -     "version": 3
 -    },
 -    "file_extension": ".py",
 -    "mimetype": "text/x-python",
 -    "name": "python",
 -    "nbconvert_exporter": "python",
 -    "pygments_lexer": "ipython3",
 -    "version": "3.9.7"
 -   }
 -  },
 -  "nbformat": 4,
 -  "nbformat_minor": 5
 - }
 
 
  |