{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "9f0d2ac5",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "e0c2c306",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Notebook\\anaconda3\\lib\\site-packages\\numpy\\lib\\arraysetops.py:583: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison\n",
      "  mask |= (ar1 == a)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>drug id</th>\n",
       "      <th>drug name</th>\n",
       "      <th>interaction drug id</th>\n",
       "      <th>interaction drug name</th>\n",
       "      <th>description</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>DB00001</td>\n",
       "      <td>Lepirudin</td>\n",
       "      <td>DB06605</td>\n",
       "      <td>Apixaban</td>\n",
       "      <td>Apixaban may increase the anticoagulant activi...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>DB00001</td>\n",
       "      <td>Lepirudin</td>\n",
       "      <td>DB06695</td>\n",
       "      <td>Dabigatran etexilate</td>\n",
       "      <td>Dabigatran etexilate may increase the anticoag...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>DB00001</td>\n",
       "      <td>Lepirudin</td>\n",
       "      <td>DB01254</td>\n",
       "      <td>Dasatinib</td>\n",
       "      <td>The risk or severity of bleeding and hemorrhag...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>DB00001</td>\n",
       "      <td>Lepirudin</td>\n",
       "      <td>DB01609</td>\n",
       "      <td>Deferasirox</td>\n",
       "      <td>The risk or severity of gastrointestinal bleed...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>DB00001</td>\n",
       "      <td>Lepirudin</td>\n",
       "      <td>DB01586</td>\n",
       "      <td>Ursodeoxycholic acid</td>\n",
       "      <td>The risk or severity of bleeding and bruising ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2768186</th>\n",
       "      <td>DB16746</td>\n",
       "      <td>Elivaldogene autotemcel</td>\n",
       "      <td>DB01264</td>\n",
       "      <td>Darunavir</td>\n",
       "      <td>The therapeutic efficacy of Elivaldogene autot...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2768187</th>\n",
       "      <td>DB16746</td>\n",
       "      <td>Elivaldogene autotemcel</td>\n",
       "      <td>DB01319</td>\n",
       "      <td>Fosamprenavir</td>\n",
       "      <td>The therapeutic efficacy of Elivaldogene autot...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2768188</th>\n",
       "      <td>DB16746</td>\n",
       "      <td>Elivaldogene autotemcel</td>\n",
       "      <td>DB01601</td>\n",
       "      <td>Lopinavir</td>\n",
       "      <td>The therapeutic efficacy of Elivaldogene autot...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2768189</th>\n",
       "      <td>DB16746</td>\n",
       "      <td>Elivaldogene autotemcel</td>\n",
       "      <td>DB11586</td>\n",
       "      <td>Asunaprevir</td>\n",
       "      <td>The therapeutic efficacy of Elivaldogene autot...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2768190</th>\n",
       "      <td>DB16746</td>\n",
       "      <td>Elivaldogene autotemcel</td>\n",
       "      <td>DB15623</td>\n",
       "      <td>TMC-310911</td>\n",
       "      <td>The therapeutic efficacy of Elivaldogene autot...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2768191 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         drug id                drug name interaction drug id  \\\n",
       "0        DB00001                Lepirudin             DB06605   \n",
       "1        DB00001                Lepirudin             DB06695   \n",
       "2        DB00001                Lepirudin             DB01254   \n",
       "3        DB00001                Lepirudin             DB01609   \n",
       "4        DB00001                Lepirudin             DB01586   \n",
       "...          ...                      ...                 ...   \n",
       "2768186  DB16746  Elivaldogene autotemcel             DB01264   \n",
       "2768187  DB16746  Elivaldogene autotemcel             DB01319   \n",
       "2768188  DB16746  Elivaldogene autotemcel             DB01601   \n",
       "2768189  DB16746  Elivaldogene autotemcel             DB11586   \n",
       "2768190  DB16746  Elivaldogene autotemcel             DB15623   \n",
       "\n",
       "        interaction drug name  \\\n",
       "0                    Apixaban   \n",
       "1        Dabigatran etexilate   \n",
       "2                   Dasatinib   \n",
       "3                 Deferasirox   \n",
       "4        Ursodeoxycholic acid   \n",
       "...                       ...   \n",
       "2768186             Darunavir   \n",
       "2768187         Fosamprenavir   \n",
       "2768188             Lopinavir   \n",
       "2768189           Asunaprevir   \n",
       "2768190            TMC-310911   \n",
       "\n",
       "                                               description  \n",
       "0        Apixaban may increase the anticoagulant activi...  \n",
       "1        Dabigatran etexilate may increase the anticoag...  \n",
       "2        The risk or severity of bleeding and hemorrhag...  \n",
       "3        The risk or severity of gastrointestinal bleed...  \n",
       "4        The risk or severity of bleeding and bruising ...  \n",
       "...                                                    ...  \n",
       "2768186  The therapeutic efficacy of Elivaldogene autot...  \n",
       "2768187  The therapeutic efficacy of Elivaldogene autot...  \n",
       "2768188  The therapeutic efficacy of Elivaldogene autot...  \n",
       "2768189  The therapeutic efficacy of Elivaldogene autot...  \n",
       "2768190  The therapeutic efficacy of Elivaldogene autot...  \n",
       "\n",
       "[2768191 rows x 5 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ddi = pd.read_csv('Dataset/DDI/DrugBank/raw/Drugbank_drug_interactions.tsv', sep='\\t', index_col=0)\n",
    "ddi"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "35d57012",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['DB00001', 'DB00002', 'DB00004', ..., 'DB09047', 'DB11074',\n",
       "       'DB00878'], dtype=object)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "drug1s = ddi['drug id']\n",
    "drug2s = ddi['interaction drug id']\n",
    "drugs = pd.concat([drug1s, drug2s], axis=0).unique()\n",
    "drugs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "79575691",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "4417\n",
      "4418\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "5915"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(len(drug1s.unique()))\n",
    "print(len(drug2s.unique()))\n",
    "len(drugs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "id": "e82113d7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>DrugBank_id</th>\n",
       "      <th>node_index</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>DB00001</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>DB00002</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>DB00004</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>DB00005</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>DB00006</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5910</th>\n",
       "      <td>DB12264</td>\n",
       "      <td>5910</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5911</th>\n",
       "      <td>DB06614</td>\n",
       "      <td>5911</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5912</th>\n",
       "      <td>DB09047</td>\n",
       "      <td>5912</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5913</th>\n",
       "      <td>DB11074</td>\n",
       "      <td>5913</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5914</th>\n",
       "      <td>DB00878</td>\n",
       "      <td>5914</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5915 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     DrugBank_id node_index\n",
       "0        DB00001          0\n",
       "1        DB00002          1\n",
       "2        DB00004          2\n",
       "3        DB00005          3\n",
       "4        DB00006          4\n",
       "...          ...        ...\n",
       "5910     DB12264       5910\n",
       "5911     DB06614       5911\n",
       "5912     DB09047       5912\n",
       "5913     DB11074       5913\n",
       "5914     DB00878       5914\n",
       "\n",
       "[5915 rows x 2 columns]"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = []\n",
    "for idx, drug in enumerate(drugs):\n",
    "    data.append([drug,idx])\n",
    "\n",
    "data = np.asarray(data)\n",
    "drug_id_df = pd.DataFrame({'DrugBank_id': data[:,0], 'node_index': data[:,1]})\n",
    "drug_id_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "466fa3f4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(5915, 2)"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    ".to_csv('\\Drugbank drug interactions.tsv',sep='\\t')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b7a7b1be",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}