You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

DDI-Network-checkpoint.ipynb 18KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 12,
  6. "id": "9f0d2ac5",
  7. "metadata": {},
  8. "outputs": [],
  9. "source": [
  10. "import pandas as pd\n",
  11. "import numpy as np\n",
  12. "from tqdm.notebook import tqdm"
  13. ]
  14. },
  15. {
  16. "cell_type": "code",
  17. "execution_count": 2,
  18. "id": "e0c2c306",
  19. "metadata": {},
  20. "outputs": [
  21. {
  22. "name": "stderr",
  23. "output_type": "stream",
  24. "text": [
  25. "C:\\Users\\Notebook\\anaconda3\\lib\\site-packages\\numpy\\lib\\arraysetops.py:583: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison\n",
  26. " mask |= (ar1 == a)\n"
  27. ]
  28. },
  29. {
  30. "data": {
  31. "text/html": [
  32. "<div>\n",
  33. "<style scoped>\n",
  34. " .dataframe tbody tr th:only-of-type {\n",
  35. " vertical-align: middle;\n",
  36. " }\n",
  37. "\n",
  38. " .dataframe tbody tr th {\n",
  39. " vertical-align: top;\n",
  40. " }\n",
  41. "\n",
  42. " .dataframe thead th {\n",
  43. " text-align: right;\n",
  44. " }\n",
  45. "</style>\n",
  46. "<table border=\"1\" class=\"dataframe\">\n",
  47. " <thead>\n",
  48. " <tr style=\"text-align: right;\">\n",
  49. " <th></th>\n",
  50. " <th>drug id</th>\n",
  51. " <th>drug name</th>\n",
  52. " <th>interaction drug id</th>\n",
  53. " <th>interaction drug name</th>\n",
  54. " <th>description</th>\n",
  55. " </tr>\n",
  56. " </thead>\n",
  57. " <tbody>\n",
  58. " <tr>\n",
  59. " <th>0</th>\n",
  60. " <td>DB00001</td>\n",
  61. " <td>Lepirudin</td>\n",
  62. " <td>DB06605</td>\n",
  63. " <td>Apixaban</td>\n",
  64. " <td>Apixaban may increase the anticoagulant activi...</td>\n",
  65. " </tr>\n",
  66. " <tr>\n",
  67. " <th>1</th>\n",
  68. " <td>DB00001</td>\n",
  69. " <td>Lepirudin</td>\n",
  70. " <td>DB06695</td>\n",
  71. " <td>Dabigatran etexilate</td>\n",
  72. " <td>Dabigatran etexilate may increase the anticoag...</td>\n",
  73. " </tr>\n",
  74. " <tr>\n",
  75. " <th>2</th>\n",
  76. " <td>DB00001</td>\n",
  77. " <td>Lepirudin</td>\n",
  78. " <td>DB01254</td>\n",
  79. " <td>Dasatinib</td>\n",
  80. " <td>The risk or severity of bleeding and hemorrhag...</td>\n",
  81. " </tr>\n",
  82. " <tr>\n",
  83. " <th>3</th>\n",
  84. " <td>DB00001</td>\n",
  85. " <td>Lepirudin</td>\n",
  86. " <td>DB01609</td>\n",
  87. " <td>Deferasirox</td>\n",
  88. " <td>The risk or severity of gastrointestinal bleed...</td>\n",
  89. " </tr>\n",
  90. " <tr>\n",
  91. " <th>4</th>\n",
  92. " <td>DB00001</td>\n",
  93. " <td>Lepirudin</td>\n",
  94. " <td>DB01586</td>\n",
  95. " <td>Ursodeoxycholic acid</td>\n",
  96. " <td>The risk or severity of bleeding and bruising ...</td>\n",
  97. " </tr>\n",
  98. " <tr>\n",
  99. " <th>...</th>\n",
  100. " <td>...</td>\n",
  101. " <td>...</td>\n",
  102. " <td>...</td>\n",
  103. " <td>...</td>\n",
  104. " <td>...</td>\n",
  105. " </tr>\n",
  106. " <tr>\n",
  107. " <th>2768186</th>\n",
  108. " <td>DB16746</td>\n",
  109. " <td>Elivaldogene autotemcel</td>\n",
  110. " <td>DB01264</td>\n",
  111. " <td>Darunavir</td>\n",
  112. " <td>The therapeutic efficacy of Elivaldogene autot...</td>\n",
  113. " </tr>\n",
  114. " <tr>\n",
  115. " <th>2768187</th>\n",
  116. " <td>DB16746</td>\n",
  117. " <td>Elivaldogene autotemcel</td>\n",
  118. " <td>DB01319</td>\n",
  119. " <td>Fosamprenavir</td>\n",
  120. " <td>The therapeutic efficacy of Elivaldogene autot...</td>\n",
  121. " </tr>\n",
  122. " <tr>\n",
  123. " <th>2768188</th>\n",
  124. " <td>DB16746</td>\n",
  125. " <td>Elivaldogene autotemcel</td>\n",
  126. " <td>DB01601</td>\n",
  127. " <td>Lopinavir</td>\n",
  128. " <td>The therapeutic efficacy of Elivaldogene autot...</td>\n",
  129. " </tr>\n",
  130. " <tr>\n",
  131. " <th>2768189</th>\n",
  132. " <td>DB16746</td>\n",
  133. " <td>Elivaldogene autotemcel</td>\n",
  134. " <td>DB11586</td>\n",
  135. " <td>Asunaprevir</td>\n",
  136. " <td>The therapeutic efficacy of Elivaldogene autot...</td>\n",
  137. " </tr>\n",
  138. " <tr>\n",
  139. " <th>2768190</th>\n",
  140. " <td>DB16746</td>\n",
  141. " <td>Elivaldogene autotemcel</td>\n",
  142. " <td>DB15623</td>\n",
  143. " <td>TMC-310911</td>\n",
  144. " <td>The therapeutic efficacy of Elivaldogene autot...</td>\n",
  145. " </tr>\n",
  146. " </tbody>\n",
  147. "</table>\n",
  148. "<p>2768191 rows × 5 columns</p>\n",
  149. "</div>"
  150. ],
  151. "text/plain": [
  152. " drug id drug name interaction drug id \\\n",
  153. "0 DB00001 Lepirudin DB06605 \n",
  154. "1 DB00001 Lepirudin DB06695 \n",
  155. "2 DB00001 Lepirudin DB01254 \n",
  156. "3 DB00001 Lepirudin DB01609 \n",
  157. "4 DB00001 Lepirudin DB01586 \n",
  158. "... ... ... ... \n",
  159. "2768186 DB16746 Elivaldogene autotemcel DB01264 \n",
  160. "2768187 DB16746 Elivaldogene autotemcel DB01319 \n",
  161. "2768188 DB16746 Elivaldogene autotemcel DB01601 \n",
  162. "2768189 DB16746 Elivaldogene autotemcel DB11586 \n",
  163. "2768190 DB16746 Elivaldogene autotemcel DB15623 \n",
  164. "\n",
  165. " interaction drug name \\\n",
  166. "0 Apixaban \n",
  167. "1 Dabigatran etexilate \n",
  168. "2 Dasatinib \n",
  169. "3 Deferasirox \n",
  170. "4 Ursodeoxycholic acid \n",
  171. "... ... \n",
  172. "2768186 Darunavir \n",
  173. "2768187 Fosamprenavir \n",
  174. "2768188 Lopinavir \n",
  175. "2768189 Asunaprevir \n",
  176. "2768190 TMC-310911 \n",
  177. "\n",
  178. " description \n",
  179. "0 Apixaban may increase the anticoagulant activi... \n",
  180. "1 Dabigatran etexilate may increase the anticoag... \n",
  181. "2 The risk or severity of bleeding and hemorrhag... \n",
  182. "3 The risk or severity of gastrointestinal bleed... \n",
  183. "4 The risk or severity of bleeding and bruising ... \n",
  184. "... ... \n",
  185. "2768186 The therapeutic efficacy of Elivaldogene autot... \n",
  186. "2768187 The therapeutic efficacy of Elivaldogene autot... \n",
  187. "2768188 The therapeutic efficacy of Elivaldogene autot... \n",
  188. "2768189 The therapeutic efficacy of Elivaldogene autot... \n",
  189. "2768190 The therapeutic efficacy of Elivaldogene autot... \n",
  190. "\n",
  191. "[2768191 rows x 5 columns]"
  192. ]
  193. },
  194. "execution_count": 2,
  195. "metadata": {},
  196. "output_type": "execute_result"
  197. }
  198. ],
  199. "source": [
  200. "ddi = pd.read_csv('raw/Drugbank_drug_interactions.tsv', sep='\\t', index_col=0)\n",
  201. "ddi"
  202. ]
  203. },
  204. {
  205. "cell_type": "code",
  206. "execution_count": 3,
  207. "id": "35d57012",
  208. "metadata": {},
  209. "outputs": [
  210. {
  211. "data": {
  212. "text/plain": [
  213. "array(['DB00001', 'DB00002', 'DB00004', ..., 'DB09047', 'DB11074',\n",
  214. " 'DB00878'], dtype=object)"
  215. ]
  216. },
  217. "execution_count": 3,
  218. "metadata": {},
  219. "output_type": "execute_result"
  220. }
  221. ],
  222. "source": [
  223. "drug1s = ddi['drug id']\n",
  224. "drug2s = ddi['interaction drug id']\n",
  225. "drugs = pd.concat([drug1s, drug2s], axis=0).unique()\n",
  226. "drugs"
  227. ]
  228. },
  229. {
  230. "cell_type": "code",
  231. "execution_count": 4,
  232. "id": "79575691",
  233. "metadata": {},
  234. "outputs": [
  235. {
  236. "name": "stdout",
  237. "output_type": "stream",
  238. "text": [
  239. "4417\n",
  240. "4418\n"
  241. ]
  242. },
  243. {
  244. "data": {
  245. "text/plain": [
  246. "5915"
  247. ]
  248. },
  249. "execution_count": 4,
  250. "metadata": {},
  251. "output_type": "execute_result"
  252. }
  253. ],
  254. "source": [
  255. "print(len(drug1s.unique()))\n",
  256. "print(len(drug2s.unique()))\n",
  257. "len(drugs)"
  258. ]
  259. },
  260. {
  261. "cell_type": "code",
  262. "execution_count": 10,
  263. "id": "e82113d7",
  264. "metadata": {},
  265. "outputs": [
  266. {
  267. "name": "stderr",
  268. "output_type": "stream",
  269. "text": [
  270. "5915it [00:00, 2353154.53it/s]\n"
  271. ]
  272. },
  273. {
  274. "data": {
  275. "text/html": [
  276. "<div>\n",
  277. "<style scoped>\n",
  278. " .dataframe tbody tr th:only-of-type {\n",
  279. " vertical-align: middle;\n",
  280. " }\n",
  281. "\n",
  282. " .dataframe tbody tr th {\n",
  283. " vertical-align: top;\n",
  284. " }\n",
  285. "\n",
  286. " .dataframe thead th {\n",
  287. " text-align: right;\n",
  288. " }\n",
  289. "</style>\n",
  290. "<table border=\"1\" class=\"dataframe\">\n",
  291. " <thead>\n",
  292. " <tr style=\"text-align: right;\">\n",
  293. " <th></th>\n",
  294. " <th>DrugBank_id</th>\n",
  295. " <th>node_index</th>\n",
  296. " </tr>\n",
  297. " </thead>\n",
  298. " <tbody>\n",
  299. " <tr>\n",
  300. " <th>0</th>\n",
  301. " <td>DB00001</td>\n",
  302. " <td>0</td>\n",
  303. " </tr>\n",
  304. " <tr>\n",
  305. " <th>1</th>\n",
  306. " <td>DB00002</td>\n",
  307. " <td>1</td>\n",
  308. " </tr>\n",
  309. " <tr>\n",
  310. " <th>2</th>\n",
  311. " <td>DB00004</td>\n",
  312. " <td>2</td>\n",
  313. " </tr>\n",
  314. " <tr>\n",
  315. " <th>3</th>\n",
  316. " <td>DB00005</td>\n",
  317. " <td>3</td>\n",
  318. " </tr>\n",
  319. " <tr>\n",
  320. " <th>4</th>\n",
  321. " <td>DB00006</td>\n",
  322. " <td>4</td>\n",
  323. " </tr>\n",
  324. " <tr>\n",
  325. " <th>...</th>\n",
  326. " <td>...</td>\n",
  327. " <td>...</td>\n",
  328. " </tr>\n",
  329. " <tr>\n",
  330. " <th>5910</th>\n",
  331. " <td>DB12264</td>\n",
  332. " <td>5910</td>\n",
  333. " </tr>\n",
  334. " <tr>\n",
  335. " <th>5911</th>\n",
  336. " <td>DB06614</td>\n",
  337. " <td>5911</td>\n",
  338. " </tr>\n",
  339. " <tr>\n",
  340. " <th>5912</th>\n",
  341. " <td>DB09047</td>\n",
  342. " <td>5912</td>\n",
  343. " </tr>\n",
  344. " <tr>\n",
  345. " <th>5913</th>\n",
  346. " <td>DB11074</td>\n",
  347. " <td>5913</td>\n",
  348. " </tr>\n",
  349. " <tr>\n",
  350. " <th>5914</th>\n",
  351. " <td>DB00878</td>\n",
  352. " <td>5914</td>\n",
  353. " </tr>\n",
  354. " </tbody>\n",
  355. "</table>\n",
  356. "<p>5915 rows × 2 columns</p>\n",
  357. "</div>"
  358. ],
  359. "text/plain": [
  360. " DrugBank_id node_index\n",
  361. "0 DB00001 0\n",
  362. "1 DB00002 1\n",
  363. "2 DB00004 2\n",
  364. "3 DB00005 3\n",
  365. "4 DB00006 4\n",
  366. "... ... ...\n",
  367. "5910 DB12264 5910\n",
  368. "5911 DB06614 5911\n",
  369. "5912 DB09047 5912\n",
  370. "5913 DB11074 5913\n",
  371. "5914 DB00878 5914\n",
  372. "\n",
  373. "[5915 rows x 2 columns]"
  374. ]
  375. },
  376. "execution_count": 10,
  377. "metadata": {},
  378. "output_type": "execute_result"
  379. }
  380. ],
  381. "source": [
  382. "# assign node index to drugs\n",
  383. "data = []\n",
  384. "for idx, drug in enumerate(drugs):\n",
  385. " data.append([drug,idx])\n",
  386. "\n",
  387. "data = np.asarray(data)\n",
  388. "drug_id_df = pd.DataFrame({'DrugBank_id': data[:,0], 'node_index': data[:,1]})\n",
  389. "drug_id_df"
  390. ]
  391. },
  392. {
  393. "cell_type": "code",
  394. "execution_count": 6,
  395. "id": "466fa3f4",
  396. "metadata": {},
  397. "outputs": [],
  398. "source": [
  399. "drug_id_df.to_csv('raw/drug2id.tsv',sep='\\t',index=False) # save drug2id file"
  400. ]
  401. },
  402. {
  403. "cell_type": "code",
  404. "execution_count": 16,
  405. "id": "b7a7b1be",
  406. "metadata": {},
  407. "outputs": [
  408. {
  409. "data": {
  410. "application/vnd.jupyter.widget-view+json": {
  411. "model_id": "e939f7b60d8f4721970453214f5c785f",
  412. "version_major": 2,
  413. "version_minor": 0
  414. },
  415. "text/plain": [
  416. " 0%| | 0/2768191 [00:00<?, ?it/s]"
  417. ]
  418. },
  419. "metadata": {},
  420. "output_type": "display_data"
  421. }
  422. ],
  423. "source": [
  424. "edge_indexes = []\n",
  425. "drug1_prev = drug2_prev = ''\n",
  426. "drug1_idx = drug2_idx = 0\n",
  427. "for index, row in tqdm(ddi.iterrows(), total = len(ddi)):\n",
  428. " drug1 = row['drug id']\n",
  429. " drug2 = row['interaction drug id']\n",
  430. " if drug1 != drug1_prev:\n",
  431. " drug1_idx = drug_id_df.index[drug_id_df['DrugBank_id'] == drug1][0]\n",
  432. "\n",
  433. " if drug2 != drug2_prev:\n",
  434. " drug2_idx = drug_id_df.index[drug_id_df['DrugBank_id'] == drug2][0]\n",
  435. " \n",
  436. " edge_indexes.append([drug1_idx,drug2_idx])\n",
  437. " edge_indexes.append([drug2_idx,drug1_idx]) # the graph is not directed\n",
  438. " \n",
  439. " drug1_prev = drug1\n",
  440. " drug2_prev = drug2"
  441. ]
  442. },
  443. {
  444. "cell_type": "code",
  445. "execution_count": 19,
  446. "id": "86552536",
  447. "metadata": {},
  448. "outputs": [
  449. {
  450. "data": {
  451. "text/html": [
  452. "<div>\n",
  453. "<style scoped>\n",
  454. " .dataframe tbody tr th:only-of-type {\n",
  455. " vertical-align: middle;\n",
  456. " }\n",
  457. "\n",
  458. " .dataframe tbody tr th {\n",
  459. " vertical-align: top;\n",
  460. " }\n",
  461. "\n",
  462. " .dataframe thead th {\n",
  463. " text-align: right;\n",
  464. " }\n",
  465. "</style>\n",
  466. "<table border=\"1\" class=\"dataframe\">\n",
  467. " <thead>\n",
  468. " <tr style=\"text-align: right;\">\n",
  469. " <th></th>\n",
  470. " <th>drug1_idx</th>\n",
  471. " <th>drug2_idx</th>\n",
  472. " </tr>\n",
  473. " </thead>\n",
  474. " <tbody>\n",
  475. " <tr>\n",
  476. " <th>0</th>\n",
  477. " <td>0</td>\n",
  478. " <td>2022</td>\n",
  479. " </tr>\n",
  480. " <tr>\n",
  481. " <th>1</th>\n",
  482. " <td>2022</td>\n",
  483. " <td>0</td>\n",
  484. " </tr>\n",
  485. " <tr>\n",
  486. " <th>2</th>\n",
  487. " <td>0</td>\n",
  488. " <td>4417</td>\n",
  489. " </tr>\n",
  490. " <tr>\n",
  491. " <th>3</th>\n",
  492. " <td>4417</td>\n",
  493. " <td>0</td>\n",
  494. " </tr>\n",
  495. " <tr>\n",
  496. " <th>4</th>\n",
  497. " <td>0</td>\n",
  498. " <td>4418</td>\n",
  499. " </tr>\n",
  500. " <tr>\n",
  501. " <th>...</th>\n",
  502. " <td>...</td>\n",
  503. " <td>...</td>\n",
  504. " </tr>\n",
  505. " <tr>\n",
  506. " <th>5536377</th>\n",
  507. " <td>1358</td>\n",
  508. " <td>4416</td>\n",
  509. " </tr>\n",
  510. " <tr>\n",
  511. " <th>5536378</th>\n",
  512. " <td>4416</td>\n",
  513. " <td>2891</td>\n",
  514. " </tr>\n",
  515. " <tr>\n",
  516. " <th>5536379</th>\n",
  517. " <td>2891</td>\n",
  518. " <td>4416</td>\n",
  519. " </tr>\n",
  520. " <tr>\n",
  521. " <th>5536380</th>\n",
  522. " <td>4416</td>\n",
  523. " <td>4329</td>\n",
  524. " </tr>\n",
  525. " <tr>\n",
  526. " <th>5536381</th>\n",
  527. " <td>4329</td>\n",
  528. " <td>4416</td>\n",
  529. " </tr>\n",
  530. " </tbody>\n",
  531. "</table>\n",
  532. "<p>5536382 rows × 2 columns</p>\n",
  533. "</div>"
  534. ],
  535. "text/plain": [
  536. " drug1_idx drug2_idx\n",
  537. "0 0 2022\n",
  538. "1 2022 0\n",
  539. "2 0 4417\n",
  540. "3 4417 0\n",
  541. "4 0 4418\n",
  542. "... ... ...\n",
  543. "5536377 1358 4416\n",
  544. "5536378 4416 2891\n",
  545. "5536379 2891 4416\n",
  546. "5536380 4416 4329\n",
  547. "5536381 4329 4416\n",
  548. "\n",
  549. "[5536382 rows x 2 columns]"
  550. ]
  551. },
  552. "execution_count": 19,
  553. "metadata": {},
  554. "output_type": "execute_result"
  555. }
  556. ],
  557. "source": [
  558. "edge_indexes = np.asarray(edge_indexes)\n",
  559. "drug_interaction_df = pd.DataFrame({'drug1_idx': edge_indexes[:,0], 'drug2_idx': edge_indexes[:,1]})\n",
  560. "drug_interaction_df"
  561. ]
  562. },
  563. {
  564. "cell_type": "code",
  565. "execution_count": 20,
  566. "id": "938dbeb5",
  567. "metadata": {},
  568. "outputs": [],
  569. "source": [
  570. "drug_interaction_df.to_csv('raw/drug_interactions.tsv',sep='\\t',index=False) # save drug_interactions file"
  571. ]
  572. },
  573. {
  574. "cell_type": "code",
  575. "execution_count": null,
  576. "id": "2c5c70ec",
  577. "metadata": {},
  578. "outputs": [],
  579. "source": []
  580. }
  581. ],
  582. "metadata": {
  583. "kernelspec": {
  584. "display_name": "Python 3 (ipykernel)",
  585. "language": "python",
  586. "name": "python3"
  587. },
  588. "language_info": {
  589. "codemirror_mode": {
  590. "name": "ipython",
  591. "version": 3
  592. },
  593. "file_extension": ".py",
  594. "mimetype": "text/x-python",
  595. "name": "python",
  596. "nbconvert_exporter": "python",
  597. "pygments_lexer": "ipython3",
  598. "version": "3.9.7"
  599. }
  600. },
  601. "nbformat": 4,
  602. "nbformat_minor": 5
  603. }