{ "cells": [ { "cell_type": "code", "execution_count": 2, "id": "7068baf6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " drug1 drug2\n", "0 DB00862 DB00966\n", "1 DB00575 DB00806\n", "2 DB01242 DB08893\n", "3 DB01151 DB08883\n", "4 DB01235 DB01275\n", "... ... ...\n", "48509 DB00542 DB01354\n", "48510 DB00476 DB01239\n", "48511 DB00621 DB01120\n", "48512 DB00808 DB01356\n", "48513 DB00677 DB06287\n", "\n", "[48514 rows x 2 columns]\n" ] } ], "source": [ "import pandas as pd\n", " \n", "drug_interactions_df = pd.read_csv('Dataset\\DDI\\SNAP Stanford\\ChCh-Miner_durgbank-chem-chem.tsv', sep='\\t')\n", " \n", "# printing data\n", "print(drug_interactions_df)" ] }, { "cell_type": "code", "execution_count": 9, "id": "c6abb956", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1317" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# number of drugs in the dataset\n", "len(drug_interactions_df['drug1'].unique())" ] }, { "cell_type": "code", "execution_count": 10, "id": "6cfd6019", "metadata": {}, "outputs": [], "source": [ "import xml.etree.ElementTree as ET" ] }, { "cell_type": "code", "execution_count": 18, "id": "e6c9e0b2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "tree = ET.parse('Dataset\\DDI\\DrugBank\\\\full_database.xml') \n", "\n", "# get the parent tag \n", "root = tree.getroot() \n", "\n", "# print the root (parent) tag along with its memory location \n", "print(root) \n" ] }, { "cell_type": "code", "execution_count": 35, "id": "12ada46d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "xml.etree.ElementTree.Element" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(root)" ] }, { "cell_type": "code", "execution_count": 3, "id": "06f2c07d", "metadata": {}, "outputs": [], "source": [ "ddi = pd.read_xml('Dataset\\DDI\\DrugBank\\\\full_database.xml')" ] }, { "cell_type": "code", "execution_count": 4, "id": "582cc9b6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
typecreatedupdated{http://www.drugbank.ca}drugbank-id{http://www.drugbank.ca}name{http://www.drugbank.ca}description{http://www.drugbank.ca}cas-number{http://www.drugbank.ca}unii{http://www.drugbank.ca}state{http://www.drugbank.ca}groups...{http://www.drugbank.ca}reactions{http://www.drugbank.ca}snp-effects{http://www.drugbank.ca}snp-adverse-drug-reactions{http://www.drugbank.ca}targets{http://www.drugbank.ca}enzymes{http://www.drugbank.ca}carriers{http://www.drugbank.ca}transporters{http://www.drugbank.ca}average-mass{http://www.drugbank.ca}monoisotopic-mass{http://www.drugbank.ca}calculated-properties
0biotech2005-06-132021-10-03BIOD00024LepirudinLepirudin is identical to natural hirudin exce...138068-37-8Y43GF64R34liquidNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1biotech2005-06-132022-01-02BIOD00071CetuximabCetuximab is a recombinant chimeric human/mous...205923-56-4PQX0D8J21JliquidNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2biotech2005-06-132022-01-02BIOD00001Dornase alfaDornase alfa is a biosynthetic form of human d...143831-71-4953A26OA1YliquidNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
3biotech2005-06-132021-10-03BIOD00084Denileukin diftitoxA recombinant DNA-derived cytotoxic protein co...173146-27-525E79B5CTMliquidNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4biotech2005-06-132022-01-02BIOD00052EtanerceptDimeric fusion protein consisting of the extra...185243-69-0OP401G7OJCliquidNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
..................................................................
14589small molecule2021-12-022021-12-03DB16742RP-67580None135911-02-349U9M41BGYNoneNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
14590small molecule2021-12-022021-12-03DB16743Nolpitantium chlorideNone153050-21-622O6XI63E0NoneNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
14591small molecule2021-12-022021-12-03DB16744CP-96345None132746-60-2W22ILA2I52NoneNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
14592small molecule2021-12-022021-12-03DB16745PXT 3003PXT 3003 is in phase 3 clinical trials for the...1467047-91-1NoneNoneNaN...NaNNaNNaNNaNNaNNaNNaN353.466353.210327NaN
14593biotech2021-12-202021-12-24DB16746Elivaldogene autotemcelAdrenoleukodystrophy (ALD) is an X-linked gene...NoneKUM75TD6SGliquidNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "

14594 rows × 58 columns

\n", "
" ], "text/plain": [ " type created updated \\\n", "0 biotech 2005-06-13 2021-10-03 \n", "1 biotech 2005-06-13 2022-01-02 \n", "2 biotech 2005-06-13 2022-01-02 \n", "3 biotech 2005-06-13 2021-10-03 \n", "4 biotech 2005-06-13 2022-01-02 \n", "... ... ... ... \n", "14589 small molecule 2021-12-02 2021-12-03 \n", "14590 small molecule 2021-12-02 2021-12-03 \n", "14591 small molecule 2021-12-02 2021-12-03 \n", "14592 small molecule 2021-12-02 2021-12-03 \n", "14593 biotech 2021-12-20 2021-12-24 \n", "\n", " {http://www.drugbank.ca}drugbank-id {http://www.drugbank.ca}name \\\n", "0 BIOD00024 Lepirudin \n", "1 BIOD00071 Cetuximab \n", "2 BIOD00001 Dornase alfa \n", "3 BIOD00084 Denileukin diftitox \n", "4 BIOD00052 Etanercept \n", "... ... ... \n", "14589 DB16742 RP-67580 \n", "14590 DB16743 Nolpitantium chloride \n", "14591 DB16744 CP-96345 \n", "14592 DB16745 PXT 3003 \n", "14593 DB16746 Elivaldogene autotemcel \n", "\n", " {http://www.drugbank.ca}description \\\n", "0 Lepirudin is identical to natural hirudin exce... \n", "1 Cetuximab is a recombinant chimeric human/mous... \n", "2 Dornase alfa is a biosynthetic form of human d... \n", "3 A recombinant DNA-derived cytotoxic protein co... \n", "4 Dimeric fusion protein consisting of the extra... \n", "... ... \n", "14589 None \n", "14590 None \n", "14591 None \n", "14592 PXT 3003 is in phase 3 clinical trials for the... \n", "14593 Adrenoleukodystrophy (ALD) is an X-linked gene... \n", "\n", " {http://www.drugbank.ca}cas-number {http://www.drugbank.ca}unii \\\n", "0 138068-37-8 Y43GF64R34 \n", "1 205923-56-4 PQX0D8J21J \n", "2 143831-71-4 953A26OA1Y \n", "3 173146-27-5 25E79B5CTM \n", "4 185243-69-0 OP401G7OJC \n", "... ... ... \n", "14589 135911-02-3 49U9M41BGY \n", "14590 153050-21-6 22O6XI63E0 \n", "14591 132746-60-2 W22ILA2I52 \n", "14592 1467047-91-1 None \n", "14593 None KUM75TD6SG \n", "\n", " {http://www.drugbank.ca}state {http://www.drugbank.ca}groups ... \\\n", "0 liquid NaN ... \n", "1 liquid NaN ... \n", "2 liquid NaN ... \n", "3 liquid NaN ... \n", "4 liquid NaN ... \n", "... ... ... ... \n", "14589 None NaN ... \n", "14590 None NaN ... \n", "14591 None NaN ... \n", "14592 None NaN ... \n", "14593 liquid NaN ... \n", "\n", " {http://www.drugbank.ca}reactions {http://www.drugbank.ca}snp-effects \\\n", "0 NaN NaN \n", "1 NaN NaN \n", "2 NaN NaN \n", "3 NaN NaN \n", "4 NaN NaN \n", "... ... ... \n", "14589 NaN NaN \n", "14590 NaN NaN \n", "14591 NaN NaN \n", "14592 NaN NaN \n", "14593 NaN NaN \n", "\n", " {http://www.drugbank.ca}snp-adverse-drug-reactions \\\n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "... ... \n", "14589 NaN \n", "14590 NaN \n", "14591 NaN \n", "14592 NaN \n", "14593 NaN \n", "\n", " {http://www.drugbank.ca}targets {http://www.drugbank.ca}enzymes \\\n", "0 NaN NaN \n", "1 NaN NaN \n", "2 NaN NaN \n", "3 NaN NaN \n", "4 NaN NaN \n", "... ... ... \n", "14589 NaN NaN \n", "14590 NaN NaN \n", "14591 NaN NaN \n", "14592 NaN NaN \n", "14593 NaN NaN \n", "\n", " {http://www.drugbank.ca}carriers {http://www.drugbank.ca}transporters \\\n", "0 NaN NaN \n", "1 NaN NaN \n", "2 NaN NaN \n", "3 NaN NaN \n", "4 NaN NaN \n", "... ... ... \n", "14589 NaN NaN \n", "14590 NaN NaN \n", "14591 NaN NaN \n", "14592 NaN NaN \n", "14593 NaN NaN \n", "\n", " {http://www.drugbank.ca}average-mass \\\n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "... ... \n", "14589 NaN \n", "14590 NaN \n", "14591 NaN \n", "14592 353.466 \n", "14593 NaN \n", "\n", " {http://www.drugbank.ca}monoisotopic-mass \\\n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "... ... \n", "14589 NaN \n", "14590 NaN \n", "14591 NaN \n", "14592 353.210327 \n", "14593 NaN \n", "\n", " {http://www.drugbank.ca}calculated-properties \n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "... ... \n", "14589 NaN \n", "14590 NaN \n", "14591 NaN \n", "14592 NaN \n", "14593 NaN \n", "\n", "[14594 rows x 58 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ddi" ] }, { "cell_type": "code", "execution_count": 12, "id": "979b2c1c", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
typecreatedupdated{http://www.drugbank.ca}drugbank-id{http://www.drugbank.ca}name{http://www.drugbank.ca}description{http://www.drugbank.ca}cas-number{http://www.drugbank.ca}unii{http://www.drugbank.ca}state{http://www.drugbank.ca}groups...{http://www.drugbank.ca}reactions{http://www.drugbank.ca}snp-effects{http://www.drugbank.ca}snp-adverse-drug-reactions{http://www.drugbank.ca}targets{http://www.drugbank.ca}enzymes{http://www.drugbank.ca}carriers{http://www.drugbank.ca}transporters{http://www.drugbank.ca}average-mass{http://www.drugbank.ca}monoisotopic-mass{http://www.drugbank.ca}calculated-properties
\n", "

0 rows × 58 columns

\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [type, created, updated, {http://www.drugbank.ca}drugbank-id, {http://www.drugbank.ca}name, {http://www.drugbank.ca}description, {http://www.drugbank.ca}cas-number, {http://www.drugbank.ca}unii, {http://www.drugbank.ca}state, {http://www.drugbank.ca}groups, {http://www.drugbank.ca}general-references, {http://www.drugbank.ca}synthesis-reference, {http://www.drugbank.ca}indication, {http://www.drugbank.ca}pharmacodynamics, {http://www.drugbank.ca}mechanism-of-action, {http://www.drugbank.ca}toxicity, {http://www.drugbank.ca}metabolism, {http://www.drugbank.ca}absorption, {http://www.drugbank.ca}half-life, {http://www.drugbank.ca}protein-binding, {http://www.drugbank.ca}route-of-elimination, {http://www.drugbank.ca}volume-of-distribution, {http://www.drugbank.ca}clearance, {http://www.drugbank.ca}classification, {http://www.drugbank.ca}salts, {http://www.drugbank.ca}synonyms, {http://www.drugbank.ca}products, {http://www.drugbank.ca}international-brands, {http://www.drugbank.ca}mixtures, {http://www.drugbank.ca}packagers, {http://www.drugbank.ca}manufacturers, {http://www.drugbank.ca}prices, {http://www.drugbank.ca}categories, {http://www.drugbank.ca}affected-organisms, {http://www.drugbank.ca}dosages, {http://www.drugbank.ca}atc-codes, {http://www.drugbank.ca}ahfs-codes, {http://www.drugbank.ca}pdb-entries, {http://www.drugbank.ca}fda-label, {http://www.drugbank.ca}msds, {http://www.drugbank.ca}patents, {http://www.drugbank.ca}food-interactions, {http://www.drugbank.ca}drug-interactions, {http://www.drugbank.ca}sequences, {http://www.drugbank.ca}experimental-properties, {http://www.drugbank.ca}external-identifiers, {http://www.drugbank.ca}external-links, {http://www.drugbank.ca}pathways, {http://www.drugbank.ca}reactions, {http://www.drugbank.ca}snp-effects, {http://www.drugbank.ca}snp-adverse-drug-reactions, {http://www.drugbank.ca}targets, {http://www.drugbank.ca}enzymes, {http://www.drugbank.ca}carriers, {http://www.drugbank.ca}transporters, {http://www.drugbank.ca}average-mass, {http://www.drugbank.ca}monoisotopic-mass, {http://www.drugbank.ca}calculated-properties]\n", "Index: []\n", "\n", "[0 rows x 58 columns]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ddi.loc[ddi['{http://www.drugbank.ca}reactions'].notnull()]" ] }, { "cell_type": "code", "execution_count": null, "id": "0692fe4e", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "a0612d0e", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Julia 1.8.3", "language": "julia", "name": "julia-1.8" }, "language_info": { "file_extension": ".jl", "mimetype": "application/julia", "name": "julia", "version": "1.8.3" } }, "nbformat": 4, "nbformat_minor": 5 }