{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "collapsed_sections": [ "AdU8VMTIOWLZ", "a3zuvbqx2l68", "XjAPkfq7SF87", "R6PE5ds45TPr", "y73zFlRGIbt9", "oBgNtpFQDwku", "JGEUIrbi9kNH", "fTRgGM_8_Fwg", "jPXWBZ4R_bGs" ] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "gpuClass": "standard" }, "cells": [ { "cell_type": "markdown", "source": [ "# Setup Environment" ], "metadata": { "id": "9sEfZoepGP8x" } }, { "cell_type": "code", "source": [ "! pip install hazm==0.10.0" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 770 }, "id": "u6n8Hc1hQSy7", "outputId": "e5448572-c76c-4336-97e0-4e931a1c3940" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Requirement already satisfied: hazm==0.10.0 in /usr/local/lib/python3.11/dist-packages (0.10.0)\n", "Requirement already satisfied: fasttext-wheel<0.10.0,>=0.9.2 in /usr/local/lib/python3.11/dist-packages (from hazm==0.10.0) (0.9.2)\n", "Requirement already satisfied: flashtext<3.0,>=2.7 in /usr/local/lib/python3.11/dist-packages (from hazm==0.10.0) (2.7)\n", "Requirement already satisfied: gensim<5.0.0,>=4.3.1 in /usr/local/lib/python3.11/dist-packages (from hazm==0.10.0) (4.3.3)\n", "Requirement already satisfied: nltk<4.0.0,>=3.8.1 in /usr/local/lib/python3.11/dist-packages (from hazm==0.10.0) (3.9.1)\n", "Collecting numpy==1.24.3 (from hazm==0.10.0)\n", " Using cached numpy-1.24.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB)\n", "Requirement already satisfied: python-crfsuite<0.10.0,>=0.9.9 in /usr/local/lib/python3.11/dist-packages (from hazm==0.10.0) (0.9.11)\n", "Requirement already satisfied: scikit-learn<2.0.0,>=1.2.2 in /usr/local/lib/python3.11/dist-packages (from hazm==0.10.0) (1.6.1)\n", "Requirement already satisfied: pybind11>=2.2 in /usr/local/lib/python3.11/dist-packages (from fasttext-wheel<0.10.0,>=0.9.2->hazm==0.10.0) (2.13.6)\n", "Requirement already satisfied: setuptools>=0.7.0 in /usr/local/lib/python3.11/dist-packages (from fasttext-wheel<0.10.0,>=0.9.2->hazm==0.10.0) (75.2.0)\n", "Requirement already satisfied: scipy<1.14.0,>=1.7.0 in /usr/local/lib/python3.11/dist-packages (from gensim<5.0.0,>=4.3.1->hazm==0.10.0) (1.13.1)\n", "Requirement already satisfied: smart-open>=1.8.1 in /usr/local/lib/python3.11/dist-packages (from gensim<5.0.0,>=4.3.1->hazm==0.10.0) (7.1.0)\n", "Requirement already satisfied: click in /usr/local/lib/python3.11/dist-packages (from nltk<4.0.0,>=3.8.1->hazm==0.10.0) (8.1.8)\n", "Requirement already satisfied: joblib in /usr/local/lib/python3.11/dist-packages (from nltk<4.0.0,>=3.8.1->hazm==0.10.0) (1.4.2)\n", "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.11/dist-packages (from nltk<4.0.0,>=3.8.1->hazm==0.10.0) (2024.11.6)\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (from nltk<4.0.0,>=3.8.1->hazm==0.10.0) (4.67.1)\n", "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn<2.0.0,>=1.2.2->hazm==0.10.0) (3.6.0)\n", "Requirement already satisfied: wrapt in /usr/local/lib/python3.11/dist-packages (from smart-open>=1.8.1->gensim<5.0.0,>=4.3.1->hazm==0.10.0) (1.17.2)\n", "Using cached numpy-1.24.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)\n", "Installing collected packages: numpy\n", " Attempting uninstall: numpy\n", " Found existing installation: numpy 1.26.0\n", " Uninstalling numpy-1.26.0:\n", " Successfully uninstalled numpy-1.26.0\n", "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "blosc2 3.3.2 requires numpy>=1.26, but you have numpy 1.24.3 which is incompatible.\n", "thinc 8.3.6 requires numpy<3.0.0,>=2.0.0, but you have numpy 1.24.3 which is incompatible.\n", "treescope 0.1.9 requires numpy>=1.25.2, but you have numpy 1.24.3 which is incompatible.\n", "pymc 5.22.0 requires numpy>=1.25.0, but you have numpy 1.24.3 which is incompatible.\n", "albumentations 2.0.6 requires numpy>=1.24.4, but you have numpy 1.24.3 which is incompatible.\n", "albucore 0.0.24 requires numpy>=1.24.4, but you have numpy 1.24.3 which is incompatible.\n", "tensorflow 2.18.0 requires numpy<2.1.0,>=1.26.0, but you have numpy 1.24.3 which is incompatible.\n", "jax 0.5.2 requires numpy>=1.25, but you have numpy 1.24.3 which is incompatible.\n", "jaxlib 0.5.1 requires numpy>=1.25, but you have numpy 1.24.3 which is incompatible.\u001b[0m\u001b[31m\n", "\u001b[0mSuccessfully installed numpy-1.24.3\n" ] }, { "output_type": "display_data", "data": { "application/vnd.colab-display-data+json": { "pip_warning": { "packages": [ "numpy" ] }, "id": "02f4ece44a3543ebb22ad3f3301874b3" } }, "metadata": {} } ] }, { "cell_type": "code", "source": [ "!pip install numpy==1.26.0" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "iA2Jjex-KMqx", "outputId": "521918bf-2909-4310-c2f8-5774c16a6215" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting numpy==1.26.0\n", " Using cached numpy-1.26.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (58 kB)\n", "Using cached numpy-1.26.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.2 MB)\n", "Installing collected packages: numpy\n", " Attempting uninstall: numpy\n", " Found existing installation: numpy 1.24.3\n", " Uninstalling numpy-1.24.3:\n", " Successfully uninstalled numpy-1.24.3\n", "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "hazm 0.10.0 requires numpy==1.24.3, but you have numpy 1.26.0 which is incompatible.\n", "thinc 8.3.6 requires numpy<3.0.0,>=2.0.0, but you have numpy 1.26.0 which is incompatible.\u001b[0m\u001b[31m\n", "\u001b[0mSuccessfully installed numpy-1.26.0\n" ] } ] }, { "cell_type": "code", "source": [ "from IPython.display import display, HTML\n", "\n", "display(HTML(\"\"\"\n", "
\n", " | dataset | \n", "grapheme | \n", "phoneme | \n", "homograph word | \n", "pronunciation | \n", "
---|---|---|---|---|---|
0 | \n", "homograph | \n", "من قدر تو را میدانم | \n", "man qadr-e to rA mi-dAnam | \n", "قدر | \n", "qadr | \n", "
1 | \n", "homograph | \n", "از قضای الهی به قدر الهی پناه میبرم | \n", "?az qazAy ?elAhi be qadar-e ?elAhi panAh mi-baram | \n", "قدر | \n", "qadar | \n", "
2 | \n", "homograph | \n", "به دست و صورتم کرم زدم | \n", "be dast-o suratam kerem zadam | \n", "کرم | \n", "kerem | \n", "