{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "collapsed_sections": [ "ABgLYF9R8viP", "EOZGZa2lMfPe", "AdU8VMTIOWLZ", "XhbCA2tkR45b", "XjAPkfq7SF87", "f4NqCjr1FxVg", "oBgNtpFQDwku", "JGEUIrbi9kNH" ] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "source": [ "! pip install hazm==0.10.0" ], "metadata": { "id": "KoaoU_iCNhGt", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "outputId": "09c602ac-7664-4e3f-8bcb-32e7337e24ee" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting hazm==0.10.0\n", " Downloading hazm-0.10.0-py3-none-any.whl.metadata (11 kB)\n", "Collecting fasttext-wheel<0.10.0,>=0.9.2 (from hazm==0.10.0)\n", " Downloading fasttext_wheel-0.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)\n", "Collecting flashtext<3.0,>=2.7 (from hazm==0.10.0)\n", " Downloading flashtext-2.7.tar.gz (14 kB)\n", " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Collecting gensim<5.0.0,>=4.3.1 (from hazm==0.10.0)\n", " Downloading gensim-4.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.1 kB)\n", "Requirement already satisfied: nltk<4.0.0,>=3.8.1 in /usr/local/lib/python3.11/dist-packages (from hazm==0.10.0) (3.9.1)\n", "Collecting numpy==1.24.3 (from hazm==0.10.0)\n", " Downloading numpy-1.24.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB)\n", "Collecting python-crfsuite<0.10.0,>=0.9.9 (from hazm==0.10.0)\n", " Downloading python_crfsuite-0.9.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.3 kB)\n", "Requirement already satisfied: scikit-learn<2.0.0,>=1.2.2 in /usr/local/lib/python3.11/dist-packages (from hazm==0.10.0) (1.6.1)\n", "Collecting pybind11>=2.2 (from fasttext-wheel<0.10.0,>=0.9.2->hazm==0.10.0)\n", " Downloading pybind11-2.13.6-py3-none-any.whl.metadata (9.5 kB)\n", "Requirement already satisfied: setuptools>=0.7.0 in /usr/local/lib/python3.11/dist-packages (from fasttext-wheel<0.10.0,>=0.9.2->hazm==0.10.0) (75.2.0)\n", "Collecting scipy<1.14.0,>=1.7.0 (from gensim<5.0.0,>=4.3.1->hazm==0.10.0)\n", " Downloading scipy-1.13.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.6/60.6 kB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: smart-open>=1.8.1 in /usr/local/lib/python3.11/dist-packages (from gensim<5.0.0,>=4.3.1->hazm==0.10.0) (7.1.0)\n", "Requirement already satisfied: click in /usr/local/lib/python3.11/dist-packages (from nltk<4.0.0,>=3.8.1->hazm==0.10.0) (8.1.8)\n", "Requirement already satisfied: joblib in /usr/local/lib/python3.11/dist-packages (from nltk<4.0.0,>=3.8.1->hazm==0.10.0) (1.4.2)\n", "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.11/dist-packages (from nltk<4.0.0,>=3.8.1->hazm==0.10.0) (2024.11.6)\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (from nltk<4.0.0,>=3.8.1->hazm==0.10.0) (4.67.1)\n", "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn<2.0.0,>=1.2.2->hazm==0.10.0) (3.6.0)\n", "Requirement already satisfied: wrapt in /usr/local/lib/python3.11/dist-packages (from smart-open>=1.8.1->gensim<5.0.0,>=4.3.1->hazm==0.10.0) (1.17.2)\n", "Downloading hazm-0.10.0-py3-none-any.whl (892 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m892.6/892.6 kB\u001b[0m \u001b[31m24.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading numpy-1.24.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.3/17.3 MB\u001b[0m \u001b[31m64.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading fasttext_wheel-0.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.4 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.4/4.4 MB\u001b[0m \u001b[31m28.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading gensim-4.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (26.7 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m26.7/26.7 MB\u001b[0m \u001b[31m42.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading python_crfsuite-0.9.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m40.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading pybind11-2.13.6-py3-none-any.whl (243 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m243.3/243.3 kB\u001b[0m \u001b[31m16.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading scipy-1.13.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (38.6 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m38.6/38.6 MB\u001b[0m \u001b[31m13.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hBuilding wheels for collected packages: flashtext\n", " Building wheel for flashtext (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for flashtext: filename=flashtext-2.7-py2.py3-none-any.whl size=9300 sha256=181e31aa2933b81d1d4b23e453d93e170ac703c16bd7cb21b77b97ed66a93296\n", " Stored in directory: /root/.cache/pip/wheels/49/20/47/f03dfa8a7239c54cbc44ff7389eefbf888d2c1873edaaec888\n", "Successfully built flashtext\n", "Installing collected packages: flashtext, python-crfsuite, pybind11, numpy, scipy, fasttext-wheel, gensim, hazm\n", " Attempting uninstall: numpy\n", " Found existing installation: numpy 2.0.2\n", " Uninstalling numpy-2.0.2:\n", " Successfully uninstalled numpy-2.0.2\n", " Attempting uninstall: scipy\n", " Found existing installation: scipy 1.15.2\n", " Uninstalling scipy-1.15.2:\n", " Successfully uninstalled scipy-1.15.2\n", "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "blosc2 3.3.2 requires numpy>=1.26, but you have numpy 1.24.3 which is incompatible.\n", "thinc 8.3.6 requires numpy<3.0.0,>=2.0.0, but you have numpy 1.24.3 which is incompatible.\n", "treescope 0.1.9 requires numpy>=1.25.2, but you have numpy 1.24.3 which is incompatible.\n", "pymc 5.22.0 requires numpy>=1.25.0, but you have numpy 1.24.3 which is incompatible.\n", "albumentations 2.0.6 requires numpy>=1.24.4, but you have numpy 1.24.3 which is incompatible.\n", "albucore 0.0.24 requires numpy>=1.24.4, but you have numpy 1.24.3 which is incompatible.\n", "tensorflow 2.18.0 requires numpy<2.1.0,>=1.26.0, but you have numpy 1.24.3 which is incompatible.\n", "jax 0.5.2 requires numpy>=1.25, but you have numpy 1.24.3 which is incompatible.\n", "jaxlib 0.5.1 requires numpy>=1.25, but you have numpy 1.24.3 which is incompatible.\u001b[0m\u001b[31m\n", "\u001b[0mSuccessfully installed fasttext-wheel-0.9.2 flashtext-2.7 gensim-4.3.3 hazm-0.10.0 numpy-1.24.3 pybind11-2.13.6 python-crfsuite-0.9.11 scipy-1.13.1\n" ] }, { "output_type": "display_data", "data": { "application/vnd.colab-display-data+json": { "pip_warning": { "packages": [ "numpy" ] }, "id": "144c92821f314ca8b8454fbffa93554f" } }, "metadata": {} } ] }, { "cell_type": "code", "source": [ "!sudo apt-get install espeak-ng" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Bn--f9NnAWx8", "outputId": "6bf40904-ab6f-4271-de47-155f6624993c" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Reading package lists... Done\n", "Building dependency tree... Done\n", "Reading state information... Done\n", "The following additional packages will be installed:\n", " espeak-ng-data libespeak-ng1 libpcaudio0 libsonic0\n", "The following NEW packages will be installed:\n", " espeak-ng espeak-ng-data libespeak-ng1 libpcaudio0 libsonic0\n", "0 upgraded, 5 newly installed, 0 to remove and 34 not upgraded.\n", "Need to get 4,526 kB of archives.\n", "After this operation, 11.9 MB of additional disk space will be used.\n", "Get:1 http://archive.ubuntu.com/ubuntu jammy/main amd64 libpcaudio0 amd64 1.1-6build2 [8,956 B]\n", "Get:2 http://archive.ubuntu.com/ubuntu jammy/main amd64 libsonic0 amd64 0.2.0-11build1 [10.3 kB]\n", "Get:3 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 espeak-ng-data amd64 1.50+dfsg-10ubuntu0.1 [3,956 kB]\n", "Get:4 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 libespeak-ng1 amd64 1.50+dfsg-10ubuntu0.1 [207 kB]\n", "Get:5 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 espeak-ng amd64 1.50+dfsg-10ubuntu0.1 [343 kB]\n", "Fetched 4,526 kB in 1s (4,828 kB/s)\n", "debconf: unable to initialize frontend: Dialog\n", "debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 78, <> line 5.)\n", "debconf: falling back to frontend: Readline\n", "debconf: unable to initialize frontend: Readline\n", "debconf: (This frontend requires a controlling tty.)\n", "debconf: falling back to frontend: Teletype\n", "dpkg-preconfigure: unable to re-open stdin: \n", "Selecting previously unselected package libpcaudio0:amd64.\n", "(Reading database ... 126102 files and directories currently installed.)\n", "Preparing to unpack .../libpcaudio0_1.1-6build2_amd64.deb ...\n", "Unpacking libpcaudio0:amd64 (1.1-6build2) ...\n", "Selecting previously unselected package libsonic0:amd64.\n", "Preparing to unpack .../libsonic0_0.2.0-11build1_amd64.deb ...\n", "Unpacking libsonic0:amd64 (0.2.0-11build1) ...\n", "Selecting previously unselected package espeak-ng-data:amd64.\n", "Preparing to unpack .../espeak-ng-data_1.50+dfsg-10ubuntu0.1_amd64.deb ...\n", "Unpacking espeak-ng-data:amd64 (1.50+dfsg-10ubuntu0.1) ...\n", "Selecting previously unselected package libespeak-ng1:amd64.\n", "Preparing to unpack .../libespeak-ng1_1.50+dfsg-10ubuntu0.1_amd64.deb ...\n", "Unpacking libespeak-ng1:amd64 (1.50+dfsg-10ubuntu0.1) ...\n", "Selecting previously unselected package espeak-ng.\n", "Preparing to unpack .../espeak-ng_1.50+dfsg-10ubuntu0.1_amd64.deb ...\n", "Unpacking espeak-ng (1.50+dfsg-10ubuntu0.1) ...\n", "Setting up libpcaudio0:amd64 (1.1-6build2) ...\n", "Setting up libsonic0:amd64 (0.2.0-11build1) ...\n", "Setting up espeak-ng-data:amd64 (1.50+dfsg-10ubuntu0.1) ...\n", "Setting up libespeak-ng1:amd64 (1.50+dfsg-10ubuntu0.1) ...\n", "Setting up espeak-ng (1.50+dfsg-10ubuntu0.1) ...\n", "Processing triggers for man-db (2.10.2-1) ...\n", "Processing triggers for libc-bin (2.35-0ubuntu3.8) ...\n", "/sbin/ldconfig.real: /usr/local/lib/libtbbbind_2_0.so.3 is not a symbolic link\n", "\n", "/sbin/ldconfig.real: /usr/local/lib/libumf.so.0 is not a symbolic link\n", "\n", "/sbin/ldconfig.real: /usr/local/lib/libtbbbind.so.3 is not a symbolic link\n", "\n", "/sbin/ldconfig.real: /usr/local/lib/libur_adapter_opencl.so.0 is not a symbolic link\n", "\n", "/sbin/ldconfig.real: /usr/local/lib/libhwloc.so.15 is not a symbolic link\n", "\n", "/sbin/ldconfig.real: /usr/local/lib/libur_adapter_level_zero.so.0 is not a symbolic link\n", "\n", "/sbin/ldconfig.real: /usr/local/lib/libtbbmalloc.so.2 is not a symbolic link\n", "\n", "/sbin/ldconfig.real: /usr/local/lib/libtcm_debug.so.1 is not a symbolic link\n", "\n", "/sbin/ldconfig.real: /usr/local/lib/libtcm.so.1 is not a symbolic link\n", "\n", "/sbin/ldconfig.real: /usr/local/lib/libur_loader.so.0 is not a symbolic link\n", "\n", "/sbin/ldconfig.real: /usr/local/lib/libtbbbind_2_5.so.3 is not a symbolic link\n", "\n", "/sbin/ldconfig.real: /usr/local/lib/libtbbmalloc_proxy.so.2 is not a symbolic link\n", "\n", "/sbin/ldconfig.real: /usr/local/lib/libtbb.so.12 is not a symbolic link\n", "\n" ] } ] }, { "cell_type": "code", "source": [ "!pip install jiwer" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "jBaEWi0m5uNj", "outputId": "07c1ebc8-9b5d-45d2-946f-6cb0217cde07" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting jiwer\n", " Downloading jiwer-3.1.0-py3-none-any.whl.metadata (2.6 kB)\n", "Requirement already satisfied: click>=8.1.8 in /usr/local/lib/python3.11/dist-packages (from jiwer) (8.1.8)\n", "Collecting rapidfuzz>=3.9.7 (from jiwer)\n", " Downloading rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n", "Downloading jiwer-3.1.0-py3-none-any.whl (22 kB)\n", "Downloading rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m38.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hInstalling collected packages: rapidfuzz, jiwer\n", "Successfully installed jiwer-3.1.0 rapidfuzz-3.13.0\n" ] } ] }, { "cell_type": "code", "source": [ "import pandas as pd\n", "import re\n", "from jiwer import cer" ], "metadata": { "id": "R5exL3ah5yoA" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "def transform_text(text):\n", " \"\"\"\n", " Transform text according to specified rules:\n", " 1. Apply phoneme substitutions\n", " 2. Add question marks before vowels at word boundaries\n", " 3. Remove specific symbols\n", "\n", " Args:\n", " text (str): Input text to transform\n", "\n", " Returns:\n", " str: Transformed text\n", " \"\"\"\n", " # Define the consonant and vowel regex patterns\n", " consonants = ['q', 'r', 't', 'y', 'p', 's', 'd', 'f', 'g', 'h', 'j', 'k', 'l',\n", " 'z', 'x', 'c', 'v', 'b', 'n', 'm', 'Q', 'R', 'T', 'Y', 'P', 'S',\n", " 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'Z', 'X', 'C', 'V', 'B', 'N', 'M']\n", " vowels = ['a', 'A', 'e', 'i', 'u', 'o']\n", "\n", " consonants_regex = '(?=' + '|'.join(consonants) + ')'\n", " vowels_regex = '(?=' + '|'.join(vowels) + ')'\n", "\n", " # Step 1: Apply phoneme substitutions\n", " substitutions = {\n", " 'tS': 'C',\n", " 'j': 'y',\n", " 'dZ': 'j',\n", " 'R': 'r',\n", " 'q1': 'q'\n", " }\n", "\n", " for old, new in substitutions.items():\n", " text = text.replace(old, new)\n", "\n", " # Step 3: Remove specific symbols\n", " symbols_to_remove = [\"'\", \":\", \",\"]\n", " for symbol in symbols_to_remove:\n", " text = text.replace(symbol, '')\n", "\n", " # Step 2: Add question marks before vowels at word boundaries\n", " text = re.sub(rf'([^\\w\\-]|^){vowels_regex}', r'\\1?', text)\n", "\n", " return text\n" ], "metadata": { "id": "kwOHbGP9FWJs" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "import subprocess\n", "\n", "def text_to_phonemes(text, voice='fa', ipa=False, tie_character=None, separator=None):\n", " \"\"\"\n", " Convert text to phonemes using espeak-ng.\n", "\n", " Args:\n", " text (str): Input text to convert to phonemes\n", " voice (str, optional): Voice to use (e.g., 'en-us', 'fr'). Defaults to None (default voice).\n", " ipa (bool, optional): Use International Phonetic Alphabet. Defaults to False.\n", " tie_character (str, optional): Character to join multi-letter phonemes. Defaults to None.\n", " separator (str, optional): Character to separate phonemes. Defaults to None.\n", "\n", " Returns:\n", " str: Phoneme representation of the input text\n", " \"\"\"\n", " command = ['espeak-ng', '-q', '-x'] # -q for quiet, -x for phoneme output\n", "\n", " if voice:\n", " command.extend(['-v', voice])\n", " if ipa:\n", " command.append('--ipa')\n", " if tie_character:\n", " command.extend(['--tie', tie_character])\n", " if separator:\n", " command.extend(['--sep', separator])\n", "\n", " # Add the text to process\n", " command.append(text)\n", "\n", " try:\n", " result = subprocess.run(\n", " command,\n", " check=True,\n", " stdout=subprocess.PIPE,\n", " stderr=subprocess.PIPE,\n", " text=True\n", " )\n", " phoneme_seq = result.stdout.strip()\n", " transformed_phoneme_seq = transform_text(phoneme_seq)\n", " return transformed_phoneme_seq\n", " except subprocess.CalledProcessError as e:\n", " raise RuntimeError(f\"espeak-ng failed: {e.stderr}\") from e\n", " except FileNotFoundError as e:\n", " raise RuntimeError(\"espeak-ng is not installed or not in PATH\") from e" ], "metadata": { "id": "E8V116dgBOci" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "XjAPkfq7SF87" }, "source": [ "## Get Evaluation Data" ] }, { "cell_type": "code", "source": [ "!wget https://huggingface.co/datasets/MahtaFetrat/SentenceBench/raw/main/SentenceBench.csv" ], "metadata": { "id": "qwCG0jX-88nQ", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "1564bd7a-c5bd-46a3-fa45-6532fdfc6750" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "--2025-05-10 20:59:44-- https://huggingface.co/datasets/MahtaFetrat/SentenceBench/raw/main/SentenceBench.csv\n", "Resolving huggingface.co (huggingface.co)... 3.163.189.90, 3.163.189.37, 3.163.189.74, ...\n", "Connecting to huggingface.co (huggingface.co)|3.163.189.90|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 56026 (55K) [text/plain]\n", "Saving to: ‘SentenceBench.csv’\n", "\n", "\rSentenceBench.csv 0%[ ] 0 --.-KB/s \rSentenceBench.csv 100%[===================>] 54.71K --.-KB/s in 0.008s \n", "\n", "2025-05-10 20:59:44 (6.50 MB/s) - ‘SentenceBench.csv’ saved [56026/56026]\n", "\n" ] } ] }, { "cell_type": "code", "source": [ "sentence_bench = pd.read_csv('SentenceBench.csv')" ], "metadata": { "id": "hJO-UAPDQvcb" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "sentence_bench.head(3)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 143 }, "id": "qlYbrnUa9LAN", "outputId": "ce9a29ac-c6a4-4c8a-b829-97ead674af09" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " dataset grapheme \\\n", "0 homograph من قدر تو را میدانم \n", "1 homograph از قضای الهی به قدر الهی پناه میبرم \n", "2 homograph به دست و صورتم کرم زدم \n", "\n", " phoneme homograph word \\\n", "0 man qadr-e to rA mi-dAnam قدر \n", "1 ?az qazAy ?elAhi be qadar-e ?elAhi panAh mi-baram قدر \n", "2 be dast-o suratam kerem zadam کرم \n", "\n", " pronunciation \n", "0 qadr \n", "1 qadar \n", "2 kerem " ], "text/html": [ "\n", "
\n", " | dataset | \n", "grapheme | \n", "phoneme | \n", "homograph word | \n", "pronunciation | \n", "
---|---|---|---|---|---|
0 | \n", "homograph | \n", "من قدر تو را میدانم | \n", "man qadr-e to rA mi-dAnam | \n", "قدر | \n", "qadr | \n", "
1 | \n", "homograph | \n", "از قضای الهی به قدر الهی پناه میبرم | \n", "?az qazAy ?elAhi be qadar-e ?elAhi panAh mi-baram | \n", "قدر | \n", "qadar | \n", "
2 | \n", "homograph | \n", "به دست و صورتم کرم زدم | \n", "be dast-o suratam kerem zadam | \n", "کرم | \n", "kerem | \n", "