Benchmarking notebooks for various Persian G2P models, comparing their performance on the SentenceBench dataset, including Homo-GE2PE and Homo-T5.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

Benchmark_eSpeak_NG.ipynb 52KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179
  1. {
  2. "nbformat": 4,
  3. "nbformat_minor": 0,
  4. "metadata": {
  5. "colab": {
  6. "provenance": [],
  7. "collapsed_sections": [
  8. "ABgLYF9R8viP",
  9. "EOZGZa2lMfPe",
  10. "AdU8VMTIOWLZ",
  11. "XhbCA2tkR45b",
  12. "XjAPkfq7SF87",
  13. "f4NqCjr1FxVg",
  14. "oBgNtpFQDwku",
  15. "JGEUIrbi9kNH"
  16. ]
  17. },
  18. "kernelspec": {
  19. "name": "python3",
  20. "display_name": "Python 3"
  21. },
  22. "language_info": {
  23. "name": "python"
  24. }
  25. },
  26. "cells": [
  27. {
  28. "cell_type": "code",
  29. "source": [
  30. "! pip install hazm==0.10.0"
  31. ],
  32. "metadata": {
  33. "id": "KoaoU_iCNhGt",
  34. "colab": {
  35. "base_uri": "https://localhost:8080/",
  36. "height": 1000
  37. },
  38. "outputId": "09c602ac-7664-4e3f-8bcb-32e7337e24ee"
  39. },
  40. "execution_count": null,
  41. "outputs": [
  42. {
  43. "output_type": "stream",
  44. "name": "stdout",
  45. "text": [
  46. "Collecting hazm==0.10.0\n",
  47. " Downloading hazm-0.10.0-py3-none-any.whl.metadata (11 kB)\n",
  48. "Collecting fasttext-wheel<0.10.0,>=0.9.2 (from hazm==0.10.0)\n",
  49. " Downloading fasttext_wheel-0.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)\n",
  50. "Collecting flashtext<3.0,>=2.7 (from hazm==0.10.0)\n",
  51. " Downloading flashtext-2.7.tar.gz (14 kB)\n",
  52. " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
  53. "Collecting gensim<5.0.0,>=4.3.1 (from hazm==0.10.0)\n",
  54. " Downloading gensim-4.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.1 kB)\n",
  55. "Requirement already satisfied: nltk<4.0.0,>=3.8.1 in /usr/local/lib/python3.11/dist-packages (from hazm==0.10.0) (3.9.1)\n",
  56. "Collecting numpy==1.24.3 (from hazm==0.10.0)\n",
  57. " Downloading numpy-1.24.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB)\n",
  58. "Collecting python-crfsuite<0.10.0,>=0.9.9 (from hazm==0.10.0)\n",
  59. " Downloading python_crfsuite-0.9.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.3 kB)\n",
  60. "Requirement already satisfied: scikit-learn<2.0.0,>=1.2.2 in /usr/local/lib/python3.11/dist-packages (from hazm==0.10.0) (1.6.1)\n",
  61. "Collecting pybind11>=2.2 (from fasttext-wheel<0.10.0,>=0.9.2->hazm==0.10.0)\n",
  62. " Downloading pybind11-2.13.6-py3-none-any.whl.metadata (9.5 kB)\n",
  63. "Requirement already satisfied: setuptools>=0.7.0 in /usr/local/lib/python3.11/dist-packages (from fasttext-wheel<0.10.0,>=0.9.2->hazm==0.10.0) (75.2.0)\n",
  64. "Collecting scipy<1.14.0,>=1.7.0 (from gensim<5.0.0,>=4.3.1->hazm==0.10.0)\n",
  65. " Downloading scipy-1.13.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)\n",
  66. "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.6/60.6 kB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
  67. "\u001b[?25hRequirement already satisfied: smart-open>=1.8.1 in /usr/local/lib/python3.11/dist-packages (from gensim<5.0.0,>=4.3.1->hazm==0.10.0) (7.1.0)\n",
  68. "Requirement already satisfied: click in /usr/local/lib/python3.11/dist-packages (from nltk<4.0.0,>=3.8.1->hazm==0.10.0) (8.1.8)\n",
  69. "Requirement already satisfied: joblib in /usr/local/lib/python3.11/dist-packages (from nltk<4.0.0,>=3.8.1->hazm==0.10.0) (1.4.2)\n",
  70. "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.11/dist-packages (from nltk<4.0.0,>=3.8.1->hazm==0.10.0) (2024.11.6)\n",
  71. "Requirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (from nltk<4.0.0,>=3.8.1->hazm==0.10.0) (4.67.1)\n",
  72. "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn<2.0.0,>=1.2.2->hazm==0.10.0) (3.6.0)\n",
  73. "Requirement already satisfied: wrapt in /usr/local/lib/python3.11/dist-packages (from smart-open>=1.8.1->gensim<5.0.0,>=4.3.1->hazm==0.10.0) (1.17.2)\n",
  74. "Downloading hazm-0.10.0-py3-none-any.whl (892 kB)\n",
  75. "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m892.6/892.6 kB\u001b[0m \u001b[31m24.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
  76. "\u001b[?25hDownloading numpy-1.24.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)\n",
  77. "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.3/17.3 MB\u001b[0m \u001b[31m64.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
  78. "\u001b[?25hDownloading fasttext_wheel-0.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.4 MB)\n",
  79. "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.4/4.4 MB\u001b[0m \u001b[31m28.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
  80. "\u001b[?25hDownloading gensim-4.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (26.7 MB)\n",
  81. "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m26.7/26.7 MB\u001b[0m \u001b[31m42.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
  82. "\u001b[?25hDownloading python_crfsuite-0.9.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n",
  83. "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m40.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
  84. "\u001b[?25hDownloading pybind11-2.13.6-py3-none-any.whl (243 kB)\n",
  85. "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m243.3/243.3 kB\u001b[0m \u001b[31m16.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
  86. "\u001b[?25hDownloading scipy-1.13.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (38.6 MB)\n",
  87. "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m38.6/38.6 MB\u001b[0m \u001b[31m13.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
  88. "\u001b[?25hBuilding wheels for collected packages: flashtext\n",
  89. " Building wheel for flashtext (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
  90. " Created wheel for flashtext: filename=flashtext-2.7-py2.py3-none-any.whl size=9300 sha256=181e31aa2933b81d1d4b23e453d93e170ac703c16bd7cb21b77b97ed66a93296\n",
  91. " Stored in directory: /root/.cache/pip/wheels/49/20/47/f03dfa8a7239c54cbc44ff7389eefbf888d2c1873edaaec888\n",
  92. "Successfully built flashtext\n",
  93. "Installing collected packages: flashtext, python-crfsuite, pybind11, numpy, scipy, fasttext-wheel, gensim, hazm\n",
  94. " Attempting uninstall: numpy\n",
  95. " Found existing installation: numpy 2.0.2\n",
  96. " Uninstalling numpy-2.0.2:\n",
  97. " Successfully uninstalled numpy-2.0.2\n",
  98. " Attempting uninstall: scipy\n",
  99. " Found existing installation: scipy 1.15.2\n",
  100. " Uninstalling scipy-1.15.2:\n",
  101. " Successfully uninstalled scipy-1.15.2\n",
  102. "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
  103. "blosc2 3.3.2 requires numpy>=1.26, but you have numpy 1.24.3 which is incompatible.\n",
  104. "thinc 8.3.6 requires numpy<3.0.0,>=2.0.0, but you have numpy 1.24.3 which is incompatible.\n",
  105. "treescope 0.1.9 requires numpy>=1.25.2, but you have numpy 1.24.3 which is incompatible.\n",
  106. "pymc 5.22.0 requires numpy>=1.25.0, but you have numpy 1.24.3 which is incompatible.\n",
  107. "albumentations 2.0.6 requires numpy>=1.24.4, but you have numpy 1.24.3 which is incompatible.\n",
  108. "albucore 0.0.24 requires numpy>=1.24.4, but you have numpy 1.24.3 which is incompatible.\n",
  109. "tensorflow 2.18.0 requires numpy<2.1.0,>=1.26.0, but you have numpy 1.24.3 which is incompatible.\n",
  110. "jax 0.5.2 requires numpy>=1.25, but you have numpy 1.24.3 which is incompatible.\n",
  111. "jaxlib 0.5.1 requires numpy>=1.25, but you have numpy 1.24.3 which is incompatible.\u001b[0m\u001b[31m\n",
  112. "\u001b[0mSuccessfully installed fasttext-wheel-0.9.2 flashtext-2.7 gensim-4.3.3 hazm-0.10.0 numpy-1.24.3 pybind11-2.13.6 python-crfsuite-0.9.11 scipy-1.13.1\n"
  113. ]
  114. },
  115. {
  116. "output_type": "display_data",
  117. "data": {
  118. "application/vnd.colab-display-data+json": {
  119. "pip_warning": {
  120. "packages": [
  121. "numpy"
  122. ]
  123. },
  124. "id": "144c92821f314ca8b8454fbffa93554f"
  125. }
  126. },
  127. "metadata": {}
  128. }
  129. ]
  130. },
  131. {
  132. "cell_type": "code",
  133. "source": [
  134. "!sudo apt-get install espeak-ng"
  135. ],
  136. "metadata": {
  137. "colab": {
  138. "base_uri": "https://localhost:8080/"
  139. },
  140. "id": "Bn--f9NnAWx8",
  141. "outputId": "6bf40904-ab6f-4271-de47-155f6624993c"
  142. },
  143. "execution_count": null,
  144. "outputs": [
  145. {
  146. "output_type": "stream",
  147. "name": "stdout",
  148. "text": [
  149. "Reading package lists... Done\n",
  150. "Building dependency tree... Done\n",
  151. "Reading state information... Done\n",
  152. "The following additional packages will be installed:\n",
  153. " espeak-ng-data libespeak-ng1 libpcaudio0 libsonic0\n",
  154. "The following NEW packages will be installed:\n",
  155. " espeak-ng espeak-ng-data libespeak-ng1 libpcaudio0 libsonic0\n",
  156. "0 upgraded, 5 newly installed, 0 to remove and 34 not upgraded.\n",
  157. "Need to get 4,526 kB of archives.\n",
  158. "After this operation, 11.9 MB of additional disk space will be used.\n",
  159. "Get:1 http://archive.ubuntu.com/ubuntu jammy/main amd64 libpcaudio0 amd64 1.1-6build2 [8,956 B]\n",
  160. "Get:2 http://archive.ubuntu.com/ubuntu jammy/main amd64 libsonic0 amd64 0.2.0-11build1 [10.3 kB]\n",
  161. "Get:3 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 espeak-ng-data amd64 1.50+dfsg-10ubuntu0.1 [3,956 kB]\n",
  162. "Get:4 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 libespeak-ng1 amd64 1.50+dfsg-10ubuntu0.1 [207 kB]\n",
  163. "Get:5 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 espeak-ng amd64 1.50+dfsg-10ubuntu0.1 [343 kB]\n",
  164. "Fetched 4,526 kB in 1s (4,828 kB/s)\n",
  165. "debconf: unable to initialize frontend: Dialog\n",
  166. "debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 78, <> line 5.)\n",
  167. "debconf: falling back to frontend: Readline\n",
  168. "debconf: unable to initialize frontend: Readline\n",
  169. "debconf: (This frontend requires a controlling tty.)\n",
  170. "debconf: falling back to frontend: Teletype\n",
  171. "dpkg-preconfigure: unable to re-open stdin: \n",
  172. "Selecting previously unselected package libpcaudio0:amd64.\n",
  173. "(Reading database ... 126102 files and directories currently installed.)\n",
  174. "Preparing to unpack .../libpcaudio0_1.1-6build2_amd64.deb ...\n",
  175. "Unpacking libpcaudio0:amd64 (1.1-6build2) ...\n",
  176. "Selecting previously unselected package libsonic0:amd64.\n",
  177. "Preparing to unpack .../libsonic0_0.2.0-11build1_amd64.deb ...\n",
  178. "Unpacking libsonic0:amd64 (0.2.0-11build1) ...\n",
  179. "Selecting previously unselected package espeak-ng-data:amd64.\n",
  180. "Preparing to unpack .../espeak-ng-data_1.50+dfsg-10ubuntu0.1_amd64.deb ...\n",
  181. "Unpacking espeak-ng-data:amd64 (1.50+dfsg-10ubuntu0.1) ...\n",
  182. "Selecting previously unselected package libespeak-ng1:amd64.\n",
  183. "Preparing to unpack .../libespeak-ng1_1.50+dfsg-10ubuntu0.1_amd64.deb ...\n",
  184. "Unpacking libespeak-ng1:amd64 (1.50+dfsg-10ubuntu0.1) ...\n",
  185. "Selecting previously unselected package espeak-ng.\n",
  186. "Preparing to unpack .../espeak-ng_1.50+dfsg-10ubuntu0.1_amd64.deb ...\n",
  187. "Unpacking espeak-ng (1.50+dfsg-10ubuntu0.1) ...\n",
  188. "Setting up libpcaudio0:amd64 (1.1-6build2) ...\n",
  189. "Setting up libsonic0:amd64 (0.2.0-11build1) ...\n",
  190. "Setting up espeak-ng-data:amd64 (1.50+dfsg-10ubuntu0.1) ...\n",
  191. "Setting up libespeak-ng1:amd64 (1.50+dfsg-10ubuntu0.1) ...\n",
  192. "Setting up espeak-ng (1.50+dfsg-10ubuntu0.1) ...\n",
  193. "Processing triggers for man-db (2.10.2-1) ...\n",
  194. "Processing triggers for libc-bin (2.35-0ubuntu3.8) ...\n",
  195. "/sbin/ldconfig.real: /usr/local/lib/libtbbbind_2_0.so.3 is not a symbolic link\n",
  196. "\n",
  197. "/sbin/ldconfig.real: /usr/local/lib/libumf.so.0 is not a symbolic link\n",
  198. "\n",
  199. "/sbin/ldconfig.real: /usr/local/lib/libtbbbind.so.3 is not a symbolic link\n",
  200. "\n",
  201. "/sbin/ldconfig.real: /usr/local/lib/libur_adapter_opencl.so.0 is not a symbolic link\n",
  202. "\n",
  203. "/sbin/ldconfig.real: /usr/local/lib/libhwloc.so.15 is not a symbolic link\n",
  204. "\n",
  205. "/sbin/ldconfig.real: /usr/local/lib/libur_adapter_level_zero.so.0 is not a symbolic link\n",
  206. "\n",
  207. "/sbin/ldconfig.real: /usr/local/lib/libtbbmalloc.so.2 is not a symbolic link\n",
  208. "\n",
  209. "/sbin/ldconfig.real: /usr/local/lib/libtcm_debug.so.1 is not a symbolic link\n",
  210. "\n",
  211. "/sbin/ldconfig.real: /usr/local/lib/libtcm.so.1 is not a symbolic link\n",
  212. "\n",
  213. "/sbin/ldconfig.real: /usr/local/lib/libur_loader.so.0 is not a symbolic link\n",
  214. "\n",
  215. "/sbin/ldconfig.real: /usr/local/lib/libtbbbind_2_5.so.3 is not a symbolic link\n",
  216. "\n",
  217. "/sbin/ldconfig.real: /usr/local/lib/libtbbmalloc_proxy.so.2 is not a symbolic link\n",
  218. "\n",
  219. "/sbin/ldconfig.real: /usr/local/lib/libtbb.so.12 is not a symbolic link\n",
  220. "\n"
  221. ]
  222. }
  223. ]
  224. },
  225. {
  226. "cell_type": "code",
  227. "source": [
  228. "!pip install jiwer"
  229. ],
  230. "metadata": {
  231. "colab": {
  232. "base_uri": "https://localhost:8080/"
  233. },
  234. "id": "jBaEWi0m5uNj",
  235. "outputId": "07c1ebc8-9b5d-45d2-946f-6cb0217cde07"
  236. },
  237. "execution_count": null,
  238. "outputs": [
  239. {
  240. "output_type": "stream",
  241. "name": "stdout",
  242. "text": [
  243. "Collecting jiwer\n",
  244. " Downloading jiwer-3.1.0-py3-none-any.whl.metadata (2.6 kB)\n",
  245. "Requirement already satisfied: click>=8.1.8 in /usr/local/lib/python3.11/dist-packages (from jiwer) (8.1.8)\n",
  246. "Collecting rapidfuzz>=3.9.7 (from jiwer)\n",
  247. " Downloading rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n",
  248. "Downloading jiwer-3.1.0-py3-none-any.whl (22 kB)\n",
  249. "Downloading rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)\n",
  250. "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m38.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
  251. "\u001b[?25hInstalling collected packages: rapidfuzz, jiwer\n",
  252. "Successfully installed jiwer-3.1.0 rapidfuzz-3.13.0\n"
  253. ]
  254. }
  255. ]
  256. },
  257. {
  258. "cell_type": "code",
  259. "source": [
  260. "import pandas as pd\n",
  261. "import re\n",
  262. "from jiwer import cer"
  263. ],
  264. "metadata": {
  265. "id": "R5exL3ah5yoA"
  266. },
  267. "execution_count": null,
  268. "outputs": []
  269. },
  270. {
  271. "cell_type": "code",
  272. "source": [
  273. "def transform_text(text):\n",
  274. " \"\"\"\n",
  275. " Transform text according to specified rules:\n",
  276. " 1. Apply phoneme substitutions\n",
  277. " 2. Add question marks before vowels at word boundaries\n",
  278. " 3. Remove specific symbols\n",
  279. "\n",
  280. " Args:\n",
  281. " text (str): Input text to transform\n",
  282. "\n",
  283. " Returns:\n",
  284. " str: Transformed text\n",
  285. " \"\"\"\n",
  286. " # Define the consonant and vowel regex patterns\n",
  287. " consonants = ['q', 'r', 't', 'y', 'p', 's', 'd', 'f', 'g', 'h', 'j', 'k', 'l',\n",
  288. " 'z', 'x', 'c', 'v', 'b', 'n', 'm', 'Q', 'R', 'T', 'Y', 'P', 'S',\n",
  289. " 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'Z', 'X', 'C', 'V', 'B', 'N', 'M']\n",
  290. " vowels = ['a', 'A', 'e', 'i', 'u', 'o']\n",
  291. "\n",
  292. " consonants_regex = '(?=' + '|'.join(consonants) + ')'\n",
  293. " vowels_regex = '(?=' + '|'.join(vowels) + ')'\n",
  294. "\n",
  295. " # Step 1: Apply phoneme substitutions\n",
  296. " substitutions = {\n",
  297. " 'tS': 'C',\n",
  298. " 'j': 'y',\n",
  299. " 'dZ': 'j',\n",
  300. " 'R': 'r',\n",
  301. " 'q1': 'q'\n",
  302. " }\n",
  303. "\n",
  304. " for old, new in substitutions.items():\n",
  305. " text = text.replace(old, new)\n",
  306. "\n",
  307. " # Step 3: Remove specific symbols\n",
  308. " symbols_to_remove = [\"'\", \":\", \",\"]\n",
  309. " for symbol in symbols_to_remove:\n",
  310. " text = text.replace(symbol, '')\n",
  311. "\n",
  312. " # Step 2: Add question marks before vowels at word boundaries\n",
  313. " text = re.sub(rf'([^\\w\\-]|^){vowels_regex}', r'\\1?', text)\n",
  314. "\n",
  315. " return text\n"
  316. ],
  317. "metadata": {
  318. "id": "kwOHbGP9FWJs"
  319. },
  320. "execution_count": null,
  321. "outputs": []
  322. },
  323. {
  324. "cell_type": "code",
  325. "source": [
  326. "import subprocess\n",
  327. "\n",
  328. "def text_to_phonemes(text, voice='fa', ipa=False, tie_character=None, separator=None):\n",
  329. " \"\"\"\n",
  330. " Convert text to phonemes using espeak-ng.\n",
  331. "\n",
  332. " Args:\n",
  333. " text (str): Input text to convert to phonemes\n",
  334. " voice (str, optional): Voice to use (e.g., 'en-us', 'fr'). Defaults to None (default voice).\n",
  335. " ipa (bool, optional): Use International Phonetic Alphabet. Defaults to False.\n",
  336. " tie_character (str, optional): Character to join multi-letter phonemes. Defaults to None.\n",
  337. " separator (str, optional): Character to separate phonemes. Defaults to None.\n",
  338. "\n",
  339. " Returns:\n",
  340. " str: Phoneme representation of the input text\n",
  341. " \"\"\"\n",
  342. " command = ['espeak-ng', '-q', '-x'] # -q for quiet, -x for phoneme output\n",
  343. "\n",
  344. " if voice:\n",
  345. " command.extend(['-v', voice])\n",
  346. " if ipa:\n",
  347. " command.append('--ipa')\n",
  348. " if tie_character:\n",
  349. " command.extend(['--tie', tie_character])\n",
  350. " if separator:\n",
  351. " command.extend(['--sep', separator])\n",
  352. "\n",
  353. " # Add the text to process\n",
  354. " command.append(text)\n",
  355. "\n",
  356. " try:\n",
  357. " result = subprocess.run(\n",
  358. " command,\n",
  359. " check=True,\n",
  360. " stdout=subprocess.PIPE,\n",
  361. " stderr=subprocess.PIPE,\n",
  362. " text=True\n",
  363. " )\n",
  364. " phoneme_seq = result.stdout.strip()\n",
  365. " transformed_phoneme_seq = transform_text(phoneme_seq)\n",
  366. " return transformed_phoneme_seq\n",
  367. " except subprocess.CalledProcessError as e:\n",
  368. " raise RuntimeError(f\"espeak-ng failed: {e.stderr}\") from e\n",
  369. " except FileNotFoundError as e:\n",
  370. " raise RuntimeError(\"espeak-ng is not installed or not in PATH\") from e"
  371. ],
  372. "metadata": {
  373. "id": "E8V116dgBOci"
  374. },
  375. "execution_count": null,
  376. "outputs": []
  377. },
  378. {
  379. "cell_type": "markdown",
  380. "metadata": {
  381. "id": "XjAPkfq7SF87"
  382. },
  383. "source": [
  384. "## Get Evaluation Data"
  385. ]
  386. },
  387. {
  388. "cell_type": "code",
  389. "source": [
  390. "!wget https://huggingface.co/datasets/MahtaFetrat/SentenceBench/raw/main/SentenceBench.csv"
  391. ],
  392. "metadata": {
  393. "id": "qwCG0jX-88nQ",
  394. "colab": {
  395. "base_uri": "https://localhost:8080/"
  396. },
  397. "outputId": "1564bd7a-c5bd-46a3-fa45-6532fdfc6750"
  398. },
  399. "execution_count": null,
  400. "outputs": [
  401. {
  402. "output_type": "stream",
  403. "name": "stdout",
  404. "text": [
  405. "--2025-05-10 20:59:44-- https://huggingface.co/datasets/MahtaFetrat/SentenceBench/raw/main/SentenceBench.csv\n",
  406. "Resolving huggingface.co (huggingface.co)... 3.163.189.90, 3.163.189.37, 3.163.189.74, ...\n",
  407. "Connecting to huggingface.co (huggingface.co)|3.163.189.90|:443... connected.\n",
  408. "HTTP request sent, awaiting response... 200 OK\n",
  409. "Length: 56026 (55K) [text/plain]\n",
  410. "Saving to: ‘SentenceBench.csv’\n",
  411. "\n",
  412. "\rSentenceBench.csv 0%[ ] 0 --.-KB/s \rSentenceBench.csv 100%[===================>] 54.71K --.-KB/s in 0.008s \n",
  413. "\n",
  414. "2025-05-10 20:59:44 (6.50 MB/s) - ‘SentenceBench.csv’ saved [56026/56026]\n",
  415. "\n"
  416. ]
  417. }
  418. ]
  419. },
  420. {
  421. "cell_type": "code",
  422. "source": [
  423. "sentence_bench = pd.read_csv('SentenceBench.csv')"
  424. ],
  425. "metadata": {
  426. "id": "hJO-UAPDQvcb"
  427. },
  428. "execution_count": null,
  429. "outputs": []
  430. },
  431. {
  432. "cell_type": "code",
  433. "source": [
  434. "sentence_bench.head(3)"
  435. ],
  436. "metadata": {
  437. "colab": {
  438. "base_uri": "https://localhost:8080/",
  439. "height": 143
  440. },
  441. "id": "qlYbrnUa9LAN",
  442. "outputId": "ce9a29ac-c6a4-4c8a-b829-97ead674af09"
  443. },
  444. "execution_count": null,
  445. "outputs": [
  446. {
  447. "output_type": "execute_result",
  448. "data": {
  449. "text/plain": [
  450. " dataset grapheme \\\n",
  451. "0 homograph من قدر تو را می‌دانم \n",
  452. "1 homograph از قضای الهی به قدر الهی پناه می‌برم \n",
  453. "2 homograph به دست و صورتم کرم زدم \n",
  454. "\n",
  455. " phoneme homograph word \\\n",
  456. "0 man qadr-e to rA mi-dAnam قدر \n",
  457. "1 ?az qazAy ?elAhi be qadar-e ?elAhi panAh mi-baram قدر \n",
  458. "2 be dast-o suratam kerem zadam کرم \n",
  459. "\n",
  460. " pronunciation \n",
  461. "0 qadr \n",
  462. "1 qadar \n",
  463. "2 kerem "
  464. ],
  465. "text/html": [
  466. "\n",
  467. " <div id=\"df-0f3443f9-2364-4c95-ad21-e297d127156b\" class=\"colab-df-container\">\n",
  468. " <div>\n",
  469. "<style scoped>\n",
  470. " .dataframe tbody tr th:only-of-type {\n",
  471. " vertical-align: middle;\n",
  472. " }\n",
  473. "\n",
  474. " .dataframe tbody tr th {\n",
  475. " vertical-align: top;\n",
  476. " }\n",
  477. "\n",
  478. " .dataframe thead th {\n",
  479. " text-align: right;\n",
  480. " }\n",
  481. "</style>\n",
  482. "<table border=\"1\" class=\"dataframe\">\n",
  483. " <thead>\n",
  484. " <tr style=\"text-align: right;\">\n",
  485. " <th></th>\n",
  486. " <th>dataset</th>\n",
  487. " <th>grapheme</th>\n",
  488. " <th>phoneme</th>\n",
  489. " <th>homograph word</th>\n",
  490. " <th>pronunciation</th>\n",
  491. " </tr>\n",
  492. " </thead>\n",
  493. " <tbody>\n",
  494. " <tr>\n",
  495. " <th>0</th>\n",
  496. " <td>homograph</td>\n",
  497. " <td>من قدر تو را می‌دانم</td>\n",
  498. " <td>man qadr-e to rA mi-dAnam</td>\n",
  499. " <td>قدر</td>\n",
  500. " <td>qadr</td>\n",
  501. " </tr>\n",
  502. " <tr>\n",
  503. " <th>1</th>\n",
  504. " <td>homograph</td>\n",
  505. " <td>از قضای الهی به قدر الهی پناه می‌برم</td>\n",
  506. " <td>?az qazAy ?elAhi be qadar-e ?elAhi panAh mi-baram</td>\n",
  507. " <td>قدر</td>\n",
  508. " <td>qadar</td>\n",
  509. " </tr>\n",
  510. " <tr>\n",
  511. " <th>2</th>\n",
  512. " <td>homograph</td>\n",
  513. " <td>به دست و صورتم کرم زدم</td>\n",
  514. " <td>be dast-o suratam kerem zadam</td>\n",
  515. " <td>کرم</td>\n",
  516. " <td>kerem</td>\n",
  517. " </tr>\n",
  518. " </tbody>\n",
  519. "</table>\n",
  520. "</div>\n",
  521. " <div class=\"colab-df-buttons\">\n",
  522. "\n",
  523. " <div class=\"colab-df-container\">\n",
  524. " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-0f3443f9-2364-4c95-ad21-e297d127156b')\"\n",
  525. " title=\"Convert this dataframe to an interactive table.\"\n",
  526. " style=\"display:none;\">\n",
  527. "\n",
  528. " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
  529. " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
  530. " </svg>\n",
  531. " </button>\n",
  532. "\n",
  533. " <style>\n",
  534. " .colab-df-container {\n",
  535. " display:flex;\n",
  536. " gap: 12px;\n",
  537. " }\n",
  538. "\n",
  539. " .colab-df-convert {\n",
  540. " background-color: #E8F0FE;\n",
  541. " border: none;\n",
  542. " border-radius: 50%;\n",
  543. " cursor: pointer;\n",
  544. " display: none;\n",
  545. " fill: #1967D2;\n",
  546. " height: 32px;\n",
  547. " padding: 0 0 0 0;\n",
  548. " width: 32px;\n",
  549. " }\n",
  550. "\n",
  551. " .colab-df-convert:hover {\n",
  552. " background-color: #E2EBFA;\n",
  553. " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
  554. " fill: #174EA6;\n",
  555. " }\n",
  556. "\n",
  557. " .colab-df-buttons div {\n",
  558. " margin-bottom: 4px;\n",
  559. " }\n",
  560. "\n",
  561. " [theme=dark] .colab-df-convert {\n",
  562. " background-color: #3B4455;\n",
  563. " fill: #D2E3FC;\n",
  564. " }\n",
  565. "\n",
  566. " [theme=dark] .colab-df-convert:hover {\n",
  567. " background-color: #434B5C;\n",
  568. " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
  569. " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
  570. " fill: #FFFFFF;\n",
  571. " }\n",
  572. " </style>\n",
  573. "\n",
  574. " <script>\n",
  575. " const buttonEl =\n",
  576. " document.querySelector('#df-0f3443f9-2364-4c95-ad21-e297d127156b button.colab-df-convert');\n",
  577. " buttonEl.style.display =\n",
  578. " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
  579. "\n",
  580. " async function convertToInteractive(key) {\n",
  581. " const element = document.querySelector('#df-0f3443f9-2364-4c95-ad21-e297d127156b');\n",
  582. " const dataTable =\n",
  583. " await google.colab.kernel.invokeFunction('convertToInteractive',\n",
  584. " [key], {});\n",
  585. " if (!dataTable) return;\n",
  586. "\n",
  587. " const docLinkHtml = 'Like what you see? Visit the ' +\n",
  588. " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
  589. " + ' to learn more about interactive tables.';\n",
  590. " element.innerHTML = '';\n",
  591. " dataTable['output_type'] = 'display_data';\n",
  592. " await google.colab.output.renderOutput(dataTable, element);\n",
  593. " const docLink = document.createElement('div');\n",
  594. " docLink.innerHTML = docLinkHtml;\n",
  595. " element.appendChild(docLink);\n",
  596. " }\n",
  597. " </script>\n",
  598. " </div>\n",
  599. "\n",
  600. "\n",
  601. " <div id=\"df-eed350e1-3b88-4a95-a1a7-ede78c415da5\">\n",
  602. " <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-eed350e1-3b88-4a95-a1a7-ede78c415da5')\"\n",
  603. " title=\"Suggest charts\"\n",
  604. " style=\"display:none;\">\n",
  605. "\n",
  606. "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
  607. " width=\"24px\">\n",
  608. " <g>\n",
  609. " <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
  610. " </g>\n",
  611. "</svg>\n",
  612. " </button>\n",
  613. "\n",
  614. "<style>\n",
  615. " .colab-df-quickchart {\n",
  616. " --bg-color: #E8F0FE;\n",
  617. " --fill-color: #1967D2;\n",
  618. " --hover-bg-color: #E2EBFA;\n",
  619. " --hover-fill-color: #174EA6;\n",
  620. " --disabled-fill-color: #AAA;\n",
  621. " --disabled-bg-color: #DDD;\n",
  622. " }\n",
  623. "\n",
  624. " [theme=dark] .colab-df-quickchart {\n",
  625. " --bg-color: #3B4455;\n",
  626. " --fill-color: #D2E3FC;\n",
  627. " --hover-bg-color: #434B5C;\n",
  628. " --hover-fill-color: #FFFFFF;\n",
  629. " --disabled-bg-color: #3B4455;\n",
  630. " --disabled-fill-color: #666;\n",
  631. " }\n",
  632. "\n",
  633. " .colab-df-quickchart {\n",
  634. " background-color: var(--bg-color);\n",
  635. " border: none;\n",
  636. " border-radius: 50%;\n",
  637. " cursor: pointer;\n",
  638. " display: none;\n",
  639. " fill: var(--fill-color);\n",
  640. " height: 32px;\n",
  641. " padding: 0;\n",
  642. " width: 32px;\n",
  643. " }\n",
  644. "\n",
  645. " .colab-df-quickchart:hover {\n",
  646. " background-color: var(--hover-bg-color);\n",
  647. " box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
  648. " fill: var(--button-hover-fill-color);\n",
  649. " }\n",
  650. "\n",
  651. " .colab-df-quickchart-complete:disabled,\n",
  652. " .colab-df-quickchart-complete:disabled:hover {\n",
  653. " background-color: var(--disabled-bg-color);\n",
  654. " fill: var(--disabled-fill-color);\n",
  655. " box-shadow: none;\n",
  656. " }\n",
  657. "\n",
  658. " .colab-df-spinner {\n",
  659. " border: 2px solid var(--fill-color);\n",
  660. " border-color: transparent;\n",
  661. " border-bottom-color: var(--fill-color);\n",
  662. " animation:\n",
  663. " spin 1s steps(1) infinite;\n",
  664. " }\n",
  665. "\n",
  666. " @keyframes spin {\n",
  667. " 0% {\n",
  668. " border-color: transparent;\n",
  669. " border-bottom-color: var(--fill-color);\n",
  670. " border-left-color: var(--fill-color);\n",
  671. " }\n",
  672. " 20% {\n",
  673. " border-color: transparent;\n",
  674. " border-left-color: var(--fill-color);\n",
  675. " border-top-color: var(--fill-color);\n",
  676. " }\n",
  677. " 30% {\n",
  678. " border-color: transparent;\n",
  679. " border-left-color: var(--fill-color);\n",
  680. " border-top-color: var(--fill-color);\n",
  681. " border-right-color: var(--fill-color);\n",
  682. " }\n",
  683. " 40% {\n",
  684. " border-color: transparent;\n",
  685. " border-right-color: var(--fill-color);\n",
  686. " border-top-color: var(--fill-color);\n",
  687. " }\n",
  688. " 60% {\n",
  689. " border-color: transparent;\n",
  690. " border-right-color: var(--fill-color);\n",
  691. " }\n",
  692. " 80% {\n",
  693. " border-color: transparent;\n",
  694. " border-right-color: var(--fill-color);\n",
  695. " border-bottom-color: var(--fill-color);\n",
  696. " }\n",
  697. " 90% {\n",
  698. " border-color: transparent;\n",
  699. " border-bottom-color: var(--fill-color);\n",
  700. " }\n",
  701. " }\n",
  702. "</style>\n",
  703. "\n",
  704. " <script>\n",
  705. " async function quickchart(key) {\n",
  706. " const quickchartButtonEl =\n",
  707. " document.querySelector('#' + key + ' button');\n",
  708. " quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
  709. " quickchartButtonEl.classList.add('colab-df-spinner');\n",
  710. " try {\n",
  711. " const charts = await google.colab.kernel.invokeFunction(\n",
  712. " 'suggestCharts', [key], {});\n",
  713. " } catch (error) {\n",
  714. " console.error('Error during call to suggestCharts:', error);\n",
  715. " }\n",
  716. " quickchartButtonEl.classList.remove('colab-df-spinner');\n",
  717. " quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
  718. " }\n",
  719. " (() => {\n",
  720. " let quickchartButtonEl =\n",
  721. " document.querySelector('#df-eed350e1-3b88-4a95-a1a7-ede78c415da5 button');\n",
  722. " quickchartButtonEl.style.display =\n",
  723. " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
  724. " })();\n",
  725. " </script>\n",
  726. " </div>\n",
  727. " </div>\n",
  728. " </div>\n"
  729. ],
  730. "application/vnd.google.colaboratory.intrinsic+json": {
  731. "type": "dataframe",
  732. "variable_name": "sentence_bench",
  733. "summary": "{\n \"name\": \"sentence_bench\",\n \"rows\": 400,\n \"fields\": [\n {\n \"column\": \"dataset\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"homograph\",\n \"mana-tts\",\n \"commonvoice\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"grapheme\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 400,\n \"samples\": [\n \"\\u0622\\u06cc\\u0627 \\u0628\\u0627\\u06cc\\u062f \\u062d\\u0642\\u06cc\\u0642\\u062a \\u0631\\u0627 \\u0628\\u0647 \\u0622\\u0646\\u200c\\u0647\\u0627 \\u0628\\u06af\\u0648\\u06cc\\u06cc\\u0645\\u061f\",\n \"\\u06a9\\u0647 \\u067e\\u06cc\\u0634 \\u0627\\u0632 \\u0627\\u0646\\u0642\\u0644\\u0627\\u0628 \\u0628\\u0647 \\u062e\\u0648\\u0627\\u0628\\u06af\\u0627\\u0647 \\u062f\\u062e\\u062a\\u0631\\u0627\\u0646 \\u0648 \\u0632\\u0646\\u0627\\u0646 \\u0646\\u0627\\u0628\\u06cc\\u0646\\u0627 \\u0627\\u062e\\u062a\\u0635\\u0627\\u0635\\u200c\\u06cc\\u0627\\u0641\\u062a\\u0647 \\u0628\\u0648\\u062f. \\u0627\\u063a\\u0644\\u0628 \\u0632\\u0646\\u0627\\u0646\\u06cc \\u06a9\\u0647 \\u062f\\u0631 \\u0627\\u06cc\\u0646 \\u062e\\u0648\\u0627\\u0628\\u06af\\u0627\\u0647 \\u0632\\u0646\\u062f\\u06af\\u06cc \\u0645\\u06cc\\u200c\\u06a9\\u0631\\u062f\\u0646\\u062f\\u060c \",\n \"\\u062f\\u0648\\u062f \\u0648 \\u0645\\u0647 \\u063a\\u0644\\u06cc\\u0638\\u06cc \\u062f\\u0631 \\u0645\\u062d\\u06cc\\u0637 \\u067e\\u06cc\\u0686\\u06cc\\u062f\\u0647 \\u0628\\u0648\\u062f\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"phoneme\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 400,\n \"samples\": [\n \"?AyA bAyad haqiqat rA be ?AnhA beguyim\\u061f\",\n \"ke piS ?az ?enqelAb be xAbgAh-e doxtarAn va zanAn-e nAbinA ?extesAsyAfte bud ?aqlab-e zanAni ke dar ?in xAbgAh zendegi mikardand\",\n \"dud-o meh-e qalizi dar mohit piCide bud\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"homograph word\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 101,\n \"samples\": [\n \"\\u06af\\u0631\\u06cc\\u0645\",\n \"\\u0633\\u0628\\u06a9\\u06cc\",\n \"\\u06a9\\u0645\\u06cc\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pronunciation\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 210,\n \"samples\": [\n \"darham\",\n \"Sum\",\n \"moSk\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
  734. }
  735. },
  736. "metadata": {},
  737. "execution_count": 8
  738. }
  739. ]
  740. },
  741. {
  742. "cell_type": "markdown",
  743. "metadata": {
  744. "id": "wDV7ysXf2b_H"
  745. },
  746. "source": [
  747. "### Get ManaTTS"
  748. ]
  749. },
  750. {
  751. "cell_type": "code",
  752. "execution_count": null,
  753. "metadata": {
  754. "colab": {
  755. "base_uri": "https://localhost:8080/"
  756. },
  757. "id": "TcL5ZLvSSnVB",
  758. "outputId": "66b99ca9-442a-46c9-81d6-49cf7a35d08a"
  759. },
  760. "outputs": [
  761. {
  762. "output_type": "execute_result",
  763. "data": {
  764. "text/plain": [
  765. "[('در این نوشته بنا داریم با یک ابزار ساده و مکانیکی افزایش بینایی برای افراد کم\\u200cبینا ',\n",
  766. " 'dar ?in neveSte banA dArim bA yek ?abzAr-e sAde va mekAniki-ye ?afzAyeS-e binAyi barAye ?afrAd-e kam\\u200cbinA '),\n",
  767. " ('به نام بی\\u200cوپتیک یا عدسی دورنما آشنا شویم. ',\n",
  768. " 'be nAm-e biyoptik yA ?adasi-ye durnamA ?ASnA Savim'),\n",
  769. " ('دراین\\u200cصورت، انجام خودارزیابی و ارائه بازخورد بر عهده خودتان است. ',\n",
  770. " 'dar ?in surat ?anjAm-e xod?arzyAbi va ?erA?e-ye bAzxord bar ?ohde-ye xodetAn ?ast ')]"
  771. ]
  772. },
  773. "metadata": {},
  774. "execution_count": 9
  775. }
  776. ],
  777. "source": [
  778. "filtered_rows = sentence_bench[sentence_bench['dataset'] == 'mana-tts'][['grapheme', 'phoneme']]\n",
  779. "\n",
  780. "# Convert to a list of tuples\n",
  781. "mana_evaluation_data = list(filtered_rows.itertuples(index=False, name=None))\n",
  782. "\n",
  783. "mana_evaluation_data[:3]"
  784. ]
  785. },
  786. {
  787. "cell_type": "markdown",
  788. "metadata": {
  789. "id": "Jjacw9Mp2eoX"
  790. },
  791. "source": [
  792. "### Get CommonVoice"
  793. ]
  794. },
  795. {
  796. "cell_type": "code",
  797. "execution_count": null,
  798. "metadata": {
  799. "id": "-yQnqCGw26sk",
  800. "colab": {
  801. "base_uri": "https://localhost:8080/"
  802. },
  803. "outputId": "7f587b1d-f55c-41e9-d4ff-59a381a7e4cd"
  804. },
  805. "outputs": [
  806. {
  807. "output_type": "execute_result",
  808. "data": {
  809. "text/plain": [
  810. "[('در اکثر شهرها، مرکزی برای خرید دوچرخه وجود دارد.',\n",
  811. " 'dar ?aksar-e Sahr-hA, markazi barAye xarid-e doCarxe vojud dArad.'),\n",
  812. " ('پس از مدرسه کودکان به سوی خانه جست و خیز کردند.',\n",
  813. " 'pas ?az madrese kudakAn be suye xAne jast-o-xiz kardand.'),\n",
  814. " ('شما نگران زن و بچه این نباش.', 'SomA negarAn-e zan-o-baCCe-ye ?in nabAS.')]"
  815. ]
  816. },
  817. "metadata": {},
  818. "execution_count": 10
  819. }
  820. ],
  821. "source": [
  822. "filtered_rows = sentence_bench[sentence_bench['dataset'] == 'commonvoice'][['grapheme', 'phoneme']]\n",
  823. "\n",
  824. "# Convert to a list of tuples\n",
  825. "commonvoice_evaluation_data = list(filtered_rows.itertuples(index=False, name=None))\n",
  826. "\n",
  827. "commonvoice_evaluation_data[:3]"
  828. ]
  829. },
  830. {
  831. "cell_type": "markdown",
  832. "metadata": {
  833. "id": "ciSPyhRc3Rvo"
  834. },
  835. "source": [
  836. "### Get Homograph"
  837. ]
  838. },
  839. {
  840. "cell_type": "code",
  841. "execution_count": null,
  842. "metadata": {
  843. "id": "XlFc5JbN3Rvz",
  844. "colab": {
  845. "base_uri": "https://localhost:8080/"
  846. },
  847. "outputId": "fb6b313f-3d1b-4110-9a11-230c07d07c3d"
  848. },
  849. "outputs": [
  850. {
  851. "output_type": "execute_result",
  852. "data": {
  853. "text/plain": [
  854. "[('من قدر تو را می\\u200cدانم', 'man qadr-e to rA mi-dAnam', 'قدر', 'qadr'),\n",
  855. " ('از قضای الهی به قدر الهی پناه می\\u200cبرم',\n",
  856. " '?az qazAy ?elAhi be qadar-e ?elAhi panAh mi-baram',\n",
  857. " 'قدر',\n",
  858. " 'qadar'),\n",
  859. " ('به دست و صورتم کرم زدم', 'be dast-o suratam kerem zadam', 'کرم', 'kerem')]"
  860. ]
  861. },
  862. "metadata": {},
  863. "execution_count": 11
  864. }
  865. ],
  866. "source": [
  867. "filtered_rows = sentence_bench[sentence_bench['dataset'] == 'homograph'][['grapheme', 'phoneme', 'homograph word',\t'pronunciation']]\n",
  868. "\n",
  869. "# Convert to a list of tuples\n",
  870. "homograph_evaluation_data = list(filtered_rows.itertuples(index=False, name=None))\n",
  871. "\n",
  872. "homograph_evaluation_data[:3]"
  873. ]
  874. },
  875. {
  876. "cell_type": "markdown",
  877. "metadata": {
  878. "id": "R6PE5ds45TPr"
  879. },
  880. "source": [
  881. "# Evaluate Method Outputs"
  882. ]
  883. },
  884. {
  885. "cell_type": "markdown",
  886. "metadata": {
  887. "id": "y73zFlRGIbt9"
  888. },
  889. "source": [
  890. "## PER Evaluation"
  891. ]
  892. },
  893. {
  894. "cell_type": "code",
  895. "execution_count": null,
  896. "metadata": {
  897. "id": "ItuviO3w5Vzv"
  898. },
  899. "outputs": [],
  900. "source": [
  901. "def remove_non_word_chars(text):\n",
  902. " pattern = r'[^\\w\\s\\?]'\n",
  903. " cleaned_text = re.sub(pattern, '', text)\n",
  904. " cleaned_text = re.sub('_', '', text)\n",
  905. " return cleaned_text"
  906. ]
  907. },
  908. {
  909. "cell_type": "code",
  910. "execution_count": null,
  911. "metadata": {
  912. "id": "syQCurXu51TO"
  913. },
  914. "outputs": [],
  915. "source": [
  916. "def remove_white_spaces(text):\n",
  917. " cleaned_text = re.sub(r'\\s+', ' ', text)\n",
  918. " return cleaned_text.strip()"
  919. ]
  920. },
  921. {
  922. "cell_type": "code",
  923. "execution_count": null,
  924. "metadata": {
  925. "id": "V7APkVM053RP"
  926. },
  927. "outputs": [],
  928. "source": [
  929. "def get_word_only_text(text):\n",
  930. " word_only_text = remove_non_word_chars(text)\n",
  931. " extra_space_removed_text = remove_white_spaces(word_only_text)\n",
  932. "\n",
  933. " return extra_space_removed_text"
  934. ]
  935. },
  936. {
  937. "cell_type": "code",
  938. "execution_count": null,
  939. "metadata": {
  940. "id": "ROomKSao57vy"
  941. },
  942. "outputs": [],
  943. "source": [
  944. "def get_texts_cer(reference, model_output):\n",
  945. " # Preprocess input texts to only contain word characters\n",
  946. " word_only_reference = get_word_only_text(reference)\n",
  947. " word_only_output = get_word_only_text(model_output)\n",
  948. "\n",
  949. " # Return +infinity for CER if any of the texts is empty\n",
  950. " if not word_only_reference.strip() or not word_only_output.strip():\n",
  951. " return float('inf')\n",
  952. "\n",
  953. " return cer(word_only_reference, word_only_output)"
  954. ]
  955. },
  956. {
  957. "cell_type": "code",
  958. "execution_count": null,
  959. "metadata": {
  960. "id": "4vHLUjp48hc3"
  961. },
  962. "outputs": [],
  963. "source": [
  964. "def get_avg_cer_of_method(method_outputs, references):\n",
  965. " cers = []\n",
  966. " for idx, o in enumerate(method_outputs):\n",
  967. " cer = get_texts_cer(o.replace('-', ''), references[idx][1].replace('-', ''))\n",
  968. " if cer != float('inf'):\n",
  969. " cers.append(cer)\n",
  970. "\n",
  971. " return sum(cers) / len(cers)"
  972. ]
  973. },
  974. {
  975. "cell_type": "markdown",
  976. "metadata": {
  977. "id": "oBgNtpFQDwku"
  978. },
  979. "source": [
  980. "## Homograph Evaluation"
  981. ]
  982. },
  983. {
  984. "cell_type": "code",
  985. "execution_count": null,
  986. "metadata": {
  987. "id": "J445ULEvEEDn"
  988. },
  989. "outputs": [],
  990. "source": [
  991. "def get_homograph_performance(outputs, references):\n",
  992. " corrects = 0\n",
  993. " total = 0\n",
  994. "\n",
  995. " for idx, (g, p, homograph, right) in enumerate(references):\n",
  996. " if homograph != '':\n",
  997. " total += 1\n",
  998. " if right in outputs[idx]:\n",
  999. " corrects += 1\n",
  1000. "\n",
  1001. " return corrects / total"
  1002. ]
  1003. },
  1004. {
  1005. "cell_type": "markdown",
  1006. "metadata": {
  1007. "id": "JGEUIrbi9kNH"
  1008. },
  1009. "source": [
  1010. "# Full bench"
  1011. ]
  1012. },
  1013. {
  1014. "cell_type": "code",
  1015. "execution_count": null,
  1016. "metadata": {
  1017. "id": "fGzQvL8V9mln"
  1018. },
  1019. "outputs": [],
  1020. "source": [
  1021. "benchmark = []\n",
  1022. "\n",
  1023. "for g, p in mana_evaluation_data:\n",
  1024. " benchmark.append((g, p, '', ''))\n",
  1025. "\n",
  1026. "for g, p in commonvoice_evaluation_data:\n",
  1027. " benchmark.append((g, p, '', ''))\n",
  1028. "\n",
  1029. "for g, p, w, r in homograph_evaluation_data:\n",
  1030. " benchmark.append((g, p, w, r))\n",
  1031. "\n",
  1032. "benchmark = benchmark[:400]"
  1033. ]
  1034. },
  1035. {
  1036. "cell_type": "code",
  1037. "execution_count": null,
  1038. "metadata": {
  1039. "id": "4jlXFt8tCPWB"
  1040. },
  1041. "outputs": [],
  1042. "source": [
  1043. "def print_all_metrics(predictions):\n",
  1044. " per = get_avg_cer_of_method(predictions, benchmark) * 100\n",
  1045. " homograph = get_homograph_performance(predictions, benchmark) * 100\n",
  1046. "\n",
  1047. " print(f\"PER: \\t\\t\\t{per:.4f}\")\n",
  1048. " print(f\"HOMOGRAPH: \\t\\t{homograph:.4f}\")"
  1049. ]
  1050. },
  1051. {
  1052. "cell_type": "markdown",
  1053. "source": [
  1054. "# Inference"
  1055. ],
  1056. "metadata": {
  1057. "id": "fTRgGM_8_Fwg"
  1058. }
  1059. },
  1060. {
  1061. "cell_type": "code",
  1062. "source": [
  1063. "import time"
  1064. ],
  1065. "metadata": {
  1066. "id": "owk5yIrGd5mE"
  1067. },
  1068. "execution_count": null,
  1069. "outputs": []
  1070. },
  1071. {
  1072. "cell_type": "code",
  1073. "source": [
  1074. "start_time = time.time()\n",
  1075. "\n",
  1076. "mapped_outputs = [text_to_phonemes(item[0]) for item in benchmark]\n",
  1077. "\n",
  1078. "total_time = time.time() - start_time\n",
  1079. "avg_time = total_time / len(benchmark) if len(benchmark) > 0 else 0"
  1080. ],
  1081. "metadata": {
  1082. "id": "8xzXN4KWOBWX"
  1083. },
  1084. "execution_count": null,
  1085. "outputs": []
  1086. },
  1087. {
  1088. "cell_type": "markdown",
  1089. "source": [
  1090. "# Results"
  1091. ],
  1092. "metadata": {
  1093. "id": "JAIAobLFCKCr"
  1094. }
  1095. },
  1096. {
  1097. "cell_type": "code",
  1098. "source": [
  1099. "print_all_metrics(mapped_outputs)\n",
  1100. "print(f\"TOTAL TIME:\\t\\t{total_time:.2f} (s)\")\n",
  1101. "print(f\"AVG TIME:\\t\\t{avg_time:.4f} (s)+\")"
  1102. ],
  1103. "metadata": {
  1104. "id": "E3OB9-tCWT7I",
  1105. "colab": {
  1106. "base_uri": "https://localhost:8080/"
  1107. },
  1108. "outputId": "8e57d011-ed0b-4d4e-defe-1ed45e9ff56d"
  1109. },
  1110. "execution_count": null,
  1111. "outputs": [
  1112. {
  1113. "output_type": "stream",
  1114. "name": "stdout",
  1115. "text": [
  1116. "PER: \t\t\t6.9152\n",
  1117. "HOMOGRAPH: \t\t43.8679\n",
  1118. "TOTAL TIME:\t\t6.82 (s)\n",
  1119. "AVG TIME:\t\t0.0170 (s)+\n"
  1120. ]
  1121. }
  1122. ]
  1123. },
  1124. {
  1125. "cell_type": "markdown",
  1126. "source": [
  1127. "# Runs\n",
  1128. "\n",
  1129. "## First:\n",
  1130. "\n",
  1131. "```\n",
  1132. "PER: \t\t\t6.9152\n",
  1133. "HOMOGRAPH: \t\t43.8679\n",
  1134. "TOTAL TIME:\t\t6.58 (s)\n",
  1135. "AVG TIME:\t\t0.0165 (s)+\n",
  1136. "```\n",
  1137. "\n",
  1138. "## Second\n",
  1139. "\n",
  1140. "```\n",
  1141. "PER: \t\t\t6.9152\n",
  1142. "HOMOGRAPH: \t\t43.8679\n",
  1143. "TOTAL TIME:\t\t6.67 (s)\n",
  1144. "AVG TIME:\t\t0.0167 (s)+\n",
  1145. "```\n",
  1146. "\n",
  1147. "## Third\n",
  1148. "\n",
  1149. "```\n",
  1150. "PER: \t\t\t6.9152\n",
  1151. "HOMOGRAPH: \t\t43.8679\n",
  1152. "TOTAL TIME:\t\t6.65 (s)\n",
  1153. "AVG TIME:\t\t0.0166 (s)+\n",
  1154. "```\n",
  1155. "\n",
  1156. "## Fourth\n",
  1157. "\n",
  1158. "```\n",
  1159. "PER: \t\t\t6.9152\n",
  1160. "HOMOGRAPH: \t\t43.8679\n",
  1161. "TOTAL TIME:\t\t7.16 (s)\n",
  1162. "AVG TIME:\t\t0.0179 (s)+\n",
  1163. "```\n",
  1164. "\n",
  1165. "## Fifth\n",
  1166. "\n",
  1167. "```\n",
  1168. "PER: \t\t\t6.9152\n",
  1169. "HOMOGRAPH: \t\t43.8679\n",
  1170. "TOTAL TIME:\t\t6.82 (s)\n",
  1171. "AVG TIME:\t\t0.0170 (s)+\n",
  1172. "```"
  1173. ],
  1174. "metadata": {
  1175. "id": "1ImhTHJY7bYr"
  1176. }
  1177. }
  1178. ]
  1179. }