# Setup Environment

In [None]:
! pip install PersianG2p

Collecting PersianG2p
  Downloading PersianG2p-0.3.2-py3-none-any.whl.metadata (6.5 kB)
Collecting hazm (from PersianG2p)
  Downloading hazm-0.10.0-py3-none-any.whl.metadata (11 kB)
Collecting num2fawords (from PersianG2p)
  Downloading num2fawords-1.1-py3-none-any.whl.metadata (4.1 kB)
Collecting fasttext-wheel<0.10.0,>=0.9.2 (from hazm->PersianG2p)
  Downloading fasttext_wheel-0.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)
Collecting flashtext<3.0,>=2.7 (from hazm->PersianG2p)
  Downloading flashtext-2.7.tar.gz (14 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gensim<5.0.0,>=4.3.1 (from hazm->PersianG2p)
  Downloading gensim-4.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.1 kB)
Collecting numpy (from PersianG2p)
  Downloading numpy-1.24.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB)
Collecting python-crfsuite<0.10.0,>=0.9.9 (from hazm->PersianG2p)
  Downloading python

In [None]:
! pip install jiwer

Collecting jiwer
  Downloading jiwer-3.1.0-py3-none-any.whl.metadata (2.6 kB)
Collecting rapidfuzz>=3.9.7 (from jiwer)
  Downloading rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Downloading jiwer-3.1.0-py3-none-any.whl (22 kB)
Downloading rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m23.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rapidfuzz, jiwer
Successfully installed jiwer-3.1.0 rapidfuzz-3.13.0


In [None]:
import os
import re
from tqdm import tqdm
import csv
import pandas as pd
import json
import itertools
from jiwer import cer

# Setup Model

In [None]:
from PersianG2p import Persian_g2p_converter

PersianG2Pconverter = Persian_g2p_converter()

In [None]:
PersianG2Pconverter.transliterate('دلم میخواست برم ', tidy = False, secret = True)

'dalam mixAst beram'

In [None]:
PersianG2Pconverter.transliterate('دلم میخواست برم ', tidy = True, secret = True)

'dalam mixāst beram'

In [None]:
PersianG2Pconverter.transliterate('انجمن نابینایان برای افرادی که تمایل به شنیدن مجله‌ی نسل مانا را دارند، این امکان را فراهم کرده‌است.', tidy = False, secret = True)

'anjoman nAbinA?An barA^ye efrAdi ke tamAyol be Senidan majele?i nasl mAnA rA dArand ، in emkAn rA farAham kerdedest .'

# mapping

In [None]:
output_to_phonetics_map = {
    'м': 'm',
    'ʷ':' v',
    'w': 'v',
    'c': 'k',
    'ĉ': 'C',
    'č': 'C',
    '̕': "?",
    "'": '?',
    'ʔ': "?",
    'ꞌ': "?",
    '̛':  "?",
    '’': "?",
    'ʼ': "?",
    "'": '?',
    'â': 'A',
    'â': 'A',
    'ȃ': 'A',
    'ž': 'Z',
    'š': 'S',
    'W': 'v',
    'β': 'f',
    'е': 'e',
    '`': "?",
    'ɑ': 'A',
    'ɑ': 'A',
    'ʃ': 'S',
    'ð': 'z',
    'ɾ': 'r',
    'æ': 'a',
    'ɪ': 'e',
    'χ': 'x',
    'ɣ': 'q',
    'ʒ': 'Z',
    ':': '',
    'ː': '',
    'ā': 'A',
    'ː': '',
    'ä': 'A',
    'á': 'A',
    'š': 'S',
    'ū': 'u',
    'û': 'u',
    'ś': 's',
    'ī': 'i',
    'í': 'i',
    'î': 'i',
    'é': 'e',
    'ḥ': 'h',
    'ɒ': 'A',
    'ʰ': '',
    'ə': 'e',
    'R': 'r',
    'W': 'v',
    'Q': 'q',
    'T': 't',
    'Y': 'y',
    'P': 'p',
    'D': 'd',
    'F': 'f',
    'H': 'h',
    'J': 'j',
    'L': 'l',
    'X': 'x',
    'V': 'v',
    'B': 'b',
    'N': 'n',
    'M': 'm',
    'K': 'k',
    'G': 'g',
    'U': 'u',
    'O': 'o',
    'I': 'i',
    'E': 'e',
    'ŋ': 'ng',
    '.': '',
    'ɛ': 'e',
    'ʊ': 'u',
    "ˈ": '?',
    'ù': 'u',
    'θ': 's',
    '̪': '',
    'ũ': 'u',
    '_': '',
    'ç': 'C',
    'ĝ': 'q',
    'ɢ': 'q',
    'ː': '',
    'í': 'i',
    'ŝ': 'S',
    '!': '',
    'ǧ': 'q',
    'ʻ': '?',
    'è': 'e',
    '�': '',
    'ú': 'u',
    'ô': 'o',
    'ē': 'e',
    'à': 'A',
    'ă': 'A',
    'ǐ': 'i',
    'ü': 'u',
    '\u200e': '',
    'ğ': 'q',
    'ṣ': 'S',
    'â': 'A',
    'â': 'A',
    'ȃ': 'A',
    'ž': 'Z',
    'š': 'S',
    'ā': 'A',
    'ː': '',
    'ä': 'A',
    'á': 'A',
    'š': 'S',
    'ū': 'u',
    'û': 'u',
    'ś': 'S',
    'ī': 'i',
    'í': 'i',
    'î': 'i',
    'é': 'e',
}

consonants_regex = '(?=' + '|'.join(['q', 'r', 't', 'y', 'p', 's', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'z', 'x', 'c', 'v', 'b', 'n', 'm', 'Q', 'R', 'T', 'Y', 'P', 'S', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'Z', 'X', 'C', 'V', 'B', 'N', 'M' ]) + ')'
vowels_regex = '(?=' + '|'.join(['a', 'A', 'e', 'i', 'u', 'o']) + ')'


def replace_phonetic_characters(input_string, char_map=output_to_phonetics_map, from_phonetics=False):
    substituted = re.sub(r'tʃʰ', 'C', input_string)
    substituted = re.sub(r't͡ʃ', 'C', input_string)
    substituted = re.sub(r'tʃ', 'C', substituted)
    substituted = re.sub(r't͡S', 'C', substituted)
    substituted = re.sub(r'ow', 'o', substituted)
    substituted = re.sub('d͡ʒ', 'j', substituted)
    substituted = re.sub('dʒ', 'j', substituted)

    # Create a translation table using str.maketrans
    translation_table = str.maketrans(char_map)

    # Use str.translate to replace characters based on the translation table
    translated = substituted.translate(translation_table)

    return translated

# Get Evaluation Data

In [None]:
!wget https://huggingface.co/datasets/MahtaFetrat/SentenceBench/raw/main/SentenceBench.csv

--2025-05-10 11:45:09--  https://huggingface.co/datasets/MahtaFetrat/SentenceBench/raw/main/SentenceBench.csv
Resolving huggingface.co (huggingface.co)... 13.226.52.8, 13.226.52.35, 13.226.52.100, ...
Connecting to huggingface.co (huggingface.co)|13.226.52.8|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 56026 (55K) [text/plain]
Saving to: ‘SentenceBench.csv’


2025-05-10 11:45:09 (4.13 MB/s) - ‘SentenceBench.csv’ saved [56026/56026]



In [None]:
sentence_bench = pd.read_csv('SentenceBench.csv')

In [None]:
sentence_bench.head(3)

Unnamed: 0,dataset,grapheme,phoneme,homograph word,pronunciation
0,homograph,من قدر تو را می‌دانم,man qadr-e to rA mi-dAnam,قدر,qadr
1,homograph,از قضای الهی به قدر الهی پناه می‌برم,?az qazAy ?elAhi be qadar-e ?elAhi panAh mi-baram,قدر,qadar
2,homograph,به دست و صورتم کرم زدم,be dast-o suratam kerem zadam,کرم,kerem


### Get ManaTTS

In [None]:
filtered_rows = sentence_bench[sentence_bench['dataset'] == 'mana-tts'][['grapheme', 'phoneme']]

# Convert to a list of tuples
mana_evaluation_data = list(filtered_rows.itertuples(index=False, name=None))

mana_evaluation_data[:3]

[('در این نوشته بنا داریم با یک ابزار ساده و مکانیکی افزایش بینایی برای افراد کم\u200cبینا ',
  'dar ?in neveSte banA dArim bA yek ?abzAr-e sAde va mekAniki-ye ?afzAyeS-e binAyi barAye ?afrAd-e kam\u200cbinA '),
 ('به نام بی\u200cوپتیک یا عدسی دورنما آشنا شویم. ',
  'be nAm-e biyoptik yA ?adasi-ye durnamA ?ASnA Savim'),
 ('دراین\u200cصورت، انجام خودارزیابی و ارائه بازخورد بر عهده خودتان است. ',
  'dar ?in surat ?anjAm-e xod?arzyAbi va ?erA?e-ye bAzxord bar ?ohde-ye xodetAn ?ast ')]

### Get CommonVoice

In [None]:
filtered_rows = sentence_bench[sentence_bench['dataset'] == 'commonvoice'][['grapheme', 'phoneme']]

# Convert to a list of tuples
commonvoice_evaluation_data = list(filtered_rows.itertuples(index=False, name=None))

commonvoice_evaluation_data[:3]

[('در اکثر شهرها، مرکزی برای خرید دوچرخه وجود دارد.',
  'dar ?aksar-e Sahr-hA, markazi barAye xarid-e  doCarxe vojud dArad.'),
 ('پس از مدرسه کودکان به سوی خانه جست و خیز کردند.',
  'pas ?az madrese kudakAn be suye xAne jast-o-xiz kardand.'),
 ('شما نگران زن و بچه این نباش.', 'SomA negarAn-e zan-o-baCCe-ye ?in nabAS.')]

### Get Homograph

In [None]:
filtered_rows = sentence_bench[sentence_bench['dataset'] == 'homograph'][['grapheme', 'phoneme', 'homograph word',	'pronunciation']]

# Convert to a list of tuples
homograph_evaluation_data = list(filtered_rows.itertuples(index=False, name=None))

homograph_evaluation_data[:3]

[('من قدر تو را می\u200cدانم', 'man qadr-e to rA mi-dAnam', 'قدر', 'qadr'),
 ('از قضای الهی به قدر الهی پناه می\u200cبرم',
  '?az qazAy ?elAhi be qadar-e ?elAhi panAh mi-baram',
  'قدر',
  'qadar'),
 ('به دست و صورتم کرم زدم', 'be dast-o suratam kerem zadam', 'کرم', 'kerem')]

# Evaluate Method Outputs

## PER Evaluation

In [None]:
def remove_non_word_chars(text):
    pattern = r'[^\w\s\?]'
    cleaned_text = re.sub(pattern, ' ', text)
    return cleaned_text

In [None]:
def remove_white_spaces(text):
    cleaned_text = re.sub(r'\s+', ' ', text)
    return cleaned_text.strip()

In [None]:
def get_word_only_text(text):
  word_only_text = remove_non_word_chars(text)
  extra_space_removed_text = remove_white_spaces(word_only_text)

  return extra_space_removed_text

In [None]:
def get_texts_cer(reference, model_output):
  # Preprocess input texts to only contain word characters
  word_only_reference = get_word_only_text(reference)
  word_only_output = get_word_only_text(model_output)

  # Return +infinity for CER if any of the texts is empty
  if not word_only_reference.strip() or not word_only_output.strip():
    return float('inf')

  return cer(word_only_reference, word_only_output)

In [None]:
def get_avg_cer_of_method(method_outputs, references):
  cers = []
  for idx, o in enumerate(method_outputs):
    cer = get_texts_cer(o.replace('-', ''), references[idx][1].replace('-', ''))
    if cer != float('inf'):
      cers.append(cer)

  return sum(cers) / len(cers)

## Homograph Evaluation

In [None]:
def get_homograph_performance(outputs, references):
  corrects = 0
  total = 0

  for idx, (g, p, homograph, right) in enumerate(references):
    if homograph != '':
      total += 1
      if right in outputs[idx]:
        corrects += 1

  return corrects / total

# Full bench

In [None]:
benchmark = []

for g, p in mana_evaluation_data:
  benchmark.append((g, p, '', ''))

for g, p in commonvoice_evaluation_data:
  benchmark.append((g, p, '', ''))

for g, p, w, r in homograph_evaluation_data:
  benchmark.append((g, p, w, r))

benchmark = benchmark[:400]

In [None]:
def print_all_metrics(predictions):
  per = get_avg_cer_of_method(predictions, benchmark) * 100
  homograph = get_homograph_performance(predictions, benchmark) * 100

  print(f"PER: \t\t\t{per:.4f}")
  print(f"HOMOGRAPH: \t\t{homograph:.4f}")

# outputs

In [None]:
from tqdm import tqdm
import time

outputs = []
start_time = time.time()

for g, p, _, _ in tqdm(benchmark):
    o = PersianG2Pconverter.transliterate(g, tidy=False, secret=True)
    outputs.append(o)

total_time = time.time() - start_time
avg_time = total_time / len(benchmark) if len(benchmark) > 0 else 0

100%|██████████| 400/400 [15:20<00:00,  2.30s/it]


In [None]:
mapped_outputs = []
for o in outputs:
  mapped = replace_phonetic_characters(o)
  mapped_outputs.append(mapped)

In [None]:
print_all_metrics(mapped_outputs)
print(f"TOTAL TIME:\t\t{total_time:.4f} (s)")
print(f"AVG TIME:\t\t{avg_time:.4f} (s)")

PER: 			15.0414
HOMOGRAPH: 		37.7358
TOTAL TIME:		920.0853 (s)
AVG TIME:		2.3002 (s)


# Runs

## First:

```
PER: 			15.0414
homograph: 		37.7358
TOTAL TIME:		874.3154 (s)
AVG TIME:		2.1858 (s)
```

## Second

```
PER: 			15.0414
homograph: 		37.7358
TOTAL TIME:		814.6596 (s)
AVG TIME:		2.0366 (s)
```

## Third

```
PER: 			15.0414
POLYPHONE: 		37.7358
TOTAL TIME:		845.8805 (s)
AVG TIME:		2.1147 (s)
```

## Fourth

```
PER: 			15.0414
HOMOGRAPH: 		37.7358
TOTAL TIME:		882.1829 (s)
AVG TIME:		2.2055 (s)
```

## Fifth

```
PER: 			15.0414
HOMOGRAPH: 		37.7358
TOTAL TIME:		920.0853 (s)
AVG TIME:		2.3002 (s)
```