""" This module implements phone error metrics based on the work from ginic/phone_errors. Original implementation: https://huggingface.co/spaces/ginic/phone_errors Citation: @inproceedings{Mortensen-et-al:2016, author = {David R. Mortensen and Patrick Littell and Akash Bharadwaj and Kartik Goyal and Chris Dyer and Lori S. Levin}, title = {PanPhon: {A} Resource for Mapping {IPA} Segments to Articulatory Feature Vectors}, booktitle = {Proceedings of {COLING} 2016, the 26th International Conference on Computational Linguistics: Technical Papers}, pages = {3475--3484}, publisher = {{ACL}}, year = {2016} } """ import numpy as np import panphon.distance class PhoneErrorMetrics: def __init__(self, feature_model: str = "segment"): """Initialize the phone error metrics calculator. Args: feature_model (str): panphon feature parsing model ("strict", "permissive", or "segment") """ self.distance_computer = panphon.distance.Distance(feature_model=feature_model) def _phone_error_rate(self, prediction: str, reference: str) -> float: """Compute phone error rate between prediction and reference. Args: prediction (str): Predicted IPA string reference (str): Reference IPA string Returns: float: Phone error rate """ if not reference: raise ValueError("Reference string cannot be empty") pred_phones = self.distance_computer.fm.ipa_segs(prediction) ref_phones = self.distance_computer.fm.ipa_segs(reference) phone_edits = self.distance_computer.min_edit_distance( lambda x: 1, # deletion cost lambda x: 1, # insertion cost lambda x, y: 0 if x == y else 1, # substitution cost [[]], pred_phones, ref_phones, ) return phone_edits / len(ref_phones) def compute( self, predictions: list[str], references: list[str], is_normalize_pfer: bool = False, ) -> dict: """Compute phone error metrics between predictions and references. Args: predictions (List[str]): List of predicted IPA strings references (List[str]): List of reference IPA strings is_normalize_pfer (bool): Whether to normalize phone feature error rates Returns: Dict containing: - phone_error_rates: List of PER for each pair - mean_phone_error_rate: Average PER - phone_feature_error_rates: List of PFER for each pair - mean_phone_feature_error_rate: Average PFER - feature_error_rates: List of FER for each pair - mean_feature_error_rate: Average FER """ phone_error_rates = [] feature_error_rates = [] hamming_distances = [] for pred, ref in zip(predictions, references): if is_normalize_pfer: hd = self.distance_computer.hamming_feature_edit_distance_div_maxlen( pred, ref ) else: hd = self.distance_computer.hamming_feature_edit_distance(pred, ref) hamming_distances.append(hd) per = self._phone_error_rate(pred, ref) phone_error_rates.append(per) fer = self.distance_computer.feature_error_rate(pred, ref) feature_error_rates.append(fer) return { "phone_error_rates": phone_error_rates, "mean_phone_error_rate": float(np.mean(phone_error_rates)), "phone_feature_error_rates": hamming_distances, "mean_phone_feature_error_rate": float(np.mean(hamming_distances)), "feature_error_rates": feature_error_rates, "mean_feature_error_rate": float(np.mean(feature_error_rates)), }