Source code for IPA.es_pron

# pylint: disable=anomalous-backslash-in-string
# pylint: disable=line-too-long, invalid-name
"""Generates Spanish IPA from spelling. Implements template {{es-IPA}}.
Modified from https://en.wiktionary.org/wiki/Module:es-pronunc Lua module partially.
"""

from __future__ import print_function
from __future__ import absolute_import
from __future__ import unicode_literals

import regex as re


[docs]def to_IPA(word, LatinAmerica=False, phonetic=True): """Generates Spanish IPA from spelling. Implements template `{{es-IPA}}`_. .. _{{es-IPA}}: https://en.wiktionary.org/wiki/Template:es-IPA Parameters ---------- word : string String of es-IPA text parsed in `{{es-IPA}}`_ from Wiktionary. LatinAmerica : bool Value of ``|LatinAmerica=`` parameter parsed in `{{es-IPA}}`_. phonetic : bool Value of ``|phonetic=`` parameter parsed in `{{es-IPA}}`_. Returns ------- string Converted Spanish IPA. Notes ----- - Modified from `Wiktioanry es-pronunc Lua module`_ partially. - Testcases are modified from `Wiktionary es-pronunc/testcases`_. .. _Wiktioanry es-pronunc Lua module: https://en.wiktionary.org/wiki/Module:es-pronunc .. _Wiktionary es-pronunc/testcases: https://en.wiktionary.org/wiki/Module:es-pronunc/testcases Examples -------- >>> es_text = "baca" # es: [[baca]] >>> es_IPA = es_pron.to_IPA(es_text) >>> es_IPA "ˈbaka" """ word = word.lower() word = re.sub("[^abcdefghijklmnopqrstuvwxyzáéíóúüñ.]", "", word) # determining whether "y" is a consonant or a vowel + diphthongs, "-mente" suffix word = re.sub("y([^aeiouáéíóú])", r"i\1", word) word = re.sub("y([aeiouáéíóú])", r"ɟ\1", word) # not the real sound word = re.sub("hi([aeiouáéíóú])", r"ɟ\1", word) word = re.sub("y$", "ï", word) word = re.sub("mente$", "ménte", word) # x word = re.sub("x", "ks", word) # "c" & "g" before "i" and "e" and all that stuff if LatinAmerica: word = re.sub("c([ieíé])", "s" + r"\1", word) else: word = re.sub("c([ieíé])", "θ" + r"\1", word) word = re.sub("gü([ieíé])", r"ɡw\1", word) word = re.sub("ü", "", word) word = re.sub("gu([ieíé])", r"ɡ\1", word) word = re.sub("g([ieíé])", r"x\1", word) # alphabet-to-phoneme word = re.sub("qu", "c", word) word = re.sub("v", "b", word) word = re.sub("ch", "ʃ", word) # not the real sound # ['g']='ɡ': U+0067 LATIN SMALL LETTER G → U+0261 LATIN SMALL LETTER SCRIPT G word = re.sub( "[cgjñry]", lambda x: {"c": "k", "g": "ɡ", "j": "x", "ñ": "ɲ", "r": "ɾ"}[x.group()], word ) word = re.sub("^ɾ", "r", word) word = re.sub("ɾɾ", "r", word) word = re.sub("lɾ", "lr", word) word = re.sub("nɾ", "nr", word) word = re.sub("ɾ([bdfɡklʎmnɲpstxzʃɟ])", r"r\1", word) word = re.sub("n([bm])", r"m\1", word) if LatinAmerica: word = re.sub("ll", "ɟ", word) word = re.sub("z", "z", word) else: word = re.sub("ll", "ʎ", word) word = re.sub("z", "θ", word) # not the real LatAm sound # syllable division word = re.sub("([aeiouáéíóú])([^aeiouáéíóú.])([aeiouáéíóú])", r"\1.\2\3", word) word = re.sub("([aeiouáéíóú])([^aeiouáéíóú.])([aeiouáéíóú])", r"\1.\2\3", word) word = re.sub("([aeiouáéíóú])([^aeiouáéíóú.])([^aeiouáéíóú.])([aeiouáéíóú])", r"\1\2.\3\4", word) word = re.sub("([aeiouáéíóú])([^aeiouáéíóú.])([^aeiouáéíóú.])([aeiouáéíóú])", r"\1\2.\3\4", word) word = re.sub("([aeiouáéíóú])([^aeiouáéíóú.])([^aeiouáéíóú.])([^aeiouáéíóú.])([aeiouáéíóú])", r"\1\2.\3\4\5", word) word = re.sub("([aeiouáéíóú])([^aeiouáéíóú.])([^aeiouáéíóú.])([^aeiouáéíóú.])([aeiouáéíóú])", r"\1\2.\3\4\5", word) word = re.sub("([pbktdɡ])\.([lɾ])", r".\1\2", word) word = re.sub("([^aeiouáéíóú.])\.s([^aeiouáéíóú.])", r"\1s.\2", word) word = re.sub("([aeoáéíóú])([aeoáéíóú])", r"\1.\2", word) word = re.sub("([ií])([ií])", r"\1.\2", word) word = re.sub("([uú])([uú])", r"\1.\2", word) # diphthongs word = re.sub("ih?([aeouáéóú])", r"j\1", word) word = re.sub("uh?([aeioáéíó])", r"w\1", word) # accentuation syllables = word.split(".") if re.search("[áéíóú]", word): for i in range(len(syllables)): if re.search("[áéíóú]", syllables[i]): syllables[i] = "ˈ" + syllables[i] else: if re.search("[^aeiouns]$", word): syllables[len(syllables)-1] = "ˈ" + syllables[len(syllables)-1] else: if len(syllables) > 1: syllables[len(syllables)-2] = "ˈ" + syllables[len(syllables)-2] # syllables nasalized if ending with "n", voiceless consonants in syllable-final position to voiced for i in range(len(syllables)): syllables[i] = re.sub( "[áéíóú]", lambda x: {"á": "a", "é": "e", "í": "i", "ó": "o", "ú": "u"}[x.group()], syllables[i] ) if phonetic and re.search("[mnɲ][^aeiou]?$", syllables[i]): syllables[i] = re.sub( "([aeiou])", lambda x: {"a": "ã", "e": "ẽ", "i": "ĩ", "o": "õ", "u": "ũ"}[x.group()], syllables[i] ) syllables[i] = re.sub( "[ptk]$", lambda x: {"p": "b", "t": "d", "k": "ɡ"}[x.group()], syllables[i] ) word = "".join(syllables) # real sound of LatAm Z word = re.sub("z", "s", word) # secondary stress word = re.sub("ˈ(.+)ˈ", r"ˌ\1ˈ", word) word = re.sub("ˈ(.+)ˌ", r"ˌ\1ˌ", word) word = re.sub("ˌ(.+)ˈ(.+)ˈ", r"ˌ\1ˌ\2ˈ", word) # phonetic transcription if phonetic: # θ, s, f before voiced consonants word = re.sub("θ([ˈˌ]?[mnɲbdɟɡlʎɾrh])", r"θ̬\1", word) word = re.sub("s([ˈˌ]?[mnɲbdɟɡlʎɾrh])", r"z\1", word) word = re.sub("f([ˈˌ]?[mnɲbdɟɡlʎrh])", r"v\1", word) # lots of allophones going on word = re.sub( "[bdɟɡ]", lambda x: {"b": "β", "d": "ð", "ɟ": "ʝ", "ɡ": "ɣ"}[x.group()], word ) word = re.sub( "^[ˈˌ]?[βðɣʝ]", lambda x: { "β": "b", "ð": "d", "ʝ": "ɟ", "ɣ": "ɡ", "ˈβ": "ˈb", "ˈð": "ˈd", "ˈʝ": "ˈɟ", "ˈɣ": "ˈɡ", "ˌβ": "ˌb", "ˌð": "ˌd", "ˌʝ": "ˌɟ", "ˌɣ": "ˌɡ" }[x.group()], word ) word = re.sub("([mnɲ][ˈˌ]?)β", r"\1b", word) word = re.sub("([lʎmnɲ][ˈˌ]?)ð", r"\1d", word) word = re.sub("([mnɲ][ˈˌ]?)ɣ", r"\1ɡ", word) word = re.sub("([lʎmnɲ][ˈˌ]?)ʝ", r"\1ɟ", word) word = re.sub( "[td]", lambda x: {"t": "t̪", "d": "d̪"}[x.group()], word ) # nasal assimilation before consonants word = re.sub("n([ˈˌ]?[f])", r"ɱ\1", word) word = re.sub("n([ˈˌ]?[td])", r"n̪\1", word) word = re.sub("n([ˈˌ]?[θ])", r"n̟\1", word) word = re.sub("n([ˈˌ]?ʃ)", r"nʲ\1", word) word = re.sub("n([ˈˌ]?[ɟʎ])", r"ɲ\1", word) word = re.sub("n([ˈˌ]?[kxɡ])", r"ŋ\1", word) # lateral assimilation before consonants word = re.sub("l([ˈˌ]?[td])", r"l̪\1", word) word = re.sub("l([ˈˌ]?[θ])", r"l̟\1", word) # semivowels word = re.sub("([aeouãẽõũ][iïĩ])", r"\1̯", word) word = re.sub("([aeioãẽĩõ][uũ])", r"\1̯", word) word = re.sub("h", "", word) # silent "h" word = re.sub("ʃ", "t͡ʃ", word) # fake "ch" to real "ch" word = re.sub("ɟ", "ɟ͡ʝ", word) # fake "y" to real "y" word = re.sub("ï", "i", word) # fake "y$" to real "y$" return word