Module:Cher-common
Itsura
- This module lacks a documentation subpage. Please create it.
- Useful links: subpage list • links • transclusions • testcases • sandbox
local export = {}
local m_str_utils = require("Module:string utilities")
local m_table = require("Module:table")
-- Import libraries
local decomp = mw.ustring.toNFD
local gsub = m_str_utils.gsub
local lower = m_str_utils.lower
-- Store accents as variables
local U = require("Module:string/char")
local acute = U(0x0301) -- acute accent
local grave = U(0x0300) -- grave accent
local double_acute = U(0x030B) -- double acute accent
local double_grave = U(0x030F) -- double grave accent
-- Store various character sets
export.CONS = "cdghjklmnqstwyCDGHJKLMNQSTWYʔ" -- consonants
export.VOWL = "aeiouvAEIOUV" -- vowels
export.TONE = acute .. grave .. double_acute .. double_grave -- main tone marks
local CONS = export.CONS
local VOWL = export.VOWL
local TONE = export.TONE
-- Store vowel order in Cherokee
export.vowel_order = {"a", "e", "i", "o", "u", "v"}
local vowel_order = m_table.invert(export.vowel_order)
-- Store voiceless to voiced consonant pairs
export.voiced_pairs = {
["c"] = "j",
["k"] = "g",
["t"] = "d"
}
local voiced_pairs = export.voiced_pairs
-- Map Latin consonants to Cherokee syllables
-- Each consonant maps to a corresponding vowel
export.syll_list = {
-- order of syllables: a, e, i, o, u, v
[""] = {"Ꭰ", "Ꭱ", "Ꭲ", "Ꭳ", "Ꭴ", "Ꭵ"},
k = {"Ꭷ"},
g = {"Ꭶ", "Ꭸ", "Ꭹ", "Ꭺ", "Ꭻ", "Ꭼ"},
h = {"Ꭽ", "Ꭾ", "Ꭿ", "Ꮀ", "Ꮁ", "Ꮂ"},
l = {"Ꮃ", "Ꮄ", "Ꮅ", "Ꮆ", "Ꮇ", "Ꮈ"},
m = {"Ꮉ", "Ꮊ", "Ꮋ", "Ꮌ", "Ꮍ", "Ᏽ"}, -- Ᏽ is obsolete
n = {"Ꮎ", "Ꮑ", "Ꮒ", "Ꮓ", "Ꮔ", "Ꮕ"},
hn = {"Ꮏ"},
qu = {"Ꮖ", "Ꮗ", "Ꮘ", "Ꮙ", "Ꮚ", "Ꮛ"},
s = {"Ꮜ", "Ꮞ", "Ꮟ", "Ꮠ", "Ꮡ", "Ꮢ"},
d = {"Ꮣ", "Ꮥ", "Ꮧ", "Ꮩ", "Ꮪ", "Ꮫ"},
t = {"Ꮤ", "Ꮦ", "Ꮨ"},
dl = {"Ꮬ"},
tl = {"Ꮭ", "Ꮮ", "Ꮯ", "Ꮰ", "Ꮱ", "Ꮲ"},
ts = {"Ꮳ", "Ꮴ", "Ꮵ", "Ꮶ", "Ꮷ", "Ꮸ"},
w = {"Ꮹ", "Ꮺ", "Ꮻ", "Ꮼ", "Ꮽ", "Ꮾ"},
y = {"Ꮿ", "Ᏸ", "Ᏹ", "Ᏺ", "Ᏻ", "Ᏼ"}
}
-- Simplifies the transcription by removing tone and length in vowels and
-- converting normalising spelling in consonants
local function simplify_tr(text)
-- make lowercase and decompose chars
local tr = decomp(lower(text))
-- add word boundaries
tr = gsub(tr, "([^%s]+)", "#%1#")
-- remove tone marks
tr = gsub(tr, "[" .. TONE .. "]", "")
-- remove long vowels
tr = gsub(tr, "([" .. VOWL .. "])%1", "%1")
-- remove dummy markers
tr = gsub(tr, "([kt])h([" .. VOWL .. "])%*", "%1%2") -- special case for aspiration in preceding consonant
tr = gsub(tr, "%*", "")
-- normalise consonants
local replacements = {
{ "kh?w", "q" }, -- kw/khw -> q
{ "nah", "nA" }, -- nah -> nA
{ "hn", "N" }, -- hn -> N
{ "([ckt])([^h])", function(ch, ch_next) -- voiceless unaspirated to voiced
return voiced_pairs[ch] .. ch_next
end },
{ "([ckt])h", "%1" }, -- voiceless aspirated to unaspirated
{ "[ht]l", "T" }, -- hl/tl -> T
{ "dl", "D" } -- dl -> D
}
-- go over each replacement
for _, replacement in ipairs(replacements) do
tr = gsub(tr, replacement[1], replacement[2])
end
-- handle vowels
tr = gsub(tr, "#([" .. VOWL .. "])", "#ʔ%1") -- initial vowels
tr = gsub(tr, "([" .. VOWL .. "])([" .. VOWL .. "])", "%1ʔ%2") -- vowel sequences
-- remove word boundaries
return gsub(tr, "#", "")
end
-- Generates a single Cherokee syllable given a consonant and vowel
local function gen_syll(cons, vowel)
-- normalise consonants
local cons_map = {
["c"] = "ts",
["D"] = "dl",
["j"] = "ts",
["N"] = "hn",
["q"] = "qu",
["T"] = "tl",
["ʔ"] = ""
}
cons = cons_map[cons] or cons
-- handle special cases
if cons == "s" and vowel == "" then
return "Ꮝ"
elseif (cons == "h" or cons == "") and vowel == "" then
return ""
elseif vowel == "A" then
return "Ꮐ"
end
-- get syllable from mapping
local sylls = export.syll_list[cons]
local vowel_idx = vowel_order[vowel]
local syll = sylls and vowel_idx and vowel_idx <= #sylls and sylls[vowel_idx]
if not syll then
-- check for fallback consonants
local fallback_map = {
["dl"] = "tl",
["hn"] = "n",
["k"] = "g",
["t"] = "d"
}
local fallback_cons = fallback_map[cons]
if fallback_cons then
local fallback_sylls = export.syll_list[fallback_cons]
syll = fallback_sylls and vowel_idx and vowel_idx <= #fallback_sylls and fallback_sylls[vowel_idx]
end
-- invalid syllable
if not syll then
return nil
end
end
-- return syllable if successful
return syll
end
-- Generates the Cherokee syllabary form from a given Latin transcription
-- It first simplifies the transcription, then matches each consonant-vowel pair to a syllable
-- Each invalid syllable is logged and and any will result in an error
function export.to_syll(tr)
-- simplify transcription
tr = simplify_tr(tr)
local invalid_cons = {}
-- match each consonant-vowel pair with corresponding syllable
local result = gsub(tr, "([" .. CONS .. "])([" .. VOWL .. "]?)", function(c, v)
local syll = gen_syll(c, v)
if not syll and v == "" then
table.insert(invalid_cons, c)
return c
end
return syll or (c .. v)
end)
-- raise error for any invalid consonant
if #invalid_cons > 0 then
error("Invalid consonant(s) (in order: " .. table.concat(invalid_cons, ", ") ..
') - please fix by adding a dummy vowel after each one, followed by a "*".')
end
-- otherwise return result
return result
end
return export