Pumunta sa nilalaman

Module:Cher-common

Mula Wiksiyonaryo


local export = {}

local m_str_utils = require("Module:string utilities")
local m_table = require("Module:table")

-- Import libraries
local decomp = mw.ustring.toNFD
local gsub = m_str_utils.gsub
local lower = m_str_utils.lower

-- Store accents as variables
local U = require("Module:string/char")
local acute = U(0x0301)  -- acute accent
local grave = U(0x0300)  -- grave accent
local double_acute = U(0x030B)  -- double acute accent
local double_grave = U(0x030F)  -- double grave accent

-- Store various character sets
export.CONS = "cdghjklmnqstwyCDGHJKLMNQSTWYʔ"  -- consonants
export.VOWL = "aeiouvAEIOUV"  -- vowels
export.TONE = acute .. grave .. double_acute .. double_grave  -- main tone marks
local CONS = export.CONS
local VOWL = export.VOWL
local TONE = export.TONE

-- Store vowel order in Cherokee
export.vowel_order = {"a", "e", "i", "o", "u", "v"}
local vowel_order = m_table.invert(export.vowel_order)

-- Store voiceless to voiced consonant pairs
export.voiced_pairs = {
	["c"] = "j",
	["k"] = "g",
	["t"] = "d"
}
local voiced_pairs = export.voiced_pairs

-- Map Latin consonants to Cherokee syllables
-- Each consonant maps to a corresponding vowel
export.syll_list = {
	-- order of syllables: a, e, i, o, u, v
	[""] = {"Ꭰ", "Ꭱ", "Ꭲ", "Ꭳ", "Ꭴ", "Ꭵ"},
	k = {"Ꭷ"},
	g = {"Ꭶ", "Ꭸ", "Ꭹ", "Ꭺ", "Ꭻ", "Ꭼ"},
	h = {"Ꭽ", "Ꭾ", "Ꭿ", "Ꮀ", "Ꮁ", "Ꮂ"},
	l = {"Ꮃ", "Ꮄ", "Ꮅ", "Ꮆ", "Ꮇ", "Ꮈ"},
	m = {"Ꮉ", "Ꮊ", "Ꮋ", "Ꮌ", "Ꮍ", "Ᏽ"}, -- Ᏽ is obsolete
	n = {"Ꮎ", "Ꮑ", "Ꮒ", "Ꮓ", "Ꮔ", "Ꮕ"},
	hn = {"Ꮏ"},
	qu = {"Ꮖ", "Ꮗ", "Ꮘ", "Ꮙ", "Ꮚ", "Ꮛ"},
	s = {"Ꮜ", "Ꮞ", "Ꮟ", "Ꮠ", "Ꮡ", "Ꮢ"},
	d = {"Ꮣ", "Ꮥ", "Ꮧ", "Ꮩ", "Ꮪ", "Ꮫ"},
	t = {"Ꮤ", "Ꮦ", "Ꮨ"},
	dl = {"Ꮬ"},
	tl = {"Ꮭ", "Ꮮ", "Ꮯ", "Ꮰ", "Ꮱ", "Ꮲ"},
	ts = {"Ꮳ", "Ꮴ", "Ꮵ", "Ꮶ", "Ꮷ", "Ꮸ"},
	w = {"Ꮹ", "Ꮺ", "Ꮻ", "Ꮼ", "Ꮽ", "Ꮾ"},
	y = {"Ꮿ", "Ᏸ", "Ᏹ", "Ᏺ", "Ᏻ", "Ᏼ"}
}

-- Simplifies the transcription by removing tone and length in vowels and
-- converting normalising spelling in consonants
local function simplify_tr(text)
	-- make lowercase and decompose chars
	local tr = decomp(lower(text))

	-- add word boundaries
	tr = gsub(tr, "([^%s]+)", "#%1#")

	-- remove tone marks
	tr = gsub(tr, "[" .. TONE .. "]", "")

	-- remove long vowels
	tr = gsub(tr, "([" .. VOWL .. "])%1", "%1")

	-- remove dummy markers
	tr = gsub(tr, "([kt])h([" .. VOWL .. "])%*", "%1%2")  -- special case for aspiration in preceding consonant
	tr = gsub(tr, "%*", "")

	-- normalise consonants
	local replacements = {
		{ "kh?w", "q" },           -- kw/khw -> q
		{ "nah", "nA" },           -- nah -> nA
		{ "hn", "N" },             -- hn -> N
		{ "([ckt])([^h])", function(ch, ch_next) -- voiceless unaspirated to voiced
			return voiced_pairs[ch] .. ch_next
		end },
		{ "([ckt])h", "%1" },      -- voiceless aspirated to unaspirated
		{ "[ht]l", "T" },          -- hl/tl -> T
		{ "dl", "D" }              -- dl -> D
	}

	-- go over each replacement
	for _, replacement in ipairs(replacements) do
		tr = gsub(tr, replacement[1], replacement[2])
	end

	-- handle vowels
	tr = gsub(tr, "#([" .. VOWL .. "])", "#ʔ%1")  -- initial vowels
	tr = gsub(tr, "([" .. VOWL .. "])([" .. VOWL .. "])", "%1ʔ%2")  -- vowel sequences

	-- remove word boundaries
	return gsub(tr, "#", "")
end

-- Generates a single Cherokee syllable given a consonant and vowel
local function gen_syll(cons, vowel)
	-- normalise consonants
	local cons_map = {
		["c"] = "ts",
		["D"] = "dl",
		["j"] = "ts",
		["N"] = "hn",
		["q"] = "qu",
		["T"] = "tl",
		["ʔ"] = ""
	}
	cons = cons_map[cons] or cons

	-- handle special cases
	if cons == "s" and vowel == "" then
		return "Ꮝ"
	elseif (cons == "h" or cons == "") and vowel == "" then
		return ""
	elseif vowel == "A" then
		return "Ꮐ"
	end

	-- get syllable from mapping
	local sylls = export.syll_list[cons]
	local vowel_idx = vowel_order[vowel]
	local syll = sylls and vowel_idx and vowel_idx <= #sylls and sylls[vowel_idx]
	if not syll then
		-- check for fallback consonants
		local fallback_map = {
			["dl"] = "tl",
			["hn"] = "n",
			["k"] = "g",
			["t"] = "d"
		}

		local fallback_cons = fallback_map[cons]
		if fallback_cons then
			local fallback_sylls = export.syll_list[fallback_cons]
			syll = fallback_sylls and vowel_idx and vowel_idx <= #fallback_sylls and fallback_sylls[vowel_idx]
		end

		-- invalid syllable
		if not syll then
			return nil
		end
	end

	-- return syllable if successful
	return syll
end

-- Generates the Cherokee syllabary form from a given Latin transcription
-- It first simplifies the transcription, then matches each consonant-vowel pair to a syllable
-- Each invalid syllable is logged and and any will result in an error
function export.to_syll(tr)
	-- simplify transcription
	tr = simplify_tr(tr)
	local invalid_cons = {}

	-- match each consonant-vowel pair with corresponding syllable
	local result = gsub(tr, "([" .. CONS .. "])([" .. VOWL .. "]?)", function(c, v)
		local syll = gen_syll(c, v)
		if not syll and v == "" then
			table.insert(invalid_cons, c)
			return c
		end
		return syll or (c .. v)
	end)

	-- raise error for any invalid consonant
	if #invalid_cons > 0 then
		error("Invalid consonant(s) (in order: " .. table.concat(invalid_cons, ", ") ..
			') - please fix by adding a dummy vowel after each one, followed by a "*".')
	end

	-- otherwise return result
	return result
end

return export