Pumunta sa nilalaman

Module:Tibt-translit

Mula Wiksiyonaryo

This module will transliterate text in the Katitikang Tibetan. It is used to transliterate Nyenkha, Manangba, Olekha, Old Tibetan, Brokpake, Sikkimese, Chali, Tshangla, Tawang Monpa, Classical Tibetan, Khengkha, Kurtöp, Sherpa, Zhang-Zhung, at Zangskari. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:Tibt-translit/testcases.

Functions

[baguhin]
tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local m_str_utils = require("Module:string utilities")

local gsub = m_str_utils.gsub
local match = m_str_utils.match
local toNFC = mw.ustring.toNFC
local upper = m_str_utils.upper

local Tibt = require("Module:Tibt-common")

local export = {}

local twoChars = {
	["རྀ"] = "ṛ", ["ྲྀ"] = "ṛ", -- Primarily used in Sanskrit(-derived) borrowings.
	["ལྀ"] = "ḷ", ["ླྀ"] = "ḷ",
	
	["ཕ༹"] = "f", ["བ༹"] = "v", -- Used to transliterate Chinese.
	["ཁ༹"] = "x", ["ག༹"] = "ġ", -- Used in Balti.
	
	["ྥ༹"] = "f", ["ྦ༹"] = "v",
	["ྑ༹"] = "x", ["ྒ༹"] = "ġ",
}

local oneChar = {
	["ཀ"] = "k", ["ཁ"] = "kh", ["ག"] = "g", ["ང"] = "ng",
	["ཅ"] = "c", ["ཆ"] = "ch", ["ཇ"] = "j", ["ཉ"] = "ny",
	["ཏ"] = "t", ["ཐ"] = "th", ["ད"] = "d", ["ན"] = "n",
	["པ"] = "p", ["ཕ"] = "ph", ["བ"] = "b", ["མ"] = "m",
	["ཙ"] = "ts", ["ཚ"] = "tsh", ["ཛ"] = "dz", ["ཝ"] = ".w",
	["ཞ"] = "zh", ["ཟ"] = "z", ["འ"] = "'", ["ཡ"] = ".y",
	["ར"] = ".r", ["ཪ"] = ".r", ["ལ"] = "l", ["ཤ"] = "sh", ["ས"] = "s",
	["ཧ"] = "h", ["ཨ"] = "\1",
	["ཊ"] = "ṭ", ["ཋ"] = "ṭh", ["ཌ"] = "ḍ", ["ཎ"] = "ṇ", ["ཥ"] = "ṣ",
	["ཫ"] = "q", ["ཬ"] = "ṛ", -- Used in Balti.
	["྅"] = "ʼ", ["ྈ"] = "x", ["ྉ"] = "f", ["ྌ"] = "f", -- Used in Sanskrit.
	
	["ཱ"] = "̄", ["ི"] = "i", ["ྀ"] = "ị", ["ུ"] = "u", ["ེ"] = "e", ["ཻ"] = "ai", ["ོ"] = "o", ["ཽ"] = "au",
	
	["ཾ"] = "ṃ", ["ྂ"] = "ṃ", ["ྃ"] = "m̐", ["ཿ"] = "ḥ",
	
	["ྐ"] = "k", ["ྑ"] = "kh", ["ྒ"] = "g", ["ྔ"] = "ng",
	["ྕ"] = "c", ["ྖ"] = "ch", ["ྗ"] = "j", ["ྙ"] = "ny",
	["ྟ"] = "t", ["ྠ"] = "th", ["ྡ"] = "d", ["ྣ"] = "n",
	["ྤ"] = "p", ["ྥ"] = "ph", ["ྦ"] = "b", ["ྨ"] = "m",
	["ྩ"] = "ts", ["ྪ"] = "tsh", ["ྫ"] = "dz", ["ྭ"] = "w", ["ྺ"] = "w",
	["ྮ"] = "zh", ["ྯ"] = "z", ["ྰ"] = "'", ["ྱ"] = "y", ["ྻ"] = "y",
	["ྲ"] = "r", ["ྼ"] = "r", ["ླ"] = "l", ["ྴ"] = "sh", ["ྶ"] = "s",
	["ྷ"] = "h", ["ྸ"] = "+a",
	["ྚ"] = "ṭ", ["ྛ"] = "ṭh", ["ྜ"] = "ḍ", ["ྞ"] = "ṇ", ["ྵ"] = "ṣ",
	["ྍ"] = "x", ["ྎ"] = "f", ["ྏ"] = "f",
}

local symbol = {
	["༠"] = "0", ["༡"] = "1", ["༢"] = "2", ["༣"] = "3", ["༤"] = "4",
	["༥"] = "5", ["༦"] = "6", ["༧"] = "7", ["༨"] = "8", ["༩"] = "9",
	["༪"] = "0.5", ["༫"] = "1.5", ["༬"] = "2.5", ["༭"] = "3.5", ["༮"] = "4.5",
	["༯"] = "5.5", ["༰"] = "6.5", ["༱"] = "7.5", ["༲"] = "8.5", ["༳"] = "9.5",
	["་"] = " ", ["༌"] = "*", ["།"] = ".", ["༎"] = ".\n\n", ["༏"] = ";",
	["༑"] = "|", ["༈"] = "!", ["༔"] = ":", ["༼"] = "(", ["༽"] = ")",
	["༺"] = "<", ["༻"] = ">",
	[" "] = "\1"
}

function export.tr(text, lang, sc)

	local langObj; if not lang then
		error("Language code required.")
	else
		langObj = require("Module:languages").getByCode(lang)
	end
	local scObj = require("Module:scripts").getByCode("Tibt")
	text = (langObj:makeEntryName(text))
	text = scObj:fixDiscouragedSequences(text)
	text = scObj:toFixedNFD(text)
	text = gsub(text, "༒", "།")
	text = gsub(text, "[་༌]+$", "")
	
	for word in Tibt.getWords(text) do
		for syllable in Tibt.getSyllables(word) do
			local tr = syllable
			
			tr = gsub(tr, "(ཱ)([ིེུ-ཽྀྲླ]+)", "%2%1")
			
			local mainStack = gsub(Tibt.findMainStack(syllable, lang), "[ཾཿྂྃ]", "")
			if match(mainStack, "([^ༀི-ཽྀ]ཱ?)$") then
				local newMainStack = mainStack .. "a"
				newMainStack = gsub(newMainStack, "ཱa$", "aཱ")
				tr = gsub(tr, mainStack, newMainStack, 1)
			end
			
			tr = gsub(tr, "^(.*)༷(.*)$", "<u>%1%2</u>")
			tr = gsub(tr, "^(.*)༵(.*)$", "<span style=\"text-decoration-style:double;\">%1%2</span>")
			
			for letter, replacement in pairs(twoChars) do
				tr = gsub(tr, letter, replacement)
			end
			tr = gsub(tr, ".", oneChar)
			
			tr = gsub(tr, "(.')([^aāeiīoḷḹṛṝuū%-<])", "%1a%2")
			tr = gsub(tr, "%f[^%zaāeiīoḷḹṛṝuū%->]%.", "")
			tr = gsub(tr, "%.([rwy][^aāeiīoḷḹṛṝuū])", "%1")
			tr = gsub(tr, "^\1", "")
			tr = tr:gsub("\1", "%.")
			
			text = gsub(text, syllable, tr, 1)
		end
	end
	
	text = gsub(text, "྄a?", "")
	text = gsub(text, ".", symbol)
	text = gsub(text, " ' ", "")
	text = gsub(text, " *· *·? *", " · ")
	text = gsub(text, " *%.", ".")
	text = gsub(text, "\n+", "\n\n")
	text = gsub(text, "\n\n$", "")
	if match(text, "%. ") or match(text, "%.\n.") or match(text, "%.$") then
		text = gsub(text, "^'?.", upper)
		text = gsub(text, "\n\n'?.", upper)
		text = gsub(text, "%. '?.", upper)
	end
	
	-- East Bodish-specific fixes
	local east_bodish = { ["xkz"] = true, ["kjz"] = true, ["xkf"] = true, ["dzl"] = true, ["dka"] = true }
	
	if east_bodish[lang] then
		if lang == "xkz" then
			text = gsub(text, "%f[^%c%s]%f[aeiouāēīōū]", "q")
			text = gsub(text, "%'%f[aeiouāēīōū]", "")
			text = gsub(text, "q", "'")
			text = gsub(text, "̄", "̂")
			text = gsub(text, "̂́", "́̂")
			text = gsub(text, "([bcdfghjklmnprstvwxyz]+[aeiou])́", "'%1")
			text = gsub(text, "([aeiouâêîôû]n?[bcdfghjklmnprstvwxyz])a", "%1")
		elseif lang == "dzl" then
			text = gsub(text, "g%.y", "'y")
			text = gsub(text, "sn", "'n")
			text = gsub(text, "dbr", "'r")
			text = gsub(text, "dba%'", "'w")
			text = gsub(text, "'w%f[%A]", "'wa")
		elseif lang == "kjz" then
			text = gsub(text, "r%f[nm]", "'")
			text = gsub(text, "db%f[yr]", "'")
			text = gsub(text, "dba%'", "'w")
			text = gsub(text, "ml", "'l")
			text = gsub(text, "%f[^%c%s]%f[aeiouāēīōū]", "q")
			text = gsub(text, "%'%f[aeiouāēīōū]", "")
			text = gsub(text, "q", "'")
			text = gsub(text, "([aeiou])'", "%1h")
			text = gsub(text, "([aou])l%f[%A]", "%1̈")
		end
		
		text = gsub(text, "[bdg]%f[%A]", {b="p", d="t", g="k"})
		text = gsub(text, "nk%f[%A]", "ng")
		text = gsub(text, "%s", "")
	end
	
	text = gsub(text, "\1", " ") -- substitute normal space between words back
	
	-- End with a space so that concurrent parts of running text that need to be transliterated separately (e.g. due to links) are still properly separated.
	return toNFC(text) .. " "
end

return export