Module:Tibt-translit
Itsura
- The following documentation is generated by Module:documentation/functions/translit. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
This module will transliterate text in the Katitikang Tibetan. It is used to transliterate Nyenkha, Manangba, Olekha, Old Tibetan, Brokpake, Sikkimese, Chali, Tshangla, Tawang Monpa, Classical Tibetan, Khengkha, Kurtöp, Sherpa, Zhang-Zhung, at Zangskari.
The module should preferably not be called directly from templates or other modules.
To use it from a template, use {{xlit}}.
Within a module, use Module:languages#Language:transliterate.
For testcases, see Module:Tibt-translit/testcases.
Functions
[baguhin]tr(text, lang, sc)- Transliterates a given piece of
textwritten in the script specified by the codesc, and language specified by the codelang. - When the transliteration fails, returns
nil.
local m_str_utils = require("Module:string utilities")
local gsub = m_str_utils.gsub
local match = m_str_utils.match
local toNFC = mw.ustring.toNFC
local upper = m_str_utils.upper
local Tibt = require("Module:Tibt-common")
local export = {}
local twoChars = {
["རྀ"] = "ṛ", ["ྲྀ"] = "ṛ", -- Primarily used in Sanskrit(-derived) borrowings.
["ལྀ"] = "ḷ", ["ླྀ"] = "ḷ",
["ཕ༹"] = "f", ["བ༹"] = "v", -- Used to transliterate Chinese.
["ཁ༹"] = "x", ["ག༹"] = "ġ", -- Used in Balti.
["ྥ༹"] = "f", ["ྦ༹"] = "v",
["ྑ༹"] = "x", ["ྒ༹"] = "ġ",
}
local oneChar = {
["ཀ"] = "k", ["ཁ"] = "kh", ["ག"] = "g", ["ང"] = "ng",
["ཅ"] = "c", ["ཆ"] = "ch", ["ཇ"] = "j", ["ཉ"] = "ny",
["ཏ"] = "t", ["ཐ"] = "th", ["ད"] = "d", ["ན"] = "n",
["པ"] = "p", ["ཕ"] = "ph", ["བ"] = "b", ["མ"] = "m",
["ཙ"] = "ts", ["ཚ"] = "tsh", ["ཛ"] = "dz", ["ཝ"] = ".w",
["ཞ"] = "zh", ["ཟ"] = "z", ["འ"] = "'", ["ཡ"] = ".y",
["ར"] = ".r", ["ཪ"] = ".r", ["ལ"] = "l", ["ཤ"] = "sh", ["ས"] = "s",
["ཧ"] = "h", ["ཨ"] = "\1",
["ཊ"] = "ṭ", ["ཋ"] = "ṭh", ["ཌ"] = "ḍ", ["ཎ"] = "ṇ", ["ཥ"] = "ṣ",
["ཫ"] = "q", ["ཬ"] = "ṛ", -- Used in Balti.
["྅"] = "ʼ", ["ྈ"] = "x", ["ྉ"] = "f", ["ྌ"] = "f", -- Used in Sanskrit.
["ཱ"] = "̄", ["ི"] = "i", ["ྀ"] = "ị", ["ུ"] = "u", ["ེ"] = "e", ["ཻ"] = "ai", ["ོ"] = "o", ["ཽ"] = "au",
["ཾ"] = "ṃ", ["ྂ"] = "ṃ", ["ྃ"] = "m̐", ["ཿ"] = "ḥ",
["ྐ"] = "k", ["ྑ"] = "kh", ["ྒ"] = "g", ["ྔ"] = "ng",
["ྕ"] = "c", ["ྖ"] = "ch", ["ྗ"] = "j", ["ྙ"] = "ny",
["ྟ"] = "t", ["ྠ"] = "th", ["ྡ"] = "d", ["ྣ"] = "n",
["ྤ"] = "p", ["ྥ"] = "ph", ["ྦ"] = "b", ["ྨ"] = "m",
["ྩ"] = "ts", ["ྪ"] = "tsh", ["ྫ"] = "dz", ["ྭ"] = "w", ["ྺ"] = "w",
["ྮ"] = "zh", ["ྯ"] = "z", ["ྰ"] = "'", ["ྱ"] = "y", ["ྻ"] = "y",
["ྲ"] = "r", ["ྼ"] = "r", ["ླ"] = "l", ["ྴ"] = "sh", ["ྶ"] = "s",
["ྷ"] = "h", ["ྸ"] = "+a",
["ྚ"] = "ṭ", ["ྛ"] = "ṭh", ["ྜ"] = "ḍ", ["ྞ"] = "ṇ", ["ྵ"] = "ṣ",
["ྍ"] = "x", ["ྎ"] = "f", ["ྏ"] = "f",
}
local symbol = {
["༠"] = "0", ["༡"] = "1", ["༢"] = "2", ["༣"] = "3", ["༤"] = "4",
["༥"] = "5", ["༦"] = "6", ["༧"] = "7", ["༨"] = "8", ["༩"] = "9",
["༪"] = "0.5", ["༫"] = "1.5", ["༬"] = "2.5", ["༭"] = "3.5", ["༮"] = "4.5",
["༯"] = "5.5", ["༰"] = "6.5", ["༱"] = "7.5", ["༲"] = "8.5", ["༳"] = "9.5",
["་"] = " ", ["༌"] = "*", ["།"] = ".", ["༎"] = ".\n\n", ["༏"] = ";",
["༑"] = "|", ["༈"] = "!", ["༔"] = ":", ["༼"] = "(", ["༽"] = ")",
["༺"] = "<", ["༻"] = ">",
[" "] = "\1"
}
function export.tr(text, lang, sc)
local langObj; if not lang then
error("Language code required.")
else
langObj = require("Module:languages").getByCode(lang)
end
local scObj = require("Module:scripts").getByCode("Tibt")
text = (langObj:makeEntryName(text))
text = scObj:fixDiscouragedSequences(text)
text = scObj:toFixedNFD(text)
text = gsub(text, "༒", "།")
text = gsub(text, "[་༌]+$", "")
for word in Tibt.getWords(text) do
for syllable in Tibt.getSyllables(word) do
local tr = syllable
tr = gsub(tr, "(ཱ)([ིེུ-ཽྀྲླ]+)", "%2%1")
local mainStack = gsub(Tibt.findMainStack(syllable, lang), "[ཾཿྂྃ]", "")
if match(mainStack, "([^ༀི-ཽྀ]ཱ?)$") then
local newMainStack = mainStack .. "a"
newMainStack = gsub(newMainStack, "ཱa$", "aཱ")
tr = gsub(tr, mainStack, newMainStack, 1)
end
tr = gsub(tr, "^(.*)༷(.*)$", "<u>%1%2</u>")
tr = gsub(tr, "^(.*)༵(.*)$", "<span style=\"text-decoration-style:double;\">%1%2</span>")
for letter, replacement in pairs(twoChars) do
tr = gsub(tr, letter, replacement)
end
tr = gsub(tr, ".", oneChar)
tr = gsub(tr, "(.')([^aāeiīoḷḹṛṝuū%-<])", "%1a%2")
tr = gsub(tr, "%f[^%zaāeiīoḷḹṛṝuū%->]%.", "")
tr = gsub(tr, "%.([rwy][^aāeiīoḷḹṛṝuū])", "%1")
tr = gsub(tr, "^\1", "")
tr = tr:gsub("\1", "%.")
text = gsub(text, syllable, tr, 1)
end
end
text = gsub(text, "྄a?", "")
text = gsub(text, ".", symbol)
text = gsub(text, " ' ", "")
text = gsub(text, " *· *·? *", " · ")
text = gsub(text, " *%.", ".")
text = gsub(text, "\n+", "\n\n")
text = gsub(text, "\n\n$", "")
if match(text, "%. ") or match(text, "%.\n.") or match(text, "%.$") then
text = gsub(text, "^'?.", upper)
text = gsub(text, "\n\n'?.", upper)
text = gsub(text, "%. '?.", upper)
end
-- East Bodish-specific fixes
local east_bodish = { ["xkz"] = true, ["kjz"] = true, ["xkf"] = true, ["dzl"] = true, ["dka"] = true }
if east_bodish[lang] then
if lang == "xkz" then
text = gsub(text, "%f[^%c%s]%f[aeiouāēīōū]", "q")
text = gsub(text, "%'%f[aeiouāēīōū]", "")
text = gsub(text, "q", "'")
text = gsub(text, "̄", "̂")
text = gsub(text, "̂́", "́̂")
text = gsub(text, "([bcdfghjklmnprstvwxyz]+[aeiou])́", "'%1")
text = gsub(text, "([aeiouâêîôû]n?[bcdfghjklmnprstvwxyz])a", "%1")
elseif lang == "dzl" then
text = gsub(text, "g%.y", "'y")
text = gsub(text, "sn", "'n")
text = gsub(text, "dbr", "'r")
text = gsub(text, "dba%'", "'w")
text = gsub(text, "'w%f[%A]", "'wa")
elseif lang == "kjz" then
text = gsub(text, "r%f[nm]", "'")
text = gsub(text, "db%f[yr]", "'")
text = gsub(text, "dba%'", "'w")
text = gsub(text, "ml", "'l")
text = gsub(text, "%f[^%c%s]%f[aeiouāēīōū]", "q")
text = gsub(text, "%'%f[aeiouāēīōū]", "")
text = gsub(text, "q", "'")
text = gsub(text, "([aeiou])'", "%1h")
text = gsub(text, "([aou])l%f[%A]", "%1̈")
end
text = gsub(text, "[bdg]%f[%A]", {b="p", d="t", g="k"})
text = gsub(text, "nk%f[%A]", "ng")
text = gsub(text, "%s", "")
end
text = gsub(text, "\1", " ") -- substitute normal space between words back
-- End with a space so that concurrent parts of running text that need to be transliterated separately (e.g. due to links) are still properly separated.
return toNFC(text) .. " "
end
return export
Kategorya:
- Katitikang Tibetan
- Transliteration modules without a testcases subpage
- Transliteration modules by script
- Katitikang Tibetan na modyul
- Transliteration na modyul
- Sikkimese na modyul
- Nyenkha na modyul
- Chali na modyul
- Zhang-Zhung na modyul
- Old Tibetan na modyul
- Brokpake na modyul
- Manangba na modyul
- Tshangla na modyul
- Tawang Monpa na modyul
- Zangskari na modyul
- Sherpa na modyul
- Classical Tibetan na modyul
- Kurtöp na modyul
- Olekha na modyul
- Khengkha na modyul