Module:mr-translit
Itsura
- The following documentation is generated by Module:documentation/functions/translit. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
This module will transliterate Wikang Marathi text. It is also used to transliterate Ahirani, Konkani, at Varhadi.
The module should preferably not be called directly from templates or other modules.
To use it from a template, use {{xlit}}.
Within a module, use Module:languages#Language:transliterate.
For testcases, see Module:mr-translit/testcases.
Functions
[baguhin]tr(text, lang, sc)- Transliterates a given piece of
textwritten in the script specified by the codesc, and language specified by the codelang. - When the transliteration fails, returns
nil.
-- Transliteration for Marathi (in progress)
local export = {}
local m_str_utils = require("Module:string utilities")
local gmatch = m_str_utils.gmatch
local gsub = m_str_utils.gsub
local match = m_str_utils.match
local reverse = m_str_utils.reverse
local toNFC = mw.ustring.toNFC
local conv = {
-- consonants
['क'] = 'k', ['ख'] = 'kh', ['ग'] = 'g', ['घ'] = 'gh', ['ङ'] = 'ṅ',
['च'] = 'c', ['छ'] = 'ch', ['ज'] = 'j', ['झ'] = 'jh', ['ञ'] = 'ñ',
['ट'] = 'ṭ', ['ठ'] = 'ṭh', ['ड'] = 'ḍ', ['ढ'] = 'ḍh', ['ण'] = 'ṇ',
['त'] = 't', ['थ'] = 'th', ['द'] = 'd', ['ध'] = 'dh', ['न'] = 'n',
['प'] = 'p', ['फ'] = 'ph', ['ब'] = 'b', ['भ'] = 'bh', ['म'] = 'm',
['य'] = 'y', ['र'] = 'r', ['ल'] = 'l', ['व'] = 'v', ['ळ'] = 'ḷ',
['श'] = 'ś', ['ष'] = 'ṣ', ['स'] = 's', ['ह'] = 'h',
['ऱ'] = 'r',
-- ['ज्ञ'] = 'jñ',
-- not for page titles
['ज़'] = 'j̈', ['झ़'] = 'j̈h', ['च़'] = 'ċ',
-- vowel diacritics
['ि'] = 'i', ['ु'] = 'u', ['े'] = 'e', ['ो'] = 'o',
['ा'] = 'ā', ['ी'] = 'ī', ['ू'] = 'ū',
['ृ'] = 'ru',
['ै'] = 'ai', ['ौ'] = 'au',
['ॉ'] = 'ŏ',
['ॅ'] = 'ĕ',
-- vowel signs
['अ'] = 'a', ['इ'] = 'i', ['उ'] = 'u', ['ए'] = 'e', ['ओ'] = 'o',
['आ'] = 'ā', ['ई'] = 'ī', ['ऊ'] = 'ū',
['ऋ'] = 'ru',
['ऐ'] = 'ai', ['औ'] = 'au',
['ऑ'] = 'ŏ',
['ॲ'] = 'ĕ', ['ऍ'] = 'ĕ',
['ॐ'] = 'om',
-- chandrabindu
['ँ'] = '̃',
-- anusvara
['ं'] = 'ṁ',
-- visarga
['ः'] = 'ḥ',
-- virama
['्'] = '',
-- numerals
['०'] = '0', ['१'] = '1', ['२'] = '2', ['३'] = '3', ['४'] = '4',
['५'] = '5', ['६'] = '6', ['७'] = '7', ['८'] = '8', ['९'] = '9',
-- punctuation
['।'] = '.', -- danda
['॥'] = '.', -- double danda
['+'] = '', -- compound separator
-- abbreviation sign
['॰'] = '.',
}
local nasal_assim = {
['क'] = 'ङ', ['ख'] = 'ङ', ['ग'] = 'ङ', ['घ'] = 'ङ',
['च़'] = 'न', ['ज़'] = 'न', ['झ़'] = 'न',
['च'] = 'ञ', ['छ'] = 'ञ', ['ज'] = 'ञ', ['झ'] = 'ञ',
['ट'] = 'ण', ['ठ'] = 'ण', ['ड'] = 'ण', ['ढ'] = 'ण',
['प'] = 'म', ['फ'] = 'म', ['ब'] = 'म', ['भ'] = 'म', ['म'] = 'म',
['य'] = 'इ', ['र'] = 'उ', ['ल'] = 'ल', ['व'] = 'उ',
['श'] = 'उ', ['ष'] = 'उ', ['स'] = 'उ', ['ह'] = 'उ',
}
local perm_cl = {
['म्ल'] = true, ['व्ल'] = true, ['न्ल'] = true,
}
local all_cons, special_cons = 'कखगघङचछजझञटठडढतथदधपफबभशषसयरलवहणनमळ', 'दतयरलवहनम'
local vowel, vowel_sign = '%*aिुृेोाीूैौॉॅ', 'अइउएओआईऊऋऐऔऑॲऍ'
local syncope_pattern = '([' .. vowel .. vowel_sign .. '])(़?[' .. all_cons .. '])a(़?[' .. all_cons .. '])([ंँ]?[' .. vowel .. vowel_sign .. '])'
function export.tr(text, lang, sc)
text = gsub(text, 'ाँ', 'ॉ' .. 'ं')
text = gsub(text, 'ँ', 'ॅ' .. 'ं')
text = gsub(text, '([^' .. vowel .. vowel_sign .. '])ं ', '%1अ ')
text = gsub(text, '([^' .. vowel .. vowel_sign .. '])ं$', '%1अ')
text = gsub(text, '([' .. all_cons .. ']़?)([' .. vowel .. '्]?)', function(c, d)
return c .. (d == "" and 'a' or d) end)
for word in gmatch(text, "[ऀ-ॿa]+") do
local orig_word = word
word = reverse(word)
word = gsub(word, '^a(़?[' .. all_cons .. '][' .. vowel .. vowel_sign .. '])', '%1')
while match(word, syncope_pattern) do
word = gsub(word, syncope_pattern, '%1%2%3%4')
end
word = gsub(word, '(़?.?)ं(.)', function(succ, prev)
return succ .. (succ..prev == "a" and "्म" or
(succ == "" and match(prev, '[' .. vowel .. ']') and "̃" or nasal_assim[succ] or "n")) .. prev end)
text = gsub(text, orig_word, reverse(word))
end
text = gsub(text, '.़?', conv)
text = gsub(text, 'ñjñ', 'ndny')
text = gsub(text, 'jñ', 'dny')
text = gsub(text, 'a([iu])̃', 'a͠%1')
text = gsub(text, 'aa', 'a')
return toNFC(text)
end
return export