Module:ar-stripdiacritics
Itsura
- This module lacks a documentation subpage. Please create it.
- Useful links: subpage list • links • transclusions • testcases • sandbox
local m_str_utils = require("Module:string utilities")
local find = m_str_utils.find
local gsub = m_str_utils.gsub
local U = m_str_utils.char
local taTwiil = U(0x640)
local waSla = U(0x671)
-- diacritics ordinarily removed by entry_name replacements
local Arabic_diacritics = U(0x64B, 0x64C, 0x64D, 0x64E, 0x64F, 0x650, 0x651, 0x652, 0x670)
-- replace alif waṣl with alif
-- remove tatweel and diacritics: fathatan, dammatan, kasratan, fatha,
-- damma, kasra, shadda, sukun, superscript (dagger) alef
local replacements = {
from = {U(0x0671), "[" .. U(0x0640, 0x064B) .. "-" .. U(0x0652, 0x0670, 0x0656) .. "]"},
to = {U(0x0627)},
}
local export = {}
function export.stripDiacritics(text, lang, sc)
if text == waSla or find(text, "^" .. taTwiil .. "?[" .. Arabic_diacritics .. "]" .. "$") then
return text
end
for i, from in ipairs(replacements.from) do
local to = replacements.to[i] or ""
text = gsub(text, from, to)
end
return text
end
return export
Mga kategorya:
- Arabe na modyul
- Diacritic-stripping na modyul
- Arabe Yemeni na modyul
- Arabe Timog Levantino na modyul
- Karakhanid na modyul
- Arabe Hilagang Mesopotamiko na modyul
- Arabe Neyedi na modyul
- Arabe Golpo na modyul
- Arabe Tunesino na modyul
- Arabe Omani na modyul
- Arabe Dhofari na modyul
- Arabe Maroki na modyul
- Arabe Tajiki na modyul
- Arabe Sudanes na modyul
- Arabe Hijazi na modyul
- Arabe Libyo na modyul
- Bulgar na modyul
- Arabe Ehipsiyo na modyul
- Mozarabic na modyul
- Early Old Oghuz na modyul
- Arabe Uzbeki na modyul
- Old Ruthenian na modyul
- Arabe Hilagang Levantino na modyul
- Arabe Arhelino na modyul
- Arabe Baharna na modyul
- Mogholi na modyul
- Old Anatolian Turkish na modyul
- Arabe Iraqi na modyul
- Templates and modules needing documentation