Module:languages/chars
Itsura
- This module lacks a documentation subpage. Please create it.
- Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox
local export = {}
local table = table
local insert = table.insert
local u = require("Module:string/char")
-- UTF-8 encoded strings for some commonly-used diacritics.
local c = {
prime = u(0x02B9),
grave = u(0x0300),
acute = u(0x0301),
circ = u(0x0302), -- circumflex
tilde = u(0x0303),
macron = u(0x0304),
overline = u(0x0305),
breve = u(0x0306),
dotabove = u(0x0307),
diaer = u(0x0308), -- diaeresis
ringabove = u(0x030A),
dacute = u(0x030B), -- double acute
caron = u(0x030C),
lineabove = u(0x030D),
dgrave = u(0x030F), -- double grave
invbreve = u(0x0311), -- inverted breve
turnedcommaabove = u(0x0312),
commaabove = u(0x0313),
revcommaabove = u(0x0314), -- reversed comma above
dotbelow = u(0x0323),
diaerbelow = u(0x0324), -- diaeresis below
ringbelow = u(0x0325),
cedilla = u(0x0327),
ogonek = u(0x0328),
caronbelow = u(0x032C),
brevebelow = u(0x032E),
macronbelow = u(0x0331),
perispomeni = u(0x0342),
ypogegrammeni = u(0x0345),
CGJ = u(0x034F), -- combining grapheme joiner
zigzag = u(0x035B),
dbrevebelow = u(0x035C), -- double breve below
dmacron = u(0x035E), -- double macron
dtilde = u(0x0360), -- double tilde
dinvbreve = u(0x0361), -- double inverted breve
small_a = u(0x0363),
small_e = u(0x0364),
small_i = u(0x0365),
small_o = u(0x0366),
small_u = u(0x0367),
keraia = u(0x0374),
lowerkeraia = u(0x0375),
tonos = u(0x0384),
palatalization = u(0x0484),
dasiapneumata = u(0x0485),
psilipneumata = u(0x0486),
kashida = u(0x0640),
fathatan = u(0x064B),
dammatan = u(0x064C),
kasratan = u(0x064D),
fatha = u(0x064E),
damma = u(0x064F),
kasra = u(0x0650),
shadda = u(0x0651),
sukun = u(0x0652),
hamzaabove = u(0x0654),
nunghunna = u(0x0658),
zwarakay = u(0x0659),
smallv = u(0x065A),
superalef = u(0x0670),
udatta = u(0x0951),
anudatta = u(0x0952),
tacute = u(0x1ACB), -- triple acute
dottedgrave = u(0x1DC0),
dottedacute = u(0x1DC1),
coronis = u(0x1FBD),
psili = u(0x1FBF),
dasia = u(0x1FEF),
ZWNJ = u(0x200C), -- zero width non-joiner
ZWJ = u(0x200D), -- zero width joiner
RSQuo = u(0x2019), -- right single quote
kavyka = u(0xA67C),
VS01 = u(0xFE00), -- variation selector 1
-- Punctuation for the standardChars field.
-- Note: characters are literal (i.e. no magic characters).
punc = " ',-‐‑‒–—…∅",
-- Range covering all diacritics.
diacritics = u(0x300) .. "-" .. u(0x34E) ..
u(0x350) .. "-" .. u(0x36F) ..
u(0x1AB0) .. "-" .. u(0x1ACE) ..
u(0x1DC0) .. "-" .. u(0x1DFF) ..
u(0x20D0) .. "-" .. u(0x20F0) ..
u(0xFE20) .. "-" .. u(0xFE2F),
}
-- Braille characters for the standardChars field.
local braille = {}
for i = 0x2800, 0x28FF do
insert(braille, u(i))
end
c.braille = table.concat(braille)
export.chars = c
-- PUA characters, generally used in sortkeys.
-- Note: if the limit needs to be increased, do so in powers of 2 (due to the way memory is allocated for tables).
local p = {}
for i = 1, 32 do
p[i] = u(0xF000+i-1)
end
export.puaChars = p
local cs = {}
-- Used for the default display_text and strip_diacritics for Grek, but parts also used directly by Albanian (sq).
cs["Grek-displaytext"] = {
from = {"Þ", "þ", c.turnedcommaabove, "['ʼ" .. c.RSQuo .. c.prime .. c.keraia .. c.coronis .. c.psili .. "]"}, -- Not tonos: used as the numeral sign in entries.
to = {"Ϸ", "ϸ", c.revcommaabove, c.RSQuo}
}
cs["Grek-stripdiacritics"] = {
remove_diacritics = c.caron .. c.diaerbelow .. c.brevebelow,
from = cs["Grek-displaytext"].from,
to = {"Ϸ", "ϸ", c.revcommaabove, "'"}
}
-- Used in the default strip_diacritics and sort_key for Cyrs, but also used directly by Old Ruthenian (zle-ort).
cs["Cyrs_remove_diacritics"] =
c.grave .. c.acute .. c.dotabove .. c.diaer .. c.invbreve .. c.palatalization .. c.dasiapneumata .. c.psilipneumata .. c.dottedgrave .. c.dottedacute .. c.kavyka
export.chars_substitutions = cs
return export