Pumunta sa nilalaman

Module:languages/chars

Mula Wiksiyonaryo


local export = {}

local table = table
local insert = table.insert
local u = require("Module:string/char")

-- UTF-8 encoded strings for some commonly-used diacritics.
local c = {
	prime				= u(0x02B9),
	grave				= u(0x0300),
	acute				= u(0x0301),
	circ				= u(0x0302), -- circumflex
	tilde				= u(0x0303),
	macron				= u(0x0304),
	overline			= u(0x0305),
	breve				= u(0x0306),
	dotabove			= u(0x0307),
	diaer				= u(0x0308), -- diaeresis
	ringabove			= u(0x030A),
	dacute				= u(0x030B), -- double acute
	caron				= u(0x030C),
	lineabove			= u(0x030D),
	dgrave				= u(0x030F), -- double grave
	invbreve			= u(0x0311), -- inverted breve
	turnedcommaabove	= u(0x0312),
	commaabove			= u(0x0313),
	revcommaabove		= u(0x0314), -- reversed comma above
	dotbelow			= u(0x0323),
	diaerbelow			= u(0x0324), -- diaeresis below
	ringbelow			= u(0x0325),
	cedilla				= u(0x0327),
	ogonek				= u(0x0328),
	caronbelow			= u(0x032C),
	brevebelow			= u(0x032E),
	macronbelow			= u(0x0331),
	perispomeni			= u(0x0342),
	ypogegrammeni		= u(0x0345),
	CGJ					= u(0x034F), -- combining grapheme joiner
	zigzag				= u(0x035B),
	dbrevebelow			= u(0x035C), -- double breve below
	dmacron				= u(0x035E), -- double macron
	dtilde				= u(0x0360), -- double tilde
	dinvbreve			= u(0x0361), -- double inverted breve
	small_a				= u(0x0363),
	small_e				= u(0x0364),
	small_i				= u(0x0365),
	small_o				= u(0x0366),
	small_u				= u(0x0367),
	keraia				= u(0x0374),
	lowerkeraia			= u(0x0375),
	tonos				= u(0x0384),
	palatalization		= u(0x0484),
	dasiapneumata		= u(0x0485),
	psilipneumata		= u(0x0486),
	kashida				= u(0x0640),
	fathatan			= u(0x064B),
	dammatan			= u(0x064C),
	kasratan			= u(0x064D),
	fatha				= u(0x064E),
	damma				= u(0x064F),
	kasra				= u(0x0650),
	shadda				= u(0x0651),
	sukun				= u(0x0652),
	hamzaabove			= u(0x0654),
	nunghunna			= u(0x0658),
	zwarakay			= u(0x0659),
	smallv				= u(0x065A),
	superalef			= u(0x0670),
	udatta				= u(0x0951),
	anudatta			= u(0x0952),
	tacute				= u(0x1ACB), -- triple acute
	dottedgrave			= u(0x1DC0),
	dottedacute			= u(0x1DC1),
	coronis				= u(0x1FBD),
	psili				= u(0x1FBF),
	dasia				= u(0x1FEF),
	ZWNJ				= u(0x200C), -- zero width non-joiner
	ZWJ					= u(0x200D), -- zero width joiner
	RSQuo				= u(0x2019), -- right single quote
	kavyka				= u(0xA67C),
	VS01				= u(0xFE00), -- variation selector 1
	-- Punctuation for the standardChars field.
	-- Note: characters are literal (i.e. no magic characters).
	punc			= " ',-‐‑‒–—…∅",
	-- Range covering all diacritics.
	diacritics		= u(0x300) .. "-" .. u(0x34E) ..
						u(0x350) .. "-" .. u(0x36F) ..
						u(0x1AB0) .. "-" .. u(0x1ACE) ..
						u(0x1DC0) .. "-" .. u(0x1DFF) ..
						u(0x20D0) .. "-" .. u(0x20F0) ..
						u(0xFE20) .. "-" .. u(0xFE2F),
}
-- Braille characters for the standardChars field.
local braille = {}
for i = 0x2800, 0x28FF do
	insert(braille, u(i))
end
c.braille = table.concat(braille)
export.chars = c

-- PUA characters, generally used in sortkeys.
-- Note: if the limit needs to be increased, do so in powers of 2 (due to the way memory is allocated for tables).
local p = {}
for i = 1, 32 do
	p[i] = u(0xF000+i-1)
end
export.puaChars = p

local cs = {}

-- Used for the default display_text and strip_diacritics for Grek, but parts also used directly by Albanian (sq).
cs["Grek-displaytext"] = {
	from = {"Þ", "þ", c.turnedcommaabove, "['ʼ" .. c.RSQuo .. c.prime .. c.keraia .. c.coronis .. c.psili .. "]"}, -- Not tonos: used as the numeral sign in entries.
	to = {"Ϸ", "ϸ", c.revcommaabove, c.RSQuo}
}

cs["Grek-stripdiacritics"] = {
	remove_diacritics = c.caron .. c.diaerbelow .. c.brevebelow,
	from = cs["Grek-displaytext"].from,
	to = {"Ϸ", "ϸ", c.revcommaabove, "'"}
}

-- Used in the default strip_diacritics and sort_key for Cyrs, but also used directly by Old Ruthenian (zle-ort).
cs["Cyrs_remove_diacritics"] =
	c.grave .. c.acute .. c.dotabove .. c.diaer .. c.invbreve .. c.palatalization .. c.dasiapneumata .. c.psilipneumata .. c.dottedgrave .. c.dottedacute .. c.kavyka

export.chars_substitutions = cs

return export