Pumunta sa nilalaman

Module:headword/data

Mula Wiksiyonaryo

This module contains data used by Module:headword and Module:headword/templates.

The function at the bottom of the module transforms the tables, so that they contain [item1] = true, [item2] = true, ....


local headword_page_module = "Module:headword/page"

local list_to_set = require("Module:table").listToSet

local data = {}

------ 1. Lists which are converted into sets. ------

--[==[ var:
Large pages where we disable label tracking, red link checking and similar.
]==]
data.large_pages = list_to_set {
	-- pages that consistently hit timeouts
	"a",
	-- pages that sometimes hit timeouts
	"A",
	"baba",
	"de",
	"e",
	"i",
	"lima",
	"o",
	"u",
	"и",
	"山",
	"子",
	"月",
	"一",
	"人",
}

--[==[ var:
Map from singular to plural, and from plural to itself, for recognized parts of speech with irregular plurals. Most of
these are invariable plurals, e.g. `kanji` is its own plural; but we also have `mora` plural `morae`.
]==]
data.irregular_plurals = list_to_set({
	"cmavo",
	"cmene",
	"fu'ivla",
	"gismu",
	"Han tu",
	"hanja",
	"hanzi",
	"jyutping",
	"kana",
	"kanji",
	"lujvo",
	"phrasebook",
	"pinyin",
	"rafsi",
}, function(_, item)
	return item
end)

local irregular_plurals = data.irregular_plurals

-- Irregular non-zero plurals AND any regular plurals where the singular ends in "s",
-- because the module assumes that inputs ending in "s" are plurals. The singular and
-- plural both need to be added, as the module will generate a default plural if
-- the input doesn't match a key in this table.
for sg, pl in next, {
	mora = "morae"
} do
	irregular_plurals[sg], irregular_plurals[pl] = pl, pl
end

--[==[ var:
Recognized lemmas. If the part of speech in {{tl|head}} is set to one of these or its singular equivalent, the category
'LANG lemmas' will automatically be added. If the part of speech is not a singular or plural lemma or non-lemma form and
is not an abbreviation that expands to a recognized lemma or non-lemma form, the page will be added to various tracking
categories:
* [[Special:WhatLinksHere/Wiktionary:Tracking/headword/unrecognized pos]]
* [[Special:WhatLinksHere/Wiktionary:Tracking/headword/unrecognized pos/LANG]]
* [[Special:WhatLinksHere/Wiktionary:Tracking/headword/unrecognized pos/pos/POS]]
* [[Special:WhatLinksHere/Wiktionary:Tracking/headword/unrecognized pos/pos/POS/LANG]]
]==]
data.lemmas = list_to_set{
	"daglat", --	"abbreviations",
	"akronim", --	"acronyms",
	"pang-uri", --	"adjectives",
	"adnominal", -- "adnominals",
	"adpositions",
	"pang-abay", --	"adverbs",
	"panlapi", --	"affixes",
	"ambipositions",
	"artikulo", --	"articles",  --Dahil hindi hinihiwalay ng Tagalog ang articles and determiners, determiners will have "pantukoy" while articles will have "artikulo"
	"kabilaang lapi", --	"circumfixes",
	"circumpositions",
	"classifiers",
	"cmavo",
	"cmavo clusters",
	"cmene",
	"combining forms",
	"pangatnig", --	"conjunctions",
	"counters",
	"pantukoy", --"determiners",
	"tuldik", -- "diacritical marks",
	"digrapo", --	"digraphs",
	"pang-uring magkatulad", --"equative adjectives", 
	"fu'ivla",
	"gismu",
	"titik-Tsino", -- "Han characters",
	"Han tu",
	"hanja",
	"hanzi",
	"ideophones",
	"sawikain", --	"idioms",
	"gitlapi", --	"infixes",
	"inisyals", --	"initialisms",
	"iteration marks",
	"interfixes",
	"pandamdam", --	"interjections",
	"kana",
	"kanji",
	"titik", --	"letters",
	"pang-angkop", --	"ligatures",
	"logogramo", --"logograms"
	"lujvo",
	"morae",
	"morpema", --	"morphemes",
	"non-constituents",
	"pangngalan", --	"nouns",
	"bilang", --	"numbers",
	"simbolong pambilang", --"numeral symbols",
	"pambilang", --"numerals",
	"kataga", --	"particles",
	"parirala", --	"phrases",
	"postpositions",
	"postpositional phrases",
	"predicatives",
	"unlapi", --	"prefixes",
	"malapang-ukol na parirala", --"prepositional phrases",
	"pang-ukol", --	"prepositions",
	"preverbs",
	"pang-abay na makahalip", --	"pronominal adverbs",
	"panghalip", --	"pronouns",
	"pangngalang pantangi", --	"proper nouns",
	"salawikain", --	"proverbs",
	"bantas", --	"punctuation marks",
	"pamanggit", --	"relatives",
	"ugat", --	"roots",
	"tangkay", -- "stems",
	"hulapi", --	"suffixes",
	"pantig", --	"syllables",
	"simbolo", --	"symbols",
	"pandiwa", --	"verbs",
}

--[==[ var:
Recognized non-lemma forms. If the part of speech in {{tl|head}} is set to one of these or its singular equivalent, the
category 'LANG non-lemma forms' will automatically be added. If the part of speech is not a singular or plural lemma or
non-lemma form and is not an abbreviation that expands to a recognized lemma or non-lemma form, the page will be added
to various tracking categories; see the documentation of `data.lemmas`.
]==]
data.nonlemmas = list_to_set{
	"anyong pandiwaring tahasan", --"active participle forms",
	"pandiwaring tahasan", --"active participles",
	"pandiwaring malapang-uri", --"adjectival participles",
    "anyong pang-uring kaukulan", --"adjective case forms",
	"anyong pang-uri", --"adjective forms",
	"anyong pang-uring pambabae", --"adjective feminine forms",
	"anyong pang-uring maramihan", --"adjective plural forms",
	"anyong pang-abay", --"adverb forms",
	"pandiwaring malapang-abay", --"adverbial participles",
	"agent participles",
	"anyong artikulo", --"article forms",
	"anyong kabilaang lapi", --"circumfix forms",
	"combined forms",
	"anyong pang-uring pahambing", --"comparative adjective forms",
	"pang-uring pahambing", --	"comparative adjectives",
	"anyong pang-abay na pahambing", -- "comparative adverb forms",
	"pang-abay na pahambing", --	"comparative adverbs",
	"anyong pangatnig", --"conjunction forms",
	"may-angkop", --"contractions",
	"converbs",
	"anyong pantukoy na pahambing", --"determiner comparative forms",
	"anyong pantukoy", --"determiner forms",
	"anyong pantukoy na pasukdol", --"determiner superlative forms",
	"pangngalang paliit", --	"diminutive nouns",
	"pang-uring palamang", --"elative adjectives",
	"anyong pang-uring magkatulad", -- "equative adjective forms",
	"pang-uring magkatulad", --"equative adjectives",
	"pandiwaring panghinaharap", --"future participles",
	"herundiyo", --"gerunds", 
	"anyong pawatas", --"infinitive forms",
	"pawatas", --	"infinitives",
	"anyong pandamdam", --"interjection forms",
	"jyutping",
	"maling baybay", --"misspellings",
	"pandiwaring negatibo", --"negative participles",
	"pandiwaring makangalan", --"nominal participles",
	"anyong pangngalang kaukulan", --"noun case forms",
	"noun construct forms",
	"anyong pangngalang dalwahan", --"noun dual forms",
	"anyong pangngalan", --"noun forms",
	"anyong pangngalang pangkaunti", --"noun paucal forms",
	"anyong pangngalang maramihan", -- "noun plural forms",
	"anyong pangngalang paari", --"noun possessive forms",
	"anyong pangngalang isahan", --"noun singulative forms",
	"anyong pambilang", --"numeral forms",
	"pandiwari", --	"participles",
	"anyong pandiwari", --"participle forms",
	"anyong kataga", --"particle forms",
	"pandiwaring balintiyak", --"passive participles",
	"pandiwaring pangnagdaang tahasan", -- "past active participles",
	"pandiwaring pangnagdaang malapang-abay", --"past adverbial participles",
	"pandiwaring pangnagdaan", -- "past participles",
	"anyong pandiwaring pangnagdaan", -- "past participle forms",
	"anyong pandiwaring pangnagdaang balintiyak", --"past passive participles",
	"pandiwaring panggitnang tahasan", --"perfect active participles",
	"pandiwaring panggitna", --"perfect participles",
	"pandiwaring panggitnang balintiyak", --"perfect passive participles",
	"pinyin",
	"maramihan", --	"plurals",
	"postposition forms",
	"anyong unlapi", --"prefix forms",
	"pang-ukol na tinipil", --"preposition contractions",
	"anyong pang-ukol", --"preposition forms",
	"panghalip na malapang-ukol", -- "prepositional pronouns",
	"pandiwaring pangkasalukuyang tahasan", --" present active participles"
	"pandiwaring pangkasalukuyang malapang-abay", --"present adverbial participles",
	"anyong pandiwaring pangkasalukuyan", --"present participles",
	"pandiwaring pangkasalukuyang balintiyak", --"present passive participles",
	"preverb forms",
	"anyong panghalip", --"pronoun forms",
	"anyong panghalip paari", --"pronoun possessive forms",
	"anyong pangngalang pantangi", --"proper noun forms",
	"anyong pangngalang pantanging maramihan", --"proper noun plural forms",
	"rafsi",
	"romanisasyon", --"romanizations",
	"anyong ugat", --"root forms",
	"isahan", --"singulatives",
	"anyong hulapi", --"suffix forms",
	"anyong pang-uring pasukdol", --"superlative adjective forms",
	"pang-uring pasukdol", --	"superlative adjectives",
	"anyong pang-abay na pasukdol", --"superlative adverb forms",
	"pang-abay na pasukdol", --	"superlative adverbs",
	"anyong pandiwa", --"verb forms",
	"pangngalang makadiwa", --	"verbal nouns",
}

--[==[ var:
List of languages that will not have links to separate parts of the headword.
]==]
data.no_multiword_links = list_to_set{
	"zh",
}

--[==[ var:
List of languages that will not have `LANG multiword terms` categories added. There are various reasons why languages
are in this list: (a) words are written without spaces between them; (b) syllables are written with spaces between them;
(c) variant reconstructions are notated with a tilde surrounded by spaces; (d) the language is a sign language, where
pagenames are multiword descriptions of the gesture(s) required to make an individual sign; (e) some other weirdnesses.
]==]
data.no_multiword_cat = list_to_set{
	-------- Languages without spaces between words (sometimes spaces between phrases) --------
	"blt", -- Tai Dam
	"ja", -- Japanese
	"khb", -- Lü
	"km", -- Khmer
	"lo", -- Lao
	"mnw", -- Mon
	"my", -- Burmese
	"nan", -- Min Nan (some words in Latin script; hyphens between syllables)
	"nan-hbl", -- Hokkien (some words in Latin script; hyphens between syllables)
	"nod", -- Northern Thai
	"ojp", -- Old Japanese
	"shn", -- Shan
	"sou", -- Southern Thai
	"tdd", -- Tai Nüa
	"th", -- Thai
	"tts", -- Isan
	"twh", -- Tai Dón
	"txg", -- Tangut
	"zh", -- Chinese (all varieties with Chinese characters)
	"zkt", -- Khitan

	-------- Languages with spaces between syllables --------
	"ahk", -- Akha
	"aou", -- A'ou
	"atb", -- Zaiwa
	"byk", -- Biao
	"cdy", -- Chadong
	--"duu", -- Drung; not sure
	--"hmx-pro", -- Proto-Hmong-Mien
	--"hnj", -- Green Hmong; not sure
	"huq", -- Tsat
	"ium", -- Iu Mien
	--"lis", -- Lisu; not sure
	"mtq", -- Muong
	--"mww", -- White Hmong; not sure
	"onb", -- Lingao
	--"sit-gkh", -- Gokhy; not sure
	--"swi", -- Sui; not sure
	"tbq-lol-pro", -- Proto-Loloish
	"tdh", -- Thulung
	"ukk", -- Muak Sa-aak
	"vi", -- Vietnamese
	"yig", -- Wusa Nasu
	"zng", -- Mang

	-------- Languages with ~ with surrounding spaces used to separate variants --------
	"mkh-ban-pro", -- Proto-Bahnaric
	"sit-pro", -- Proto-Sino-Tibetan; listed above

	-------- Other weirdnesses --------
	"mul", -- Translingual; gestures, Morse code, etc.
	"aot", -- Atong (India); bullet is a letter

	-------- All sign languages	--------
	"ads",
	"aed",
	"aen",
	"afg",
	"ase",
	"asf",
	"asp",
	"asq",
	"asw",
	"bfi",
	"bfk",
	"bog",
	"bqn",
	"bqy",
	"bvl",
	"bzs",
	"cds",
	"csc",
	"csd",
	"cse",
	"csf",
	"csg",
	"csl",
	"csn",
	"csq",
	"csr",
	"doq",
	"dse",
	"dsl",
	"ecs",
	"esl",
	"esn",
	"eso",
	"eth",
	"fcs",
	"fse",
	"fsl",
	"fss",
	"gds",
	"gse",
	"gsg",
	"gsm",
	"gss",
	"gus",
	"hab",
	"haf",
	"hds",
	"hks",
	"hos",
	"hps",
	"hsh",
	"hsl",
	"icl",
	"iks",
	"ils",
	"inl",
	"ins",
	"ise",
	"isg",
	"isr",
	"jcs",
	"jhs",
	"jls",
	"jos",
	"jsl",
	"jus",
	"kgi",
	"kvk",
	"lbs",
	"lls",
	"lsl",
	"lso",
	"lsp",
	"lst",
	"lsy",
	"lws",
	"mdl",
	"mfs",
	"mre",
	"msd",
	"msr",
	"mzc",
	"mzg",
	"mzy",
	"nbs",
	"ncs",
	"nsi",
	"nsl",
	"nsp",
	"nsr",
	"nzs",
	"okl",
	"pgz",
	"pks",
	"prl",
	"prz",
	"psc",
	"psd",
	"psg",
	"psl",
	"pso",
	"psp",
	"psr",
	"pys",
	"rms",
	"rsl",
	"rsm",
	"sdl",
	"sfb",
	"sfs",
	"sgg",
	"sgx",
	"slf",
	"sls",
	"sqk",
	"sqs",
	"ssp",
	"ssr",
	"svk",
	"swl",
	"syy",
	"tse",
	"tsm",
	"tsq",
	"tss",
	"tsy",
	"tza",
	"ugn",
	"ugy",
	"ukl",
	"uks",
	"vgt",
	"vsi",
	"vsl",
	"vsv",
	"xki",
	"xml",
	"xms",
	"ygs",
	"ysl",
	"zib",
	"zsl",
}

--[==[ var:
List of languages where a hyphen is not considered a word separator for the `LANG multiword terms` category. There are
numerous reasons why languages are in this list; by each language should be listed the reason for inclusion.
]==]
data.hyphen_not_multiword_sep = list_to_set{
	"akk", -- Akkadian; hyphens between syllables
	"akl", -- Aklanon; hyphens for mid-word glottal stops
	"ber-pro", -- Proto-Berber; morphemes separated by hyphens
	"ceb", -- Cebuano; hyphens for mid-word glottal stops
	"cnk", -- Khumi Chin; hyphens used in single words
	"cpi", -- Chinese Pidgin English; Chinese-derived words with hyphens between syllables
	"de", -- German; too many false positives
	"esx-esk-pro", -- hyphen used to separate morphemes
	"fi", -- Finnish; hyphen used to separate components in compound words if the final and initial vowels match, respectively
	"gd", -- Scottish Gaelic; too many false positives like [[a-chianaibh]], [[a-nìos]], [[an-dè]] and other adverbs in a- and an-
	"hil", -- Hiligaynon; hyphens for mid-word glottal stops
	"hnn", -- Hanunoo; too many false positives
	"ilo", -- Ilocano; hyphens for mid-word glottal stops
	"kne", -- Kankanaey; hyphens for mid-word glottal stops
	"lcp", -- Western Lawa; dash as syllable joiner
	"lwl", -- Eastern Lawa; dash as syllable joiner
	"mfa", -- Pattani Malay in Thai script; dash as syllable joiner
	"mkh-vie-pro", -- Proto-Vietic; morphemes separated by hyphens
	"msb", -- Masbatenyo; too many false positives
	"tl", -- Tagalog; too many false positives
	"war", -- Waray-Waray; too many false positives
	"yo", -- Yoruba; hyphens used to show lengthened nasal vowels
}

--[==[ var:
List of languages that will not have `LANG masculine nouns` and similar categories added. Generally, these languages are
lacking gender but use the gender field for other purposes. (This is a massive hack and should be changed.)
]==]
data.no_gender_cat = list_to_set{
	-- Languages without gender but which use the gender field for other purposes
	"ja",
	"th",
}

--[==[ var:
List of languages where [[Module:headword]] should not attempt to generate a transliteration even if the term is written
in a non-Latin script. FIXME: Notate reasons why each language is in this list.
]==]
data.notranslit = list_to_set{
	"ams",
	"az",
	"bbc",
	"bug",
	"cdo",
	"cia",
	"cjm",
	"cjy",
	"cmn",
	"cnp",
	"cpi",
	"cpx",
	"csp",
	"czh",
	"czo",
	"gan",
	"hak",
	"hnm",
	"hsn",
	"ja",
	"kzg",
	"lad",
	"ltc",
	"luh",
	"lzh",
	"mnp",
	"ms",
	"mul",
	"mvi",
	"nan",
	"nan-dat",
	"nan-hbl",
	"nan-hlh",
	"nan-lnx",
	"nan-tws",
	"nan-zhe",
	"nan-zsh",
	"och",
	"oj",
	"okn",
	"ryn",
	"rys",
	"ryu",
	"sh",
	"sjc",
	"tgt",
	"th",
	"tkn",
	"tly",
	"txg",
	"und",
	"vi",
	"wuu",
	"xug",
	"yoi",
	"yox",
	"yue",
	"za",
	"zh",
	"zhx-sic",
	"zhx-tai",
}

--[==[ var:
List of languages that will default to `sccat` being true, i.e. categories like `LANG POS in SCRIPT script` will
automatically be generated. This can be overridden using {{para|sccat|0}} in {{tl|head}} or setting `sccat` to
`false` in Lua.
]==]
data.default_sccat = list_to_set{
	"inc-apa",
	"inc-ash",
	"kfr",
	"ks",
	"mr",
	"mwr",
	"inc-oaw",
	"inc-ohi",
	"omr",
	"inc-opa",
	"phr",
	"pi",
	"pra",
	"sa",
	"skr",
	"sd",
}

--[==[ var:
List of script codes for which a script-tagged display title will be added.
]==]
data.toBeTagged = list_to_set{
	"Ahom",
	"Arab",
		"fa-Arab",
		"glk-Arab",
		"kk-Arab",
		"ks-Arab",
		"ku-Arab",
		"mzn-Arab",
		"ms-Arab",
		"ota-Arab",
		"pa-Arab",
		"ps-Arab",
		"sd-Arab",
		"tt-Arab",
		"ug-Arab",
		"ur-Arab",
	"Armi",
	"Armn",
	"Avst",
	"Bali",
	"Bamu",
	"Batk",
	"Beng",
		"as-Beng",
	"Bopo",
	"Brah",
	"Brai",
	"Bugi",
	"Buhd",
	"Cakm",
	"Cans",
	"Cari",
	"Cham",
	"Cher",
	"Copt",
	"Cprt",
	"Cyrl",
	"Cyrs",
	"Deva",
	"Dsrt",
	"Egyd",
	"Egyp",
	"Ethi",
	"Geok",
	"Geor",
	"Glag",
	"Goth",
	"Grek",
		"Polyt",
		"polytonic",
	"Gujr",
	"Guru",
	"Hang",
	"Hani",
	"Hano",
	"Hebr",
	"Hira",
	"Hluw",
	"Ital",
	"Java",
	"Kali",
	"Kana",
	"Khar",
	"Khmr",
	"Knda",
	"Kthi",
	"Lana",
	"Laoo",
	"Latn",
		"Latf",
		"Latg",
		"Latnx",
		"Latinx",
		"pjt-Latn",
	"Lepc",
	"Limb",
	"Linb",
	"Lisu",
	"Lyci",
	"Lydi",
	"Mand",
	"Mani",
	"Marc",
	"Merc",
	"Mero",
	"Mlym",
	"Mong",
		"mnc-Mong",
		"sjo-Mong",
		"xwo-Mong",
	"Mtei",
	"Mymr",
	"Narb",
	"Nkoo",
	"Nshu",
	"Ogam",
	"Olck",
	"Orkh",
	"Orya",
	"Osma",
	"Ougr",
	"Palm",
	"Phag",
	"Phli",
	"Phlv",
	"Phnx",
	"Plrd",
	"Prti",
	"Rjng",
	"Runr",
	"Samr",
	"Sarb",
	"Saur",
	"Sgnw",
	"Shaw",
	"Shrd",
	"Sinh",
	"Sora",
	"Sund",
	"Sylo",
	"Syrc",
	"Tagb",
	"Tale",
	"Talu",
	"Taml",
	"Tang",
	"Tavt",
	"Telu",
	"Tfng",
	"Tglg",
	"Thaa",
	"Thai",
	"Tibt",
	"Ugar",
	"Vaii",
	"Xpeo",
	"Xsux",
	"Yiii",
	"Zmth",
	"Zsym",

	"Ipach",
	"Music",
	"Rumin",
}

--[==[ var:
Parts of speech which will not be categorised in categories like `English terms spelled with É` if the term is the
character in question (e.g. the letter entry for English [[é]]). This contrasts with entries like the French adjective
[[m̂]], which is a one-letter word spelled with the letter.
]==]
data.pos_not_spelled_with_self = list_to_set{
	"tuldik", --	"diacritical marks",
	"titik-Tsino", -- "Han characters",
	"Han tu",
	"hanja",
	"hanzi",
	"iteration marks",
	"kana",
	"kanji",
	"titik", --	"letters",
	"pang-angkop", --	"ligatures",
	"logogramo", --"logograms"
	"morae",
	"simbolong pambilang", --"numeral symbols",
	"pambilang", --"numerals",
	"bantas", --"punctuation marks",
	"pantig", --	"syllables",
	"simbolo", --	"symbols",
}

------ 2. Lists not converted into sets. ------

--[==[ var:
Recognized aliases for parts of speech (param 2=). Key is the short form and value is the canonical singular (not
pluralized) form. It is singular so the same table can be used in [[Module:form of]] for the {{para|p}}/{{para|POS}}
param and [[Module:links]] for the pos= param. Note that any part of speech, abbreviated or not, can be suffixed with
`f` to generate the corresponding non-lemma form part of speech, such as `adjf`, `af` or `adjectivef` for
`adjective form`, and `nounf` or `nf` for `noun form`. This expansion happens even when it does not make sense for the
given part of speech (e.g. `pclf` expands to `particle form` and `symf` expands to `symbol form`), and currently also,
at least in [[Module:headword]] (but not [[Module:links]]), even if the part before the `f` is not a recognized part of
speech or abbreviation (hence `nerf` expands to `ner form`).
]==]
data.pos_aliases = {
	a = "pang-uri",
	adj = "pang-uri",
	pnr = "pang-uri",  --diksiyonaryo.ph abbr
	adv = "pang-abay",
	pnb = "pang-abay",  --diksiyonaryo.ph abbr
	art = "artikulo",
	det = "pantukoy",
	pnt = "pantukoy",  --diksiyonaryo.ph abbr
	compadj = "pang-uring pahambing",
	compadv = "pang-abay na pahambing",
	conj = "pangatnig",
	conv = "converb",
	int = "pandamdam",
	interj = "pandamdam",
	intj = "pandamdam",
	pdd = "pandamdam",  --diksiyonaryo.ph abbr
	n = "pangngalan",
	png = "pangngalan",  --diksiyonaryo.ph abbr
	-- the next two support Algonquian languages; see also vii/vai/vti/vta below
	na = "animate noun",
	ni = "inanimate noun",
	num = "pambilang",
	part = "pandiwari",
	pcl = "kataga",
	phr = "parirala",
	pn = "pangngalang pantangi",
	postp = "postposition",
	pref = "unlapi",
	prep = "pang-ukol",
	prepphr = "malapang-ukol na parirala",
	prespart = "pandiwari na pangnagdaan",
	pron = "panghalip",
	pnh = "panghalip",  --diksiyonaryo.ph abbr
	prop = "pangngalang pantangi",
	proper = "pangngalang pantangi",
	propn = "pangngalang pantangi",
	rom = "romanisasyon",
	romanisation = "romanisasyon",
	romanisations = "romanisasyon",
	suf = "hulapi",
	supadj = "pang-uring pasukdol",
	supadv = "pang-abay na pasukdol",
	sym = "simbolo",
	v = "pandiwa",
	vb = "pandiwa",
	pnd = "pandiwa",  --diksiyonaryo.ph abbr
	vi = "pandiwang katawanin",
	vt = "pandiwang palipat",
	-- the next four support Algonquian languages
	vii = "inanimate intransitive verb",
	vai = "animate intransitive verb",
	vti = "transitive inanimate verb",
	vta = "transitive animate verb",
	pnl = "panlapi",  --diksiyonaryo.ph abbr
	
	-- redirect EN to TL
	abbreviation = "daglat",
	acronym = "akronim",
	adjective = "pang-uri",
	adverb = "pang-abay",
	article = "artikulo",
	circumfix = "kabilaang lapi",
	["comparative adjective"] = "pang-uring pahambing",
	["comparative adverb"] = "pang-abay na pahambing",
	conjunction = "pangatnig",
	determiner = "pantukoy",
	["diacritical mark"] = "tuldik",
	digraph = "digrapo",
	["equative adjective"] = "pang-uring magkatulad",
	["Han character"] = "titik-Tsino",
	["idiom"] = "sawikain",
	infix = "gitlapi",
	initialism = "inisyals",
	interjection = "pandamdam",
	letter = "titik",
	logograms = "logogramo",
	morpheme = "morpema",
	noun = "pangngalan",
	number = "bilang",
	numeral = "pambilang",
	["numeral symbols"] = "simbolong pambilang", 
	participle = "pandiwari",
	particle = "kataga",
	phrase = "parirala",
	["proper noun"] = "pangngalang pantangi",
	prefix = "unlapi",
	preposition = "pang-ukol",
	["prepositional phrase"] = "malapang-ukol na parirala",
	["pronominal adverbs"] = "pang-abay na makahalip",
	pronoun = "panghalip",
	proverb = "salawikain",
	["punctuation marks"] = "bantas",
	romanization = "romanisasyon",
	relatives = "pamanggit",
	root = "ugat",
	stem = "tangkay",
	suffix = "hulapi",
	["superlative adjective"] = "pang-uring pasukdol",
	["superlative adverb"] = "pang-abay na pasukdol",
	syllable = "pantig",
	symbol = "simbolo",
	verb = "pandiwa",
	["intransitive verb"] = "pandiwang katawanin",
	["transitive verb"] = "pandiwang palipat",
	["transitive and intransitive verb"] = "pandiwang palipat at katawanin",
	
	--nonlemmas
	["active participle forms"] = "anyong pandiwaring tahasan",
	["active participles"] = "pandiwaring tahasan",
	["adjectival participles"] = "pandiwaring malapang-uri",
    ["adjective case form"] = "anyong pang-uring kaukulan",
	["adjective form"] = "anyong pang-uri",
	["adjective feminine form"] = "anyong pang-uring pambabae",
	["adjective plural form"] = "anyong pang-uring maramihan",
	["adverb form"] = "anyong pang-abay",
	["adverbial participles"] = "pandiwaring malapang-abay",
	--"agent participles",
	["article form"] = "anyong pantukoy",
	["circumfix form"] = "anyong kabilaang lapi",
	--"combined form",
	["comparative adjective form"] = "anyong pang-uring pahambing",
	["comparative adjectives"] = "pang-uring pahambing",
	["comparative adverb form"] = "anyong pang-abay na pahambing",
	["comparative adverbs"] = "pang-abay na pahambing",
	["conjunction form"] = "anyong pangatnig",
	["contractions"] = "may-angkop",
	--"converbs",
	["determiner comparative form"] = "anyong pantukoy na pahambing",
	["determiner form"] = "anyong pantukoy",
	["determiner superlative form"] = "anyong pantukoy na pasukdol",
	["diminutive nouns"] = "pangngalang paliit",
	["elative adjectives"] = "pang-uring palamang",
	["equative adjective form"] = "anyong pang-uring magkatulad",
	["equative adjectives"] = "pang-uring magkatulad",
	["future participles"] = "pandiwaring panghinaharap",
	["gerunds"] = "herundiyo", 
	["infinitive form"] = "anyong pawatas",
	["infinitives"] = "pawatas",
	["interjection form"] = "anyong pandamdam",
	--"jyutping",
	["misspellings"] = "maling baybay",
	["negative participles"] = "pandiwaring negatibo",
	["nominal participles"] = "pandiwaring makangalan",
	["noun case form"] = "anyong pangngalang kaukulan",
	--"noun construct form",
	["noun dual form"] = "anyong pangngalang dalwahan",
	["noun form"] = "anyong pangngalan",
	["noun paucal form"] = "anyong pangngalang pangkaunti",
	["noun plural form"] = "anyong pangngalang maramihan",
	["noun possessive form"] = "anyong pangngalang paari",
	["noun singulative form"] = "anyong pangngalang isahan",
	["numeral form"] = "anyong pambilang",
	["participles"] = "pandiwari",
	["participle form"] = "anyong pandiwari",
	["particle form"] = "anyong kataga",
	["passive participles"] = "pandiwaring balintiyak",
	["past active participles"] = "pandiwaring pangnagdaang tahasan",
	["past adverbial participles"] = "pandiwaring pangnagdaang malapang-abay",
	["past participles"] = "pandiwaring pangnagdaan",
	["past participle form"] = "anyong pandiwaring pangnagdaan",
	["past passive participles"] = "anyong pandiwaring pangnagdaang balintiyak",
	["perfect active participles"] = "pandiwaring panggitnang tahasan",
	["perfect participles"] = "pandiwaring panggitna",
	["perfect passive participles"] = "pandiwaring panggitnang balintiyak",
	--"pinyin",
	["plurals"] = "maramihan",
	--"postposition form",
	["prefix form"] = "anyong unlapi",
	["preposition contractions"] = "pang-ukol na tinipil",
	["preposition form"] = "anyong pang-ukol",
	["prepositional pronouns"] = "panghalip na malapang-ukol",
	["present active participles"] = "pandiwaring pangkasalukuyang tahasan",
	["present adverbial participles"] = "pandiwaring pangkasalukuyang malapang-abay",
	["present participles"] = "anyong pandiwaring pangkasalukuyan",
	["present passive participles"] = "pandiwaring pangkasalukuyang balintiyak",
	--"preverb form",
	["pronoun form"] = "anyong panghalip",
	["pronoun possessive form"] = "anyong panghalip paari",
	["proper noun form"] = "anyong pangngalang pantangi",
	["proper noun plural form"] = "anyong pangngalang pantanging maramihan",
	--"rafsi",
	["romanizations"] = "romanisasyon",
	["root form"] = "anyong ugat",
	["singulatives"] = "isahan",
	["suffix form"] = "anyong hulapi",
	["superlative adjective form"] = "anyong pang-uring pasukdol",
	["superlative adjectives"] = "pang-uring pasukdol",
	["superlative adverb form"] = "anyong pang-abay na pasukdol",
	["superlative adverbs"] = "pang-abay na pasukdol",
	["verb form"] = "anyong pandiwa",
	["verbal nouns"] = "pangngalang makadiwa",
	
}

--[==[ var:
Map of parts of speech for which categories like `German masculine nouns` or `Russian imperfective verbs` will be
generated if the headword is of the appropriate gender/number. The map is used to canonicalize parts of speech for
categorization purposes; specifically, proper nouns categorizes like nouns.
]==]
data.pos_for_gender_number_cat = {
	["pangngalan"] = "pangngalan",
	["pangngalang pantangi"] = "pangngalan",
	["hulapi"] = "hulapi",
	-- We include verbs because impf and pf are valid "genders".
	["pandiwa"] = "pandiwa",
}

--[==[ var:
Lower limit for a "long" word in a particular language. Used to categorize terms into e.g.
[[:Category:Long English words]] automatically. Languages with no mapping here do not get categorized.
]==]
data.long_word_thresholds = {
    ["af"] = 20,
    ["bg"] = 20,
    ["cy"] = 25,
    ["de"] = 20,
    ["en"] = 25,
    ["es"] = 20,
    ["fr"] = 20,
    ["ka"] = 20,
    ["sv"] = 20,
    ["tl"] = 25,
}

------ 3. Page-wide processing (so that it only needs to be done once per page). ------
data.page = require(headword_page_module).process_page()
-- Set some page properties directly on `data` for ease of use.
data.pagename = data.page.pagename
data.encoded_pagename = data.page.encoded_pagename

return data