Module:headword/data
Itsura
- The following documentation is located at Module:headword/data/documentation. [edit] Categories were auto-generated by Module:documentation. [edit]
- Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox
This module contains data used by Module:headword and Module:headword/templates.
The function at the bottom of the module transforms the tables, so that they contain [item1] = true, [item2] = true, ....
local headword_page_module = "Module:headword/page"
local list_to_set = require("Module:table").listToSet
local data = {}
------ 1. Lists which are converted into sets. ------
--[==[ var:
Large pages where we disable label tracking, red link checking and similar.
]==]
data.large_pages = list_to_set {
-- pages that consistently hit timeouts
"a",
-- pages that sometimes hit timeouts
"A",
"baba",
"de",
"e",
"i",
"lima",
"o",
"u",
"и",
"山",
"子",
"月",
"一",
"人",
}
--[==[ var:
Map from singular to plural, and from plural to itself, for recognized parts of speech with irregular plurals. Most of
these are invariable plurals, e.g. `kanji` is its own plural; but we also have `mora` plural `morae`.
]==]
data.irregular_plurals = list_to_set({
"cmavo",
"cmene",
"fu'ivla",
"gismu",
"Han tu",
"hanja",
"hanzi",
"jyutping",
"kana",
"kanji",
"lujvo",
"phrasebook",
"pinyin",
"rafsi",
}, function(_, item)
return item
end)
local irregular_plurals = data.irregular_plurals
-- Irregular non-zero plurals AND any regular plurals where the singular ends in "s",
-- because the module assumes that inputs ending in "s" are plurals. The singular and
-- plural both need to be added, as the module will generate a default plural if
-- the input doesn't match a key in this table.
for sg, pl in next, {
mora = "morae"
} do
irregular_plurals[sg], irregular_plurals[pl] = pl, pl
end
--[==[ var:
Recognized lemmas. If the part of speech in {{tl|head}} is set to one of these or its singular equivalent, the category
'LANG lemmas' will automatically be added. If the part of speech is not a singular or plural lemma or non-lemma form and
is not an abbreviation that expands to a recognized lemma or non-lemma form, the page will be added to various tracking
categories:
* [[Special:WhatLinksHere/Wiktionary:Tracking/headword/unrecognized pos]]
* [[Special:WhatLinksHere/Wiktionary:Tracking/headword/unrecognized pos/LANG]]
* [[Special:WhatLinksHere/Wiktionary:Tracking/headword/unrecognized pos/pos/POS]]
* [[Special:WhatLinksHere/Wiktionary:Tracking/headword/unrecognized pos/pos/POS/LANG]]
]==]
data.lemmas = list_to_set{
"daglat", -- "abbreviations",
"akronim", -- "acronyms",
"pang-uri", -- "adjectives",
"adnominal", -- "adnominals",
"adpositions",
"pang-abay", -- "adverbs",
"panlapi", -- "affixes",
"ambipositions",
"artikulo", -- "articles", --Dahil hindi hinihiwalay ng Tagalog ang articles and determiners, determiners will have "pantukoy" while articles will have "artikulo"
"kabilaang lapi", -- "circumfixes",
"circumpositions",
"classifiers",
"cmavo",
"cmavo clusters",
"cmene",
"combining forms",
"pangatnig", -- "conjunctions",
"counters",
"pantukoy", --"determiners",
"tuldik", -- "diacritical marks",
"digrapo", -- "digraphs",
"pang-uring magkatulad", --"equative adjectives",
"fu'ivla",
"gismu",
"titik-Tsino", -- "Han characters",
"Han tu",
"hanja",
"hanzi",
"ideophones",
"sawikain", -- "idioms",
"gitlapi", -- "infixes",
"inisyals", -- "initialisms",
"iteration marks",
"interfixes",
"pandamdam", -- "interjections",
"kana",
"kanji",
"titik", -- "letters",
"pang-angkop", -- "ligatures",
"logogramo", --"logograms"
"lujvo",
"morae",
"morpema", -- "morphemes",
"non-constituents",
"pangngalan", -- "nouns",
"bilang", -- "numbers",
"simbolong pambilang", --"numeral symbols",
"pambilang", --"numerals",
"kataga", -- "particles",
"parirala", -- "phrases",
"postpositions",
"postpositional phrases",
"predicatives",
"unlapi", -- "prefixes",
"malapang-ukol na parirala", --"prepositional phrases",
"pang-ukol", -- "prepositions",
"preverbs",
"pang-abay na makahalip", -- "pronominal adverbs",
"panghalip", -- "pronouns",
"pangngalang pantangi", -- "proper nouns",
"salawikain", -- "proverbs",
"bantas", -- "punctuation marks",
"pamanggit", -- "relatives",
"ugat", -- "roots",
"tangkay", -- "stems",
"hulapi", -- "suffixes",
"pantig", -- "syllables",
"simbolo", -- "symbols",
"pandiwa", -- "verbs",
}
--[==[ var:
Recognized non-lemma forms. If the part of speech in {{tl|head}} is set to one of these or its singular equivalent, the
category 'LANG non-lemma forms' will automatically be added. If the part of speech is not a singular or plural lemma or
non-lemma form and is not an abbreviation that expands to a recognized lemma or non-lemma form, the page will be added
to various tracking categories; see the documentation of `data.lemmas`.
]==]
data.nonlemmas = list_to_set{
"anyong pandiwaring tahasan", --"active participle forms",
"pandiwaring tahasan", --"active participles",
"pandiwaring malapang-uri", --"adjectival participles",
"anyong pang-uring kaukulan", --"adjective case forms",
"anyong pang-uri", --"adjective forms",
"anyong pang-uring pambabae", --"adjective feminine forms",
"anyong pang-uring maramihan", --"adjective plural forms",
"anyong pang-abay", --"adverb forms",
"pandiwaring malapang-abay", --"adverbial participles",
"agent participles",
"anyong artikulo", --"article forms",
"anyong kabilaang lapi", --"circumfix forms",
"combined forms",
"anyong pang-uring pahambing", --"comparative adjective forms",
"pang-uring pahambing", -- "comparative adjectives",
"anyong pang-abay na pahambing", -- "comparative adverb forms",
"pang-abay na pahambing", -- "comparative adverbs",
"anyong pangatnig", --"conjunction forms",
"may-angkop", --"contractions",
"converbs",
"anyong pantukoy na pahambing", --"determiner comparative forms",
"anyong pantukoy", --"determiner forms",
"anyong pantukoy na pasukdol", --"determiner superlative forms",
"pangngalang paliit", -- "diminutive nouns",
"pang-uring palamang", --"elative adjectives",
"anyong pang-uring magkatulad", -- "equative adjective forms",
"pang-uring magkatulad", --"equative adjectives",
"pandiwaring panghinaharap", --"future participles",
"herundiyo", --"gerunds",
"anyong pawatas", --"infinitive forms",
"pawatas", -- "infinitives",
"anyong pandamdam", --"interjection forms",
"jyutping",
"maling baybay", --"misspellings",
"pandiwaring negatibo", --"negative participles",
"pandiwaring makangalan", --"nominal participles",
"anyong pangngalang kaukulan", --"noun case forms",
"noun construct forms",
"anyong pangngalang dalwahan", --"noun dual forms",
"anyong pangngalan", --"noun forms",
"anyong pangngalang pangkaunti", --"noun paucal forms",
"anyong pangngalang maramihan", -- "noun plural forms",
"anyong pangngalang paari", --"noun possessive forms",
"anyong pangngalang isahan", --"noun singulative forms",
"anyong pambilang", --"numeral forms",
"pandiwari", -- "participles",
"anyong pandiwari", --"participle forms",
"anyong kataga", --"particle forms",
"pandiwaring balintiyak", --"passive participles",
"pandiwaring pangnagdaang tahasan", -- "past active participles",
"pandiwaring pangnagdaang malapang-abay", --"past adverbial participles",
"pandiwaring pangnagdaan", -- "past participles",
"anyong pandiwaring pangnagdaan", -- "past participle forms",
"anyong pandiwaring pangnagdaang balintiyak", --"past passive participles",
"pandiwaring panggitnang tahasan", --"perfect active participles",
"pandiwaring panggitna", --"perfect participles",
"pandiwaring panggitnang balintiyak", --"perfect passive participles",
"pinyin",
"maramihan", -- "plurals",
"postposition forms",
"anyong unlapi", --"prefix forms",
"pang-ukol na tinipil", --"preposition contractions",
"anyong pang-ukol", --"preposition forms",
"panghalip na malapang-ukol", -- "prepositional pronouns",
"pandiwaring pangkasalukuyang tahasan", --" present active participles"
"pandiwaring pangkasalukuyang malapang-abay", --"present adverbial participles",
"anyong pandiwaring pangkasalukuyan", --"present participles",
"pandiwaring pangkasalukuyang balintiyak", --"present passive participles",
"preverb forms",
"anyong panghalip", --"pronoun forms",
"anyong panghalip paari", --"pronoun possessive forms",
"anyong pangngalang pantangi", --"proper noun forms",
"anyong pangngalang pantanging maramihan", --"proper noun plural forms",
"rafsi",
"romanisasyon", --"romanizations",
"anyong ugat", --"root forms",
"isahan", --"singulatives",
"anyong hulapi", --"suffix forms",
"anyong pang-uring pasukdol", --"superlative adjective forms",
"pang-uring pasukdol", -- "superlative adjectives",
"anyong pang-abay na pasukdol", --"superlative adverb forms",
"pang-abay na pasukdol", -- "superlative adverbs",
"anyong pandiwa", --"verb forms",
"pangngalang makadiwa", -- "verbal nouns",
}
--[==[ var:
List of languages that will not have links to separate parts of the headword.
]==]
data.no_multiword_links = list_to_set{
"zh",
}
--[==[ var:
List of languages that will not have `LANG multiword terms` categories added. There are various reasons why languages
are in this list: (a) words are written without spaces between them; (b) syllables are written with spaces between them;
(c) variant reconstructions are notated with a tilde surrounded by spaces; (d) the language is a sign language, where
pagenames are multiword descriptions of the gesture(s) required to make an individual sign; (e) some other weirdnesses.
]==]
data.no_multiword_cat = list_to_set{
-------- Languages without spaces between words (sometimes spaces between phrases) --------
"blt", -- Tai Dam
"ja", -- Japanese
"khb", -- Lü
"km", -- Khmer
"lo", -- Lao
"mnw", -- Mon
"my", -- Burmese
"nan", -- Min Nan (some words in Latin script; hyphens between syllables)
"nan-hbl", -- Hokkien (some words in Latin script; hyphens between syllables)
"nod", -- Northern Thai
"ojp", -- Old Japanese
"shn", -- Shan
"sou", -- Southern Thai
"tdd", -- Tai Nüa
"th", -- Thai
"tts", -- Isan
"twh", -- Tai Dón
"txg", -- Tangut
"zh", -- Chinese (all varieties with Chinese characters)
"zkt", -- Khitan
-------- Languages with spaces between syllables --------
"ahk", -- Akha
"aou", -- A'ou
"atb", -- Zaiwa
"byk", -- Biao
"cdy", -- Chadong
--"duu", -- Drung; not sure
--"hmx-pro", -- Proto-Hmong-Mien
--"hnj", -- Green Hmong; not sure
"huq", -- Tsat
"ium", -- Iu Mien
--"lis", -- Lisu; not sure
"mtq", -- Muong
--"mww", -- White Hmong; not sure
"onb", -- Lingao
--"sit-gkh", -- Gokhy; not sure
--"swi", -- Sui; not sure
"tbq-lol-pro", -- Proto-Loloish
"tdh", -- Thulung
"ukk", -- Muak Sa-aak
"vi", -- Vietnamese
"yig", -- Wusa Nasu
"zng", -- Mang
-------- Languages with ~ with surrounding spaces used to separate variants --------
"mkh-ban-pro", -- Proto-Bahnaric
"sit-pro", -- Proto-Sino-Tibetan; listed above
-------- Other weirdnesses --------
"mul", -- Translingual; gestures, Morse code, etc.
"aot", -- Atong (India); bullet is a letter
-------- All sign languages --------
"ads",
"aed",
"aen",
"afg",
"ase",
"asf",
"asp",
"asq",
"asw",
"bfi",
"bfk",
"bog",
"bqn",
"bqy",
"bvl",
"bzs",
"cds",
"csc",
"csd",
"cse",
"csf",
"csg",
"csl",
"csn",
"csq",
"csr",
"doq",
"dse",
"dsl",
"ecs",
"esl",
"esn",
"eso",
"eth",
"fcs",
"fse",
"fsl",
"fss",
"gds",
"gse",
"gsg",
"gsm",
"gss",
"gus",
"hab",
"haf",
"hds",
"hks",
"hos",
"hps",
"hsh",
"hsl",
"icl",
"iks",
"ils",
"inl",
"ins",
"ise",
"isg",
"isr",
"jcs",
"jhs",
"jls",
"jos",
"jsl",
"jus",
"kgi",
"kvk",
"lbs",
"lls",
"lsl",
"lso",
"lsp",
"lst",
"lsy",
"lws",
"mdl",
"mfs",
"mre",
"msd",
"msr",
"mzc",
"mzg",
"mzy",
"nbs",
"ncs",
"nsi",
"nsl",
"nsp",
"nsr",
"nzs",
"okl",
"pgz",
"pks",
"prl",
"prz",
"psc",
"psd",
"psg",
"psl",
"pso",
"psp",
"psr",
"pys",
"rms",
"rsl",
"rsm",
"sdl",
"sfb",
"sfs",
"sgg",
"sgx",
"slf",
"sls",
"sqk",
"sqs",
"ssp",
"ssr",
"svk",
"swl",
"syy",
"tse",
"tsm",
"tsq",
"tss",
"tsy",
"tza",
"ugn",
"ugy",
"ukl",
"uks",
"vgt",
"vsi",
"vsl",
"vsv",
"xki",
"xml",
"xms",
"ygs",
"ysl",
"zib",
"zsl",
}
--[==[ var:
List of languages where a hyphen is not considered a word separator for the `LANG multiword terms` category. There are
numerous reasons why languages are in this list; by each language should be listed the reason for inclusion.
]==]
data.hyphen_not_multiword_sep = list_to_set{
"akk", -- Akkadian; hyphens between syllables
"akl", -- Aklanon; hyphens for mid-word glottal stops
"ber-pro", -- Proto-Berber; morphemes separated by hyphens
"ceb", -- Cebuano; hyphens for mid-word glottal stops
"cnk", -- Khumi Chin; hyphens used in single words
"cpi", -- Chinese Pidgin English; Chinese-derived words with hyphens between syllables
"de", -- German; too many false positives
"esx-esk-pro", -- hyphen used to separate morphemes
"fi", -- Finnish; hyphen used to separate components in compound words if the final and initial vowels match, respectively
"gd", -- Scottish Gaelic; too many false positives like [[a-chianaibh]], [[a-nìos]], [[an-dè]] and other adverbs in a- and an-
"hil", -- Hiligaynon; hyphens for mid-word glottal stops
"hnn", -- Hanunoo; too many false positives
"ilo", -- Ilocano; hyphens for mid-word glottal stops
"kne", -- Kankanaey; hyphens for mid-word glottal stops
"lcp", -- Western Lawa; dash as syllable joiner
"lwl", -- Eastern Lawa; dash as syllable joiner
"mfa", -- Pattani Malay in Thai script; dash as syllable joiner
"mkh-vie-pro", -- Proto-Vietic; morphemes separated by hyphens
"msb", -- Masbatenyo; too many false positives
"tl", -- Tagalog; too many false positives
"war", -- Waray-Waray; too many false positives
"yo", -- Yoruba; hyphens used to show lengthened nasal vowels
}
--[==[ var:
List of languages that will not have `LANG masculine nouns` and similar categories added. Generally, these languages are
lacking gender but use the gender field for other purposes. (This is a massive hack and should be changed.)
]==]
data.no_gender_cat = list_to_set{
-- Languages without gender but which use the gender field for other purposes
"ja",
"th",
}
--[==[ var:
List of languages where [[Module:headword]] should not attempt to generate a transliteration even if the term is written
in a non-Latin script. FIXME: Notate reasons why each language is in this list.
]==]
data.notranslit = list_to_set{
"ams",
"az",
"bbc",
"bug",
"cdo",
"cia",
"cjm",
"cjy",
"cmn",
"cnp",
"cpi",
"cpx",
"csp",
"czh",
"czo",
"gan",
"hak",
"hnm",
"hsn",
"ja",
"kzg",
"lad",
"ltc",
"luh",
"lzh",
"mnp",
"ms",
"mul",
"mvi",
"nan",
"nan-dat",
"nan-hbl",
"nan-hlh",
"nan-lnx",
"nan-tws",
"nan-zhe",
"nan-zsh",
"och",
"oj",
"okn",
"ryn",
"rys",
"ryu",
"sh",
"sjc",
"tgt",
"th",
"tkn",
"tly",
"txg",
"und",
"vi",
"wuu",
"xug",
"yoi",
"yox",
"yue",
"za",
"zh",
"zhx-sic",
"zhx-tai",
}
--[==[ var:
List of languages that will default to `sccat` being true, i.e. categories like `LANG POS in SCRIPT script` will
automatically be generated. This can be overridden using {{para|sccat|0}} in {{tl|head}} or setting `sccat` to
`false` in Lua.
]==]
data.default_sccat = list_to_set{
"inc-apa",
"inc-ash",
"kfr",
"ks",
"mr",
"mwr",
"inc-oaw",
"inc-ohi",
"omr",
"inc-opa",
"phr",
"pi",
"pra",
"sa",
"skr",
"sd",
}
--[==[ var:
List of script codes for which a script-tagged display title will be added.
]==]
data.toBeTagged = list_to_set{
"Ahom",
"Arab",
"fa-Arab",
"glk-Arab",
"kk-Arab",
"ks-Arab",
"ku-Arab",
"mzn-Arab",
"ms-Arab",
"ota-Arab",
"pa-Arab",
"ps-Arab",
"sd-Arab",
"tt-Arab",
"ug-Arab",
"ur-Arab",
"Armi",
"Armn",
"Avst",
"Bali",
"Bamu",
"Batk",
"Beng",
"as-Beng",
"Bopo",
"Brah",
"Brai",
"Bugi",
"Buhd",
"Cakm",
"Cans",
"Cari",
"Cham",
"Cher",
"Copt",
"Cprt",
"Cyrl",
"Cyrs",
"Deva",
"Dsrt",
"Egyd",
"Egyp",
"Ethi",
"Geok",
"Geor",
"Glag",
"Goth",
"Grek",
"Polyt",
"polytonic",
"Gujr",
"Guru",
"Hang",
"Hani",
"Hano",
"Hebr",
"Hira",
"Hluw",
"Ital",
"Java",
"Kali",
"Kana",
"Khar",
"Khmr",
"Knda",
"Kthi",
"Lana",
"Laoo",
"Latn",
"Latf",
"Latg",
"Latnx",
"Latinx",
"pjt-Latn",
"Lepc",
"Limb",
"Linb",
"Lisu",
"Lyci",
"Lydi",
"Mand",
"Mani",
"Marc",
"Merc",
"Mero",
"Mlym",
"Mong",
"mnc-Mong",
"sjo-Mong",
"xwo-Mong",
"Mtei",
"Mymr",
"Narb",
"Nkoo",
"Nshu",
"Ogam",
"Olck",
"Orkh",
"Orya",
"Osma",
"Ougr",
"Palm",
"Phag",
"Phli",
"Phlv",
"Phnx",
"Plrd",
"Prti",
"Rjng",
"Runr",
"Samr",
"Sarb",
"Saur",
"Sgnw",
"Shaw",
"Shrd",
"Sinh",
"Sora",
"Sund",
"Sylo",
"Syrc",
"Tagb",
"Tale",
"Talu",
"Taml",
"Tang",
"Tavt",
"Telu",
"Tfng",
"Tglg",
"Thaa",
"Thai",
"Tibt",
"Ugar",
"Vaii",
"Xpeo",
"Xsux",
"Yiii",
"Zmth",
"Zsym",
"Ipach",
"Music",
"Rumin",
}
--[==[ var:
Parts of speech which will not be categorised in categories like `English terms spelled with É` if the term is the
character in question (e.g. the letter entry for English [[é]]). This contrasts with entries like the French adjective
[[m̂]], which is a one-letter word spelled with the letter.
]==]
data.pos_not_spelled_with_self = list_to_set{
"tuldik", -- "diacritical marks",
"titik-Tsino", -- "Han characters",
"Han tu",
"hanja",
"hanzi",
"iteration marks",
"kana",
"kanji",
"titik", -- "letters",
"pang-angkop", -- "ligatures",
"logogramo", --"logograms"
"morae",
"simbolong pambilang", --"numeral symbols",
"pambilang", --"numerals",
"bantas", --"punctuation marks",
"pantig", -- "syllables",
"simbolo", -- "symbols",
}
------ 2. Lists not converted into sets. ------
--[==[ var:
Recognized aliases for parts of speech (param 2=). Key is the short form and value is the canonical singular (not
pluralized) form. It is singular so the same table can be used in [[Module:form of]] for the {{para|p}}/{{para|POS}}
param and [[Module:links]] for the pos= param. Note that any part of speech, abbreviated or not, can be suffixed with
`f` to generate the corresponding non-lemma form part of speech, such as `adjf`, `af` or `adjectivef` for
`adjective form`, and `nounf` or `nf` for `noun form`. This expansion happens even when it does not make sense for the
given part of speech (e.g. `pclf` expands to `particle form` and `symf` expands to `symbol form`), and currently also,
at least in [[Module:headword]] (but not [[Module:links]]), even if the part before the `f` is not a recognized part of
speech or abbreviation (hence `nerf` expands to `ner form`).
]==]
data.pos_aliases = {
a = "pang-uri",
adj = "pang-uri",
pnr = "pang-uri", --diksiyonaryo.ph abbr
adv = "pang-abay",
pnb = "pang-abay", --diksiyonaryo.ph abbr
art = "artikulo",
det = "pantukoy",
pnt = "pantukoy", --diksiyonaryo.ph abbr
compadj = "pang-uring pahambing",
compadv = "pang-abay na pahambing",
conj = "pangatnig",
conv = "converb",
int = "pandamdam",
interj = "pandamdam",
intj = "pandamdam",
pdd = "pandamdam", --diksiyonaryo.ph abbr
n = "pangngalan",
png = "pangngalan", --diksiyonaryo.ph abbr
-- the next two support Algonquian languages; see also vii/vai/vti/vta below
na = "animate noun",
ni = "inanimate noun",
num = "pambilang",
part = "pandiwari",
pcl = "kataga",
phr = "parirala",
pn = "pangngalang pantangi",
postp = "postposition",
pref = "unlapi",
prep = "pang-ukol",
prepphr = "malapang-ukol na parirala",
prespart = "pandiwari na pangnagdaan",
pron = "panghalip",
pnh = "panghalip", --diksiyonaryo.ph abbr
prop = "pangngalang pantangi",
proper = "pangngalang pantangi",
propn = "pangngalang pantangi",
rom = "romanisasyon",
romanisation = "romanisasyon",
romanisations = "romanisasyon",
suf = "hulapi",
supadj = "pang-uring pasukdol",
supadv = "pang-abay na pasukdol",
sym = "simbolo",
v = "pandiwa",
vb = "pandiwa",
pnd = "pandiwa", --diksiyonaryo.ph abbr
vi = "pandiwang katawanin",
vt = "pandiwang palipat",
-- the next four support Algonquian languages
vii = "inanimate intransitive verb",
vai = "animate intransitive verb",
vti = "transitive inanimate verb",
vta = "transitive animate verb",
pnl = "panlapi", --diksiyonaryo.ph abbr
-- redirect EN to TL
abbreviation = "daglat",
acronym = "akronim",
adjective = "pang-uri",
adverb = "pang-abay",
article = "artikulo",
circumfix = "kabilaang lapi",
["comparative adjective"] = "pang-uring pahambing",
["comparative adverb"] = "pang-abay na pahambing",
conjunction = "pangatnig",
determiner = "pantukoy",
["diacritical mark"] = "tuldik",
digraph = "digrapo",
["equative adjective"] = "pang-uring magkatulad",
["Han character"] = "titik-Tsino",
["idiom"] = "sawikain",
infix = "gitlapi",
initialism = "inisyals",
interjection = "pandamdam",
letter = "titik",
logograms = "logogramo",
morpheme = "morpema",
noun = "pangngalan",
number = "bilang",
numeral = "pambilang",
["numeral symbols"] = "simbolong pambilang",
participle = "pandiwari",
particle = "kataga",
phrase = "parirala",
["proper noun"] = "pangngalang pantangi",
prefix = "unlapi",
preposition = "pang-ukol",
["prepositional phrase"] = "malapang-ukol na parirala",
["pronominal adverbs"] = "pang-abay na makahalip",
pronoun = "panghalip",
proverb = "salawikain",
["punctuation marks"] = "bantas",
romanization = "romanisasyon",
relatives = "pamanggit",
root = "ugat",
stem = "tangkay",
suffix = "hulapi",
["superlative adjective"] = "pang-uring pasukdol",
["superlative adverb"] = "pang-abay na pasukdol",
syllable = "pantig",
symbol = "simbolo",
verb = "pandiwa",
["intransitive verb"] = "pandiwang katawanin",
["transitive verb"] = "pandiwang palipat",
["transitive and intransitive verb"] = "pandiwang palipat at katawanin",
--nonlemmas
["active participle forms"] = "anyong pandiwaring tahasan",
["active participles"] = "pandiwaring tahasan",
["adjectival participles"] = "pandiwaring malapang-uri",
["adjective case form"] = "anyong pang-uring kaukulan",
["adjective form"] = "anyong pang-uri",
["adjective feminine form"] = "anyong pang-uring pambabae",
["adjective plural form"] = "anyong pang-uring maramihan",
["adverb form"] = "anyong pang-abay",
["adverbial participles"] = "pandiwaring malapang-abay",
--"agent participles",
["article form"] = "anyong pantukoy",
["circumfix form"] = "anyong kabilaang lapi",
--"combined form",
["comparative adjective form"] = "anyong pang-uring pahambing",
["comparative adjectives"] = "pang-uring pahambing",
["comparative adverb form"] = "anyong pang-abay na pahambing",
["comparative adverbs"] = "pang-abay na pahambing",
["conjunction form"] = "anyong pangatnig",
["contractions"] = "may-angkop",
--"converbs",
["determiner comparative form"] = "anyong pantukoy na pahambing",
["determiner form"] = "anyong pantukoy",
["determiner superlative form"] = "anyong pantukoy na pasukdol",
["diminutive nouns"] = "pangngalang paliit",
["elative adjectives"] = "pang-uring palamang",
["equative adjective form"] = "anyong pang-uring magkatulad",
["equative adjectives"] = "pang-uring magkatulad",
["future participles"] = "pandiwaring panghinaharap",
["gerunds"] = "herundiyo",
["infinitive form"] = "anyong pawatas",
["infinitives"] = "pawatas",
["interjection form"] = "anyong pandamdam",
--"jyutping",
["misspellings"] = "maling baybay",
["negative participles"] = "pandiwaring negatibo",
["nominal participles"] = "pandiwaring makangalan",
["noun case form"] = "anyong pangngalang kaukulan",
--"noun construct form",
["noun dual form"] = "anyong pangngalang dalwahan",
["noun form"] = "anyong pangngalan",
["noun paucal form"] = "anyong pangngalang pangkaunti",
["noun plural form"] = "anyong pangngalang maramihan",
["noun possessive form"] = "anyong pangngalang paari",
["noun singulative form"] = "anyong pangngalang isahan",
["numeral form"] = "anyong pambilang",
["participles"] = "pandiwari",
["participle form"] = "anyong pandiwari",
["particle form"] = "anyong kataga",
["passive participles"] = "pandiwaring balintiyak",
["past active participles"] = "pandiwaring pangnagdaang tahasan",
["past adverbial participles"] = "pandiwaring pangnagdaang malapang-abay",
["past participles"] = "pandiwaring pangnagdaan",
["past participle form"] = "anyong pandiwaring pangnagdaan",
["past passive participles"] = "anyong pandiwaring pangnagdaang balintiyak",
["perfect active participles"] = "pandiwaring panggitnang tahasan",
["perfect participles"] = "pandiwaring panggitna",
["perfect passive participles"] = "pandiwaring panggitnang balintiyak",
--"pinyin",
["plurals"] = "maramihan",
--"postposition form",
["prefix form"] = "anyong unlapi",
["preposition contractions"] = "pang-ukol na tinipil",
["preposition form"] = "anyong pang-ukol",
["prepositional pronouns"] = "panghalip na malapang-ukol",
["present active participles"] = "pandiwaring pangkasalukuyang tahasan",
["present adverbial participles"] = "pandiwaring pangkasalukuyang malapang-abay",
["present participles"] = "anyong pandiwaring pangkasalukuyan",
["present passive participles"] = "pandiwaring pangkasalukuyang balintiyak",
--"preverb form",
["pronoun form"] = "anyong panghalip",
["pronoun possessive form"] = "anyong panghalip paari",
["proper noun form"] = "anyong pangngalang pantangi",
["proper noun plural form"] = "anyong pangngalang pantanging maramihan",
--"rafsi",
["romanizations"] = "romanisasyon",
["root form"] = "anyong ugat",
["singulatives"] = "isahan",
["suffix form"] = "anyong hulapi",
["superlative adjective form"] = "anyong pang-uring pasukdol",
["superlative adjectives"] = "pang-uring pasukdol",
["superlative adverb form"] = "anyong pang-abay na pasukdol",
["superlative adverbs"] = "pang-abay na pasukdol",
["verb form"] = "anyong pandiwa",
["verbal nouns"] = "pangngalang makadiwa",
}
--[==[ var:
Map of parts of speech for which categories like `German masculine nouns` or `Russian imperfective verbs` will be
generated if the headword is of the appropriate gender/number. The map is used to canonicalize parts of speech for
categorization purposes; specifically, proper nouns categorizes like nouns.
]==]
data.pos_for_gender_number_cat = {
["pangngalan"] = "pangngalan",
["pangngalang pantangi"] = "pangngalan",
["hulapi"] = "hulapi",
-- We include verbs because impf and pf are valid "genders".
["pandiwa"] = "pandiwa",
}
--[==[ var:
Lower limit for a "long" word in a particular language. Used to categorize terms into e.g.
[[:Category:Long English words]] automatically. Languages with no mapping here do not get categorized.
]==]
data.long_word_thresholds = {
["af"] = 20,
["bg"] = 20,
["cy"] = 25,
["de"] = 20,
["en"] = 25,
["es"] = 20,
["fr"] = 20,
["ka"] = 20,
["sv"] = 20,
["tl"] = 25,
}
------ 3. Page-wide processing (so that it only needs to be done once per page). ------
data.page = require(headword_page_module).process_page()
-- Set some page properties directly on `data` for ease of use.
data.pagename = data.page.pagename
data.encoded_pagename = data.page.encoded_pagename
return data