Module:Lang/data: Difference between revisions

From Vigyanwiki
< Module:Lang
Template>Trappist the monk
(+mla; - linguist list codes because not in IANA registry;)
Template>Trappist the monk
No edit summary
Line 6: Line 6:
Language codes and names in this table override the BCP47 names in Module:Language/name/data.
Language codes and names in this table override the BCP47 names in Module:Language/name/data.


This table may also be used to add 'codes' that aren't BCP47
code indexes in this table shall always be lower case
 
Linguist list identifier codes may be found at http://multitree.org/codes/
Additional Linguist list idenifier codes may require tweaks to get_ietf_parts() in Module:Lang
 
]]
]]


local override = {
local override = {
-- ISO 639-1 codes
-- ISO 639-1 codes
["de-AT"] = {"Austrian German"}, -- these code-region and code-variant to match en.wiki article names
["de-at"] = {"Austrian German"}, -- these code-region and code-variant to match en.wiki article names
["de-CH"] = {"Swiss German"},
["de-ch"] = {"Swiss German"},
["en-AU"] = {"Australian English"},
["en-au"] = {"Australian English"},
["en-CA"] = {"Canadian English"},
["en-ca"] = {"Canadian English"},
["en-emodeng"] = {"Early Modern English"},
["en-emodeng"] = {"Early Modern English"},
["en-GB"] = {"British English"},
["en-gb"] = {"British English"},
["en-IE"] = {"Irish English"},
["en-ie"] = {"Irish English"},
["en-NZ"] = {"New Zealand English"},
["en-nz"] = {"New Zealand English"},
["en-US"] = {"American English"},
["en-us"] = {"American English"},
["en-ZA"] = {"South African English"},
["en-za"] = {"South African English"},
['fy'] = {'West Frisian'}, -- IANA name is Western Frisian
['fy'] = {'West Frisian'}, -- IANA name is Western Frisian
['ps'] = {'Pashto'}, -- IANA name is Pushto


-- ISO 639-2, -3 codes
-- ISO 639-2, -3 codes
Line 38: Line 35:
['mla'] = {'Tamambo'}, -- wp_languages name is Medieval Latin; IANA and ISO 639-1 name is Malo
['mla'] = {'Tamambo'}, -- wp_languages name is Medieval Latin; IANA and ISO 639-1 name is Malo
['nrf'] = {'Norman'}, -- not quite a collective - IANA name: Jèrriais; categorizes to Norman-language text
['nrf'] = {'Norman'}, -- not quite a collective - IANA name: Jèrriais; categorizes to Norman-language text
['ps'] = {'Pashto'}, -- IANA name is Pushto
['roa'] = {'Romance'}, -- collective; overrides roa (Jèrriais) in the wp language table; IANA name is "Romance languages"
['roa'] = {'Romance'}, -- collective; overrides roa (Jèrriais) in the wp language table; IANA name is "Romance languages"
['sem'] = {'other Semitic'}, -- ISO 639-2 collective; this text used to fit pre-existing: Category:Articles containing other Semitic-language text
['sem'] = {'other Semitic'}, -- ISO 639-2 collective; this text used to fit pre-existing: Category:Articles containing other Semitic-language text
Line 45: Line 41:
['und'] = {'undetermined'}, -- capitalization to match existing category
['und'] = {'undetermined'}, -- capitalization to match existing category
['wrg'] = {'Warrongo'}, -- IANA name is Warungu
['wrg'] = {'Warrongo'}, -- IANA name is Warungu
["xal-RU"] = {"Kalmyk"}, -- to match en.wiki article title
["xal-ru"] = {"Kalmyk"}, -- to match en.wiki article title
}
}



Revision as of 18:54, 22 December 2017

Documentation for this module may be created at Module:Lang/data/doc

local lang_data = {};


--[[--------------------------< O V E R R I D E >--------------------------------------------------------------

Language codes and names in this table override the BCP47 names in Module:Language/name/data.

code indexes in this table shall always be lower case
]]

local override = {
-- ISO 639-1 codes
	["de-at"] = {"Austrian German"},											-- these code-region and code-variant to match en.wiki article names
	["de-ch"] = {"Swiss German"},
	["en-au"] = {"Australian English"},
	["en-ca"] = {"Canadian English"},
	["en-emodeng"] = {"Early Modern English"},
	["en-gb"] = {"British English"},
	["en-ie"] = {"Irish English"},
	["en-nz"] = {"New Zealand English"},
	["en-us"] = {"American English"},
	["en-za"] = {"South African English"},
	['fy'] = {'West Frisian'},													-- IANA name is Western Frisian
	['ps'] = {'Pashto'},														-- IANA name is Pushto

-- ISO 639-2, -3 codes
	['arc'] = {'Aramaic'},														-- to match en.wiki article title
	["chu"] = {"Church Slavonic"},												-- to match en.wiki article title
	["fan"] = {"Fang"},															-- IANA name is Fang (Equatorial Guinea); disambiguation from fak: Fang (Camaroon)
	['frr'] = {'North Frisian'},												-- IANA name is Northern Frisian
	['frs'] = {'East Frisian Low Saxon'},										-- IANA name is Eastern Frisian
	['jam'] = {'Jamaican Patois'},												-- IANA name is Jamaican Creole English
	['mhr'] = {'Meadow Mari'},													-- IANA name is Eastern Mari
	['mid'] = {'Modern Mandaic'},												-- IANA name is Mandaic
	['mla'] = {'Tamambo'},														-- wp_languages name is Medieval Latin; IANA and ISO 639-1 name is Malo
	['nrf'] = {'Norman'},														-- not quite a collective - IANA name: Jèrriais; categorizes to Norman-language text
	['roa'] = {'Romance'},														-- collective; overrides roa (Jèrriais) in the wp language table; IANA name is "Romance languages"
	['sem'] = {'other Semitic'},												-- ISO 639-2 collective; this text used to fit pre-existing: Category:Articles containing other Semitic-language text
	['stq'] = {'Saterland Frisian'},											-- IANA name is Saterfriesisch
	["tmr"] = {"Jewish Babylonian Aramaic"},									-- ISO 639-3 name is Jewish Babylonian Aramaic (ca. 200-1200 CE)
	['und'] = {'undetermined'},													-- capitalization to match existing category
	['wrg'] = {'Warrongo'},														-- IANA name is Warungu
	["xal-ru"] = {"Kalmyk"},													-- to match en.wiki article title
	}


--[=[-------------------------< R T L _ S C R I P T S >--------------------------------------------------------

ISO 15924 scripts that are written right-to-left.  Data in this table taken from [[ISO 15924#List of codes]]

]=]

local rtl_scripts = {
	'adlm', 'arab', 'aran', 'armi', 'avst', 'cprt', 'egyd', 'egyh', 'hatr', 'hebr',
	'hung', 'inds', 'khar', 'lydi', 'mand', 'mani', 'mend', 'merc', 'mero', 'narb',
	'nbat', 'nkoo', 'orkh', 'palm', 'phli', 'phlp', 'phlv', 'phnx', 'prti', 'samr',
	'sarb', 'syrc', 'syre', 'syrj', 'syrn', 'thaa', 'wole'
	};

local is_rtl_script = require("Module:Table").listToSet(rtl_scripts)


--[[--------------------------< T R A N S L I T   T I T L E S >------------------------------------------------

This is a table of tables of transliteration standards and the language codes or language scripts that apply to
those standards.  This table is used to create the tool-tip text associated with the transliterated text displayed
by some of the {{lang-??}} templates.

These tables are more-or-less copied directly from {{transl}}.  The standard 'NO_STD' is a construct to allow for
the cases when no |std= parameter value is provided.

]]

local translit_title_table = {
	['ISO'] = {																	-- when a transliteration standard is supplied
		['ab'] = 'ISO 9 Cyrillic',
		['ba'] = 'ISO 9 Cyrillic',
		['be'] = 'ISO 9 Cyrillic',
		['bg'] = 'ISO 9 Cyrillic',
		['kk'] = 'ISO 9 Cyrillic',
		['ky'] = 'ISO 9 Cyrillic',
		['mn'] = 'ISO 9 Cyrillic',
		['ru'] = 'ISO 9 Cyrillic',
		['tg'] = 'ISO 9 Cyrillic',
		['uk'] = 'ISO 9 Cyrillic',
		['bua'] = 'ISO 9 Cyrillic',
		['sah'] = 'ISO 9 Cyrillic',
		['tut'] = 'ISO 9 Cyrillic',
		['xal'] = 'ISO 9 Cyrillic',
		['Cyrl'] = 'ISO 9 Cyrillic',

		['ar'] = 'ISO 233 Arabic',
		['ku'] = 'ISO 233 Arabic',
		['ps'] = 'ISO 233 Arabic',
		['ug'] = 'ISO 233 Arabic',
		['ur'] = 'ISO 233 Arabic',
		['Arab'] = 'ISO 233 Arabic',

		['he'] = 'ISO 259 Hebrew',
		['yi'] = 'ISO 259 Hebrew',
		['Hebr'] = 'ISO 259 Hebrew',

		['el'] = 'ISO 843 Greek',
		['grc'] = 'ISO 843 Greek',

		['ja'] = 'ISO 3602 Japanese',
		['Hira'] = 'ISO 3602 Japanese',
		['Hrkt'] = 'ISO 3602 Japanese',
		['Jpan'] = 'ISO 3602 Japanese',
		['Kana'] = 'ISO 3602 Japanese',

		['zh'] = 'ISO 7098 Chinese',
		['chi'] = 'ISO 7098 Chinese',
		['pny'] = 'ISO 7098 Chinese',
		['zho'] = 'ISO 7098 Chinese',
		['Han'] = 'ISO 7098 Chinese',											-- unicode alias of Hani? doesn't belong here? should be Hani?
		['Hans'] = 'ISO 7098 Chinese',
		['Hant'] = 'ISO 7098 Chinese',

		['ka'] = 'ISO 9984 Georgian',
		['kat'] = 'ISO 9984 Georgian',

		['arm'] = 'ISO 9985 Armenian',
		['hy'] = 'ISO 9985 Armenian',

		['th'] = 'ISO 11940 Thai',
		['tha'] = 'ISO 11940 Thai',

		['ko'] = 'ISO 11941 Korean',
		['kor'] = 'ISO 11941 Korean',

		['bn'] = 'ISO 15919 Indic',
		['dra'] = 'ISO 15919 Indic',
		['gu'] = 'ISO 15919 Indic',
		['hi'] = 'ISO 15919 Indic',
		['inc'] = 'ISO 15919 Indic',
		['kn'] = 'ISO 15919 Indic',
		['ml'] = 'ISO 15919 Indic',
		['mr'] = 'ISO 15919 Indic',
		['sa'] = 'ISO 15919 Indic',
		['ta'] = 'ISO 15919 Indic',
		['te'] = 'ISO 15919 Indic',
		['Beng'] = 'ISO 15919 Indic',
		['Brah'] = 'ISO 15919 Indic',
		['Deva'] = 'ISO 15919 Indic',

		['default'] = 'ISO transliteration',
		},

	['DIN'] = {
		['ar'] = 'DIN 31635 Arabic',
		['fa'] = 'DIN 31635 Arabic',
		['ku'] = 'DIN 31635 Arabic',
		['ps'] = 'DIN 31635 Arabic',
		['tg'] = 'DIN 31635 Arabic',
		['ug'] = 'DIN 31635 Arabic',
		['ur'] = 'DIN 31635 Arabic',
		['Arab'] = 'DIN 31635 Arabic',

		['default'] = 'DIN transliteration',
		},

	['IAST'] = {
		['default'] = 'International Alphabet of Sanskrit Transliteration',
		},

	['ALA'] = {
		['default'] = ' American Library Association – Library of Congress transliteration',
		},

	['ALA-LC'] = {
		['default'] = ' American Library Association – Library of Congress transliteration',
		},

	['NO_STD'] = {																-- when no transliteration standard is supplied
		['akk'] = 'Semitic transliteration',
		['sem'] = 'Semitic transliteration',
		['Phnx'] = 'Semitic transliteration',
		['Xsux'] = 'Cuneiform transliteration',
		},
	};


return
	{
	override = override,
	rtl_scripts = rtl_scripts,
	is_rtl_script = is_rtl_script,
	translit_title_table = translit_title_table,
	};