Module:Lang/data

From Vigyanwiki
< Module:Lang
Revision as of 07:35, 15 January 2018 by Template>Jonesey95 (Undid revision 820507783 by Jonesey95 (talk). did not fix the problem on the talk page, so undoing out of an abundance of caution)

Documentation for this module may be created at Module:Lang/data/doc

local lang_data = {};


--[[--------------------------< O V E R R I D E >--------------------------------------------------------------

Language codes and names in this table override the BCP47 names in Module:Language/name/data.

code indexes in this table shall always be lower case
]]

local override = {
-- ISO 639-1 codes
	["bh"] = {"Bihari languages"},												-- only ISO 639-1 collective; defined here to override improper redefinition (Bihari) in wp_languages
	["de-at"] = {"Austrian German"},											-- these code-region and code-variant to match en.wiki article names
	["de-ch"] = {"Swiss German"},
	["en-au"] = {"Australian English"},
	["en-ca"] = {"Canadian English"},
	["en-emodeng"] = {"Early Modern English"},
	["en-gb"] = {"British English"},
	["en-ie"] = {"Irish English"},
	["en-nz"] = {"New Zealand English"},
	["en-us"] = {"American English"},
	["en-za"] = {"South African English"},
	['fy'] = {'West Frisian'},													-- IANA name is Western Frisian
	['ps'] = {'Pashto'},														-- IANA name is Pushto
	["sr-Cyrl"] = {"Serbian"},													-- override wp_languages {"Serbian Cyrillic"}; to achieve this, use |label=

-- ISO 639-2, -3 codes
	['arc'] = {'Aramaic'},														-- to match en.wiki article title
	["ber"] = {"Berber languages"},												-- ISO 639-2 collective; defined here to override redefinition in wp_languages
	["cel"] = {"Celtic languages"},												-- ISO 639-2 collective; defined here to override improper redefinition ('Proto-Celtic') in wp_languages; use cel-x-proto instead
	["chu"] = {"Church Slavonic"},												-- to match en.wiki article title
	["egy"] = {"Ancient Egyptian"},												-- IANA name is Egyptian (Ancient); distinguish from contemporary arz: Egyptian Arabic 
	["fan"] = {"Fang"},															-- IANA name is Fang (Equatorial Guinea); disambiguation from fak: Fang (Camaroon)
	['frr'] = {'North Frisian'},												-- IANA name is Northern Frisian
	['frs'] = {'East Frisian Low Saxon'},										-- IANA name is Eastern Frisian
	["gem"] = {"Germanic languages"},											-- ISO 639-2 collective; defined here to override improper redefinition ('Proto-Germanic') in wp_languages; use gem-x-proto instead
	['jam'] = {'Jamaican Patois'},												-- IANA name is Jamaican Creole English
	['mhr'] = {'Meadow Mari'},													-- IANA name is Eastern Mari
	['mid'] = {'Modern Mandaic'},												-- IANA name is Mandaic
	['mla'] = {'Tamambo'},														-- wp_languages name is Medieval Latin; IANA and ISO 639-1 name is Malo
	["myn"] = {"Mayan languages"},												-- ISO 639-2 collective; defined here to override redefinition in wp_languages
	["nah"] = {"Nahuatl languages"},											-- ISO 639-2 collective; defined here to override redefinition in wp_languages
	['nrf'] = {'Norman'},														-- not quite a collective - IANA name: Jèrriais; categorizes to Norman-language text
	["pra"] = {"Prakrit languages"},											-- ISO 639-2 collective; defined here to override redefinition in wp_languages
	['roa'] = {'Romance  languages'},											-- ISO 639-2 collective; defined here to override improper redefinition (Jèrriais) in wp_language; IANA name is "Romance languages"
	["sal"] = {"Salishan languages"},											-- ISO 639-2 collective; defined here to override redefinition in wp_languages
	["sla"] = {"Slavic languages"},												-- ISO 639-2 collective; defined here to override redefinition in wp_languages
	["son"] = {"Songhai languages"},											-- ISO 639-2 collective; defined here to override redefinition in wp_languages
	['stq'] = {'Saterland Frisian'},											-- IANA name is Saterfriesisch
	['und'] = {'undetermined'},													-- capitalization to match existing category
	["wen"] = {"Sorbian languages"},											-- ISO 639-2 collective; defined here to override redefinition in wp_languages
	['wrg'] = {'Warrongo'},														-- IANA name is Warungu
	["xal-ru"] = {"Kalmyk"},													-- to match en.wiki article title
	
-- private use codes
	["cel-x-proto"] = {"Proto-Celtic"},											-- cel in IANA is Celtic languages
	["gem-x-proto"] = {"Proto-Germanic"},										-- gem in IANA is Germanic languages
	['grc-x-aeolic'] = {'Aeolic Greek'},										-- these grc-x-... codes are preferred alternates to the non-standard catchall code grc-gre
	['grc-x-attic'] = {'Attic Greek'},
	['grc-x-biblical'] = {'Biblical Greek'},
	['grc-x-byzant'] = {'Byzantine Greek'},
	['grc-x-classic'] = {'Classical Greek'},
	['grc-x-doric'] = {'Doric Greek'},
	['grc-x-hellen'] = {'Hellenistic Greek'},
	['grc-x-ionic'] = {'Ionic Greek'},
	['grc-x-koine'] = {'Koinē Greek'},
	['grc-x-medieval'] = {'Medieval Greek'},
	['grc-x-patris'] = {'Patristic Greek'},
	['yuf-x-hav'] = {'Havasupai'},												-- IANA name for these three is Havasupai-Walapai-Yavapai
	['yuf-x-wal'] = {'Walapai'},
	['yuf-x-yav'] = {'Yavapai'},
	}


--[=[-------------------------< R T L _ S C R I P T S >--------------------------------------------------------

ISO 15924 scripts that are written right-to-left.  Data in this table taken from [[ISO 15924#List of codes]]

last update to this list: 2017-12-24

]=]

local rtl_scripts = {
	'adlm', 'arab', 'aran', 'armi', 'avst', 'cprt', 'egyd', 'egyh', 'hatr', 'hebr',
	'hung', 'inds', 'khar', 'lydi', 'mand', 'mani', 'mend', 'merc', 'mero', 'narb',
	'nbat', 'nkoo', 'orkh', 'palm', 'phli', 'phlp', 'phlv', 'phnx', 'prti', 'rohg',
	'samr', 'sarb', 'sogd', 'sogo', 'syrc', 'syre', 'syrj', 'syrn', 'thaa', 'wole',
	};

local is_rtl_script = require("Module:Table").listToSet(rtl_scripts)


--[[--------------------------< T R A N S L I T   T I T L E S >------------------------------------------------

This is a table of tables of transliteration standards and the language codes or language scripts that apply to
those standards.  This table is used to create the tool-tip text associated with the transliterated text displayed
by some of the {{lang-??}} templates.

These tables are more-or-less copied directly from {{transl}}.  The standard 'NO_STD' is a construct to allow for
the cases when no |std= parameter value is provided.

]]

local translit_title_table = {
	['ISO'] = {																	-- when a transliteration standard is supplied
		['ab'] = 'ISO 9 Cyrillic',
		['ba'] = 'ISO 9 Cyrillic',
		['be'] = 'ISO 9 Cyrillic',
		['bg'] = 'ISO 9 Cyrillic',
		['kk'] = 'ISO 9 Cyrillic',
		['ky'] = 'ISO 9 Cyrillic',
		['mn'] = 'ISO 9 Cyrillic',
		['ru'] = 'ISO 9 Cyrillic',
		['tg'] = 'ISO 9 Cyrillic',
		['uk'] = 'ISO 9 Cyrillic',
		['bua'] = 'ISO 9 Cyrillic',
		['sah'] = 'ISO 9 Cyrillic',
		['tut'] = 'ISO 9 Cyrillic',
		['xal'] = 'ISO 9 Cyrillic',
		['cyrl'] = 'ISO 9 Cyrillic',

		['ar'] = 'ISO 233 Arabic',
		['ku'] = 'ISO 233 Arabic',
		['ps'] = 'ISO 233 Arabic',
		['ug'] = 'ISO 233 Arabic',
		['ur'] = 'ISO 233 Arabic',
		['arab'] = 'ISO 233 Arabic',

		['he'] = 'ISO 259 Hebrew',
		['yi'] = 'ISO 259 Hebrew',
		['hebr'] = 'ISO 259 Hebrew',

		['el'] = 'ISO 843 Greek',
		['grc'] = 'ISO 843 Greek',

		['ja'] = 'ISO 3602 Japanese',
		['hira'] = 'ISO 3602 Japanese',
		['hrkt'] = 'ISO 3602 Japanese',
		['jpan'] = 'ISO 3602 Japanese',
		['kana'] = 'ISO 3602 Japanese',

		['zh'] = 'ISO 7098 Chinese',
		['chi'] = 'ISO 7098 Chinese',
		['pny'] = 'ISO 7098 Chinese',
		['zho'] = 'ISO 7098 Chinese',
--		['han'] = 'ISO 7098 Chinese',											-- unicode alias of Hani? doesn't belong here? should be Hani?
		['hans'] = 'ISO 7098 Chinese',
		['hant'] = 'ISO 7098 Chinese',

		['ka'] = 'ISO 9984 Georgian',
		['kat'] = 'ISO 9984 Georgian',

		['arm'] = 'ISO 9985 Armenian',
		['hy'] = 'ISO 9985 Armenian',

		['th'] = 'ISO 11940 Thai',
		['tha'] = 'ISO 11940 Thai',

		['ko'] = 'ISO 11941 Korean',
		['kor'] = 'ISO 11941 Korean',

		['bn'] = 'ISO 15919 Indic',
		['dra'] = 'ISO 15919 Indic',
		['gu'] = 'ISO 15919 Indic',
		['hi'] = 'ISO 15919 Indic',
		['inc'] = 'ISO 15919 Indic',
		['kn'] = 'ISO 15919 Indic',
		['ml'] = 'ISO 15919 Indic',
		['mr'] = 'ISO 15919 Indic',
		['sa'] = 'ISO 15919 Indic',
		['ta'] = 'ISO 15919 Indic',
		['te'] = 'ISO 15919 Indic',
		['beng'] = 'ISO 15919 Indic',
		['brah'] = 'ISO 15919 Indic',
		['deva'] = 'ISO 15919 Indic',

		['default'] = 'ISO transliteration',
		},

	['DIN'] = {
		['ar'] = 'DIN 31635 Arabic',
		['fa'] = 'DIN 31635 Arabic',
		['ku'] = 'DIN 31635 Arabic',
		['ps'] = 'DIN 31635 Arabic',
		['tg'] = 'DIN 31635 Arabic',
		['ug'] = 'DIN 31635 Arabic',
		['ur'] = 'DIN 31635 Arabic',
		['arab'] = 'DIN 31635 Arabic',

		['default'] = 'DIN transliteration',
		},

	['IAST'] = {
		['default'] = 'International Alphabet of Sanskrit Transliteration',
		},

	['ALA'] = {
		['default'] = 'American Library Association – Library of Congress transliteration',
		},

	['ALA-LC'] = {
		['default'] = 'American Library Association – Library of Congress transliteration',
		},

	['NO_STD'] = {																-- when no transliteration standard is supplied
		['akk'] = 'Semitic transliteration',
		['sem'] = 'Semitic transliteration',
		['phnx'] = 'Semitic transliteration',
		['xsux'] = 'Cuneiform transliteration',
		},
	};


return
	{
	override = override,
	rtl_scripts = rtl_scripts,
	is_rtl_script = is_rtl_script,
	translit_title_table = translit_title_table,
	};