Module:Lang
From Vigyanwiki
Documentation for this module may be created at Module:Lang/doc
--[=[
Lua support for the {{lang}} and {{lang-xx}} templates and replacement of various supporting templates.
]=]
require('Module:No globals');
local p = {};
local getArgs = require ('Module:Arguments').getArgs;
local lang_name_table = require ('Module:Language/name/data');
local lang_data = mw.loadData ('Module:Lang/data'); -- language name override and transliteration tool-tip tables
local namespace = mw.title.getCurrentTitle().namespace; -- used for categorization
--[[--------------------------< I S _ S E T >------------------------------------------------------------------
Returns true if argument is set; false otherwise. Argument is 'set' when it exists (not nil) or when it is not an empty string.
]]
local function is_set( var )
return not (var == nil or var == '');
end
--[[--------------------------< I N _ A R R A Y >--------------------------------------------------------------
Whether needle is in haystack
]]
local function in_array( needle, haystack )
if needle == nil then
return false;
end
for n,v in ipairs( haystack ) do
if v == needle then
return n;
end
end
return false;
end
--[[--------------------------< F O R M A T _ I E T F _ T A G >------------------------------------------------
prettify ietf tags to use recommended subtag formats:
code: lower case
script: sentence case
region: upper case
variant: lower case
]]
local function format_ietf_tag (code, script, region, variant)
local out = {};
local c;
table.insert (out, code:lower());
if is_set (script) then
c = script:match ('^%a'):upper(); -- make script sentence case
script = script:lower():gsub ('^%a', c, 1);
table.insert (out, script);
end
if is_set (region) then
table.insert (out, region:upper());
end
if is_set (variant) then
table.insert (out, variant:lower());
end
return table.concat (out, '-');
end
--[[--------------------------< G E T _ I E T F _ P A R T S >--------------------------------------------------
extracts and returns IETF language tag parts:
primary language subtag (required) - 2 or 3 character IANA language code
script subtag - four character IANA script code
region subtag - two-letter or three digit IANA region code
variant subtag - four digit or 5-8 alnum variant code
in any one of these forms
lang lang-variant
lang-script lang-script-variant
lang-region lang-region-variant
lang-script-region lang-script-region-variant
each of lang, script, region, and variant, when used, must be valid
returns four values. Valid parts are returned as themselves; omitted parts are returned as empty strings, invalid
parts are returned as nil.
see http://www.rfc-editor.org/rfc/bcp/bcp47.txt section 2.1
]]
local function get_ietf_parts (source)
local code;
local script = '';
local region = '';
local variant = '';
local c;
if not is_set (source) then
return nil, nil, nil, nil;
end
if source:match ('^%a+%-%a%a%a%a%-%a%a%-(%d%d%d%d+$') then -- ll-Ssss-RR-variant (where variant is 4 digits)
code, script, region, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%a%a)%-(%d%d%d%d)$');
elseif source:match ('^%a+%-%a%a%a%a%-%d%d%d%-(%d%d%d%d+$') then -- ll-Ssss-DDD-variant (where region is 3 digits; variant is 4 digits)
code, script, region, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d)%-(%d%d%d%d)$');
elseif source:match ('^%a+%-%a%a%a%a%-%a%a%-[%a%d][%a%d][%a%d][%a%d][%a%d]+$') then -- ll-Ssss-RR-variant (where variant is 5-8 alnum characters)
code, script, region, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%a%a)%-([%a%d][%a%d][%a%d][%a%d][%a%d][%a%d]?[%a%d]?[%a%d]?)$');
elseif source:match ('^%a+%-%a%a%a%a%-%d%d%d%-[%a%d][%a%d][%a%d][%a%d][%a%d]+$') then -- ll-Ssss-DDD-variant (where region is 3 digits; variant is 5-8 alnum characters)
code, script, region, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d)%-([%a%d][%a%d][%a%d][%a%d][%a%d][%a%d]?[%a%d]?[%a%d]?)$');
elseif source:match ('^%a+%-%a%a%a%a%-(%d%d%d%d+$') then -- ll-Ssss-variant (where variant is 4 digits)
code, script, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d%d)$');
elseif source:match ('^%a+%-%a%a%a%a%-[%a%d][%a%d][%a%d][%a%d][%a%d]+$') then -- ll-Ssss-variant (where variant is 5-8 alnum characters)
code, script, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-([%a%d][%a%d][%a%d][%a%d][%a%d][%a%d]?[%a%d]?[%a%d]?)$');
elseif source:match ('^%a+%-%a%a%-(%d%d%d%d+$') then -- ll-RR-variant (where variant is 4 digits)
code, region, variant = source:match ('^(%a%a%a?)%-(%a%a)%-(%d%d%d%d)$');
elseif source:match ('^%a+%-%d%d%d%-(%d%d%d%d+$') then -- ll-DDD-variant (where region is 3 digits; variant is 4 digits)
code, region, variant = source:match ('^(%a%a%a?)%-(%d%d%d)%-(%d%d%d%d)$');
elseif source:match ('^%a+%-%a%a%-[%a%d][%a%d][%a%d][%a%d][%a%d]+$') then -- ll-RR-variant (where variant is 5-8 alnum characters)
code, region, variant = source:match ('^(%a%a%a?)%-(%a%a)%-([%a%d][%a%d][%a%d][%a%d][%a%d][%a%d]?[%a%d]?[%a%d]?)$');
elseif source:match ('^%a+%-%d%d%d%-[%a%d][%a%d][%a%d][%a%d][%a%d]+$') then -- ll-DDD-variant (where region is 3 digits; variant is 4 digits)
code, region, variant = source:match ('^(%a%a%a?)%-(%d%d%d)%-([%a%d][%a%d][%a%d][%a%d][%a%d][%a%d]?[%a%d]?[%a%d]?)$');
elseif source:match ('^%a+%-(%d%d%d%d+)$') then -- ll-variant (where variant is 4 digits)
code, variant = source:match ('^(%a%a%a?)%-(%d%d%d%d)$');
elseif source:match ('^%a+%-[%a%d][%a%d][%a%d][%a%d][%a%d]+$') then -- ll-variant (where variant is 5-8 alnum characters)
code, variant = source:match ('^(%a%a%a?)%-([%a%d][%a%d][%a%d][%a%d][%a%d][%a%d]?[%a%d]?[%a%d]?)$');
elseif source:match ('^%a+%-%a%a%a%a%-%a%a$') then -- ll-Ssss-RR
code, script, region = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%a%a)$');
elseif source:match ('^%a+%-%a%a%a%a%-%d%d%d$') then -- ll-Ssss-DDD (region is 3 digits)
code, script, region = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d)$');
elseif source:match ('^%a+%-%a%a%a%a$') then -- ll-Ssss
code, script = source:match ('^(%a%a%a?)%-(%a%a%a%a)$');
elseif source:match ('^%a+%-%a%a$') then -- ll-RR
code, region = source:match ('^(%a%a%a?)%-(%a%a)$');
elseif source:match ('^%a+%-%d%d%d$') then -- ll-DDD (region is 3 digits)
code, region = source:match ('^(%a%a%a?)%-(%d%d%d)$');
elseif source:match ('^%a+$') then -- ll
code = source:match ('^(%a%a%a?)$');
else
return nil, nil, nil, nil; -- don't know what we got but it is malformed
end
code = code:lower(); -- ensure that we use and return lower case version of this
if not (lang_data.override[code] or lang_name_table.lang[code]) then
return nil, nil, nil, nil; -- invalid language code, don't know about the others (don't care?)
end
if is_set (script) then
if not lang_name_table.script[script:lower()] then
return code, nil, nil, nil; -- language code ok, invalid script, don't know about the others (don't care?)
end
end
if is_set (region) then
if not lang_name_table.region[region:lower()] then
return code, script, nil, nil;
end
end
if is_set (variant) then
if not lang_name_table.variant[variant:lower()] then
return code, script, region, nil;
end
if not in_array (code, lang_name_table.variant[variant:lower()]['prefixes']) then
return code, script, region, nil;
end
end
return code, script, region, variant; -- return the good bits
end
--[=[-------------------------< M A K E _ E R R O R _ M S G >--------------------------------------------------
]=]
local function make_error_msg (msg, nocat)
local out = {};
table.insert (out, '<span style="font-size:100%" class="error">error: ');
table.insert (out, msg);
table.insert (out, '</span>');
-- if (0 == namespace) and not is_set (nocat) then -- only categorize in article space
table.insert (out, '[[Category:lang and lang-xx template errors]]');
-- end
return table.concat (out);
end
--[=[-------------------------< M A K E _ W I K I L I N K >----------------------------------------------------
Makes a wikilink; when both link and display text is provided, returns a wikilink in the form [[L|D]]; if only
link is provided, returns a wikilink in the form [[L]]; if neither are provided or link is omitted, returns an
empty string.
]=]
local function make_wikilink (link, display)
if is_set (link) then
if is_set (display) then
return table.concat ({'[[', link, '|', display, ']]'});
else
return table.concat ({'[[', link, ']]'});
end
else
return '';
end
end
--[[--------------------------< M A K E _ T E X T _ S P A N >--------------------------------------------------
]]
local function make_text_span (code, text, rtl, italic, size)
local span = {};
table.insert (span, '<span lang="'); -- open <span> tag
table.insert (span, code); -- language attribute
table.insert (span, '"');
if 'yes' == rtl then
table.insert (span, ' dir="rtl"'); -- for right to left languages
end
if is_set (size) then -- {{lang}} only
table.insert (span, table.concat ({' style="font-size:', size, ';"'}))
end
table.insert (span, '>'); -- close the opening span tag
if 'yes' == italic then
table.insert (span, table.concat ({"''", text, "''"})); -- text with italic markup
else
table.insert (span, text); -- DEFAULT: text is not italicized
end
table.insert (span, '</span>'); -- close the span
if 'yes' == rtl then
table.insert (span, '‎'); -- make sure the browser knows that we're at the end of the rtl
end
return table.concat (span); -- put it all together and done
end
--[[--------------------------< M A K E _ C A T E G O R Y >----------------------------------------------------
]]
local function make_category (code, language_name, nocat)
local cat = {};
if (0 ~= namespace) or nocat then -- only categorize in article space
return ''; -- return empty string for concatenation
end
table.insert (cat, '[[Category:Articles containing ');
if ('en' == code) or ('eng' == code) then
table.insert (cat, 'explicitly cited English');
elseif 'art' == code then
table.insert (cat, 'constructed')
else
table.insert (cat, language_name);
end
table.insert (cat, '-language text]]');
return table.concat (cat);
end
--[[--------------------------< M A K E _ T R A N S L I T >----------------------------------------------------
return translit <span>...</span> else return empty string
The value |script= is not used in {{transl}} for this purpose; instead it uses |code. Because language scripts
are listed in the {{transl}} switches they are included in the data tables. The script parameter is introduced
at {{Language with name and transliteration}}. If |script= is set, this function uses it in preference to code.
]]
local function make_translit (code, language_name, translit, std, script)
local title;
local tout = {};
local title_table = lang_data.translit_title_table; -- table of transliteration standards and the language codes and scripts that apply to those standards
table.insert (tout, "''<span title=\"");
if not is_set (std) and not is_set (script) then -- when neither standard nor script specified
table.insert (tout, language_name); -- write a generic tool tip
table.insert (tout, ' transliteration');
elseif is_set (std) and is_set (script) then -- when both are specified
if title_table[std][script] then -- and legitimate
table.insert (tout, title_table[std][script]); -- add the appropriate text to the tool tip
else
return ''; -- one or both invalid, set up for an error message
end
elseif is_set (std) then -- script not set, use language code
if not title_table[std] then return ''; end -- invalid standard, setupt for error message
if title_table[std][code] then
table.insert (tout, title_table[std][code]);
else -- code doesn't match
table.insert (tout, title_table[std]['default']); -- so use the standard's default
end
else -- here if script set but std not set
if title_table['NO_STD'][script] then
table.insert (tout, title_table['NO_STD'][script]); -- use script if set
elseif title_table['NO_STD'][code] then
table.insert (tout, title_table['NO_STD'][code]); -- use language code
else
table.insert (tout, language_name); -- write a generic tool tip
table.insert (tout, ' transliteration');
end
end
table.insert (tout, '" class="Unicode" style="white-space:normal; text-decoration:none">');
table.insert (tout, translit);
table.insert (tout, "</span>''");
return table.concat (tout);
end
--[[--------------------------< L A N G >----------------------------------------------------------------------
<includeonly>{{#invoke:lang|lang|code={{{1|}}}|text={{{2|}}}|rtl={{{rtl|}}}|italic={{{italic|}}}|size={{{size|}}}|nocat={{{nocat|}}}}}</includeonly>
|code = the BCP47 language code
|text = the displayed text in language specified by code
|rtl = boolean true identifies the language specified by code as a right-to-left language
|size = css keyword appropriate for use with css font-size:<size>
|nocat = boolean true inhibits normal categorization; error categories are not affected
]]
function p.lang (frame)
local args = getArgs(frame);
local out = {};
local language_name;
local code, script, region, variant = get_ietf_parts (args.code);
if not (code and script and region and variant) then
return make_error_msg (table.concat ({'{{lang}}: unknown language code: ', args.code or 'missing'}), args.no_cat);
end
if not is_set (args.text) then
return make_error_msg ('{{lang}}: no text', args.no_cat);
end
if not is_set (args.italic) then
args.italic = 'no'; -- DEFAULT for {{lang}} templates is to not italicize
end
if args.text:match ("%f[\']\'\'[^\']+\'\'%f[^\']") or args.text:match ("\'\'\'\'\'[^\']+\'\'\'\'\'") then -- italic but not bold or bold italic
return make_error_msg ('{{lang-xx}}: text has italic markup', args.no_cat);
end
args.code = format_ietf_tag (code, script, region, variant); -- format to recommended subtag styles
if lang_data.override[code] then
language_name = lang_data.override[code][1]
elseif lang_name_table.lang[code] then
language_name = lang_name_table.lang[code][1]; -- table entries sometimes have multiple names, always take the first one
end
table.insert (out, make_text_span (args.code, args.text, args.rtl, args.italic, args.size));
table.insert (out, make_category (args.code, language_name, args.nocat));
return table.concat (out); -- put it all together and done
end
--[[--------------------------< L A N G _ X X >----------------------------------------------------------------
<includeonly>{{#invoke:lang|lang_xx|code=<code>|text={{{1|}}}|link={{{links|{{{link}}}}}}|rtl={{{rtl|}}}|nocat={{{nocat|}}}|italic={{{italic|}}}|lit={{{lit|}}}|translit={{{translit|}}}|script={{{script|}}}|std={{{std|}}}}}</includeonly>
|code = the BCP47 language code
|text = the displayed text in language specified by code
|link = boolean true (default) links language specified by code to associated language article
|rtl = boolean true identifies the language specified by code as a right-to-left language
|nocat = boolean true inhibits normal categorization; error categories are not affected
|italic = boolean true (default) renders displayed text in italic font
|lit = text that is a literal translation of text
for those {{lang-xx}} templates that support transliteration:
|translit = text that is a transliteration of text
|std = the standard that applies to the transliteration
|script = ISO 15924 script name; falls back to code
]]
function p.lang_xx (frame)
local args = getArgs(frame);
if not is_set (args.italic) then
args.italic = 'yes'; -- DEFAULT for {{lang-xx}} templates is to italicize
end
args.size = nil; -- size not supported in {{lang-xx}}
local out = {};
local language_name;
local code, script, region, variant = get_ietf_parts (args.code);
local translit_script;
local translit;
local translit_title;
if not (code and script and region and variant) then
return make_error_msg (table.concat ({'{{lang-xx}}: unknown language code: ', args.code or 'missing'}), args.no_cat);
end
if not is_set (args.text) then
return make_error_msg ('{{lang-xx}}: no text', args.no_cat);
end
if args.text:match ("%f[\']\'\'[^\']+\'\'%f[^\']") or args.text:match ("\'\'\'\'\'[^\']+\'\'\'\'\'") then -- italic but not bold or bold italic
return make_error_msg ('{{lang-xx}}: text has italic markup', args.no_cat);
end
args.code = format_ietf_tag (code, script, region, variant); -- format to recommended subtag styles
if lang_data.override[code] then
language_name = lang_data.override[code][1]
elseif lang_name_table.lang[code] then
language_name = lang_name_table.lang[code][1]; -- table entries sometimes have multiple names, always take the first one
end
translit_script = args.script or language_name; -- for translit prefer script over language
if 'no' == args.link then
table.insert (out, language_name); -- language name without wikilink
else
table.insert (out, make_wikilink (language_name .. ' language', language_name)); -- language name with wikilink
end
table.insert (out, ': '); -- separator
table.insert (out, make_text_span (args.code, args.text, args.rtl, args.italic, args.size))
if is_set (args.translit) then -- transliteration (not supported in {{lang}}); not supported in all {{lang-xx}}
table.insert (out, ', <small>');
translit_title = mw.title.makeTitle (0, 'Romanization of ' .. language_name)
if translit_title.exists and ('no' ~= args.link) then
table.insert (out, make_wikilink ('Romanization of ' .. translit_script or language_name, 'translit.'));
else
table.insert (out, '<abbr title="transliteration">translit.</abbr>');
end
table.insert (out, ' </small>');
translit = make_translit (args.code, language_name, args.translit, args.std, args.script)
if is_set (translit) then
table.insert (out, translit);
else
return make_error_msg (table.concat ({'{{lang-xx}}: invalid translit std: \'', args.std or 'missing', '\' or script: \'', args.script or 'missing', '\''}), args.nocat);
end
end
if is_set (args.lit) then -- translation (not supported in {{lang}})
table.insert (out, ', <small>');
if 'no' == args.link then
table.insert (out, '<abbr title="literal translation">lit.</abbr>');
else
table.insert (out, make_wikilink ('Literal translation', 'lit.'));
end
table.insert (out, " </small>'");
table.insert (out, args.lit);
table.insert (out, "'");
end
table.insert (out, make_category (args.code, language_name, args.nocat));
return table.concat (out); -- put it all together and done
end
return p;