Module:Links

local export = {}

--[=[	Unsupported titles and pages with high memory usage are listed at Module:links/data. Other modules used: Module:script utilities Module:scripts Module:languages and its submodules Module:gender and number Module:utilities Module:string Module:debug ]=]

-- These are prefixed with u to avoid confusion with the default string methods -- of the same name. local usub = mw.ustring.sub

local table_insert = table.insert local table_concat = table.concat

local ignore_cap = { ["ko"] = true, }

local phonetic_extraction = { ["th"] = "Module:th", ["km"] = "Module:km", }

local pos_tags = { ["a"] = "adjective", ["adv"] = "adverb", ["int"] = "interjection", ["n"] = "noun", ["pron"] = "pronoun", ["v"] = "verb", ["vi"] = "intransitive verb", ["vt"] = "transitive verb", ["vti"] = "transitive and intransitive verb", }

function export.getLinkPage(target, lang) if mw.loadData("Module:links/data").unsupported_titles[target] then return "Unsupported titles/" .. mw.loadData("Module:links/data").unsupported_titles[target] end -- If the link contains unexpanded template parameters, then don't create a link. if target:find("{{{", nil, true) then return nil end if target:sub(1, 1) == ":" or target:sub(1, 2) == "w:" or target:sub(1, 10) == "wikipedia:" then return target end -- Remove diacritics from the page name target = lang:makeEntryName(target) -- Link to appendix for reconstructed terms and terms in appendix-only languages if target:find("^*.") then if lang:getCode == "und" then return nil end target = "Reconstruction:" .. lang:getCanonicalName .. "/" .. usub(target, 2) elseif lang:getType == "reconstructed" then error("The specified language " .. lang:getCanonicalName .. " is unattested, while the given word is not marked with '*' to indicate that it is reconstructed") elseif lang:getType == "appendix-constructed" then target = "Appendix:" .. lang:getCanonicalName .. "/" .. target end return target end

-- Make a language-specific link from given link's parts local function makeLangLink(link, lang, id, allowSelfLink) -- Temporary tracking code if (lang:getCode == "sma" or lang:getCode == "sju" or lang:getCode == "sje" or lang:getCode == "smj" or lang:getCode == "se" or lang:getCode == "smn" or lang:getCode == "sia" or lang:getCode == "sjk" or lang:getCode == "sms" or lang:getCode == "sjd" or lang:getCode == "sjt") then if link.display and string.find(link.display, "'") then require("Module:debug").track("links/Sami apostrophe display") elseif link.target and string.find(link.target, "'") then require("Module:debug").track("links/Sami apostrophe target") end end -- Find fragments (when link didn't come from parseLink). -- Prevents {{l|en|word#Etymology 2|word}} from linking to word. if link.fragment == nil then -- Replace numeric character references with the corresponding character (&#29; → '), -- as they contain #, which would be misinterpreted (wa'a → wa&#29;a → pagename wa&, fragment 29;a). link.target = link.target:gsub("&#(%d+);",			function(number) return mw.ustring.char(tonumber(number)) end) local first, second = link.target:match("^([^#]+)#(.+)$") if first then link.target, link.fragment = first, second end end -- If there is no display form, then create a default one if not link.display then link.display = link.target -- Strip the prefix from the displayed form -- TODO: other interwiki links? if link.display:sub(1, 1) == ":" and not mw.loadData("Module:links/data").unsupported_titles[link.display] then link.display = link.display:sub(2) -- remove colon from beginning else local prefix = link.display:match("^([^:]+):") local prefixes = { w = true, wikipedia = true, }			if prefixes[prefix] then link.display = link.display:sub(#prefix + 2) -- remove prefix plus colon end end end -- Process the target link.target = export.getLinkPage(link.target, lang) if not link.target then return link.display end -- If the target is the same as the current page, then return a "self-link" like the software does if not allowSelfLink and not id and (link.target == mw.title.getCurrentTitle.prefixedText or link.target == ":" .. mw.title.getCurrentTitle.prefixedText) then return "" .. link.display .. " "	end --		Add fragment		Do not add a section link to "Undetermined", as such sections do not exist and are invalid.		TabbedLanguages handles links without a section by linking to the "last visited" section,		but adding "Undetermined" would break that feature.		For localized prefixes that make syntax error, please use the format: ["xyz"] = true, local prefix = link.target:match("^:?([^:]+):") local prefixes = { w = true, wikipedia = true, Category = true, }	if not (prefix and prefixes[prefix]) then if link.fragment or link.target:find("#$") then require("Module:debug").track { "links/fragment", "links/fragment/" .. lang:getCode }		end if not link.fragment and lang:getCode ~= "und" then if id then link.fragment = require("Module:utilities").make_id(lang, id) elseif not mw.ustring.find(link.target, "^Appendix:") and not mw.ustring.find(link.target, "^Reconstruction:") then link.fragment = lang:getCanonicalName end end -- This allows linking to pages like sms:a without it being treated weirdly. link.target = link.target:gsub(":", "&#x3a;") end return "" .. link.display .. "" end

-- Split a link into its parts local function parseLink(linktext) local link = { target = linktext } local first, second = link.target:match("^([^|]+)|(.+)$") if first then link.target = first link.display = second else link.display = link.target end first, second = link.target:match("^(.+)#(.+)$") if first then link.target = first link.fragment = second else -- So that makeLangLink does not look for a fragment again link.fragment = false end return link end

-- Creates a basic wikilink to the given term. If the text already contains -- links, these are replaced with links to the correct section. function export.language_link(data, allowSelfLink, dontLinkRecons) if type(data) ~= "table" then error("The first argument to the function language_link must be a table. See Module:links/documentation for more information.") end local text = data.term if ignore_cap[data.lang:getCode] and text then text = text:gsub("%^", "") end -- If the text begins with * and another character, -- then act as if each link begins with * local allReconstructed = false if text:find("^*.") then allReconstructed = true end -- Do we have embedded wikilinks? if text:find("", nil, true) then		--[=[		[[Special:WhatLinksHere/Template:tracking/links/alt-ignored		Special:WhatLinksHere/Template:tracking/links/id-ignored		]=]		if data.alt then			require("Module:debug").track("links/alt-ignored")			mw.log("(from Module:links)", "text with embedded wikilinks:", text, "ignored alt:", data.alt, "lang:", data.lang:getCode)		end		if data.id then			require("Module:debug").track("links/id-ignored")			mw.log("(from Module:links)", "text with embedded wikilinks:", text, "ignored id:", data.id, "lang:", data.lang:getCode)		end		-- Begins and ends with a wikilink tag		if text:find("^%[%[(.+)%]%]$") then			-- There are no [ ] in between.			-- This makes the wikilink tag redundant.			if text:find("^%[%^%[%+%]%]$") then				require("Module:debug").track("links/redundant wikilink")			else				local temp = text:gsub("^%[%[(.+)%]%]$", "%1")				temp = temp:gsub("%]%], %[%[", "|")				if not temp:find("[%[%]]") then require("Module:debug").track("links/list") end end end text = text:gsub("%[%[([^%]]+)%]%]",			function(linktext)				local link = parseLink(linktext)				if allReconstructed then					link.target = "*" .. link.target				end				return makeLangLink(link, data.lang, data.id, allowSelfLink, dontLinkRecons)			end) -- Remove the extra * at the beginning if it's immediately followed -- by a link whose display begins with * too if allReconstructed then text = text:gsub("^%*%[%[([^|%]]+)|%*", "[[%1|*") end else -- There is no embedded wikilink, make a link using the parameters. text = makeLangLink({ target = text, display = data.alt }, data.lang, data.id, allowSelfLink, dontLinkRecons) end return text end

function export.mark(text, itemType, face, lang) local tag = { "", "" } if itemType == "gloss" then tag = { ' “ ', ' ” ' } elseif itemType == "tr" then if face == "term" then tag = { '', ' ' } else tag = { '', ' ' } end elseif itemType == "ts" then tag = { ' /', '/ ' } elseif itemType == "annotations" then tag = { ' ( ', ' ) ' } end if type(text) == "string" then return tag[1] .. text .. tag[2] else return "" end end

-- Format the annotations (things following the linked term) function export.format_link_annotations(data, face) local output = {} -- Interwiki link if data.interwiki then table_insert(output, data.interwiki) end -- Genders if type(data.genders) ~= "table" then data.genders = { data.genders } end if data.genders and #data.genders > 0 then local m_gen = require("Module:gender and number") table_insert(output, " " .. m_gen.format_list(data.genders, data.lang)) end local annotations = {} -- Transliteration and transcription if data.tr or data.ts then local kind if face == "term" then kind = face else kind = "default" end if data.tr and data.ts then table_insert(annotations, require("Module:script utilities").tag_translit(data.tr, data.lang, kind) .. " " .. export.mark(data.ts, "ts")) if mw.ustring.match(data.tr, "/[^><]*/") ~= nil then table_insert(annotations, "") end elseif data.ts then table_insert(annotations, export.mark(data.ts, "ts")) else table_insert(annotations, require("Module:script utilities").tag_translit(data.tr, data.lang, kind)) if mw.ustring.match(data.tr, "/[^><]*/") ~= nil then table_insert(annotations, "") end end end -- Gloss/translation if data.gloss then table_insert(annotations, export.mark(data.gloss, "gloss")) end -- Part of speech if data.pos then table_insert(annotations, pos_tags[data.pos] or data.pos) -- debug category for pos= containing transcriptions if mw.ustring.match(data.pos, "/[^><]*/") ~= nil then table_insert(annotations, "") end end -- Literal/sum-of-parts meaning if data.lit then table_insert(annotations, "literally " .. export.mark(data.lit, "gloss")) end if #annotations > 0 then table_insert(output, " " .. export.mark(table_concat(annotations, ", "), "annotations")) end return table_concat(output) end

-- A version of {{l}} or {{m}} that can be called from other modules too function export.full_link(data, face, allowSelfLink, dontLinkRecons) if type(data) ~= "table" then error("The first argument to the function full_link must be a table. See Module:links/documentation for more information.") end -- Create the link local output = {} local categories = {} local link = "" local annotations --local m_utilities = require("Module:utilities") -- Is there any text to show? if (data.term or data.alt) then -- Try to detect the script if it was not provided if not data.sc then data.sc = require("Module:scripts").findBestScript(data.alt or data.term, data.lang) else -- Track uses of sc parameter local best = require("Module:scripts").findBestScript(data.alt or data.term, data.lang) require("Module:debug").track("links/sc") if data.sc:getCode == best:getCode then require("Module:debug").track("links/sc/redundant") require("Module:debug").track("links/sc/redundant/" .. data.sc:getCode) else require("Module:debug").track("links/sc/needed") require("Module:debug").track("links/sc/needed/" .. data.sc:getCode) end end local class = "" if data.accel then class = "form-of lang-" .. data.lang:getCode .. " " .. data.accel end -- Only make a link if the term has been given, otherwise just show the alt text without a link link = require("Module:script utilities").tag_text(data.term and export.language_link(data, allowSelfLink, dontLinkRecons) or data.alt, data.lang, data.sc, face, class) else --	No term to show.				Is there at least a transliteration we can work from? link = require("Module:script utilities").request_script(data.lang, data.sc) if link == "" or not data.tr or data.tr == "-" then -- No link to show, and no transliteration either. Show a term request. local category = "" if mw.title.getCurrentTitle.nsText ~= "Template" then table_insert(categories, "") end link = " [Term?] " end end table_insert(output, link) if data.tr == "" or data.tr == "-" then data.tr = nil elseif phonetic_extraction[data.lang:getCode] then local m_phonetic = require(phonetic_extraction[data.lang:getCode]) data.tr = data.tr or m_phonetic.getTranslit(export.remove_links(data.term)) elseif (data.term or data.alt) and not ((data.sc:getCode:find("Latn", nil, true)) or data.sc:getCode == "Latinx") then if not mw.loadData("Module:links/data").high_memory_entries[mw.title.getCurrentTitle.text] or not data.tr then -- Try to generate a transliteration if necessary local automated_tr = data.lang:transliterate(export.remove_links(data.alt or data.term), data.sc) if automated_tr then local manual_tr = data.tr				if manual_tr then if manual_tr == automated_tr then table_insert(categories,							""									.. "") else -- Prevents Arabic root categories from flooding the tracking categories. if mw.title.getCurrentTitle.nsText ~= "Category" then table_insert(categories,								""										.. "") end end end if (not manual_tr) or data.lang:overrideManualTranslit then data.tr = automated_tr end end end end -- Link to the transliteration entry for languages that require this if data.tr and data.lang:link_tr then data.tr = export.language_link { lang = data.lang, term = data.tr } end table_insert(output, export.format_link_annotations(data, face)) return table_concat(output) .. table_concat(categories) end

--	Strips links: deletes category links,		the targets of piped links,		and all double square brackets. function export.remove_links(text) if type(text) == "table" then text = text.args[1] end if not text or text == "" then return "" end text = mw.ustring.gsub(text, "%[%[Category:[^|%]]-|?[^|%]]-%]%]", "") text = text:gsub("%[%^|%-|", "") text = text:gsub("%[%[", "") text = text:gsub("%]%]", "") return text end

function export.english_links(text) local lang = require("Module:languages").getByCode("en") -- Parentheses around function call to remove second return value, the -- number of replacements. return (text:gsub("%[%[([^%]]+)%]%]", function(linktext) local link = parseLink(linktext) return makeLangLink(link, lang, nil, true, false) end)) end

function export.light_link(data) local language_names = mw.loadData("Module:languages/code to canonical name") local script_codes = mw.loadData("Module:scripts/codes") if data.langCode then data.langName = language_names[data.langCode] or error('The language code "' .. data.langCode .. '" is not recognized.') else error('Language code is required.') end if not data.term then error('Term to link to is required.') end if data.scCode then if not script_codes[data.scCode] then error('The script code "' .. data.sc .. '" is not recognized.') end else error("The function light_link requires a script code.") end local fragment if data.id then fragment = data.langName .. "-" .. mw.uri.encode(data.id, "WIKI") else fragment = data.langName end return table_concat { '", (data.alt or data.term), " " } end

--[=[	For example, Norwegian_Bokm.C3.A5l → Norwegian_Bokmål. 0xC3 and 0xA5 are the hexadecimal-base representation of the two bytes used to encode the character å in the UTF-8 encoding: 11000011 10100101	Note that the bytes used to represent a character are actually different from the Unicode codepoint. For å, the codepoint is 0xE5. The bits (digits) that actually spell the codepoint are found in the brackets: 110[00011] 10[100101]. For further explanation, see UTF-8. ]=]

-- The character class %x should not be used, as it includes the characters a-f, -- which do not occur in these anchor encodings. local capitalHex = "[0-9A-F]"

local function decodeAnchor(anchor) return (anchor:gsub("%.(" .. capitalHex .. capitalHex .. ")", function(hexByte) return string.char(tonumber(hexByte, 16)) end)) end

function export.section_link(link) if type(link) ~= "string" then error("The first argument to section_link was a " .. type(link) .. ", but it should be a string.") end link = link:gsub("_", " ") local numberSigns = require("Module:string").count(link, "#") if numberSigns > 1 then error("The section link should only contain one number sign (#).") end local page, section = link:match("^([^#]+)#(.+)$") if page and section then section = decodeAnchor(section) return table_concat { "", page, " § ", section, "" } else error('The function "' .. section_link .. '" could not find a number sign marking a section name.') end end

return export