Toggle menu
14
229
69
27.1K
Kenshi Wiki
Toggle preferences menu
Toggle personal menu
Not logged in
Your IP address will be publicly visible if you make any edits.

Dependency of Module:Get cell for Template:Get cell to display properly. This is undocumented elsewhere yet abundantly apparent upon a read of the code.


-- Module:WikitextParser is a general-purpose wikitext parser
-- Documentation and master version: https://en.wikipedia.org/wiki/Module:WikitextParser
-- Authors: User:Sophivorus, User:Certes, User:Aidan9382, et al.
-- License: CC-BY-SA-4.0
local WikitextParser = {}

-- Helper function to escape a string for use in regexes
local function escapeString( str )
	return str:gsub( '[%^%$%(%)%.%[%]%*%+%-%?%%]', '%%%0' )
end

-- Get the lead section from the given wikitext
-- The lead section is any content before the first section title.
-- @param wikitext Required. Wikitext to parse.
-- @return Wikitext of the lead section. May be empty if the lead section is empty.
function WikitextParser.getLead( wikitext )
	wikitext = '\n' .. wikitext
	wikitext = wikitext:gsub( '\n==.*', '' )
	wikitext = mw.text.trim( wikitext )
	return wikitext
end

-- Get the sections from the given wikitext
-- This method doesn't get the lead section, use getLead for that
-- @param wikitext Required. Wikitext to parse.
-- @return Map from section title to section content
function WikitextParser.getSections( wikitext )
	local sections = {}
	wikitext = '\n' .. wikitext .. '\n=='
	for title in wikitext:gmatch( '\n==+ *([^=]+) *==+' ) do
		local section = wikitext:match( '\n==+ *' .. escapeString( title ) .. ' *==+(.-)\n==' )
		section = mw.text.trim( section )
		sections[ title ] = section
	end
	return sections
end

-- Get a section from the given wikitext (including any subsections)
-- If the given section title appears more than once, only the section of the first instance will be returned
-- @param wikitext Required. Wikitext to parse.
-- @param title Required. Title of the section
-- @return Wikitext of the section, or nil if it isn't found. May be empty if the section is empty or contains only subsections.
function WikitextParser.getSection( wikitext, title )
	title = mw.text.trim( title )
	title = escapeString( title )
	wikitext = '\n' .. wikitext .. '\n'
	local level, wikitext = wikitext:match( '\n(==+) *' .. title .. ' *==.-\n(.*)' )
	if wikitext then
		local nextSection = '\n==' .. string.rep( '=?', #level - 2 ) .. '[^=].*'
		wikitext = wikitext:gsub( nextSection, '' ) -- remove later sections at this level or higher
		wikitext = mw.text.trim( wikitext )
		return wikitext
	end
end

-- Get the content of a <section> tag from the given wikitext.
-- We can't use getTags because both opening and closing <section> tags are self-closing tags.
-- @param wikitext Required. Wikitext to parse.
-- @param name Required. Name of the <section> tag
-- @return Content of the <section> tag, or nil if it isn't found. May be empty if the section tag is empty.
function WikitextParser.getSectionTag( wikitext, name )
	name = mw.text.trim( name )
	name = escapeString( name )
	wikitext = wikitext:match( '< *section +begin *= *["\']? *' .. name .. ' *["\']? */>(.-)< *section +end= *["\']? *'.. name ..' *["\']? */>' )
	if wikitext then
		return mw.text.trim( wikitext )
	end
end

-- Get the lists from the given wikitext.
-- @param wikitext Required. Wikitext to parse.
-- @return Sequence of lists.
function WikitextParser.getLists( wikitext )
	local lists = {}
	wikitext = '\n' .. wikitext .. '\n\n'
	for list in wikitext:gmatch( '\n([*#].-)\n[^*#]' ) do
		table.insert( lists, list )
	end
	return lists
end

-- Get the paragraphs from the given wikitext.
-- @param wikitext Required. Wikitext to parse.
-- @return Sequence of paragraphs.
function WikitextParser.getParagraphs( wikitext )
	local paragraphs = {}

	-- Remove non-paragraphs
	wikitext = '\n' .. wikitext .. '\n'
	wikitext = wikitext:gsub( '\n[*#][^\n]*', '' ) -- remove lists
	wikitext = wikitext:gsub( '\n%[%b[]%]\n', '' ) -- remove files and categories
	wikitext = wikitext:gsub( '\n%b{} *\n', '\n%0\n' ) -- add spacing between tables and block templates
	wikitext = wikitext:gsub( '\n%b{} *\n', '\n' ) -- remove tables and block templates
	wikitext = wikitext:gsub( '\n==+[^=]+==+ *\n', '\n' ) -- remove section titles
	wikitext = mw.text.trim( wikitext )

	for paragraph in mw.text.gsplit( wikitext, '\n\n+' ) do
		if mw.text.trim( paragraph ) ~= '' then
			table.insert( paragraphs, paragraph )
		end
	end
	return paragraphs
end

-- Get the templates from the given wikitext.
-- @param wikitext Required. Wikitext to parse.
-- @return Sequence of templates.
function WikitextParser.getTemplates( wikitext )
	local templates = {}
	for template in wikitext:gmatch( '{%b{}}' ) do
		if wikitext:sub( 1, 3 ) ~= '{{#' then -- skip parser functions like #if
			table.insert( templates, template )
		end
	end
	return templates
end

-- Get the requested template from the given wikitext.
-- If the template appears more than once, only the first instance will be returned
-- @param wikitext Required. Wikitext to parse.
-- @param name Name of the template to get
-- @return Wikitext of the template, or nil if it wasn't found
function WikitextParser.getTemplate( wikitext, name )
	local templates = WikitextParser.getTemplates( wikitext )
	local lang = mw.language.getContentLanguage()
	for _, template in pairs( templates ) do
		local templateName = template:match( '^{{ *([^}|\n]+)' )
		if lang:ucfirst( templateName ) == lang:ucfirst( name ) then
			return template
		end
	end
end

-- Get the parameters from the given template.
-- @param wikitext Required. Template wikitext to parse.
-- @return Map from parameter name to parameter value
function WikitextParser.getParameters( template )
	local parameters = {}
	local params = template:match( '{{[^|}]-|(.*)}}' )
	if params then
		-- Temporarily replace pipes in subtemplates and links to avoid chaos
		for subtemplate in params:gmatch( '{%b{}}' ) do
			params = params:gsub( escapeString( subtemplate ), subtemplate:gsub( '.', { ['%']='%%', ['|']="@@:@@", ['=']='@@_@@' } ) )
		end
		for link in params:gmatch( '[%b[]]' ) do
			params = params:gsub( escapeString( link ), link:gsub( '.', { ['%']='%%', ['|']='@@:@@', ['=']='@@_@@' } ) )
		end
		local count = 0
		local parts, name, value
		for param in mw.text.gsplit( params, '|' ) do
			parts = mw.text.split( param, '=' )
			name = mw.text.trim( parts[1] )
			if #parts == 1 then
				value = name
				count = count + 1
				name = count
			else
				value = mw.text.trim( table.concat( parts, '=', 2 ) )
			end
			value = value:gsub( '@@_@@', '=' )
			value = value:gsub( '@@:@@', '|' )
			parameters[ name ] = value
		end
	end
	return parameters
end

-- Get the tags from the given wikitext.
-- @param wikitext Required. Wikitext to parse.
-- @return Sequence of tags.
function WikitextParser.getTags( wikitext )
	local tags = {}
	local tag, tagName, tagEnd
	for tagStart, tagOpen in wikitext:gmatch( '()(<[^/].->)' ) do
		tagName = tagOpen:match( '< ?(.-)[ >]' )

		-- If we're in a self-closing tag, like <ref name="foo" />, <references/>, <br/>, <br>, <hr>, etc.
		if tagOpen:match( '<.-/>' ) or tagName == 'br' or tagName == 'hr' then
			tag = tagOpen

		-- If we're in a tag that may contain others like it, like <div> or <span>
		elseif tagName == 'div' or tagName == 'span' then
			local position = tagStart + #tagOpen - 1
			local depth = 1
			while depth > 0 do
				tagEnd = wikitext:match( '</ ?' .. tagName .. ' ?>()', position )
				if tagEnd then
					tagEnd = tagEnd - 1
				else
					break -- unclosed tag
				end 
				position = wikitext:match( '()< ?' .. tagName .. '[ >]', position + 1 )
				if not position then
					position = tagEnd + 1
				end
				if position > tagEnd then
					depth = depth - 1
				else
					depth = depth + 1
				end
			end
			tag = wikitext:sub( tagStart, tagEnd )

		-- Else we're in tag that shouldn't contain others like it, like <math> or <strong>
		else
			tagEnd = wikitext:match( '</ ?' .. tagName .. ' ?>()', tagStart ) - 1
			tag = wikitext:sub( tagStart, tagEnd )
		end
		table.insert( tags, tag )
	end
	return tags
end

-- Get the <gallery> tags from the given wikitext.
-- @param wikitext Required. Wikitext to parse.
-- @return Sequence of gallery tags.
function WikitextParser.getGalleries( wikitext )
	local galleries = {}
	local tags = WikitextParser.getTags( wikitext )
	for _, tag in pairs( tags ) do
		local tagName = tag:match( '< ?(.-)[ >]' )
		if tagName == 'gallery' then
			table.insert( galleries, tag )
		end
	end
	return galleries
end

-- Get the <ref> tags from the given wikitext.
-- @param wikitext Required. Wikitext to parse.
-- @return Sequence of ref tags.
function WikitextParser.getReferences( wikitext )
	local references = {}
	local tags = WikitextParser.getTags( wikitext )
	for _, tag in pairs( tags ) do
		local tagName = tag:match( '< ?(.-)[ >]' )
		if tagName == 'ref' then
			table.insert( references, tag )
		end
	end
	return references
end

-- Get the tables from the given wikitext.
-- @param wikitext Required. Wikitext to parse.
-- @return Sequence of tables.
function WikitextParser.getTables( wikitext )
	local tables = {}
	wikitext = '\n' .. wikitext
	for t in wikitext:gmatch( '\n%b{}' ) do
		if t:sub( 1, 3 ) == '\n{|' then
			t = mw.text.trim( t ) -- exclude the leading newline
			table.insert( tables, t )
		end
	end
	return tables
end

-- Get the id from the given table wikitext
-- @param t Required. Wikitext of the table to parse.
-- @return Id of the table or nil if not found
function WikitextParser.getTableId( t )
	return string.match( t, '^{|[^\n]-id *= *["\']?([^"\'\n]+)["\']?[^\n]*\n' )
end

-- Get a table by id from the given wikitext
-- @param wikitext Required. Wikitext to parse.
-- @param id Required. Id of the table
-- @return Wikitext of the table or nil if not found
function WikitextParser.getTableById( wikitext, id )
	local tables = WikitextParser.getTables( wikitext )
	for _, t in ipairs( tables ) do
		if id == WikitextParser.getTableId( t ) then
			return t
		end
	end
end

-- Get the data from the given table wikitext
-- @param tableWikitext Required. Wikitext of the table to parse.
-- @return Table data
-- @todo Test and make more robust
function WikitextParser.getTableData( tableWikitext )
	local tableData = {}
	tableWikitext = mw.text.trim( tableWikitext );
	tableWikitext = string.gsub( tableWikitext, '^{|.-\n', '' ) -- remove the header
	tableWikitext = string.gsub( tableWikitext, '\n|}$', '' ) -- remove the footer
	tableWikitext = string.gsub( tableWikitext, '^|%+.-\n', '' ) -- remove any caption
	tableWikitext = string.gsub( tableWikitext, '|%-.-\n', '|-\n' ) -- remove any row attributes
	tableWikitext = string.gsub( tableWikitext, '^|%-\n', '' ) -- remove any leading empty row
	tableWikitext = string.gsub( tableWikitext, '\n|%-$', '' ) -- remove any trailing empty row
	for rowWikitext in mw.text.gsplit( tableWikitext, '|-', true ) do
		local rowData = {}
		rowWikitext = string.gsub( rowWikitext, '||', '\n|' )
		rowWikitext = string.gsub( rowWikitext, '!!', '\n|' )
		rowWikitext = string.gsub( rowWikitext, '\n!', '\n|' )
		rowWikitext = string.gsub( rowWikitext, '^!', '\n|' )
		rowWikitext = string.gsub( rowWikitext, '^\n|', '' )
		for cellWikitext in mw.text.gsplit( rowWikitext, '\n|' ) do
			cellWikitext = mw.text.trim( cellWikitext )
			table.insert( rowData, cellWikitext )
		end
		table.insert( tableData, rowData )
	end
	return tableData
end

-- Get the internal links from the given wikitext (includes category and file links).
-- @param wikitext Required. Wikitext to parse.
-- @return Sequence of internal links.
function WikitextParser.getLinks( wikitext )
	local links = {}
	for link in wikitext:gmatch( '%[%b[]%]' ) do
		table.insert( links, link )
	end
	return links
end

-- Get the file links from the given wikitext.
-- @param wikitext Required. Wikitext to parse.
-- @return Sequence of file links.
function WikitextParser.getFiles( wikitext )
	local files = {}
	local links = WikitextParser.getLinks( wikitext )
	for _, link in pairs( links ) do
		local namespace = link:match( '%[%[ ?(.+) ?:.+%]%]' )
		if namespace and mw.site.namespaces[ namespace ] and mw.site.namespaces[ namespace ].canonicalName == 'File' then
			table.insert( files, link )
		end
	end
	return files
end

-- Get the category links from the given wikitext.
-- @param wikitext Required. Wikitext to parse.
-- @return Sequence of category links.
function WikitextParser.getCategories( wikitext )
	local categories = {}
	local links = WikitextParser.getLinks( wikitext )
	for _, link in pairs( links ) do
		local namespace = link:match( '%[%[ ?(.+) ?:.+%]%]' )
		if namespace and mw.site.namespaces[ namespace ] and mw.site.namespaces[ namespace ].canonicalName == 'Category' then
			table.insert( categories, link )
		end
	end
	return categories
end

-- Get the external links from the given wikitext.
-- @param wikitext Required. Wikitext to parse.
-- @return Sequence of external links.
function WikitextParser.getExternalLinks( wikitext )
	local links = {}
	for link in wikitext:gmatch( '%b[]' ) do
		if link:match( '^%[//' ) or link:match( '^%[https?://' ) then
			table.insert( links, link )
		end
	end
	return links
end

return WikitextParser