Toggle menu
15
236
74
27.6K
Kenshi Wiki
Toggle preferences menu
Toggle personal menu
Not logged in
Your IP address will be publicly visible if you make any edits.

Lua error in Module:Unicode_data/sandbox at line 297: attempt to index local 'data_module' (a boolean value).


local p = {}

local Unicode_data = require "Module:Unicode data/sandbox"

local function errorf(level, ...)
	if type(level) == "number" then
		return error(string.format(...), level + 1)
	else -- level is actually the format string.
		return error(string.format(level, ...), 2)
	end
end

function mw.logf(...)
	return mw.log(string.format(...))
end

local output_mt = {}
function output_mt:insert(str)
	self.n = self.n + 1
	self[self.n] = str
end

-- also in [[Module:Unicode data/documentation functions]]
function output_mt:insert_format(...)
	self:insert(string.format(...))
end

output_mt.join = table.concat

output_mt.__index = output_mt

local function Output()
	return setmetatable({ n = 0 }, output_mt)
end


local Latn_pattern = table.concat {
	"[",
	"\n\32-\127",
	"\194\160-\194\172",
	"\195\128-\195\191",
	"\196\128-\197\191",
	"\198\128-\201\143",
	"\225\184\128-\225\187\191",
	"\226\177\160-\226\177\191",
	"\234\156\160-\234\159\191",
	"\234\172\176-\234\173\175",
	"\239\172\128-\239\172\134",
	"\239\188\129-\239\188\188",
	"–",
	"—",
	"«", "»",
	"]",
};

local get_codepoint = mw.ustring.codepoint
local function expand_range(start, ending)
	local lower, higher = get_codepoint(start), get_codepoint(ending)
	if higher < lower then
		return nil
	end
	local chars = {}
	local i = 0
	for codepoint = lower, higher do
		i = i + 1
		chars[i] = mw.ustring.char(codepoint)
	end
	return table.concat(chars)
end

local fun = require "Module:Fun"
local m_table = require "Module:TableTools"

local script_to_count_mt = {
	__index = function (self, key)
		self[key] = 0
		return 0
	end,
	__call = function (self, ...)
		return setmetatable({}, self)
	end
}
setmetatable(script_to_count_mt, script_to_count_mt)

-- Uses an iterator (such as mw.ustring.gcodepoint) that generates a codepoint
-- each time it is called with an optional state and another value.
local function show_scripts(iterator, state, value)
	local script_to_count = script_to_count_mt()
	for codepoint in iterator, state, value do
		local script = Unicode_data.lookup_script(codepoint)
		script_to_count[script] = script_to_count[script] + 1
	end
	return table.concat(
		fun.mapIter(
			function (count, script)
				return ("%s (%d)"):format(script, count)
			end,
			m_table.sortedPairs(
				script_to_count,
				function (script1, script2)
					return script_to_count[script1] > script_to_count[script2]
				end)),
		", ")
end

local function get_chars_in_scripts(iterator, state, value)
	local script_to_char_set = {}
	for codepoint in iterator, state, value do
		local script = Unicode_data.lookup_script(codepoint)
		script_to_char_set[script] = script_to_char_set[script] or {}
		script_to_char_set[script][codepoint] = true
	end
	
	return script_to_char_set
end

local function print_char_set_map(script_to_char_set, format, separator)
	format = format or "%s: %s"
	separator = separator or "\n"
	return table.concat(
		fun.mapIter(
			function (char_set, script)
				local char_list = fun.mapIter(
					function (_, codepoint)
						return mw.ustring.char(codepoint)
					end,
					m_table.sortedPairs(char_set))
				return (format):format(script, mw.text.nowiki(table.concat(char_list)))
			end,
			m_table.sortedPairs(script_to_char_set)),
		separator)
end

function p.show(frame)
	local expanded_pattern = Latn_pattern
		:gsub("%[(.-)%]", "%1")
		:gsub( -- Find two UTF-8-encoded characters separated by hyphen-minus.
			"([%z\1-\127\194-\244][\128-\191]*)%-([%z\1-\127\194-\244][\128-\191]*)",
			function (char1, char2)
				return expand_range(char1, char2)
			end)
	
	return ('* <div style="overflow-wrap: break-word;">%s</div><br>%s')
		:format(expanded_pattern
			:gsub("^%s*", ""), -- Remove initial "\n " to avoid creating unwanted pre element.
			show_scripts(mw.ustring.gcodepoint(expanded_pattern)))
end

local function get_block_info_from_arg(args, arg)
	local block_name = args[1]
		or errorf("Parameter %s is required", tostring(arg))
	
	local block_info = Unicode_data.get_block_info(block_name)
		or errorf("The block '%s' could be found", block_name)
	
	return block_info
end

local function get_boolean_from_arg(args, arg)
	return args[arg] and require "Module:Yesno" (args[arg])
end

function p.scripts_in_block(frame)
	local block_info = get_block_info_from_arg(frame.args, 1)
	local show_block_name = get_boolean_from_arg(frame.args, 2)
	local script_list = show_scripts(fun.range(block_info[1], block_info[2]))
	if show_block_name then
		return ("%s: %s"):format(block_info[3], script_list)
	else
		return script_list
	end
end

local function link_block_name(block_name)
	if block_name:find " " then
		return ("[[%s]]"):format(block_name)
	else
		return ("[[%s (Unicode block)|%s]]"):format(block_name, block_name)
	end
end

function p.scripts_in_blocks(frame)
	local output = Output()
	local start = frame.args[1] and tonumber(frame.args[1], 16) or 0
	local ending = frame.args[2] and tonumber(frame.args[2], 16) or 0x4000
	
	local script_data = mw.loadData "Module:Unicode data/scripts"
	local singles = script_data.singles
	local ranges = script_data.ranges
	
	local function clear (self)
		for _, key in ipairs(m_table.keysToList(self, false)) do
			self[key] = nil
		end
	end
	
	local counts = {}
	setmetatable(counts, {
		__index = {
			increment = function(self, script_code, amount)
				self[script_code] = (self[script_code] or 0) + (amount or 1)
			end,
			clear = clear,
		}
	})
	local codepoints_per_script = {}
	setmetatable(codepoints_per_script, {
		__index = {
			add = function(self, script_code, codepoint)
				self[script_code] = self[script_code] or { n = 0 }
				if self[script_code].n <= 0x20
						and not (codepoint <= 0x9F and (codepoint >= 0x80
						or codepoint <= 0x1F)) then
					if self[script_code].n == 0x20 then
						local period = ("."):byte()
						for _ = 1, 3 do
							self[script_code].n = self[script_code].n + 1
							self[script_code][self[script_code].n] = period
						end
					else
						if script_code == "Zinh" then -- probably combining character
							self[script_code].n = self[script_code].n + 1
							self[script_code][self[script_code].n] = 0x25CC
						end
						self[script_code].n = self[script_code].n + 1
						self[script_code][self[script_code].n] = codepoint
					end
				end
			end,
			clear = clear,
		}
	})
	
	output:insert [[
{| class="wikitable"
|+ Scripts in each Unicode block
! block !! codepoints !! scripts
]]
	
	for _, block in pairs(mw.loadData "Module:Unicode data/blocks") do
		local codepoint = block[1]
		if codepoint > ending then break end
		
		if codepoint >= start then
			while codepoint <= block[2] do
				local script = singles[codepoint]
				local count
				if script then -- Codepoint is in "singles" map.
					counts:increment(script)
					codepoints_per_script:add(script, codepoint)
					codepoint = codepoint + 1
					count = 1 -- for potential future use
				else
					local range, index = Unicode_data.binary_range_search(codepoint, ranges)
					if range then -- Codepoint is in "ranges" array.
						count = 0
						script = range[3]
						while codepoint <= range[2] and codepoint <= block[2] do
							count = count + 1
							codepoints_per_script:add(script, codepoint)
							codepoint = codepoint + 1
						end
						counts:increment(script, count)
					else -- Codepoint doesn't have data; it's Zzzz.
						-- Get range immediately above codepoint.
						while ranges[index][2] < codepoint do
							index = index + 1
						end
						
						count = 0
						script = "Zzzz"
						local range = ranges[index]
						while codepoint < range[1] and codepoint <= block[2]
								and not singles[codepoint] do
							count = count + 1
							codepoint = codepoint + 1
						end
						counts:increment(script, count)
					end
				end
			end
			
			output:insert_format([[
|-
| %s
| U+%04X&ndash;U+%04X
| %s
]], link_block_name(block[3]), block[1], block[2],
				table.concat(
					fun.map(
						function (count, script)
							return ('<abbr title="%s">%s</abbr> (<span title="%s">%d</span>)')
								:format(
									script_data.aliases[script], script,
									codepoints_per_script[script]
										and mw.text.nowiki(mw.ustring.char(
											unpack(codepoints_per_script[script])))
										or "",
									count)
						end,
						m_table.sortedPairs(
							counts,
							function (script1, script2)
								return counts[script1] > counts[script2]
							end)),
					", "))
		end
		
		-- mw.logObject(codepoints_per_script, block[3])
		counts:clear()
		codepoints_per_script:clear()
	end
	output:insert "|}"
	
	return output:join()
end

function p.chars_in_scripts_in_block(frame)
	local block_info = get_block_info_from_arg(frame.args, 1)
	local show_block_name = get_boolean_from_arg(frame.args, 2)
	local script_char_set_map = print_char_set_map(
		get_chars_in_scripts(fun.range(block_info[1], block_info[2])))
	if show_block_name then
		return ("%s: %s"):format(block_info[3], script_char_set_map)
	else
		return script_char_set_map
	end
end

function p.search_for_language_codes(frame)
	local page_name = frame.args[1] or "English language"
	
	local success, title_object = pcall(mw.title.new, page_name)
	if not (success and title_object) then
		mw.logf("Could not make title object for '%s'.", page_name)
		return
	end
	
	local content = title_object:getContent()
	
	local language_codes = {}
	for lang_template in content:gmatch "{{lang[^}]+" do
		local template_name = lang_template:match("{{([^|}]+)")
		local language_code
		if template_name == "lang" then
			language_code = lang_template:match "{{lang|([^|}]+)"
		elseif template_name:find "^lang-" then
			language_code = lang_template:match "{{lang-([^|}]+)"
		end
		if language_code then
			language_codes[language_code] = true
		end
	end
	
	return table.concat(m_table.keysToList(language_codes), ", ")
end

return p