Toggle menu
13
222
68
27K
Kenshi Wiki
Toggle preferences menu
Toggle personal menu
Not logged in
Your IP address will be publicly visible if you make any edits.

Visit the github repo.



-- <nowiki>
--- Lexer for Lua source code written in pure Lua.
--  @script             lexer
--  @license            MIT
--  @author             https://github.com/LoganDark
--  @param              {string} text Lua source code to lex.
--  @return             {string} Table of line arrays containing lexemes.

--- Mapper for individual token list string.
--  @param              {string} src List of characters or keywords to map.
--  @param[opt]         {table} list Table to extend by reference.
--  @return             {{char=true,...}}, map 
--  @local
local function lookupify(src, list)
	list = list or {}

	if type(src) == 'string' then
		for i = 1, src:len() do
			list[src:sub(i, i)] = true
		end
	elseif type(src) == 'table' then
		for i = 1, #src do
			list[src[i]] = true
		end
	end

	return list
end

--- Base identifier character set.
--  @variable           {string} base_ident
local base_ident = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'

--- Base identifier character set.
--  @variable           {string} base_digits
local base_digits = '0123456789'

--- Base identifier character set.
--  @variable           {string} base_operators
local base_operators = '+-*/^%#'

--- Map of Lua character patterns.
--  @table              chars
--  @field              {table} whitespace Boolean map of whitespace
--                      tokens.
--  @field              {table} validEscapes Boolean map of valid escape
--                      characters.
--  @field              {table} ident Boolean map of valid identifier
--                      characters.
--  @field              {table} symbols Boolean map of valid symbol and
--                      operator characters.
--  @local
local chars = {
	whitespace = lookupify(' \n\t\r'),
	validEscapes = lookupify('abfnrtv"\'\\'),
	ident = lookupify(
		base_ident .. base_digits,
		{
			start = lookupify(base_ident),
		}
	),

	digits = lookupify(
		base_digits,
		{
			hex = lookupify(base_digits .. 'abcdefABCDEF')
		}
	),

	symbols = lookupify(
		base_operators .. ',{}[]();.:', {
			equality = lookupify('~=><'),
			operators = lookupify(base_operators)
		}
	)
}

--- List of Lua keywords.
--  @table              keywords
--  @field              structure Boolean map of structure keywords.
--  @field              values Boolean map of primitive keywords.
local keywords = {
	structure = lookupify({
		'and', 'break', 'do', 'else', 'elseif', 'end', 'for', 'function',
		'goto', 'if', 'in', 'local', 'not', 'or', 'repeat', 'return', 'then',
		'until', 'while'
	}),

	values = lookupify({
		'true', 'false', 'nil'
	})
}

--  Lexer function export.
return function(text)
	local pos = 1
	local start = 1
	local buffer = {}
	local lines = {}

	local function look(delta)
		delta = pos + (delta or 0)

		return text:sub(delta, delta)
	end

	local function get()
		pos = pos + 1

		return look(-1)
	end

	local function getDataLevel()
		local num = 0

		while look(num) == '=' do
			num = num + 1
		end

		if look(num) == '[' then
			pos = pos + num + 1

			return num
		end
	end

	local function getCurrentTokenText()
		return text:sub(start, pos - 1)
	end

	local currentLineLength = 0
	local lineoffset = 0

	local function pushToken(type, text)
		text = text or getCurrentTokenText()

		local tk = buffer[#buffer]

		if not tk or tk.type ~= type then
			tk = {
				type = type,
				data = text,
				posFirst = start - lineoffset,
				posLast = pos - 1 - lineoffset
			}

			if tk.data ~= '' then
				buffer[#buffer + 1] = tk
			end
		else
			tk.data = tk.data .. text
			tk.posLast = tk.posLast + text:len()
		end

		currentLineLength = currentLineLength + text:len()
		start = pos

		return tk
	end

	local function newline()
		lines[#lines + 1] = buffer
		buffer = {}

		get()
		pushToken('newline')
		buffer[1] = nil

		lineoffset = lineoffset + currentLineLength
		currentLineLength = 0
	end

	local function getData(level, type)
		while true do
			local char = get()

			if char == '' then
				return
			elseif char == '\n' then
				pos = pos - 1
				pushToken(type)
				newline()
			elseif char == ']' then
				local valid = true

				for i = 1, level do
					if look() == '=' then
						pos = pos + 1
					else
						valid = false
						break
					end
				end

				if valid and look() == ']' then
					pos = pos - level - 1

					return
				end
			end
		end
	end

	local function chompWhitespace()
		while true do
			local char = look()

			if char == '\n' then
				pushToken('whitespace')
				newline()
			elseif chars.whitespace[char] then
				pos = pos + 1
			else
				break
			end
		end

		pushToken('whitespace')
	end

	while true do
		chompWhitespace()

		local char = get()

		if char == '' then
			break
		elseif char == '-' and look() == '-' then
			pos = pos + 1

			if look() == '[' then
				pos = pos + 1

				local level = getDataLevel()

				if level then
					getData(level, 'comment')

					pos = pos + level + 2
					pushToken('comment')
				else
					while true do
						local char2 = get()

						if char2 == '' or char2 == '\n' then
							pos = pos - 1
							pushToken('comment')

							if char2 == '\n' then
								newline()
							end

							break
						end
					end
				end
			else
				while true do
					local char2 = get()

					if char2 == '' or char2 == '\n' then
						pos = pos - 1
						pushToken('comment')

						if char2 == '\n' then
							newline()
						end

						break
					end
				end
			end

			pushToken('comment')
		elseif char == '\'' or char == '"' then
			pushToken('string_start')

			while true do
				local char2 = get()

				if char2 == '\\' then
					pos = pos - 1
					pushToken('string')
					get()

					local char3 = get()

					if chars.digits[char3] then
						for i = 1, 2 do
							if chars.digits[look()] then
								pos = pos + 1
							end
						end
					elseif char3 == 'x' then
						if chars.digits.hex[look()] and chars.digits.hex[look(1)] then
							pos = pos + 2
						else
							pushToken('unidentified')
						end
					elseif char3 == '\n' then
						pos = pos - 1
						pushToken('escape')
						newline()
					elseif not chars.validEscapes[char3] then
						pushToken('unidentified')
					end

					pushToken('escape')
				elseif char2 == '\n' then
					pos = pos - 1
					pushToken('string')
					newline()

					break
				elseif char2 == char or char2 == '' then
					pos = pos - 1
					pushToken('string')
					get()

					break
				end
			end

			pushToken('string_end')
		elseif chars.ident.start[char] then
			while chars.ident[look()] do
				pos = pos + 1
			end

			local word = getCurrentTokenText()

			if keywords.structure[word] then
				pushToken('keyword')
			elseif keywords.values[word] then
				pushToken('value')
			else
				pushToken('ident')
			end
		elseif chars.digits[char] or (char == '.' and chars.digits[look()]) then
			if char == '0' and look() == 'x' then
				pos = pos + 1

				while chars.digits.hex[look()] do
					pos = pos + 1
				end
			else
				while chars.digits[look()] do
					pos = pos + 1
				end

				if look() == '.' then
					pos = pos + 1

					while chars.digits[look()] do
						pos = pos + 1
					end
				end

				if look():lower() == 'e' then
					pos = pos + 1

					if look() == '-' then
						pos = pos + 1
					end

					while chars.digits[look()] do
						pos = pos + 1
					end
				end
			end

			pushToken('number')
		elseif char == '[' then
			local level = getDataLevel()

			if level then
				pushToken('string_start')

				getData(level, 'string')
				pushToken('string')

				pos = pos + level + 2
				pushToken('string_end')
			else
				pushToken('symbol')
			end
		elseif char == '.' then
			if look() == '.' then
				pos = pos + 1

				if look() == '.' then
					pos = pos + 1
				end
			end

			if getCurrentTokenText():len() == 3 then
				pushToken('vararg')
			else
				pushToken('symbol')
			end
		elseif char == ':' and look() == ':' then
			get()

			pushToken('label_start')

			chompWhitespace()

			if chars.ident.start[look()] then
				get()

				while chars.ident[look()] do
					get()
				end

				pushToken('label')

				chompWhitespace()

				if look() == ':' and look(1) == ':' then
					get()
					get()

					pushToken('label_end')
				end
			end
		elseif chars.symbols.equality[char] then
			if look() == '=' then
				pos = pos + 1
			end

			pushToken('operator')
		elseif chars.symbols[char] then
			if chars.symbols.operators[char] then
				pushToken('operator')
			else
				pushToken('symbol')
			end
		else
			pushToken('unidentified')
		end
	end

	lines[#lines + 1] = buffer

	return lines
end