Wordclock/tools/luasrcdiet/lparser.lua
2021-02-05 21:32:46 +01:00

1287 lines
40 KiB
Lua

---------
-- Lua 5.1+ parser written in Lua.
--
-- This file is part of LuaSrcDiet, based on Yueliang material.
--
-- **Notes:**
--
-- * This is a version of the native 5.1.x parser from Yueliang 0.4.0,
-- with significant modifications to handle LuaSrcDiet's needs:
-- (1) needs pre-built token tables instead of a module.method,
-- (2) lparser.error is an optional error handler (from llex),
-- (3) not full parsing, currently fakes raw/unlexed constants,
-- (4) parser() returns globalinfo, localinfo tables.
-- * NO support for 'arg' vararg functions (LUA_COMPAT_VARARG).
-- * A lot of the parser is unused, but might later be useful for
-- full-on parsing and analysis.
-- * Relaxed parsing of statement to not require "break" to be the
-- last statement of block (Lua 5.2+).
-- * Added basic support for goto and label statements, i.e. parser
-- does not crash on them (Lua 5.2+).
----
local fmt = string.format
local gmatch = string.gmatch
local pairs = pairs
local M = {}
--[[--------------------------------------------------------------------
-- variable and data structure initialization
----------------------------------------------------------------------]]
----------------------------------------------------------------------
-- initialization: main variables
----------------------------------------------------------------------
local toklist, -- grammar-only token tables (token table,
seminfolist, -- semantic information table, line number
toklnlist, -- table, cross-reference table)
xreflist,
tpos, -- token position
line, -- start line # for error messages
lastln, -- last line # for ambiguous syntax chk
tok, seminfo, ln, xref, -- token, semantic info, line
nameref, -- proper position of <name> token
fs, -- current function state
top_fs, -- top-level function state
globalinfo, -- global variable information table
globallookup, -- global variable name lookup table
localinfo, -- local variable information table
ilocalinfo, -- inactive locals (prior to activation)
ilocalrefs, -- corresponding references to activate
statinfo -- statements labeled by type
-- forward references for local functions
local explist1, expr, block, exp1, body, chunk
----------------------------------------------------------------------
-- initialization: data structures
----------------------------------------------------------------------
local block_follow = {} -- lookahead check in chunk(), returnstat()
for v in gmatch("else elseif end until <eof>", "%S+") do
block_follow[v] = true
end
local binopr_left = {} -- binary operators, left priority
local binopr_right = {} -- binary operators, right priority
for op, lt, rt in gmatch([[
{+ 6 6}{- 6 6}{* 7 7}{/ 7 7}{% 7 7}
{^ 10 9}{.. 5 4}
{~= 3 3}{== 3 3}
{< 3 3}{<= 3 3}{> 3 3}{>= 3 3}
{and 2 2}{or 1 1}
]], "{(%S+)%s(%d+)%s(%d+)}") do
binopr_left[op] = lt + 0
binopr_right[op] = rt + 0
end
local unopr = { ["not"] = true, ["-"] = true,
["#"] = true, } -- unary operators
local UNARY_PRIORITY = 8 -- priority for unary operators
--[[--------------------------------------------------------------------
-- support functions
----------------------------------------------------------------------]]
----------------------------------------------------------------------
-- formats error message and throws error (duplicated from llex)
-- * a simplified version, does not report what token was responsible
----------------------------------------------------------------------
local function errorline(s, line)
local e = M.error or error
e(fmt("(source):%d: %s", line or ln, s))
end
----------------------------------------------------------------------
-- handles incoming token, semantic information pairs
-- * NOTE: 'nextt' is named 'next' originally
----------------------------------------------------------------------
-- reads in next token
local function nextt()
lastln = toklnlist[tpos]
tok, seminfo, ln, xref
= toklist[tpos], seminfolist[tpos], toklnlist[tpos], xreflist[tpos]
tpos = tpos + 1
end
-- peek at next token (single lookahead for table constructor)
local function lookahead()
return toklist[tpos]
end
----------------------------------------------------------------------
-- throws a syntax error, or if token expected is not there
----------------------------------------------------------------------
local function syntaxerror(msg)
if tok ~= "<number>" and tok ~= "<string>" then
if tok == "<name>" then tok = seminfo end
tok = "'"..tok.."'"
end
errorline(msg.." near "..tok)
end
local function error_expected(token)
syntaxerror("'"..token.."' expected")
end
----------------------------------------------------------------------
-- tests for a token, returns outcome
-- * return value changed to boolean
----------------------------------------------------------------------
local function testnext(c)
if tok == c then nextt(); return true end
end
----------------------------------------------------------------------
-- check for existence of a token, throws error if not found
----------------------------------------------------------------------
local function check(c)
if tok ~= c then error_expected(c) end
end
----------------------------------------------------------------------
-- verify existence of a token, then skip it
----------------------------------------------------------------------
local function checknext(c)
check(c); nextt()
end
----------------------------------------------------------------------
-- throws error if condition not matched
----------------------------------------------------------------------
local function check_condition(c, msg)
if not c then syntaxerror(msg) end
end
----------------------------------------------------------------------
-- verifies token conditions are met or else throw error
----------------------------------------------------------------------
local function check_match(what, who, where)
if not testnext(what) then
if where == ln then
error_expected(what)
else
syntaxerror("'"..what.."' expected (to close '"..who.."' at line "..where..")")
end
end
end
----------------------------------------------------------------------
-- expect that token is a name, consume it and return the name
----------------------------------------------------------------------
local function str_checkname()
check("<name>")
local ts = seminfo
nameref = xref
nextt()
return ts
end
--[[--------------------------------------------------------------------
-- variable (global|local|upvalue) handling
-- * to track locals and globals, variable management code needed
-- * entry point is singlevar() for variable lookups
-- * lookup tables (bl.locallist) are maintained awkwardly in the basic
-- block data structures, PLUS the function data structure (this is
-- an inelegant hack, since bl is nil for the top level of a function)
----------------------------------------------------------------------]]
----------------------------------------------------------------------
-- register a local variable, create local variable object, set in
-- to-activate variable list
-- * used in new_localvarliteral(), parlist(), fornum(), forlist(),
-- localfunc(), localstat()
----------------------------------------------------------------------
local function new_localvar(name, special)
local bl = fs.bl
local locallist
-- locate locallist in current block object or function root object
if bl then
locallist = bl.locallist
else
locallist = fs.locallist
end
-- build local variable information object and set localinfo
local id = #localinfo + 1
localinfo[id] = { -- new local variable object
name = name, -- local variable name
xref = { nameref }, -- xref, first value is declaration
decl = nameref, -- location of declaration, = xref[1]
}
if special or name == "_ENV" then -- "self" and "_ENV" must be not be changed
localinfo[id].is_special = true
end
-- this can override a local with the same name in the same scope
-- but first, keep it inactive until it gets activated
local i = #ilocalinfo + 1
ilocalinfo[i] = id
ilocalrefs[i] = locallist
end
----------------------------------------------------------------------
-- actually activate the variables so that they are visible
-- * remember Lua semantics, e.g. RHS is evaluated first, then LHS
-- * used in parlist(), forbody(), localfunc(), localstat(), body()
----------------------------------------------------------------------
local function adjustlocalvars(nvars)
local sz = #ilocalinfo
-- i goes from left to right, in order of local allocation, because
-- of something like: local a,a,a = 1,2,3 which gives a = 3
while nvars > 0 do
nvars = nvars - 1
local i = sz - nvars
local id = ilocalinfo[i] -- local's id
local obj = localinfo[id]
local name = obj.name -- name of local
obj.act = xref -- set activation location
ilocalinfo[i] = nil
local locallist = ilocalrefs[i] -- ref to lookup table to update
ilocalrefs[i] = nil
local existing = locallist[name] -- if existing, remove old first!
if existing then -- do not overlap, set special
obj = localinfo[existing] -- form of rem, as -id
obj.rem = -id
end
locallist[name] = id -- activate, now visible to Lua
end
end
----------------------------------------------------------------------
-- remove (deactivate) variables in current scope (before scope exits)
-- * zap entire locallist tables since we are not allocating registers
-- * used in leaveblock(), close_func()
----------------------------------------------------------------------
local function removevars()
local bl = fs.bl
local locallist
-- locate locallist in current block object or function root object
if bl then
locallist = bl.locallist
else
locallist = fs.locallist
end
-- enumerate the local list at current scope and deactivate 'em
for _, id in pairs(locallist) do
local obj = localinfo[id]
obj.rem = xref -- set deactivation location
end
end
----------------------------------------------------------------------
-- creates a new local variable given a name
-- * skips internal locals (those starting with '('), so internal
-- locals never needs a corresponding adjustlocalvars() call
-- * special is true for "self" which must not be optimized
-- * used in fornum(), forlist(), parlist(), body()
----------------------------------------------------------------------
local function new_localvarliteral(name, special)
if name:sub(1, 1) == "(" then -- can skip internal locals
return
end
new_localvar(name, special)
end
----------------------------------------------------------------------
-- search the local variable namespace of the given fs for a match
-- * returns localinfo index
-- * used only in singlevaraux()
----------------------------------------------------------------------
local function searchvar(fs, n)
local bl = fs.bl
local locallist
if bl then
locallist = bl.locallist
while locallist do
if locallist[n] then return locallist[n] end -- found
bl = bl.prev
locallist = bl and bl.locallist
end
end
locallist = fs.locallist
return locallist[n] or -1 -- found or not found (-1)
end
----------------------------------------------------------------------
-- handle locals, globals and upvalues and related processing
-- * search mechanism is recursive, calls itself to search parents
-- * used only in singlevar()
----------------------------------------------------------------------
local function singlevaraux(fs, n, var)
if fs == nil then -- no more levels?
var.k = "VGLOBAL" -- default is global variable
return "VGLOBAL"
else
local v = searchvar(fs, n) -- look up at current level
if v >= 0 then
var.k = "VLOCAL"
var.id = v
-- codegen may need to deal with upvalue here
return "VLOCAL"
else -- not found at current level; try upper one
if singlevaraux(fs.prev, n, var) == "VGLOBAL" then
return "VGLOBAL"
end
-- else was LOCAL or UPVAL, handle here
var.k = "VUPVAL" -- upvalue in this level
return "VUPVAL"
end--if v
end--if fs
end
----------------------------------------------------------------------
-- consume a name token, creates a variable (global|local|upvalue)
-- * used in prefixexp(), funcname()
----------------------------------------------------------------------
local function singlevar(v)
local name = str_checkname()
singlevaraux(fs, name, v)
------------------------------------------------------------------
-- variable tracking
------------------------------------------------------------------
if v.k == "VGLOBAL" then
-- if global being accessed, keep track of it by creating an object
local id = globallookup[name]
if not id then
id = #globalinfo + 1
globalinfo[id] = { -- new global variable object
name = name, -- global variable name
xref = { nameref }, -- xref, first value is declaration
}
globallookup[name] = id -- remember it
else
local obj = globalinfo[id].xref
obj[#obj + 1] = nameref -- add xref
end
else
-- local/upvalue is being accessed, keep track of it
local obj = localinfo[v.id].xref
obj[#obj + 1] = nameref -- add xref
end
end
--[[--------------------------------------------------------------------
-- state management functions with open/close pairs
----------------------------------------------------------------------]]
----------------------------------------------------------------------
-- enters a code unit, initializes elements
----------------------------------------------------------------------
local function enterblock(isbreakable)
local bl = {} -- per-block state
bl.isbreakable = isbreakable
bl.prev = fs.bl
bl.locallist = {}
fs.bl = bl
end
----------------------------------------------------------------------
-- leaves a code unit, close any upvalues
----------------------------------------------------------------------
local function leaveblock()
local bl = fs.bl
removevars()
fs.bl = bl.prev
end
----------------------------------------------------------------------
-- opening of a function
-- * top_fs is only for anchoring the top fs, so that parser() can
-- return it to the caller function along with useful output
-- * used in parser() and body()
----------------------------------------------------------------------
local function open_func()
local new_fs -- per-function state
if not fs then -- top_fs is created early
new_fs = top_fs
else
new_fs = {}
end
new_fs.prev = fs -- linked list of function states
new_fs.bl = nil
new_fs.locallist = {}
fs = new_fs
end
----------------------------------------------------------------------
-- closing of a function
-- * used in parser() and body()
----------------------------------------------------------------------
local function close_func()
removevars()
fs = fs.prev
end
--[[--------------------------------------------------------------------
-- other parsing functions
-- * for table constructor, parameter list, argument list
----------------------------------------------------------------------]]
----------------------------------------------------------------------
-- parse a function name suffix, for function call specifications
-- * used in primaryexp(), funcname()
----------------------------------------------------------------------
local function field(v)
-- field -> ['.' | ':'] NAME
nextt() -- skip the dot or colon
str_checkname()
v.k = "VINDEXED"
end
----------------------------------------------------------------------
-- parse a table indexing suffix, for constructors, expressions
-- * used in recfield(), primaryexp()
----------------------------------------------------------------------
local function yindex()
-- index -> '[' expr ']'
nextt() -- skip the '['
expr({})
checknext("]")
end
----------------------------------------------------------------------
-- parse a table record (hash) field
-- * used in constructor()
----------------------------------------------------------------------
local function recfield()
-- recfield -> (NAME | '['exp1']') = exp1
if tok == "<name>" then
str_checkname()
else-- tok == '['
yindex()
end
checknext("=")
expr({})
end
----------------------------------------------------------------------
-- parse a table list (array) field
-- * used in constructor()
----------------------------------------------------------------------
local function listfield(cc)
expr(cc.v)
end
----------------------------------------------------------------------
-- parse a table constructor
-- * used in funcargs(), simpleexp()
----------------------------------------------------------------------
local function constructor(t)
-- constructor -> '{' [ field { fieldsep field } [ fieldsep ] ] '}'
-- field -> recfield | listfield
-- fieldsep -> ',' | ';'
local line = ln
local cc = {
v = { k = "VVOID" },
}
t.k = "VRELOCABLE"
checknext("{")
repeat
if tok == "}" then break end
-- closelistfield(cc) here
local c = tok
if c == "<name>" then -- may be listfields or recfields
if lookahead() ~= "=" then -- look ahead: expression?
listfield(cc)
else
recfield()
end
elseif c == "[" then -- constructor_item -> recfield
recfield()
else -- constructor_part -> listfield
listfield(cc)
end
until not testnext(",") and not testnext(";")
check_match("}", "{", line)
-- lastlistfield(cc) here
end
----------------------------------------------------------------------
-- parse the arguments (parameters) of a function declaration
-- * used in body()
----------------------------------------------------------------------
local function parlist()
-- parlist -> [ param { ',' param } ]
local nparams = 0
if tok ~= ")" then -- is 'parlist' not empty?
repeat
local c = tok
if c == "<name>" then -- param -> NAME
new_localvar(str_checkname())
nparams = nparams + 1
elseif c == "..." then
nextt()
fs.is_vararg = true
else
syntaxerror("<name> or '...' expected")
end
until fs.is_vararg or not testnext(",")
end--if
adjustlocalvars(nparams)
end
----------------------------------------------------------------------
-- parse the parameters of a function call
-- * contrast with parlist(), used in function declarations
-- * used in primaryexp()
----------------------------------------------------------------------
local function funcargs(f)
local line = ln
local c = tok
if c == "(" then -- funcargs -> '(' [ explist1 ] ')'
if line ~= lastln then
syntaxerror("ambiguous syntax (function call x new statement)")
end
nextt()
if tok ~= ")" then -- arg list is not empty?
explist1()
end
check_match(")", "(", line)
elseif c == "{" then -- funcargs -> constructor
constructor({})
elseif c == "<string>" then -- funcargs -> STRING
nextt() -- must use 'seminfo' before 'next'
else
syntaxerror("function arguments expected")
return
end--if c
f.k = "VCALL"
end
--[[--------------------------------------------------------------------
-- mostly expression functions
----------------------------------------------------------------------]]
----------------------------------------------------------------------
-- parses an expression in parentheses or a single variable
-- * used in primaryexp()
----------------------------------------------------------------------
local function prefixexp(v)
-- prefixexp -> NAME | '(' expr ')'
local c = tok
if c == "(" then
local line = ln
nextt()
expr(v)
check_match(")", "(", line)
elseif c == "<name>" then
singlevar(v)
else
syntaxerror("unexpected symbol")
end--if c
end
----------------------------------------------------------------------
-- parses a prefixexp (an expression in parentheses or a single
-- variable) or a function call specification
-- * used in simpleexp(), assignment(), expr_stat()
----------------------------------------------------------------------
local function primaryexp(v)
-- primaryexp ->
-- prefixexp { '.' NAME | '[' exp ']' | ':' NAME funcargs | funcargs }
prefixexp(v)
while true do
local c = tok
if c == "." then -- field
field(v)
elseif c == "[" then -- '[' exp1 ']'
yindex()
elseif c == ":" then -- ':' NAME funcargs
nextt()
str_checkname()
funcargs(v)
elseif c == "(" or c == "<string>" or c == "{" then -- funcargs
funcargs(v)
else
return
end--if c
end--while
end
----------------------------------------------------------------------
-- parses general expression types, constants handled here
-- * used in subexpr()
----------------------------------------------------------------------
local function simpleexp(v)
-- simpleexp -> NUMBER | STRING | NIL | TRUE | FALSE | ... |
-- constructor | FUNCTION body | primaryexp
local c = tok
if c == "<number>" then
v.k = "VKNUM"
elseif c == "<string>" then
v.k = "VK"
elseif c == "nil" then
v.k = "VNIL"
elseif c == "true" then
v.k = "VTRUE"
elseif c == "false" then
v.k = "VFALSE"
elseif c == "..." then -- vararg
check_condition(fs.is_vararg == true,
"cannot use '...' outside a vararg function");
v.k = "VVARARG"
elseif c == "{" then -- constructor
constructor(v)
return
elseif c == "function" then
nextt()
body(false, ln)
return
else
primaryexp(v)
return
end--if c
nextt()
end
------------------------------------------------------------------------
-- Parse subexpressions. Includes handling of unary operators and binary
-- operators. A subexpr is given the rhs priority level of the operator
-- immediately left of it, if any (limit is -1 if none,) and if a binop
-- is found, limit is compared with the lhs priority level of the binop
-- in order to determine which executes first.
-- * recursively called
-- * used in expr()
------------------------------------------------------------------------
local function subexpr(v, limit)
-- subexpr -> (simpleexp | unop subexpr) { binop subexpr }
-- * where 'binop' is any binary operator with a priority
-- higher than 'limit'
local op = tok
local uop = unopr[op]
if uop then
nextt()
subexpr(v, UNARY_PRIORITY)
else
simpleexp(v)
end
-- expand while operators have priorities higher than 'limit'
op = tok
local binop = binopr_left[op]
while binop and binop > limit do
nextt()
-- read sub-expression with higher priority
op = subexpr({}, binopr_right[op]) -- next operator
binop = binopr_left[op]
end
return op -- return first untreated operator
end
----------------------------------------------------------------------
-- Expression parsing starts here. Function subexpr is entered with the
-- left operator (which is non-existent) priority of -1, which is lower
-- than all actual operators. Expr information is returned in parm v.
-- * used in cond(), explist1(), index(), recfield(), listfield(),
-- prefixexp(), while_stat(), exp1()
----------------------------------------------------------------------
-- this is a forward-referenced local
function expr(v)
-- expr -> subexpr
subexpr(v, 0)
end
--[[--------------------------------------------------------------------
-- third level parsing functions
----------------------------------------------------------------------]]
------------------------------------------------------------------------
-- parse a variable assignment sequence
-- * recursively called
-- * used in expr_stat()
------------------------------------------------------------------------
local function assignment(v)
local c = v.v.k
check_condition(c == "VLOCAL" or c == "VUPVAL" or c == "VGLOBAL"
or c == "VINDEXED", "syntax error")
if testnext(",") then -- assignment -> ',' primaryexp assignment
local nv = {} -- expdesc
nv.v = {}
primaryexp(nv.v)
-- lparser.c deals with some register usage conflict here
assignment(nv)
else -- assignment -> '=' explist1
checknext("=")
explist1()
return -- avoid default
end
end
----------------------------------------------------------------------
-- parse a for loop body for both versions of the for loop
-- * used in fornum(), forlist()
----------------------------------------------------------------------
local function forbody(nvars)
-- forbody -> DO block
checknext("do")
enterblock(false) -- scope for declared variables
adjustlocalvars(nvars)
block()
leaveblock() -- end of scope for declared variables
end
----------------------------------------------------------------------
-- parse a numerical for loop, calls forbody()
-- * used in for_stat()
----------------------------------------------------------------------
local function fornum(varname)
-- fornum -> NAME = exp1, exp1 [, exp1] DO body
new_localvarliteral("(for index)")
new_localvarliteral("(for limit)")
new_localvarliteral("(for step)")
new_localvar(varname)
checknext("=")
exp1() -- initial value
checknext(",")
exp1() -- limit
if testnext(",") then
exp1() -- optional step
else
-- default step = 1
end
forbody(1)
end
----------------------------------------------------------------------
-- parse a generic for loop, calls forbody()
-- * used in for_stat()
----------------------------------------------------------------------
local function forlist(indexname)
-- forlist -> NAME {, NAME} IN explist1 DO body
-- create control variables
new_localvarliteral("(for generator)")
new_localvarliteral("(for state)")
new_localvarliteral("(for control)")
-- create declared variables
new_localvar(indexname)
local nvars = 1
while testnext(",") do
new_localvar(str_checkname())
nvars = nvars + 1
end
checknext("in")
explist1()
forbody(nvars)
end
----------------------------------------------------------------------
-- parse a function name specification
-- * used in func_stat()
----------------------------------------------------------------------
local function funcname(v)
-- funcname -> NAME {field} [':' NAME]
local needself = false
singlevar(v)
while tok == "." do
field(v)
end
if tok == ":" then
needself = true
field(v)
end
return needself
end
----------------------------------------------------------------------
-- parse the single expressions needed in numerical for loops
-- * used in fornum()
----------------------------------------------------------------------
-- this is a forward-referenced local
function exp1()
-- exp1 -> expr
expr({})
end
----------------------------------------------------------------------
-- parse condition in a repeat statement or an if control structure
-- * used in repeat_stat(), test_then_block()
----------------------------------------------------------------------
local function cond()
-- cond -> expr
expr({}) -- read condition
end
----------------------------------------------------------------------
-- parse part of an if control structure, including the condition
-- * used in if_stat()
----------------------------------------------------------------------
local function test_then_block()
-- test_then_block -> [IF | ELSEIF] cond THEN block
nextt() -- skip IF or ELSEIF
cond()
checknext("then")
block() -- 'then' part
end
----------------------------------------------------------------------
-- parse a local function statement
-- * used in local_stat()
----------------------------------------------------------------------
local function localfunc()
-- localfunc -> NAME body
new_localvar(str_checkname())
adjustlocalvars(1)
body(false, ln)
end
----------------------------------------------------------------------
-- parse a local variable declaration statement
-- * used in local_stat()
----------------------------------------------------------------------
local function localstat()
-- localstat -> NAME {',' NAME} ['=' explist1]
local nvars = 0
repeat
new_localvar(str_checkname())
nvars = nvars + 1
until not testnext(",")
if testnext("=") then
explist1()
else
-- VVOID
end
adjustlocalvars(nvars)
end
----------------------------------------------------------------------
-- parse a list of comma-separated expressions
-- * used in return_stat(), localstat(), funcargs(), assignment(),
-- forlist()
----------------------------------------------------------------------
-- this is a forward-referenced local
function explist1()
-- explist1 -> expr { ',' expr }
local e = {}
expr(e)
while testnext(",") do
expr(e)
end
end
----------------------------------------------------------------------
-- parse function declaration body
-- * used in simpleexp(), localfunc(), func_stat()
----------------------------------------------------------------------
-- this is a forward-referenced local
function body(needself, line)
-- body -> '(' parlist ')' chunk END
open_func()
checknext("(")
if needself then
new_localvarliteral("self", true)
adjustlocalvars(1)
end
parlist()
checknext(")")
chunk()
check_match("end", "function", line)
close_func()
end
----------------------------------------------------------------------
-- parse a code block or unit
-- * used in do_stat(), while_stat(), forbody(), test_then_block(),
-- if_stat()
----------------------------------------------------------------------
-- this is a forward-referenced local
function block()
-- block -> chunk
enterblock(false)
chunk()
leaveblock()
end
--[[--------------------------------------------------------------------
-- second level parsing functions, all with '_stat' suffix
-- * since they are called via a table lookup, they cannot be local
-- functions (a lookup table of local functions might be smaller...)
-- * stat() -> *_stat()
----------------------------------------------------------------------]]
----------------------------------------------------------------------
-- initial parsing for a for loop, calls fornum() or forlist()
-- * removed 'line' parameter (used to set debug information only)
-- * used in stat()
----------------------------------------------------------------------
local function for_stat()
-- stat -> for_stat -> FOR (fornum | forlist) END
local line = line
enterblock(true) -- scope for loop and control variables
nextt() -- skip 'for'
local varname = str_checkname() -- first variable name
local c = tok
if c == "=" then
fornum(varname)
elseif c == "," or c == "in" then
forlist(varname)
else
syntaxerror("'=' or 'in' expected")
end
check_match("end", "for", line)
leaveblock() -- loop scope (`break' jumps to this point)
end
----------------------------------------------------------------------
-- parse a while-do control structure, body processed by block()
-- * used in stat()
----------------------------------------------------------------------
local function while_stat()
-- stat -> while_stat -> WHILE cond DO block END
local line = line
nextt() -- skip WHILE
cond() -- parse condition
enterblock(true)
checknext("do")
block()
check_match("end", "while", line)
leaveblock()
end
----------------------------------------------------------------------
-- parse a repeat-until control structure, body parsed by chunk()
-- * originally, repeatstat() calls breakstat() too if there is an
-- upvalue in the scope block; nothing is actually lexed, it is
-- actually the common code in breakstat() for closing of upvalues
-- * used in stat()
----------------------------------------------------------------------
local function repeat_stat()
-- stat -> repeat_stat -> REPEAT block UNTIL cond
local line = line
enterblock(true) -- loop block
enterblock(false) -- scope block
nextt() -- skip REPEAT
chunk()
check_match("until", "repeat", line)
cond()
-- close upvalues at scope level below
leaveblock() -- finish scope
leaveblock() -- finish loop
end
----------------------------------------------------------------------
-- parse an if control structure
-- * used in stat()
----------------------------------------------------------------------
local function if_stat()
-- stat -> if_stat -> IF cond THEN block
-- {ELSEIF cond THEN block} [ELSE block] END
local line = line
test_then_block() -- IF cond THEN block
while tok == "elseif" do
test_then_block() -- ELSEIF cond THEN block
end
if tok == "else" then
nextt() -- skip ELSE
block() -- 'else' part
end
check_match("end", "if", line)
end
----------------------------------------------------------------------
-- parse a return statement
-- * used in stat()
----------------------------------------------------------------------
local function return_stat()
-- stat -> return_stat -> RETURN explist
nextt() -- skip RETURN
local c = tok
if block_follow[c] or c == ";" then
-- return no values
else
explist1() -- optional return values
end
end
----------------------------------------------------------------------
-- parse a break statement
-- * used in stat()
----------------------------------------------------------------------
local function break_stat()
-- stat -> break_stat -> BREAK
local bl = fs.bl
nextt() -- skip BREAK
while bl and not bl.isbreakable do -- find a breakable block
bl = bl.prev
end
if not bl then
syntaxerror("no loop to break")
end
end
----------------------------------------------------------------------
-- parse a label statement
-- * this function has been added later, it just parses label statement
-- without any validation!
-- * used in stat()
----------------------------------------------------------------------
local function label_stat()
-- stat -> label_stat -> '::' NAME '::'
nextt() -- skip '::'
str_checkname()
checknext("::")
end
----------------------------------------------------------------------
-- parse a goto statement
-- * this function has been added later, it just parses goto statement
-- without any validation!
-- * used in stat()
----------------------------------------------------------------------
local function goto_stat()
-- stat -> goto_stat -> GOTO NAME
nextt() -- skip GOTO
str_checkname()
end
----------------------------------------------------------------------
-- parse a function call with no returns or an assignment statement
-- * the struct with .prev is used for name searching in lparse.c,
-- so it is retained for now; present in assignment() also
-- * used in stat()
----------------------------------------------------------------------
local function expr_stat()
local id = tpos - 1
-- stat -> expr_stat -> func | assignment
local v = { v = {} }
primaryexp(v.v)
if v.v.k == "VCALL" then -- stat -> func
-- call statement uses no results
statinfo[id] = "call"
else -- stat -> assignment
v.prev = nil
assignment(v)
statinfo[id] = "assign"
end
end
----------------------------------------------------------------------
-- parse a function statement
-- * used in stat()
----------------------------------------------------------------------
local function function_stat()
-- stat -> function_stat -> FUNCTION funcname body
local line = line
nextt() -- skip FUNCTION
local needself = funcname({})
body(needself, line)
end
----------------------------------------------------------------------
-- parse a simple block enclosed by a DO..END pair
-- * used in stat()
----------------------------------------------------------------------
local function do_stat()
-- stat -> do_stat -> DO block END
local line = line
nextt() -- skip DO
block()
check_match("end", "do", line)
end
----------------------------------------------------------------------
-- parse a statement starting with LOCAL
-- * used in stat()
----------------------------------------------------------------------
local function local_stat()
-- stat -> local_stat -> LOCAL FUNCTION localfunc
-- -> LOCAL localstat
nextt() -- skip LOCAL
if testnext("function") then -- local function?
localfunc()
else
localstat()
end
end
--[[--------------------------------------------------------------------
-- main functions, top level parsing functions
-- * accessible functions are: init(lexer), parser()
-- * [entry] -> parser() -> chunk() -> stat()
----------------------------------------------------------------------]]
----------------------------------------------------------------------
-- initial parsing for statements, calls '_stat' suffixed functions
-- * used in chunk()
----------------------------------------------------------------------
local stat_call = { -- lookup for calls in stat()
["if"] = if_stat,
["while"] = while_stat,
["do"] = do_stat,
["for"] = for_stat,
["repeat"] = repeat_stat,
["function"] = function_stat,
["local"] = local_stat,
["return"] = return_stat,
["break"] = break_stat,
["goto"] = goto_stat,
["::"] = label_stat,
}
local function stat()
-- stat -> if_stat while_stat do_stat for_stat repeat_stat
-- function_stat local_stat return_stat break_stat
-- expr_stat
line = ln -- may be needed for error messages
local c = tok
local fn = stat_call[c]
-- handles: if while do for repeat function local return break
if fn then
statinfo[tpos - 1] = c
fn()
-- return must be last statement
if c == "return" then return true end
else
expr_stat()
end
return false
end
----------------------------------------------------------------------
-- parse a chunk, which consists of a bunch of statements
-- * used in parser(), body(), block(), repeat_stat()
----------------------------------------------------------------------
-- this is a forward-referenced local
function chunk()
-- chunk -> { stat [';'] }
local islast = false
while not islast and not block_follow[tok] do
islast = stat()
testnext(";")
end
end
----------------------------------------------------------------------
-- initialization function
----------------------------------------------------------------------
local function init(tokorig, seminfoorig, toklnorig)
tpos = 1 -- token position
top_fs = {} -- reset top level function state
------------------------------------------------------------------
-- set up grammar-only token tables; impedance-matching...
-- note that constants returned by the lexer is source-level, so
-- for now, fake(!) constant tokens (TK_NUMBER|TK_STRING|TK_LSTRING)
------------------------------------------------------------------
local j = 1
toklist, seminfolist, toklnlist, xreflist = {}, {}, {}, {}
for i = 1, #tokorig do
local tok = tokorig[i]
local yep = true
if tok == "TK_KEYWORD" or tok == "TK_OP" then
tok = seminfoorig[i]
elseif tok == "TK_NAME" then
tok = "<name>"
seminfolist[j] = seminfoorig[i]
elseif tok == "TK_NUMBER" then
tok = "<number>"
seminfolist[j] = 0 -- fake!
elseif tok == "TK_STRING" or tok == "TK_LSTRING" then
tok = "<string>"
seminfolist[j] = "" -- fake!
elseif tok == "TK_EOS" then
tok = "<eof>"
else
-- non-grammar tokens; ignore them
yep = false
end
if yep then -- set rest of the information
toklist[j] = tok
toklnlist[j] = toklnorig[i]
xreflist[j] = i
j = j + 1
end
end--for
------------------------------------------------------------------
-- initialize data structures for variable tracking
------------------------------------------------------------------
globalinfo, globallookup, localinfo = {}, {}, {}
ilocalinfo, ilocalrefs = {}, {}
statinfo = {} -- experimental
end
----------------------------------------------------------------------
-- performs parsing, returns parsed data structure
----------------------------------------------------------------------
function M.parse(tokens, seminfo, tokens_ln)
init(tokens, seminfo, tokens_ln)
open_func()
fs.is_vararg = true -- main func. is always vararg
nextt() -- read first token
chunk()
check("<eof>")
close_func()
return { -- return everything
globalinfo = globalinfo,
localinfo = localinfo,
statinfo = statinfo,
toklist = toklist,
seminfolist = seminfolist,
toklnlist = toklnlist,
xreflist = xreflist,
}
end
return M