define(function(require, exports, module) { /*global exports:true module:true require:true define:true global:true */ (function (root, name, factory) { factory(exports) }(this, 'luaparse', function (exports) { 'use strict'; exports.version = '0.1.4'; var input, options, length; // Options can be set either globally on the parser object through // defaultOptions, or during the parse call. var defaultOptions = exports.defaultOptions = { // Explicitly tell the parser when the input ends. wait: false // Store comments as an array in the chunk object. , comments: true // Track identifier scopes by adding an isLocal attribute to each // identifier-node. , scope: false // Store location information on each syntax node as // `loc: { start: { line, column }, end: { line, column } }`. , locations: false // Store the start and end character locations on each syntax node as // `range: [start, end]`. , ranges: false }; // The available tokens expressed as enum flags so they can be checked with // bitwise operations. var EOF = 1, StringLiteral = 2, Keyword = 4, Identifier = 8 , NumericLiteral = 16, Punctuator = 32, BooleanLiteral = 64 , NilLiteral = 128, VarargLiteral = 256; exports.tokenTypes = { EOF: EOF, StringLiteral: StringLiteral , Keyword: Keyword, Identifier: Identifier, NumericLiteral: NumericLiteral , Punctuator: Punctuator, BooleanLiteral: BooleanLiteral , NilLiteral: NilLiteral, VarargLiteral: VarargLiteral }; // As this parser is a bit different from luas own, the error messages // will be different in some situations. var errors = exports.errors = { unexpected: 'Unexpected %1 \'%2\' near \'%3\'' , expected: '\'%1\' expected near \'%2\'' , expectedToken: '%1 expected near \'%2\'' , unfinishedString: 'unfinished string near \'%1\'' , malformedNumber: 'malformed number near \'%1\'' }; // ### Abstract Syntax Tree // // The default AST structure is inspired by the Mozilla Parser API but can // easily be customized by overriding these functions. var ast = exports.ast = { labelStatement: function(label) { return { type: 'LabelStatement' , label: label }; } , breakStatement: function() { return { type: 'BreakStatement' }; } , gotoStatement: function(label) { return { type: 'GotoStatement' , label: label }; } , returnStatement: function(args) { return { type: 'ReturnStatement' , 'arguments': args }; } , ifStatement: function(clauses) { return { type: 'IfStatement' , clauses: clauses }; } , ifClause: function(condition, body) { return { type: 'IfClause' , condition: condition , body: body }; } , elseifClause: function(condition, body) { return { type: 'ElseifClause' , condition: condition , body: body }; } , elseClause: function(body) { return { type: 'ElseClause' , body: body }; } , whileStatement: function(condition, body) { return { type: 'WhileStatement' , condition: condition , body: body }; } , doStatement: function(body) { return { type: 'DoStatement' , body: body }; } , repeatStatement: function(condition, body) { return { type: 'RepeatStatement' , condition: condition , body: body }; } , localStatement: function(variables, init) { return { type: 'LocalStatement' , variables: variables , init: init }; } , assignmentStatement: function(variables, init) { return { type: 'AssignmentStatement' , variables: variables , init: init }; } , callStatement: function(expression) { return { type: 'CallStatement' , expression: expression }; } , functionStatement: function(identifier, parameters, isLocal, body) { return { type: 'FunctionDeclaration' , identifier: identifier , isLocal: isLocal , parameters: parameters , body: body }; } , forNumericStatement: function(variable, start, end, step, body) { return { type: 'ForNumericStatement' , variable: variable , start: start , end: end , step: step , body: body }; } , forGenericStatement: function(variables, iterators, body) { return { type: 'ForGenericStatement' , variables: variables , iterators: iterators , body: body }; } , chunk: function(body) { return { type: 'Chunk' , body: body }; } , identifier: function(name) { return { type: 'Identifier' , name: name }; } , literal: function(type, value, raw) { type = (type === StringLiteral) ? 'StringLiteral' : (type === NumericLiteral) ? 'NumericLiteral' : (type === BooleanLiteral) ? 'BooleanLiteral' : (type === NilLiteral) ? 'NilLiteral' : 'VarargLiteral'; return { type: type , value: value , raw: raw }; } , tableKey: function(key, value) { return { type: 'TableKey' , key: key , value: value }; } , tableKeyString: function(key, value) { return { type: 'TableKeyString' , key: key , value: value }; } , tableValue: function(value) { return { type: 'TableValue' , value: value }; } , tableConstructorExpression: function(fields) { return { type: 'TableConstructorExpression' , fields: fields }; } , binaryExpression: function(operator, left, right) { var type = ('and' === operator || 'or' === operator) ? 'LogicalExpression' : 'BinaryExpression'; return { type: type , operator: operator , left: left , right: right }; } , unaryExpression: function(operator, argument) { return { type: 'UnaryExpression' , operator: operator , argument: argument }; } , memberExpression: function(base, indexer, identifier) { return { type: 'MemberExpression' , indexer: indexer , identifier: identifier , base: base }; } , indexExpression: function(base, index) { return { type: 'IndexExpression' , base: base , index: index }; } , callExpression: function(base, args) { return { type: 'CallExpression' , base: base , 'arguments': args }; } , tableCallExpression: function(base, args) { return { type: 'TableCallExpression' , base: base , 'arguments': args }; } , stringCallExpression: function(base, argument) { return { type: 'StringCallExpression' , base: base , argument: argument }; } , comment: function(value, raw) { return { type: 'Comment' , value: value , raw: raw }; } }; // Wrap up the node object. function finishNode(node) { // Pop a `Marker` off the location-array and attach its location data. if (trackLocations) { var location = locations.pop(); location.complete(); if (options.locations) node.loc = location.loc; if (options.ranges) node.range = location.range; } return node; } // Helpers // ------- var slice = Array.prototype.slice , toString = Object.prototype.toString , indexOf = function indexOf(array, element) { for (var i = 0, length = array.length; i < length; i++) { if (array[i] === element) return i; } return -1; }; // Iterate through an array of objects and return the index of an object // with a matching property. function indexOfObject(array, property, element) { for (var i = 0, length = array.length; i < length; i++) { if (array[i][property] === element) return i; } return -1; } // A sprintf implementation using %index (beginning at 1) to input // arguments in the format string. // // Example: // // // Unexpected function in token // sprintf('Unexpected %2 in %1.', 'token', 'function'); function sprintf(format) { var args = slice.call(arguments, 1); format = format.replace(/%(\d)/g, function (match, index) { return '' + args[index - 1] || ''; }); return format; } // Returns a new object with the properties from all objectes passed as // arguments. Last argument takes precedence. // // Example: // // this.options = extend(options, { output: false }); function extend() { var args = slice.call(arguments) , dest = {} , src, prop; for (var i = 0, length = args.length; i < length; i++) { src = args[i]; for (prop in src) if (src.hasOwnProperty(prop)) { dest[prop] = src[prop]; } } return dest; } // ### Error functions // #### Raise an exception. // // Raise an exception by passing a token, a string format and its paramters. // // The passed tokens location will automatically be added to the error // message if it exists, if not it will default to the lexers current // position. // // Example: // // // [1:0] expected [ near ( // raise(token, "expected %1 near %2", '[', token.value); function raise(token) { var message = sprintf.apply(null, slice.call(arguments, 1)) , error, col; if ('undefined' !== typeof token.line) { col = token.range[0] - token.lineStart; error = new SyntaxError(sprintf('[%1:%2] %3', token.line, col, message)); error.line = token.line; error.index = token.range[0]; error.column = col; } else { col = index - lineStart + 1; error = new SyntaxError(sprintf('[%1:%2] %3', line, col, message)); error.index = index; error.line = line; error.column = col; } throw error; } // #### Raise an unexpected token error. // // Example: // // // expected near '0' // raiseUnexpectedToken('', token); function raiseUnexpectedToken(type, token) { raise(token, errors.expectedToken, type, token.value); } // #### Raise a general unexpected error // // Usage should pass either a token object or a symbol string which was // expected. We can also specify a nearby token such as , this will // default to the currently active token. // // Example: // // // Unexpected symbol 'end' near '' // unexpected(token); // // If there's no token in the buffer it means we have reached . function unexpected(found, near) { if ('undefined' === typeof near) near = lookahead.value; if ('undefined' !== typeof found.type) { var type; switch (found.type) { case StringLiteral: type = 'string'; break; case Keyword: type = 'keyword'; break; case Identifier: type = 'identifier'; break; case NumericLiteral: type = 'number'; break; case Punctuator: type = 'symbol'; break; case BooleanLiteral: type = 'boolean'; break; case NilLiteral: return raise(found, errors.unexpected, 'symbol', 'nil', near); } return raise(found, errors.unexpected, type, found.value, near); } return raise(found, errors.unexpected, 'symbol', found, near); } // Lexer // ----- // // The lexer, or the tokenizer reads the input string character by character // and derives a token left-right. To be as efficient as possible the lexer // prioritizes the common cases such as identifiers. It also works with // character codes instead of characters as string comparisons was the // biggest bottleneck of the parser. // // If `options.comments` is enabled, all comments encountered will be stored // in an array which later will be appended to the chunk object. If disabled, // they will simply be disregarded. // // When the lexer has derived a valid token, it will be returned as an object // containing its value and as well as its position in the input string (this // is always enabled to provide proper debug messages). // // `lex()` starts lexing and returns the following token in the stream. var index , token , previousToken , lookahead , comments , tokenStart , line , lineStart; exports.lex = lex; function lex() { skipWhiteSpace(); // Skip comments beginning with -- while (45 === input.charCodeAt(index) && 45 === input.charCodeAt(index + 1)) { scanComment(); skipWhiteSpace(); } if (index >= length) return { type : EOF , value: '' , line: line , lineStart: lineStart , range: [index, index] }; var charCode = input.charCodeAt(index) , next = input.charCodeAt(index + 1); // Memorize the range index where the token begins. tokenStart = index; if (isIdentifierStart(charCode)) return scanIdentifierOrKeyword(); switch (charCode) { case 39: case 34: // '" return scanStringLiteral(); // 0-9 case 48: case 49: case 50: case 51: case 52: case 53: case 54: case 55: case 56: case 57: return scanNumericLiteral(); case 46: // . // If the dot is followed by a digit it's a float. if (isDecDigit(next)) return scanNumericLiteral(); if (46 === next) { if (46 === input.charCodeAt(index + 2)) return scanVarargLiteral(); return scanPunctuator('..'); } return scanPunctuator('.'); case 61: // = if (61 === next) return scanPunctuator('=='); return scanPunctuator('='); case 62: // > if (61 === next) return scanPunctuator('>='); return scanPunctuator('>'); case 60: // < if (61 === next) return scanPunctuator('<='); return scanPunctuator('<'); case 126: // ~ if (61 === next) return scanPunctuator('~='); return scanPunctuator('~'); case 58: // : if (58 === next) return scanPunctuator('::'); return scanPunctuator(':'); case 91: // [ // Check for a multiline string, they begin with [= or [[ if (91 === next || 61 === next) return scanLongStringLiteral(); return scanPunctuator('['); // \* / ^ % , { } ] ( ) ; # - + case 42: case 47: case 94: case 37: case 44: case 123: case 125: case 93: case 40: case 41: case 59: case 35: case 45: case 43: case 38: case 124: return scanPunctuator(input.charAt(index)); } return unexpected(input.charAt(index)); } // Whitespace has no semantic meaning in lua so simply skip ahead while // tracking the encounted newlines. Newlines are also tracked in all // token functions where multiline values are allowed. function skipWhiteSpace() { while (index < length) { var charCode = input.charCodeAt(index); if (isWhiteSpace(charCode)) { index++; } else if (isLineTerminator(charCode)) { line++; lineStart = ++index; } else { break; } } } // Identifiers, keywords, booleans and nil all look the same syntax wise. We // simply go through them one by one and defaulting to an identifier if no // previous case matched. function scanIdentifierOrKeyword() { var value, type; // Slicing the input string is prefered before string concatenation in a // loop for performance reasons. while (isIdentifierPart(input.charCodeAt(++index))); value = input.slice(tokenStart, index); // Decide on the token type and possibly cast the value. if (isKeyword(value)) { type = Keyword; } else if ('true' === value || 'false' === value) { type = BooleanLiteral; value = ('true' === value); } else if ('nil' === value) { type = NilLiteral; value = null; } else { type = Identifier; } return { type: type , value: value , line: line , lineStart: lineStart , range: [tokenStart, index] }; } // Once a punctuator reaches this function it should already have been // validated so we simply return it as a token. function scanPunctuator(value) { index += value.length; return { type: Punctuator , value: value , line: line , lineStart: lineStart , range: [tokenStart, index] }; } // A vararg literal consists of three dots. function scanVarargLiteral() { index += 3; return { type: VarargLiteral , value: '...' , line: line , lineStart: lineStart , range: [tokenStart, index] }; } // Find the string literal by matching the delimiter marks used. function scanStringLiteral() { var delimiter = input.charCodeAt(index++) , stringStart = index , string = '' , charCode; while (index < length) { charCode = input.charCodeAt(index++); if (delimiter === charCode) break; if (92 === charCode) { // \ string += input.slice(stringStart, index - 1) + readEscapeSequence(); stringStart = index; } // EOF or `\n` terminates a string literal. If we haven't found the // ending delimiter by now, raise an exception. else if (index >= length || isLineTerminator(charCode)) { string += input.slice(stringStart, index - 1); raise({}, errors.unfinishedString, string + String.fromCharCode(charCode)); } } string += input.slice(stringStart, index - 1); return { type: StringLiteral , value: string , line: line , lineStart: lineStart , range: [tokenStart, index] }; } // Expect a multiline string literal and return it as a regular string // literal, if it doesn't validate into a valid multiline string, throw an // exception. function scanLongStringLiteral() { var string = readLongString(); // Fail if it's not a multiline literal. if (false === string) raise(token, errors.expected, '[', token.value); return { type: StringLiteral , value: string , line: line , lineStart: lineStart , range: [tokenStart, index] }; } // Numeric literals will be returned as floating-point numbers instead of // strings. The raw value should be retrieved from slicing the input string // later on in the process. // // If a hexadecimal number is encountered, it will be converted. function scanNumericLiteral() { var character = input.charAt(index) , next = input.charAt(index + 1); var value = ('0' === character && 'xX'.indexOf(next || null) >= 0) ? readHexLiteral() : readDecLiteral(); return { type: NumericLiteral , value: value , line: line , lineStart: lineStart , range: [tokenStart, index] }; } // Lua hexadecimals have an optional fraction part and an optional binary // exoponent part. These are not included in JavaScript so we will compute // all three parts separately and then sum them up at the end of the function // with the following algorithm. // // Digit := toDec(digit) // Fraction := toDec(fraction) / 16 ^ fractionCount // BinaryExp := 2 ^ binaryExp // Number := ( Digit + Fraction ) * BinaryExp function readHexLiteral() { var fraction = 0 // defaults to 0 as it gets summed , binaryExponent = 1 // defaults to 1 as it gets multiplied , binarySign = 1 // positive , digit, fractionStart, exponentStart, digitStart; digitStart = index += 2; // Skip 0x part // A minimum of one hex digit is required. if (!isHexDigit(input.charCodeAt(index))) raise({}, errors.malformedNumber, input.slice(tokenStart, index)); while (isHexDigit(input.charCodeAt(index))) index++; // Convert the hexadecimal digit to base 10. digit = parseInt(input.slice(digitStart, index), 16); // Fraction part i optional. if ('.' === input.charAt(index)) { fractionStart = ++index; while (isHexDigit(input.charCodeAt(index))) index++; fraction = input.slice(fractionStart, index); // Empty fraction parts should default to 0, others should be converted // 0.x form so we can use summation at the end. fraction = (fractionStart === index) ? 0 : parseInt(fraction, 16) / Math.pow(16, index - fractionStart); } // Binary exponents are optional if ('pP'.indexOf(input.charAt(index) || null) >= 0) { index++; // Sign part is optional and defaults to 1 (positive). if ('+-'.indexOf(input.charAt(index) || null) >= 0) binarySign = ('+' === input.charAt(index++)) ? 1 : -1; exponentStart = index; // The binary exponent sign requires a decimal digit. if (!isDecDigit(input.charCodeAt(index))) raise({}, errors.malformedNumber, input.slice(tokenStart, index)); while (isDecDigit(input.charCodeAt(index))) index++; binaryExponent = input.slice(exponentStart, index); // Calculate the binary exponent of the number. binaryExponent = Math.pow(2, binaryExponent * binarySign); } return (digit + fraction) * binaryExponent; } // Decimal numbers are exactly the same in Lua and in JavaScript, because of // this we check where the token ends and then parse it with native // functions. function readDecLiteral() { while (isDecDigit(input.charCodeAt(index))) index++; // Fraction part is optional if ('.' === input.charAt(index)) { index++; // Fraction part defaults to 0 while (isDecDigit(input.charCodeAt(index))) index++; } // Exponent part is optional. if ('eE'.indexOf(input.charAt(index) || null) >= 0) { index++; // Sign part is optional. if ('+-'.indexOf(input.charAt(index) || null) >= 0) index++; // An exponent is required to contain at least one decimal digit. if (!isDecDigit(input.charCodeAt(index))) raise({}, errors.malformedNumber, input.slice(tokenStart, index)); while (isDecDigit(input.charCodeAt(index))) index++; } return parseFloat(input.slice(tokenStart, index)); } // Translate escape sequences to the actual characters. function readEscapeSequence() { var sequenceStart = index; switch (input.charAt(index)) { // Lua allow the following escape sequences. // We don't escape the bell sequence. case 'n': index++; return '\n'; case 'r': index++; return '\r'; case 't': index++; return '\t'; case 'v': index++; return '\x0B'; case 'b': index++; return '\b'; case 'f': index++; return '\f'; // Skips the following span of white-space. case 'z': index++; skipWhiteSpace(); return ''; // Byte representation should for now be returned as is. case 'x': // \xXX, where XX is a sequence of exactly two hexadecimal digits if (isHexDigit(input.charCodeAt(index + 1)) && isHexDigit(input.charCodeAt(index + 2))) { index += 3; // Return it as is, without translating the byte. return '\\' + input.slice(sequenceStart, index); } return '\\' + input.charAt(index++); default: // \ddd, where ddd is a sequence of up to three decimal digits. if (isDecDigit(input.charCodeAt(index))) { while (isDecDigit(input.charCodeAt(++index))); return '\\' + input.slice(sequenceStart, index); } // Simply return the \ as is, it's not escaping any sequence. return input.charAt(index++); } } // Comments begin with -- after which it will be decided if they are // multiline comments or not. // // The multiline functionality works the exact same way as with string // literals so we reuse the functionality. function scanComment() { tokenStart = index; index += 2; // -- var character = input.charAt(index) , content = '' , isLong = false , commentStart = index , lineStartComment = lineStart , lineComment = line; if ('[' === character) { content = readLongString(); // This wasn't a multiline comment after all. if (false === content) content = character; else isLong = true; } // Scan until next line as long as it's not a multiline comment. if (!isLong) { while (index < length) { if (isLineTerminator(input.charCodeAt(index))) break; index++; } if (options.comments) content = input.slice(commentStart, index); } if (options.comments) { var node = ast.comment(content, input.slice(tokenStart, index)); // `Marker`s depend on tokens available in the parser and as comments are // intercepted in the lexer all location data is set manually. if (options.locations) { node.loc = { start: { line: lineComment, column: tokenStart - lineStartComment } , end: { line: line, column: index - lineStart } }; } if (options.ranges) { node.range = [tokenStart, index]; } comments.push(node); } } // Read a multiline string by calculating the depth of `=` characters and // then appending until an equal depth is found. function readLongString() { var level = 0 , content = '' , terminator = false , character, stringStart; index++; // [ // Calculate the depth of the comment. while ('=' === input.charAt(index + level)) level++; // Exit, this is not a long string afterall. if ('[' !== input.charAt(index + level)) return false; index += level + 1; // If the first character is a newline, ignore it and begin on next line. if (isLineTerminator(input.charCodeAt(index))) { line++; lineStart = index++; } stringStart = index; while (index < length) { character = input.charAt(index++); // We have to keep track of newlines as `skipWhiteSpace()` does not get // to scan this part. if (isLineTerminator(character.charCodeAt(0))) { line++; lineStart = index; } // Once the delimiter is found, iterate through the depth count and see // if it matches. if (']' === character) { terminator = true; for (var i = 0; i < level; i++) { if ('=' !== input.charAt(index + i)) terminator = false; } if (']' !== input.charAt(index + level)) terminator = false; } // We reached the end of the multiline string. Get out now. if (terminator) break; } content += input.slice(stringStart, index - 1); index += level + 1; return content; } // ## Lex functions and helpers. // Read the next token. // // This is actually done by setting the current token to the lookahead and // reading in the new lookahead token. function next() { previousToken = token; token = lookahead; lookahead = lex(); } // Consume a token if its value matches. Once consumed or not, return the // success of the operation. function consume(value) { if (value === token.value) { next(); return true; } return false; } // Expect the next token value to match. If not, throw an exception. function expect(value) { if (value === token.value) next(); else raise(token, errors.expected, value, token.value); } // ### Validation functions function isWhiteSpace(charCode) { return 9 === charCode || 32 === charCode || 0xB === charCode || 0xC === charCode; } function isLineTerminator(charCode) { return 10 === charCode || 13 === charCode; } function isDecDigit(charCode) { return charCode >= 48 && charCode <= 57; } function isHexDigit(charCode) { return (charCode >= 48 && charCode <= 57) || (charCode >= 97 && charCode <= 102) || (charCode >= 65 && charCode <= 70); } // From [Lua 5.2](http://www.lua.org/manual/5.2/manual.html#8.1) onwards // identifiers cannot use locale-dependet letters. function isIdentifierStart(charCode) { return (charCode >= 65 && charCode <= 90) || (charCode >= 97 && charCode <= 122) || 95 === charCode; } function isIdentifierPart(charCode) { return (charCode >= 65 && charCode <= 90) || (charCode >= 97 && charCode <= 122) || 95 === charCode || (charCode >= 48 && charCode <= 57); } // [3.1 Lexical Conventions](http://www.lua.org/manual/5.2/manual.html#3.1) // // `true`, `false` and `nil` will not be considered keywords, but literals. function isKeyword(id) { switch (id.length) { case 2: return 'do' === id || 'if' === id || 'in' === id || 'or' === id; case 3: return 'and' === id || 'end' === id || 'for' === id || 'not' === id; case 4: return 'else' === id || 'goto' === id || 'then' === id; case 5: return 'break' === id || 'local' === id || 'until' === id || 'while' === id; case 6: return 'elseif' === id || 'repeat' === id || 'return' === id; case 8: return 'function' === id; } return false; } function isUnary(token) { if (Punctuator === token.type) return '#-~'.indexOf(token.value) >= 0; if (Keyword === token.type) return 'not' === token.value; return false; } // @TODO this needs to be rethought. function isCallExpression(expression) { switch (expression.type) { case 'CallExpression': case 'TableCallExpression': case 'StringCallExpression': return true; } return false; } // Check if the token syntactically closes a block. function isBlockFollow(token) { if (EOF === token.type) return true; if (Keyword !== token.type) return false; switch (token.value) { case 'else': case 'elseif': case 'end': case 'until': return true; default: return false; } } // Scope // ----- // Store each block scope as a an array of identifier names. Each scope is // stored in an FILO-array. var scopes // The current scope index , scopeDepth // A list of all global identifier nodes. , globals; // Create a new scope inheriting all declarations from the previous scope. function createScope() { scopes.push(Array.apply(null, scopes[scopeDepth++])); } // Exit and remove the current scope. function exitScope() { scopes.pop(); scopeDepth--; } // Add identifier name to the current scope if it doesnt already exist. function scopeIdentifierName(name) { if (-1 !== indexOf(scopes[scopeDepth], name)) return; scopes[scopeDepth].push(name); } // Add identifier to the current scope function scopeIdentifier(node) { scopeIdentifierName(node.name); attachScope(node, true); } // Attach scope information to node. If the node is global, store it in the // globals array so we can return the information to the user. function attachScope(node, isLocal) { if (!isLocal && -1 === indexOfObject(globals, 'name', node.name)) globals.push(node); node.isLocal = isLocal; } // Is the identifier name available in this scope. function scopeHasName(name) { return (-1 !== indexOf(scopes[scopeDepth], name)); } // Location tracking // ----------------- // // Locations are stored in FILO-array as a `Marker` object consisting of both // `loc` and `range` data. Once a `Marker` is popped off the list an end // location is added and the data is attached to a syntax node. var locations = [] , trackLocations; function createLocationMarker() { return new Marker(token); } function Marker(token) { if (options.locations) { this.loc = { start: { line: token.line , column: token.range[0] - token.lineStart } , end: { line: 0 , column: 0 } }; } if (options.ranges) this.range = [token.range[0], 0]; } // Complete the location data stored in the `Marker` by adding the location // of the *previous token* as an end location. Marker.prototype.complete = function() { if (options.locations) { this.loc.end.line = previousToken.line; this.loc.end.column = previousToken.range[1] - previousToken.lineStart; } if (options.ranges) { this.range[1] = previousToken.range[1]; } }; // Create a new `Marker` and add it to the FILO-array. function markLocation() { if (trackLocations) locations.push(createLocationMarker()); } // Push an arbitrary `Marker` object onto the FILO-array. function pushLocation(marker) { if (trackLocations) locations.push(marker); } // Parse functions // --------------- // Chunk is the main program object. Syntactically it's the same as a block. // // chunk ::= block function parseChunk() { next(); markLocation(); var body = parseBlock(); if (EOF !== token.type) unexpected(token); // If the body is empty no previousToken exists when finishNode runs. if (trackLocations && !body.length) previousToken = token; return finishNode(ast.chunk(body)); } // A block contains a list of statements with an optional return statement // as its last statement. // // block ::= {stat} [retstat] function parseBlock(terminator) { var block = [] , statement; // Each block creates a new scope. if (options.scope) createScope(); while (!isBlockFollow(token)) { // Return has to be the last statement in a block. if ('return' === token.value) { block.push(parseStatement()); break; } statement = parseStatement(); // Statements are only added if they are returned, this allows us to // ignore some statements, such as EmptyStatement. if (statement) block.push(statement); } if (options.scope) exitScope(); // Doesn't really need an ast node return block; } // There are two types of statements, simple and compound. // // statement ::= break | goto | do | while | repeat | return // | if | for | function | local | label | assignment // | functioncall | ';' function parseStatement() { markLocation(); if (Keyword === token.type) { switch (token.value) { case 'local': next(); return parseLocalStatement(); case 'if': next(); return parseIfStatement(); case 'return': next(); return parseReturnStatement(); case 'function': next(); var name = parseFunctionName(); return parseFunctionDeclaration(name); case 'while': next(); return parseWhileStatement(); case 'for': next(); return parseForStatement(); case 'repeat': next(); return parseRepeatStatement(); case 'break': next(); return parseBreakStatement(); case 'do': next(); return parseDoStatement(); case 'goto': next(); return parseGotoStatement(); } } if (Punctuator === token.type) { if (consume('::')) return parseLabelStatement(); } // Assignments memorizes the location and pushes it manually for wrapper // nodes. Additionally empty `;` statements should not mark a location. if (trackLocations) locations.pop(); // When a `;` is encounted, simply eat it without storing it. if (consume(';')) return; return parseAssignmentOrCallStatement(); } // ## Statements // label ::= '::' Name '::' function parseLabelStatement() { var name = token.value , label = parseIdentifier(); if (options.scope) { scopeIdentifierName('::' + name + '::'); attachScope(label, true); } expect('::'); return finishNode(ast.labelStatement(label)); } // break ::= 'break' function parseBreakStatement() { return finishNode(ast.breakStatement()); } // goto ::= 'goto' Name function parseGotoStatement() { var name = token.value , label = parseIdentifier(); if (options.scope) label.isLabel = scopeHasName('::' + name + '::'); return finishNode(ast.gotoStatement(label)); } // do ::= 'do' block 'end' function parseDoStatement() { var body = parseBlock(); expect('end'); return finishNode(ast.doStatement(body)); } // while ::= 'while' exp 'do' block 'end' function parseWhileStatement() { var condition = parseExpectedExpression(); expect('do'); var body = parseBlock(); expect('end'); return finishNode(ast.whileStatement(condition, body)); } // repeat ::= 'repeat' block 'until' exp function parseRepeatStatement() { var body = parseBlock(); expect('until'); var condition = parseExpectedExpression(); return finishNode(ast.repeatStatement(condition, body)); } // retstat ::= 'return' [exp {',' exp}] [';'] function parseReturnStatement() { var expressions = []; if ('end' !== token.value) { var expression = parseExpression(); if (null != expression) expressions.push(expression); while (consume(',')) { expression = parseExpectedExpression(); expressions.push(expression); } consume(';'); // grammar tells us ; is optional here. } return finishNode(ast.returnStatement(expressions)); } // if ::= 'if' exp 'then' block {elif} ['else' block] 'end' // elif ::= 'elseif' exp 'then' block function parseIfStatement() { var clauses = [] , condition , body , marker; // IfClauses begin at the same location as the parent IfStatement. // It ends at the start of `end`, `else`, or `elseif`. if (trackLocations) { marker = locations[locations.length - 1]; locations.push(marker); } condition = parseExpectedExpression(); expect('then'); body = parseBlock(); clauses.push(finishNode(ast.ifClause(condition, body))); if (trackLocations) marker = createLocationMarker(); while (consume('elseif')) { pushLocation(marker); condition = parseExpectedExpression(); expect('then'); body = parseBlock(); clauses.push(finishNode(ast.elseifClause(condition, body))); if (trackLocations) marker = createLocationMarker(); } if (consume('else')) { // Include the `else` in the location of ElseClause. if (trackLocations) { marker = new Marker(previousToken); locations.push(marker); } body = parseBlock(); clauses.push(finishNode(ast.elseClause(body))); } expect('end'); return finishNode(ast.ifStatement(clauses)); } // There are two types of for statements, generic and numeric. // // for ::= Name '=' exp ',' exp [',' exp] 'do' block 'end' // for ::= namelist 'in' explist 'do' block 'end' // namelist ::= Name {',' Name} // explist ::= exp {',' exp} function parseForStatement() { var variable = parseIdentifier() , body; // The start-identifier is local. if (options.scope) scopeIdentifier(variable); // If the first expression is followed by a `=` punctuator, this is a // Numeric For Statement. if (consume('=')) { // Start expression var start = parseExpectedExpression(); expect(','); // End expression var end = parseExpectedExpression(); // Optional step expression var step = consume(',') ? parseExpectedExpression() : null; expect('do'); body = parseBlock(); expect('end'); return finishNode(ast.forNumericStatement(variable, start, end, step, body)); } // If not, it's a Generic For Statement else { // The namelist can contain one or more identifiers. var variables = [variable]; while (consume(',')) { variable = parseIdentifier(); // Each variable in the namelist is locally scoped. if (options.scope) scopeIdentifier(variable); variables.push(variable); } expect('in'); var iterators = []; // One or more expressions in the explist. do { var expression = parseExpectedExpression(); iterators.push(expression); } while (consume(',')); expect('do'); body = parseBlock(); expect('end'); return finishNode(ast.forGenericStatement(variables, iterators, body)); } } // Local statements can either be variable assignments or function // definitions. If a function definition is found, it will be delegated to // `parseFunctionDeclaration()` with the isLocal flag. // // This AST structure might change into a local assignment with a function // child. // // local ::= 'local' 'function' Name funcdecl // | 'local' Name {',' Name} ['=' exp {',' exp} function parseLocalStatement() { var name; if (Identifier === token.type) { var variables = [] , init = []; do { name = parseIdentifier(); variables.push(name); } while (consume(',')); if (consume('=')) { do { var expression = parseExpectedExpression(); init.push(expression); } while (consume(',')); } // Declarations doesn't exist before the statement has been evaluated. // Therefore assignments can't use their declarator. And the identifiers // shouldn't be added to the scope until the statement is complete. if (options.scope) { for (var i = 0, l = variables.length; i < l; i++) { scopeIdentifier(variables[i]); } } return finishNode(ast.localStatement(variables, init)); } if (consume('function')) { name = parseIdentifier(); if (options.scope) scopeIdentifier(name); // MemberExpressions are not allowed in local function statements. return parseFunctionDeclaration(name, true); } else { raiseUnexpectedToken('', token); } } // assignment ::= varlist '=' explist // varlist ::= prefixexp {',' prefixexp} // explist ::= exp {',' exp} // // call ::= callexp // callexp ::= prefixexp args | prefixexp ':' Name args function parseAssignmentOrCallStatement() { // Keep a reference to the previous token for better error messages in case // of invalid statement var previous = token , expression, marker; if (trackLocations) marker = createLocationMarker(); expression = parsePrefixExpression(); if (null == expression) return unexpected(token); if (',='.indexOf(token.value) >= 0) { var variables = [expression] , init = [] , exp; while (consume(',')) { exp = parsePrefixExpression(); if (null == exp) raiseUnexpectedToken('', token); variables.push(exp); } expect('='); do { exp = parseExpectedExpression(); init.push(exp); } while (consume(',')); pushLocation(marker); return finishNode(ast.assignmentStatement(variables, init)); } if (isCallExpression(expression)) { pushLocation(marker); return finishNode(ast.callStatement(expression)); } // The prefix expression was neither part of an assignment or a // callstatement, however as it was valid it's been consumed, so raise // the exception on the previous token to provide a helpful message. return unexpected(previous); } // ### Non-statements // Identifier ::= Name function parseIdentifier() { markLocation(); var identifier = token.value; if (Identifier !== token.type) raiseUnexpectedToken('', token); next(); return finishNode(ast.identifier(identifier)); } // Parse the functions parameters and body block. The name should already // have been parsed and passed to this declaration function. By separating // this we allow for anonymous functions in expressions. // // For local functions there's a boolean parameter which needs to be set // when parsing the declaration. // // funcdecl ::= '(' [parlist] ')' block 'end' // parlist ::= Name {',' Name} | [',' '...'] | '...' function parseFunctionDeclaration(name, isLocal) { var parameters = []; expect('('); // The declaration has arguments if (!consume(')')) { // Arguments are a comma separated list of identifiers, optionally ending // with a vararg. while (true) { if (Identifier === token.type) { var parameter = parseIdentifier(); // Function parameters are local. if (options.scope) scopeIdentifier(parameter); parameters.push(parameter); if (consume(',')) continue; else if (consume(')')) break; } // No arguments are allowed after a vararg. else if (VarargLiteral === token.type) { parameters.push(parsePrimaryExpression()); expect(')'); break; } else { raiseUnexpectedToken(' or \'...\'', token); } } } var body = parseBlock(); expect('end'); isLocal = isLocal || false; return finishNode(ast.functionStatement(name, parameters, isLocal, body)); } // Parse the function name as identifiers and member expressions. // // Name {'.' Name} [':' Name] function parseFunctionName() { var base, name, marker; if (trackLocations) marker = createLocationMarker(); base = parseIdentifier(); if (options.scope) attachScope(base, false); while (consume('.')) { pushLocation(marker); name = parseIdentifier(); if (options.scope) attachScope(name, false); base = finishNode(ast.memberExpression(base, '.', name)); } if (consume(':')) { pushLocation(marker); name = parseIdentifier(); if (options.scope) attachScope(name, false); base = finishNode(ast.memberExpression(base, ':', name)); } return base; } // tableconstructor ::= '{' [fieldlist] '}' // fieldlist ::= field {fieldsep field} fieldsep // field ::= '[' exp ']' '=' exp | Name = 'exp' | exp // // fieldsep ::= ',' | ';' function parseTableConstructor() { var fields = [] , key, value; while (true) { markLocation(); if (Punctuator === token.type && consume('[')) { key = parseExpectedExpression(); expect(']'); expect('='); value = parseExpectedExpression(); fields.push(finishNode(ast.tableKey(key, value))); } else if (Identifier === token.type) { key = parseExpectedExpression(); if (consume('=')) { value = parseExpectedExpression(); fields.push(finishNode(ast.tableKeyString(key, value))); } else { fields.push(finishNode(ast.tableValue(key))); } } else { if (null == (value = parseExpression())) { locations.pop(); break; } fields.push(finishNode(ast.tableValue(value))); } if (',;'.indexOf(token.value) >= 0) { next(); continue; } if ('}' === token.value) break; } expect('}'); return finishNode(ast.tableConstructorExpression(fields)); } // Expression parser // ----------------- // // Expressions are evaluated and always return a value. If nothing is // matched null will be returned. // // exp ::= (unop exp | primary | prefixexp ) { binop exp } // // primary ::= nil | false | true | Number | String | '...' // | functiondef | tableconstructor // // prefixexp ::= (Name | '(' exp ')' ) { '[' exp ']' // | '.' Name | ':' Name args | args } // function parseExpression() { var expression = parseSubExpression(0); return expression; } // Parse an expression expecting it to be valid. function parseExpectedExpression() { var expression = parseExpression(); if (null == expression) raiseUnexpectedToken('', token); else return expression; } // Return the precedence priority of the operator. // // As unary `-` can't be distinguished from binary `-`, unary precedence // isn't described in this table but in `parseSubExpression()` itself. // // As this function gets hit on every expression it's been optimized due to // the expensive CompareICStub which took ~8% of the parse time. function binaryPrecedence(operator) { var charCode = operator.charCodeAt(0) , length = operator.length; if (1 === length) { switch (charCode) { case 94: return 10; // ^ case 42: case 47: case 37: return 7; // * / % case 43: case 45: return 6; // + - case 60: case 62: return 3; // < > case 38: case 124: return 7; // & | } } else if (2 === length) { switch (charCode) { case 46: return 5; // .. case 60: case 62: case 61: case 126: return 3; // <= >= == ~= case 111: return 1; // or } } else if (97 === charCode && 'and' === operator) return 2; return 0; } // Implement an operator-precedence parser to handle binary operator // precedence. // // We use this algorithm because it's compact, it's fast and Lua core uses // the same so we can be sure our expressions are parsed in the same manner // without excessive amounts of tests. // // exp ::= (unop exp | primary | prefixexp ) { binop exp } function parseSubExpression(minPrecedence) { var operator = token.value // The left-hand side in binary operations. , expression, marker; if (trackLocations) marker = createLocationMarker(); // UnaryExpression if (isUnary(token)) { markLocation(); next(); var argument = parseSubExpression(8); if (argument == null) raiseUnexpectedToken('', token); expression = finishNode(ast.unaryExpression(operator, argument)); } if (null == expression) { // PrimaryExpression expression = parsePrimaryExpression(); // PrefixExpression if (null == expression) { expression = parsePrefixExpression(); } } // This is not a valid left hand expression. if (null == expression) return null; var precedence; while (true) { operator = token.value; precedence = (Punctuator === token.type || Keyword === token.type) ? binaryPrecedence(operator) : 0; if (precedence === 0 || precedence <= minPrecedence) break; // Right-hand precedence operators if ('^' === operator || '..' === operator) precedence--; next(); var right = parseSubExpression(precedence); if (null == right) raiseUnexpectedToken('', token); // Push in the marker created before the loop to wrap its entirety. if (trackLocations) locations.push(marker); expression = finishNode(ast.binaryExpression(operator, expression, right)); } return expression; } // prefixexp ::= prefix {suffix} // prefix ::= Name | '(' exp ')' // suffix ::= '[' exp ']' | '.' Name | ':' Name args | args // // args ::= '(' [explist] ')' | tableconstructor | String function parsePrefixExpression() { var base, name, marker // Keep track of the scope, if a parent is local so are the children. , isLocal; if (trackLocations) marker = createLocationMarker(); // The prefix if (Identifier === token.type) { name = token.value; base = parseIdentifier(); // Set the parent scope. if (options.scope) attachScope(base, isLocal = scopeHasName(name)); } else if (consume('(')) { base = parseExpectedExpression(); expect(')'); if (options.scope) isLocal = base.isLocal; } else { return null; } // The suffix var expression, identifier; while (true) { if (Punctuator === token.type) { switch (token.value) { case '[': pushLocation(marker); next(); expression = parseExpectedExpression(); base = finishNode(ast.indexExpression(base, expression)); expect(']'); break; case '.': pushLocation(marker); next(); identifier = parseIdentifier(); // Inherit the scope if (options.scope) attachScope(identifier, isLocal); base = finishNode(ast.memberExpression(base, '.', identifier)); break; case ':': pushLocation(marker); next(); identifier = parseIdentifier(); if (options.scope) attachScope(identifier, isLocal); base = finishNode(ast.memberExpression(base, ':', identifier)); // Once a : is found, this has to be a CallExpression, otherwise // throw an error. pushLocation(marker); base = parseCallExpression(base); break; case '(': case '{': // args pushLocation(marker); base = parseCallExpression(base); break; default: return base; } } else if (StringLiteral === token.type) { pushLocation(marker); base = parseCallExpression(base); } else { break; } } return base; } // args ::= '(' [explist] ')' | tableconstructor | String function parseCallExpression(base) { if (Punctuator === token.type) { switch (token.value) { case '(': next(); // List of expressions var expressions = []; var expression = parseExpression(); if (null != expression) expressions.push(expression); while (consume(',')) { expression = parseExpectedExpression(); expressions.push(expression); } expect(')'); return finishNode(ast.callExpression(base, expressions)); case '{': markLocation(); next(); var table = parseTableConstructor(); return finishNode(ast.tableCallExpression(base, table)); } } else if (StringLiteral === token.type) { return finishNode(ast.stringCallExpression(base, parsePrimaryExpression())); } raiseUnexpectedToken('function arguments', token); } // primary ::= String | Numeric | nil | true | false // | functiondef | tableconstructor | '...' function parsePrimaryExpression() { var literals = StringLiteral | NumericLiteral | BooleanLiteral | NilLiteral | VarargLiteral , value = token.value , type = token.type , marker; if (trackLocations) marker = createLocationMarker(); if (type & literals) { pushLocation(marker); var raw = input.slice(token.range[0], token.range[1]); next(); return finishNode(ast.literal(type, value, raw)); } else if (Keyword === type && 'function' === value) { pushLocation(marker); next(); return parseFunctionDeclaration(null); } else if (consume('{')) { pushLocation(marker); return parseTableConstructor(); } } // Parser // ------ // Export the main parser. // // - `wait` Hold parsing until end() is called. Defaults to false // - `comments` Store comments. Defaults to true. // - `scope` Track identifier scope. Defaults to false. // // Example: // // var parser = require('luaparser'); // parser.parse('i = 0'); exports.parse = parse; function parse(_input, _options) { if ('undefined' === typeof _options && 'object' === typeof _input) { _options = _input; _input = undefined; } if (!_options) _options = {}; input = _input || ''; options = extend(defaultOptions, _options); // Rewind the lexer index = 0; line = 1; lineStart = 0; length = input.length; // When tracking identifier scope, initialize with an empty scope. scopes = [[]]; scopeDepth = 0; globals = []; locations = []; if (options.comments) comments = []; if (!options.wait) return end(); return exports; } // Write to the source code buffer without beginning the parse. exports.write = write; function write(_input) { input += String(_input); length = input.length; return exports; } // Send an EOF and begin parsing. exports.end = end; function end(_input) { if ('undefined' !== typeof _input) write(_input); length = input.length; trackLocations = options.locations || options.ranges; // Initialize with a lookahead token. lookahead = lex(); var chunk = parseChunk(); if (options.comments) chunk.comments = comments; if (options.scope) chunk.globals = globals; if (locations.length > 0) throw new Error('Location tracking failed. This is most likely a bug in luaparse'); return chunk; } })); /* vim: set sw=2 ts=2 et tw=79 : */ });