/* ***** BEGIN LICENSE BLOCK ***** * Distributed under the BSD license: * * Copyright (c) 2010, Ajax.org B.V. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Ajax.org B.V. nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL AJAX.ORG B.V. BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ***** END LICENSE BLOCK ***** */ define(function(require, exports, module) { "use strict"; var config = require("./config"); // tokenizing lines longer than this makes editor very slow var MAX_TOKEN_COUNT = 2000; /** * This class takes a set of highlighting rules, and creates a tokenizer out of them. For more information, see [the wiki on extending highlighters](https://github.com/ajaxorg/ace/wiki/Creating-or-Extending-an-Edit-Mode#wiki-extendingTheHighlighter). * @class Tokenizer **/ /** * Constructs a new tokenizer based on the given rules and flags. * @param {Object} rules The highlighting rules * * @constructor **/ var Tokenizer = function(rules) { this.states = rules; this.regExps = {}; this.matchMappings = {}; for (var key in this.states) { var state = this.states[key]; var ruleRegExps = []; var matchTotal = 0; var mapping = this.matchMappings[key] = {defaultToken: "text"}; var flag = "g"; var splitterRurles = []; for (var i = 0; i < state.length; i++) { var rule = state[i]; if (rule.defaultToken) mapping.defaultToken = rule.defaultToken; if (rule.caseInsensitive) flag = "gi"; if (rule.regex == null) continue; if (rule.regex instanceof RegExp) rule.regex = rule.regex.toString().slice(1, -1); // Count number of matching groups. 2 extra groups from the full match // And the catch-all on the end (used to force a match); var adjustedregex = rule.regex; var matchcount = new RegExp("(?:(" + adjustedregex + ")|(.))").exec("a").length - 2; if (Array.isArray(rule.token)) { if (rule.token.length == 1 || matchcount == 1) { rule.token = rule.token[0]; } else if (matchcount - 1 != rule.token.length) { this.reportError("number of classes and regexp groups doesn't match", { rule: rule, groupCount: matchcount - 1 }); rule.token = rule.token[0]; } else { rule.tokenArray = rule.token; rule.token = null; rule.onMatch = this.$arrayTokens; } } else if (typeof rule.token == "function" && !rule.onMatch) { if (matchcount > 1) rule.onMatch = this.$applyToken; else rule.onMatch = rule.token; } if (matchcount > 1) { if (/\\\d/.test(rule.regex)) { // Replace any backreferences and offset appropriately. adjustedregex = rule.regex.replace(/\\([0-9]+)/g, function(match, digit) { return "\\" + (parseInt(digit, 10) + matchTotal + 1); }); } else { matchcount = 1; adjustedregex = this.removeCapturingGroups(rule.regex); } if (!rule.splitRegex && typeof rule.token != "string") splitterRurles.push(rule); // flag will be known only at the very end } mapping[matchTotal] = i; matchTotal += matchcount; ruleRegExps.push(adjustedregex); // makes property access faster if (!rule.onMatch) rule.onMatch = null; } if (!ruleRegExps.length) { mapping[0] = 0; ruleRegExps.push("$"); } splitterRurles.forEach(function(rule) { rule.splitRegex = this.createSplitterRegexp(rule.regex, flag); }, this); this.regExps[key] = new RegExp("(" + ruleRegExps.join(")|(") + ")|($)", flag); } }; (function() { this.$setMaxTokenCount = function(m) { MAX_TOKEN_COUNT = m | 0; }; this.$applyToken = function(str) { var values = this.splitRegex.exec(str).slice(1); var types = this.token.apply(this, values); // required for compatibility with old modes if (typeof types === "string") return [{type: types, value: str}]; var tokens = []; for (var i = 0, l = types.length; i < l; i++) { if (values[i]) tokens[tokens.length] = { type: types[i], value: values[i] }; } return tokens; }; this.$arrayTokens = function(str) { if (!str) return []; var values = this.splitRegex.exec(str); if (!values) return "text"; var tokens = []; var types = this.tokenArray; for (var i = 0, l = types.length; i < l; i++) { if (values[i + 1]) tokens[tokens.length] = { type: types[i], value: values[i + 1] }; } return tokens; }; this.removeCapturingGroups = function(src) { var r = src.replace( /\\.|\[(?:\\.|[^\\\]])*|\(\?[:=!]|(\()/g, function(x, y) {return y ? "(?:" : x;} ); return r; }; this.createSplitterRegexp = function(src, flag) { if (src.indexOf("(?=") != -1) { var stack = 0; var inChClass = false; var lastCapture = {}; src.replace(/(\\.)|(\((?:\?[=!])?)|(\))|([\[\]])/g, function( m, esc, parenOpen, parenClose, square, index ) { if (inChClass) { inChClass = square != "]"; } else if (square) { inChClass = true; } else if (parenClose) { if (stack == lastCapture.stack) { lastCapture.end = index+1; lastCapture.stack = -1; } stack--; } else if (parenOpen) { stack++; if (parenOpen.length != 1) { lastCapture.stack = stack; lastCapture.start = index; } } return m; }); if (lastCapture.end != null && /^\)*$/.test(src.substr(lastCapture.end))) src = src.substring(0, lastCapture.start) + src.substr(lastCapture.end); } // this is needed for regexps that can match in multiple ways if (src.charAt(0) != "^") src = "^" + src; if (src.charAt(src.length - 1) != "$") src += "$"; return new RegExp(src, (flag||"").replace("g", "")); }; /** * Returns an object containing two properties: `tokens`, which contains all the tokens; and `state`, the current state. * @returns {Object} **/ this.getLineTokens = function(line, startState) { if (startState && typeof startState != "string") { var stack = startState.slice(0); startState = stack[0]; if (startState === "#tmp") { stack.shift(); startState = stack.shift(); } } else var stack = []; var currentState = startState || "start"; var state = this.states[currentState]; if (!state) { currentState = "start"; state = this.states[currentState]; } var mapping = this.matchMappings[currentState]; var re = this.regExps[currentState]; re.lastIndex = 0; var match, tokens = []; var lastIndex = 0; var matchAttempts = 0; var token = {type: null, value: ""}; while (match = re.exec(line)) { var type = mapping.defaultToken; var rule = null; var value = match[0]; var index = re.lastIndex; if (index - value.length > lastIndex) { var skipped = line.substring(lastIndex, index - value.length); if (token.type == type) { token.value += skipped; } else { if (token.type) tokens.push(token); token = {type: type, value: skipped}; } } for (var i = 0; i < match.length-2; i++) { if (match[i + 1] === undefined) continue; rule = state[mapping[i]]; if (rule.onMatch) type = rule.onMatch(value, currentState, stack, line); else type = rule.token; if (rule.next) { if (typeof rule.next == "string") { currentState = rule.next; } else { currentState = rule.next(currentState, stack); } state = this.states[currentState]; if (!state) { this.reportError("state doesn't exist", currentState); currentState = "start"; state = this.states[currentState]; } mapping = this.matchMappings[currentState]; lastIndex = index; re = this.regExps[currentState]; re.lastIndex = index; } if (rule.consumeLineEnd) lastIndex = index; break; } if (value) { if (typeof type === "string") { if ((!rule || rule.merge !== false) && token.type === type) { token.value += value; } else { if (token.type) tokens.push(token); token = {type: type, value: value}; } } else if (type) { if (token.type) tokens.push(token); token = {type: null, value: ""}; for (var i = 0; i < type.length; i++) tokens.push(type[i]); } } if (lastIndex == line.length) break; lastIndex = index; if (matchAttempts++ > MAX_TOKEN_COUNT) { if (matchAttempts > 2 * line.length) { this.reportError("infinite loop with in ace tokenizer", { startState: startState, line: line }); } // chrome doens't show contents of text nodes with very long text while (lastIndex < line.length) { if (token.type) tokens.push(token); token = { value: line.substring(lastIndex, lastIndex += 2000), type: "overflow" }; } currentState = "start"; stack = []; break; } } if (token.type) tokens.push(token); if (stack.length > 1) { if (stack[0] !== currentState) stack.unshift("#tmp", currentState); } return { tokens : tokens, state : stack.length ? stack : currentState }; }; this.reportError = config.reportError; }).call(Tokenizer.prototype); exports.Tokenizer = Tokenizer; });