370 lines
13 KiB
JavaScript
370 lines
13 KiB
JavaScript
|
/* ***** BEGIN LICENSE BLOCK *****
|
||
|
* Distributed under the BSD license:
|
||
|
*
|
||
|
* Copyright (c) 2010, Ajax.org B.V.
|
||
|
* All rights reserved.
|
||
|
*
|
||
|
* Redistribution and use in source and binary forms, with or without
|
||
|
* modification, are permitted provided that the following conditions are met:
|
||
|
* * Redistributions of source code must retain the above copyright
|
||
|
* notice, this list of conditions and the following disclaimer.
|
||
|
* * Redistributions in binary form must reproduce the above copyright
|
||
|
* notice, this list of conditions and the following disclaimer in the
|
||
|
* documentation and/or other materials provided with the distribution.
|
||
|
* * Neither the name of Ajax.org B.V. nor the
|
||
|
* names of its contributors may be used to endorse or promote products
|
||
|
* derived from this software without specific prior written permission.
|
||
|
*
|
||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||
|
* DISCLAIMED. IN NO EVENT SHALL AJAX.ORG B.V. BE LIABLE FOR ANY
|
||
|
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||
|
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||
|
*
|
||
|
* ***** END LICENSE BLOCK ***** */
|
||
|
|
||
|
define(function(require, exports, module) {
|
||
|
"use strict";
|
||
|
|
||
|
var config = require("./config");
|
||
|
// tokenizing lines longer than this makes editor very slow
|
||
|
var MAX_TOKEN_COUNT = 2000;
|
||
|
/**
|
||
|
* This class takes a set of highlighting rules, and creates a tokenizer out of them. For more information, see [the wiki on extending highlighters](https://github.com/ajaxorg/ace/wiki/Creating-or-Extending-an-Edit-Mode#wiki-extendingTheHighlighter).
|
||
|
* @class Tokenizer
|
||
|
**/
|
||
|
|
||
|
/**
|
||
|
* Constructs a new tokenizer based on the given rules and flags.
|
||
|
* @param {Object} rules The highlighting rules
|
||
|
*
|
||
|
* @constructor
|
||
|
**/
|
||
|
var Tokenizer = function(rules) {
|
||
|
this.states = rules;
|
||
|
|
||
|
this.regExps = {};
|
||
|
this.matchMappings = {};
|
||
|
for (var key in this.states) {
|
||
|
var state = this.states[key];
|
||
|
var ruleRegExps = [];
|
||
|
var matchTotal = 0;
|
||
|
var mapping = this.matchMappings[key] = {defaultToken: "text"};
|
||
|
var flag = "g";
|
||
|
|
||
|
var splitterRurles = [];
|
||
|
for (var i = 0; i < state.length; i++) {
|
||
|
var rule = state[i];
|
||
|
if (rule.defaultToken)
|
||
|
mapping.defaultToken = rule.defaultToken;
|
||
|
if (rule.caseInsensitive)
|
||
|
flag = "gi";
|
||
|
if (rule.regex == null)
|
||
|
continue;
|
||
|
|
||
|
if (rule.regex instanceof RegExp)
|
||
|
rule.regex = rule.regex.toString().slice(1, -1);
|
||
|
|
||
|
// Count number of matching groups. 2 extra groups from the full match
|
||
|
// And the catch-all on the end (used to force a match);
|
||
|
var adjustedregex = rule.regex;
|
||
|
var matchcount = new RegExp("(?:(" + adjustedregex + ")|(.))").exec("a").length - 2;
|
||
|
if (Array.isArray(rule.token)) {
|
||
|
if (rule.token.length == 1 || matchcount == 1) {
|
||
|
rule.token = rule.token[0];
|
||
|
} else if (matchcount - 1 != rule.token.length) {
|
||
|
this.reportError("number of classes and regexp groups doesn't match", {
|
||
|
rule: rule,
|
||
|
groupCount: matchcount - 1
|
||
|
});
|
||
|
rule.token = rule.token[0];
|
||
|
} else {
|
||
|
rule.tokenArray = rule.token;
|
||
|
rule.token = null;
|
||
|
rule.onMatch = this.$arrayTokens;
|
||
|
}
|
||
|
} else if (typeof rule.token == "function" && !rule.onMatch) {
|
||
|
if (matchcount > 1)
|
||
|
rule.onMatch = this.$applyToken;
|
||
|
else
|
||
|
rule.onMatch = rule.token;
|
||
|
}
|
||
|
|
||
|
if (matchcount > 1) {
|
||
|
if (/\\\d/.test(rule.regex)) {
|
||
|
// Replace any backreferences and offset appropriately.
|
||
|
adjustedregex = rule.regex.replace(/\\([0-9]+)/g, function(match, digit) {
|
||
|
return "\\" + (parseInt(digit, 10) + matchTotal + 1);
|
||
|
});
|
||
|
} else {
|
||
|
matchcount = 1;
|
||
|
adjustedregex = this.removeCapturingGroups(rule.regex);
|
||
|
}
|
||
|
if (!rule.splitRegex && typeof rule.token != "string")
|
||
|
splitterRurles.push(rule); // flag will be known only at the very end
|
||
|
}
|
||
|
|
||
|
mapping[matchTotal] = i;
|
||
|
matchTotal += matchcount;
|
||
|
|
||
|
ruleRegExps.push(adjustedregex);
|
||
|
|
||
|
// makes property access faster
|
||
|
if (!rule.onMatch)
|
||
|
rule.onMatch = null;
|
||
|
}
|
||
|
|
||
|
if (!ruleRegExps.length) {
|
||
|
mapping[0] = 0;
|
||
|
ruleRegExps.push("$");
|
||
|
}
|
||
|
|
||
|
splitterRurles.forEach(function(rule) {
|
||
|
rule.splitRegex = this.createSplitterRegexp(rule.regex, flag);
|
||
|
}, this);
|
||
|
|
||
|
this.regExps[key] = new RegExp("(" + ruleRegExps.join(")|(") + ")|($)", flag);
|
||
|
}
|
||
|
};
|
||
|
|
||
|
(function() {
|
||
|
this.$setMaxTokenCount = function(m) {
|
||
|
MAX_TOKEN_COUNT = m | 0;
|
||
|
};
|
||
|
|
||
|
this.$applyToken = function(str) {
|
||
|
var values = this.splitRegex.exec(str).slice(1);
|
||
|
var types = this.token.apply(this, values);
|
||
|
|
||
|
// required for compatibility with old modes
|
||
|
if (typeof types === "string")
|
||
|
return [{type: types, value: str}];
|
||
|
|
||
|
var tokens = [];
|
||
|
for (var i = 0, l = types.length; i < l; i++) {
|
||
|
if (values[i])
|
||
|
tokens[tokens.length] = {
|
||
|
type: types[i],
|
||
|
value: values[i]
|
||
|
};
|
||
|
}
|
||
|
return tokens;
|
||
|
};
|
||
|
|
||
|
this.$arrayTokens = function(str) {
|
||
|
if (!str)
|
||
|
return [];
|
||
|
var values = this.splitRegex.exec(str);
|
||
|
if (!values)
|
||
|
return "text";
|
||
|
var tokens = [];
|
||
|
var types = this.tokenArray;
|
||
|
for (var i = 0, l = types.length; i < l; i++) {
|
||
|
if (values[i + 1])
|
||
|
tokens[tokens.length] = {
|
||
|
type: types[i],
|
||
|
value: values[i + 1]
|
||
|
};
|
||
|
}
|
||
|
return tokens;
|
||
|
};
|
||
|
|
||
|
this.removeCapturingGroups = function(src) {
|
||
|
var r = src.replace(
|
||
|
/\\.|\[(?:\\.|[^\\\]])*|\(\?[:=!]|(\()/g,
|
||
|
function(x, y) {return y ? "(?:" : x;}
|
||
|
);
|
||
|
return r;
|
||
|
};
|
||
|
|
||
|
this.createSplitterRegexp = function(src, flag) {
|
||
|
if (src.indexOf("(?=") != -1) {
|
||
|
var stack = 0;
|
||
|
var inChClass = false;
|
||
|
var lastCapture = {};
|
||
|
src.replace(/(\\.)|(\((?:\?[=!])?)|(\))|([\[\]])/g, function(
|
||
|
m, esc, parenOpen, parenClose, square, index
|
||
|
) {
|
||
|
if (inChClass) {
|
||
|
inChClass = square != "]";
|
||
|
} else if (square) {
|
||
|
inChClass = true;
|
||
|
} else if (parenClose) {
|
||
|
if (stack == lastCapture.stack) {
|
||
|
lastCapture.end = index+1;
|
||
|
lastCapture.stack = -1;
|
||
|
}
|
||
|
stack--;
|
||
|
} else if (parenOpen) {
|
||
|
stack++;
|
||
|
if (parenOpen.length != 1) {
|
||
|
lastCapture.stack = stack;
|
||
|
lastCapture.start = index;
|
||
|
}
|
||
|
}
|
||
|
return m;
|
||
|
});
|
||
|
|
||
|
if (lastCapture.end != null && /^\)*$/.test(src.substr(lastCapture.end)))
|
||
|
src = src.substring(0, lastCapture.start) + src.substr(lastCapture.end);
|
||
|
}
|
||
|
|
||
|
// this is needed for regexps that can match in multiple ways
|
||
|
if (src.charAt(0) != "^") src = "^" + src;
|
||
|
if (src.charAt(src.length - 1) != "$") src += "$";
|
||
|
|
||
|
return new RegExp(src, (flag||"").replace("g", ""));
|
||
|
};
|
||
|
|
||
|
/**
|
||
|
* Returns an object containing two properties: `tokens`, which contains all the tokens; and `state`, the current state.
|
||
|
* @returns {Object}
|
||
|
**/
|
||
|
this.getLineTokens = function(line, startState) {
|
||
|
if (startState && typeof startState != "string") {
|
||
|
var stack = startState.slice(0);
|
||
|
startState = stack[0];
|
||
|
if (startState === "#tmp") {
|
||
|
stack.shift();
|
||
|
startState = stack.shift();
|
||
|
}
|
||
|
} else
|
||
|
var stack = [];
|
||
|
|
||
|
var currentState = startState || "start";
|
||
|
var state = this.states[currentState];
|
||
|
if (!state) {
|
||
|
currentState = "start";
|
||
|
state = this.states[currentState];
|
||
|
}
|
||
|
var mapping = this.matchMappings[currentState];
|
||
|
var re = this.regExps[currentState];
|
||
|
re.lastIndex = 0;
|
||
|
|
||
|
var match, tokens = [];
|
||
|
var lastIndex = 0;
|
||
|
var matchAttempts = 0;
|
||
|
|
||
|
var token = {type: null, value: ""};
|
||
|
|
||
|
while (match = re.exec(line)) {
|
||
|
var type = mapping.defaultToken;
|
||
|
var rule = null;
|
||
|
var value = match[0];
|
||
|
var index = re.lastIndex;
|
||
|
|
||
|
if (index - value.length > lastIndex) {
|
||
|
var skipped = line.substring(lastIndex, index - value.length);
|
||
|
if (token.type == type) {
|
||
|
token.value += skipped;
|
||
|
} else {
|
||
|
if (token.type)
|
||
|
tokens.push(token);
|
||
|
token = {type: type, value: skipped};
|
||
|
}
|
||
|
}
|
||
|
|
||
|
for (var i = 0; i < match.length-2; i++) {
|
||
|
if (match[i + 1] === undefined)
|
||
|
continue;
|
||
|
|
||
|
rule = state[mapping[i]];
|
||
|
|
||
|
if (rule.onMatch)
|
||
|
type = rule.onMatch(value, currentState, stack, line);
|
||
|
else
|
||
|
type = rule.token;
|
||
|
|
||
|
if (rule.next) {
|
||
|
if (typeof rule.next == "string") {
|
||
|
currentState = rule.next;
|
||
|
} else {
|
||
|
currentState = rule.next(currentState, stack);
|
||
|
}
|
||
|
|
||
|
state = this.states[currentState];
|
||
|
if (!state) {
|
||
|
this.reportError("state doesn't exist", currentState);
|
||
|
currentState = "start";
|
||
|
state = this.states[currentState];
|
||
|
}
|
||
|
mapping = this.matchMappings[currentState];
|
||
|
lastIndex = index;
|
||
|
re = this.regExps[currentState];
|
||
|
re.lastIndex = index;
|
||
|
}
|
||
|
if (rule.consumeLineEnd)
|
||
|
lastIndex = index;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
if (value) {
|
||
|
if (typeof type === "string") {
|
||
|
if ((!rule || rule.merge !== false) && token.type === type) {
|
||
|
token.value += value;
|
||
|
} else {
|
||
|
if (token.type)
|
||
|
tokens.push(token);
|
||
|
token = {type: type, value: value};
|
||
|
}
|
||
|
} else if (type) {
|
||
|
if (token.type)
|
||
|
tokens.push(token);
|
||
|
token = {type: null, value: ""};
|
||
|
for (var i = 0; i < type.length; i++)
|
||
|
tokens.push(type[i]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (lastIndex == line.length)
|
||
|
break;
|
||
|
|
||
|
lastIndex = index;
|
||
|
|
||
|
if (matchAttempts++ > MAX_TOKEN_COUNT) {
|
||
|
if (matchAttempts > 2 * line.length) {
|
||
|
this.reportError("infinite loop with in ace tokenizer", {
|
||
|
startState: startState,
|
||
|
line: line
|
||
|
});
|
||
|
}
|
||
|
// chrome doens't show contents of text nodes with very long text
|
||
|
while (lastIndex < line.length) {
|
||
|
if (token.type)
|
||
|
tokens.push(token);
|
||
|
token = {
|
||
|
value: line.substring(lastIndex, lastIndex += 2000),
|
||
|
type: "overflow"
|
||
|
};
|
||
|
}
|
||
|
currentState = "start";
|
||
|
stack = [];
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (token.type)
|
||
|
tokens.push(token);
|
||
|
|
||
|
if (stack.length > 1) {
|
||
|
if (stack[0] !== currentState)
|
||
|
stack.unshift("#tmp", currentState);
|
||
|
}
|
||
|
return {
|
||
|
tokens : tokens,
|
||
|
state : stack.length ? stack : currentState
|
||
|
};
|
||
|
};
|
||
|
|
||
|
this.reportError = config.reportError;
|
||
|
|
||
|
}).call(Tokenizer.prototype);
|
||
|
|
||
|
exports.Tokenizer = Tokenizer;
|
||
|
});
|