113 lines
4.2 KiB
JavaScript
113 lines
4.2 KiB
JavaScript
|
/***** regexp tokenizer */
|
||
|
require("amd-loader");
|
||
|
|
||
|
var Tokenizer = require("../lib/ace/tokenizer").Tokenizer;
|
||
|
var Tokenizer = require("../lib/ace/tokenizer_dev").Tokenizer; // todo can't use tokenizer because of max token count
|
||
|
var TextHighlightRules = require("../lib/ace/mode/text_highlight_rules").TextHighlightRules;
|
||
|
|
||
|
var r = new TextHighlightRules()
|
||
|
r.$rules = {
|
||
|
start: [
|
||
|
{token: "anchor", regex: /[\^\$]|\\[bBAZzG]/, merge:false},
|
||
|
{token: "backRef", regex: /\\([1-9]|k(<\w+\b[+-]?\d>|'\w+\b[+-]?\d'))/, merge:false},
|
||
|
{include: "charTypes", merge:false},
|
||
|
{token: "charclass", regex: /\[\^?/, push: "charclass", merge:false},
|
||
|
{token: "alternation", regex: /\|/, merge:false},
|
||
|
{include: "quantifiers", merge:false},
|
||
|
{include: "groups", merge:false},
|
||
|
{include: "xGroup", merge:true}
|
||
|
],
|
||
|
charTypes: [
|
||
|
{token: "char", regex: /\\([tvnrbfae]|[0-8]{1,3}|x[\dA-Fa-f]{2}|x7[\dA-Fa-f]{7})/, merge:false}, // todo \cx
|
||
|
{token: "charType", regex: /\.|\\[wWsSdDhH]/, merge:false},
|
||
|
{token: "charProperty", regex: /\\p{\w+}/, merge:false},
|
||
|
{token: "char", regex: /\\./, merge:false},
|
||
|
],
|
||
|
quantifiers: [
|
||
|
{token: "quantifier", regex: /([?*+]|{\d+\b,?\d*}|{,\d+})[?+]?/, merge:false}
|
||
|
],
|
||
|
charclass: [
|
||
|
{include: "charTypes", merge:false},
|
||
|
{token: "charclass.start", regex: /\[\^?/, push: "charclass", merge:false},
|
||
|
{token: "charclass.end", regex: /\]/, next: "pop", merge:false}
|
||
|
],
|
||
|
groups: [
|
||
|
{token: "group", regex: /[(]([?](#|[imx\-]+:?|:|=|!|<=|<!|>|<\w+>|'\w+'|))?|[)]/,
|
||
|
onMatch: function(val, state, stack) {
|
||
|
if (!stack.groupNumber)
|
||
|
stack.groupNumber = 1;
|
||
|
|
||
|
var isStart = val !== ")";
|
||
|
var t = {depth:0,type: isStart ? "group.start" : "group.end", value: val};
|
||
|
t.groupType = val[2];
|
||
|
|
||
|
if (val == "(") {
|
||
|
t.number = stack.groupNumber++;
|
||
|
t.isGroup = true
|
||
|
} else if (t.groupType == "'" || (t.groupType == "<" && val.slice(-1) == ">")) {
|
||
|
t.name = val.slice(2, -1)
|
||
|
t.isGroup = true
|
||
|
} else if (t.groupType == ":") {
|
||
|
t.isGroup = true
|
||
|
}
|
||
|
|
||
|
if (t.groupType && val.indexOf("x") != -1) {
|
||
|
var minus = val.indexOf("-");
|
||
|
if (minus == -1 || minus > val.indexOf("x"))
|
||
|
stack.xGroup = t;
|
||
|
else
|
||
|
stack.xGroup = null;
|
||
|
} else if (!isStart && stack.xGroup && stack.xGroup == stack[0]) {
|
||
|
if (stack.xGroup.value.slice(-1) == ":")
|
||
|
stack.xGroup = null;
|
||
|
}
|
||
|
|
||
|
if (isStart) {
|
||
|
if (stack.groupDepth) {
|
||
|
stack[0].hasChildren = true
|
||
|
}
|
||
|
stack.groupDepth = (stack.groupDepth||0)+1;
|
||
|
stack.unshift(t)
|
||
|
} else {
|
||
|
stack.groupDepth --;
|
||
|
t.start = stack.shift(t)
|
||
|
t.start.end = t
|
||
|
}
|
||
|
return [t]
|
||
|
}, merge:false
|
||
|
}
|
||
|
],
|
||
|
xGroup: [
|
||
|
{token: "text", regex:/\s+/, onMatch: function(val, state, stack) {
|
||
|
return stack.xGroup ? [] : "text"
|
||
|
}, merge: true},
|
||
|
{token: "text", regex: /#/, onMatch: function(val, state, stack) {
|
||
|
if (stack.xGroup) {
|
||
|
this.next = "comment";
|
||
|
stack.unshift(state);
|
||
|
return [];
|
||
|
}
|
||
|
this.next = "";
|
||
|
return "text";
|
||
|
}, merge: true}
|
||
|
],
|
||
|
comment: [{
|
||
|
regex: "[^\n\r]*|^", token: "", onMatch: function(val, state, stack) {
|
||
|
this.next = stack.shift();
|
||
|
return [];
|
||
|
}
|
||
|
}]
|
||
|
}
|
||
|
r.normalizeRules()
|
||
|
var tmReTokenizer = new Tokenizer(r.getRules());
|
||
|
|
||
|
function tokenize(str) {
|
||
|
return tmReTokenizer.getLineTokens(str).tokens;
|
||
|
}
|
||
|
|
||
|
function toStr(tokens) { return tokens.map(function(x){return x.value}).join("")}
|
||
|
|
||
|
|
||
|
exports.tokenize = tokenize;
|
||
|
exports.toStr = toStr;
|
||
|
exports.tmReTokenizer = tmReTokenizer;
|