big-moving.ru/api/soft/ajaxorg/tool/regexp_tokenizer.js

113 lines
4.2 KiB
JavaScript
Executable File

/***** regexp tokenizer */
require("amd-loader");
var Tokenizer = require("../lib/ace/tokenizer").Tokenizer;
var Tokenizer = require("../lib/ace/tokenizer_dev").Tokenizer; // todo can't use tokenizer because of max token count
var TextHighlightRules = require("../lib/ace/mode/text_highlight_rules").TextHighlightRules;
var r = new TextHighlightRules()
r.$rules = {
start: [
{token: "anchor", regex: /[\^\$]|\\[bBAZzG]/, merge:false},
{token: "backRef", regex: /\\([1-9]|k(<\w+\b[+-]?\d>|'\w+\b[+-]?\d'))/, merge:false},
{include: "charTypes", merge:false},
{token: "charclass", regex: /\[\^?/, push: "charclass", merge:false},
{token: "alternation", regex: /\|/, merge:false},
{include: "quantifiers", merge:false},
{include: "groups", merge:false},
{include: "xGroup", merge:true}
],
charTypes: [
{token: "char", regex: /\\([tvnrbfae]|[0-8]{1,3}|x[\dA-Fa-f]{2}|x7[\dA-Fa-f]{7})/, merge:false}, // todo \cx
{token: "charType", regex: /\.|\\[wWsSdDhH]/, merge:false},
{token: "charProperty", regex: /\\p{\w+}/, merge:false},
{token: "char", regex: /\\./, merge:false},
],
quantifiers: [
{token: "quantifier", regex: /([?*+]|{\d+\b,?\d*}|{,\d+})[?+]?/, merge:false}
],
charclass: [
{include: "charTypes", merge:false},
{token: "charclass.start", regex: /\[\^?/, push: "charclass", merge:false},
{token: "charclass.end", regex: /\]/, next: "pop", merge:false}
],
groups: [
{token: "group", regex: /[(]([?](#|[imx\-]+:?|:|=|!|<=|<!|>|<\w+>|'\w+'|))?|[)]/,
onMatch: function(val, state, stack) {
if (!stack.groupNumber)
stack.groupNumber = 1;
var isStart = val !== ")";
var t = {depth:0,type: isStart ? "group.start" : "group.end", value: val};
t.groupType = val[2];
if (val == "(") {
t.number = stack.groupNumber++;
t.isGroup = true
} else if (t.groupType == "'" || (t.groupType == "<" && val.slice(-1) == ">")) {
t.name = val.slice(2, -1)
t.isGroup = true
} else if (t.groupType == ":") {
t.isGroup = true
}
if (t.groupType && val.indexOf("x") != -1) {
var minus = val.indexOf("-");
if (minus == -1 || minus > val.indexOf("x"))
stack.xGroup = t;
else
stack.xGroup = null;
} else if (!isStart && stack.xGroup && stack.xGroup == stack[0]) {
if (stack.xGroup.value.slice(-1) == ":")
stack.xGroup = null;
}
if (isStart) {
if (stack.groupDepth) {
stack[0].hasChildren = true
}
stack.groupDepth = (stack.groupDepth||0)+1;
stack.unshift(t)
} else {
stack.groupDepth --;
t.start = stack.shift(t)
t.start.end = t
}
return [t]
}, merge:false
}
],
xGroup: [
{token: "text", regex:/\s+/, onMatch: function(val, state, stack) {
return stack.xGroup ? [] : "text"
}, merge: true},
{token: "text", regex: /#/, onMatch: function(val, state, stack) {
if (stack.xGroup) {
this.next = "comment";
stack.unshift(state);
return [];
}
this.next = "";
return "text";
}, merge: true}
],
comment: [{
regex: "[^\n\r]*|^", token: "", onMatch: function(val, state, stack) {
this.next = stack.shift();
return [];
}
}]
}
r.normalizeRules()
var tmReTokenizer = new Tokenizer(r.getRules());
function tokenize(str) {
return tmReTokenizer.getLineTokens(str).tokens;
}
function toStr(tokens) { return tokens.map(function(x){return x.value}).join("")}
exports.tokenize = tokenize;
exports.toStr = toStr;
exports.tmReTokenizer = tmReTokenizer;