big-moving.ru/api/soft/monako/esm/vs/basic-languages/postiats/postiats.js

613 lines
18 KiB
JavaScript
Executable File

/*---------------------------------------------------------------------------------------------
* Copyright (c) Artyom Shalkhakov. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*
* Based on the ATS/Postiats lexer by Hongwei Xi.
*--------------------------------------------------------------------------------------------*/
'use strict';
export var conf = {
comments: {
lineComment: '//',
blockComment: ['(*', '*)'],
},
brackets: [['{', '}'], ['[', ']'], ['(', ')'], ['<', '>']],
autoClosingPairs: [
{ open: '"', close: '"', notIn: ['string', 'comment'] },
{ open: '{', close: '}', notIn: ['string', 'comment'] },
{ open: '[', close: ']', notIn: ['string', 'comment'] },
{ open: '(', close: ')', notIn: ['string', 'comment'] },
]
};
export var language = {
tokenPostfix: '.pats',
// TODO: staload and dynload are followed by a special kind of string literals
// with {$IDENTIFER} variables, and it also may make sense to highlight
// the punctuation (. and / and \) differently.
// Set defaultToken to invalid to see what you do not tokenize yet
defaultToken: 'invalid',
// keyword reference: https://github.com/githwxi/ATS-Postiats/blob/master/src/pats_lexing_token.dats
keywords: [
//
"abstype",
"abst0ype",
"absprop",
"absview",
"absvtype",
"absviewtype",
"absvt0ype",
"absviewt0ype",
//
"as",
//
"and",
//
"assume",
//
"begin",
//
/*
"case", // CASE
*/
//
"classdec",
//
"datasort",
//
"datatype",
"dataprop",
"dataview",
"datavtype",
"dataviewtype",
//
"do",
//
"end",
//
"extern",
"extype",
"extvar",
//
"exception",
//
"fn",
"fnx",
"fun",
//
"prfn",
"prfun",
//
"praxi",
"castfn",
//
"if",
"then",
"else",
//
"ifcase",
//
"in",
//
"infix",
"infixl",
"infixr",
"prefix",
"postfix",
//
"implmnt",
"implement",
//
"primplmnt",
"primplement",
//
"import",
//
/*
"lam", // LAM
"llam", // LLAM
"fix", // FIX
*/
//
"let",
//
"local",
//
"macdef",
"macrodef",
//
"nonfix",
//
"symelim",
"symintr",
"overload",
//
"of",
"op",
//
"rec",
//
"sif",
"scase",
//
"sortdef",
/*
// HX: [sta] is now deprecated
*/
"sta",
"stacst",
"stadef",
"static",
/*
"stavar", // T_STAVAR
*/
//
"staload",
"dynload",
//
"try",
//
"tkindef",
//
/*
"type", // TYPE
*/
"typedef",
"propdef",
"viewdef",
"vtypedef",
"viewtypedef",
//
/*
"val", // VAL
*/
"prval",
//
"var",
"prvar",
//
"when",
"where",
//
/*
"for", // T_FOR
"while", // T_WHILE
*/
//
"with",
//
"withtype",
"withprop",
"withview",
"withvtype",
"withviewtype",
],
keywords_dlr: [
"$delay",
"$ldelay",
//
"$arrpsz",
"$arrptrsize",
//
"$d2ctype",
//
"$effmask",
"$effmask_ntm",
"$effmask_exn",
"$effmask_ref",
"$effmask_wrt",
"$effmask_all",
//
"$extern",
"$extkind",
"$extype",
"$extype_struct",
//
"$extval",
"$extfcall",
"$extmcall",
//
"$literal",
//
"$myfilename",
"$mylocation",
"$myfunction",
//
"$lst",
"$lst_t",
"$lst_vt",
"$list",
"$list_t",
"$list_vt",
//
"$rec",
"$rec_t",
"$rec_vt",
"$record",
"$record_t",
"$record_vt",
//
"$tup",
"$tup_t",
"$tup_vt",
"$tuple",
"$tuple_t",
"$tuple_vt",
//
"$break",
"$continue",
//
"$raise",
//
"$showtype",
//
"$vcopyenv_v",
"$vcopyenv_vt",
//
"$tempenver",
//
"$solver_assert",
"$solver_verify",
],
keywords_srp: [
//
"#if",
"#ifdef",
"#ifndef",
//
"#then",
//
"#elif",
"#elifdef",
"#elifndef",
//
"#else",
"#endif",
//
"#error",
//
"#prerr",
"#print",
//
"#assert",
//
"#undef",
"#define",
//
"#include",
"#require",
//
"#pragma",
"#codegen2",
"#codegen3",
],
irregular_keyword_list: [
"val+",
"val-",
"val",
"case+",
"case-",
"case",
"addr@",
"addr",
"fold@",
"free@",
"fix@",
"fix",
"lam@",
"lam",
"llam@",
"llam",
"viewt@ype+",
"viewt@ype-",
"viewt@ype",
"viewtype+",
"viewtype-",
"viewtype",
"view+",
"view-",
"view@",
"view",
"type+",
"type-",
"type",
"vtype+",
"vtype-",
"vtype",
"vt@ype+",
"vt@ype-",
"vt@ype",
"viewt@ype+",
"viewt@ype-",
"viewt@ype",
"viewtype+",
"viewtype-",
"viewtype",
"prop+",
"prop-",
"prop",
"type+",
"type-",
"type",
"t@ype",
"t@ype+",
"t@ype-",
"abst@ype",
"abstype",
"absviewt@ype",
"absvt@ype",
"for*",
"for",
"while*",
"while"
],
keywords_types: [
'bool',
'double',
'byte',
'int',
'short',
'char',
'void',
'unit',
'long',
'float',
'string',
'strptr'
],
// TODO: reference for this?
keywords_effects: [
"0",
"fun",
"clo",
"prf",
"funclo",
"cloptr",
"cloref",
"ref",
"ntm",
"1" // all effects
],
operators: [
"@",
"!",
"|",
"`",
":",
"$",
".",
"=",
"#",
"~",
//
"..",
"...",
//
"=>",
// "=<", // T_EQLT
"=<>",
"=/=>",
"=>>",
"=/=>>",
//
"<",
">",
//
"><",
//
".<",
">.",
//
".<>.",
//
"->",
//"-<", // T_MINUSLT
"-<>",
],
brackets: [
{ open: ',(', close: ')', token: 'delimiter.parenthesis' },
{ open: '`(', close: ')', token: 'delimiter.parenthesis' },
{ open: '%(', close: ')', token: 'delimiter.parenthesis' },
{ open: '\'(', close: ')', token: 'delimiter.parenthesis' },
{ open: '\'{', close: '}', token: 'delimiter.parenthesis' },
{ open: '@(', close: ')', token: 'delimiter.parenthesis' },
{ open: '@{', close: '}', token: 'delimiter.brace' },
{ open: '@[', close: ']', token: 'delimiter.square' },
{ open: '#[', close: ']', token: 'delimiter.square' },
{ open: '{', close: '}', token: 'delimiter.curly' },
{ open: '[', close: ']', token: 'delimiter.square' },
{ open: '(', close: ')', token: 'delimiter.parenthesis' },
{ open: '<', close: '>', token: 'delimiter.angle' }
],
// we include these common regular expressions
symbols: /[=><!~?:&|+\-*\/\^%]+/,
IDENTFST: /[a-zA-Z_]/,
IDENTRST: /[a-zA-Z0-9_'$]/,
symbolic: /[%&+-./:=@~`^|*!$#?<>]/,
digit: /[0-9]/,
digitseq0: /@digit*/,
xdigit: /[0-9A-Za-z]/,
xdigitseq0: /@xdigit*/,
INTSP: /[lLuU]/,
FLOATSP: /[fFlL]/,
fexponent: /[eE][+-]?[0-9]+/,
fexponent_bin: /[pP][+-]?[0-9]+/,
deciexp: /\.[0-9]*@fexponent?/,
hexiexp: /\.[0-9a-zA-Z]*@fexponent_bin?/,
irregular_keywords: /val[+-]?|case[+-]?|addr\@?|fold\@|free\@|fix\@?|lam\@?|llam\@?|prop[+-]?|type[+-]?|view[+-@]?|viewt@?ype[+-]?|t@?ype[+-]?|v(iew)?t@?ype[+-]?|abst@?ype|absv(iew)?t@?ype|for\*?|while\*?/,
ESCHAR: /[ntvbrfa\\\?'"\(\[\{]/,
start: 'root',
// The main tokenizer for ATS/Postiats
// reference: https://github.com/githwxi/ATS-Postiats/blob/master/src/pats_lexing.dats
tokenizer: {
root: [
// lexing_blankseq0
{ regex: /[ \t\r\n]+/, action: { token: '' } },
// NOTE: (*) is an invalid ML-like comment!
{ regex: /\(\*\)/, action: { token: 'invalid' } },
{ regex: /\(\*/, action: { token: 'comment', next: 'lexing_COMMENT_block_ml' } },
{ regex: /\(/, action: '@brackets' /*{ token: 'delimiter.parenthesis' }*/ },
{ regex: /\)/, action: '@brackets' /*{ token: 'delimiter.parenthesis' }*/ },
{ regex: /\[/, action: '@brackets' /*{ token: 'delimiter.bracket' }*/ },
{ regex: /\]/, action: '@brackets' /*{ token: 'delimiter.bracket' }*/ },
{ regex: /\{/, action: '@brackets' /*{ token: 'delimiter.brace' }*/ },
{ regex: /\}/, action: '@brackets' /*{ token: 'delimiter.brace' }*/ },
// lexing_COMMA
{ regex: /,\(/, action: '@brackets' /*{ token: 'delimiter.parenthesis' }*/ },
{ regex: /,/, action: { token: 'delimiter.comma' } },
{ regex: /;/, action: { token: 'delimiter.semicolon' } },
// lexing_AT
{ regex: /@\(/, action: '@brackets' /* { token: 'delimiter.parenthesis' }*/ },
{ regex: /@\[/, action: '@brackets' /* { token: 'delimiter.bracket' }*/ },
{ regex: /@\{/, action: '@brackets' /*{ token: 'delimiter.brace' }*/ },
// lexing_COLON
{ regex: /:</, action: { token: 'keyword', next: '@lexing_EFFECT_commaseq0' } },
/*
lexing_DOT:
. // SYMBOLIC => lexing_IDENT_sym
. FLOATDOT => lexing_FLOAT_deciexp
. DIGIT => T_DOTINT
*/
{ regex: /\.@symbolic+/, action: { token: 'identifier.sym' } },
// FLOATDOT case
{ regex: /\.@digit*@fexponent@FLOATSP*/, action: { token: 'number.float' } },
{ regex: /\.@digit+/, action: { token: 'number.float' } },
// lexing_DOLLAR:
// '$' IDENTFST IDENTRST* => lexing_IDENT_dlr, _ => lexing_IDENT_sym
{
regex: /\$@IDENTFST@IDENTRST*/,
action: {
cases: {
'@keywords_dlr': { token: 'keyword.dlr' },
'@default': { token: 'namespace' },
}
}
},
// lexing_SHARP:
// '#' IDENTFST IDENTRST* => lexing_ident_srp, _ => lexing_IDENT_sym
{
regex: /\#@IDENTFST@IDENTRST*/,
action: {
cases: {
'@keywords_srp': { token: 'keyword.srp' },
'@default': { token: 'identifier' },
}
}
},
// lexing_PERCENT:
{ regex: /%\(/, action: { token: 'delimiter.parenthesis' } },
{ regex: /^%{(#|\^|\$)?/, action: { token: 'keyword', next: '@lexing_EXTCODE', nextEmbedded: 'text/javascript' } },
{ regex: /^%}/, action: { token: 'keyword' } },
// lexing_QUOTE
{ regex: /'\(/, action: { token: 'delimiter.parenthesis' } },
{ regex: /'\[/, action: { token: 'delimiter.bracket' } },
{ regex: /'\{/, action: { token: 'delimiter.brace' } },
[/(')(\\@ESCHAR|\\[xX]@xdigit+|\\@digit+)(')/, ['string', 'string.escape', 'string']],
[/'[^\\']'/, 'string'],
// lexing_DQUOTE
[/"/, 'string.quote', '@lexing_DQUOTE'],
// lexing_BQUOTE
{ regex: /`\(/, action: '@brackets' /* { token: 'delimiter.parenthesis' }*/ },
// TODO: otherwise, try lexing_IDENT_sym
{ regex: /\\/, action: { token: 'punctuation' } },
// lexing_IDENT_alp:
// NOTE: (?!regex) is syntax for "not-followed-by" regex
// to resolve ambiguity such as foreach$fwork being incorrectly lexed as [for] [each$fwork]!
{ regex: /@irregular_keywords(?!@IDENTRST)/, action: { token: 'keyword' } },
{
regex: /@IDENTFST@IDENTRST*[<!\[]?/,
action: {
cases: {
// TODO: dynload and staload should be specially parsed
// dynload whitespace+ "special_string"
// this special string is really:
// '/' '\\' '.' => punctuation
// ({\$)([a-zA-Z_][a-zA-Z_0-9]*)(}) => punctuation,keyword,punctuation
// [^"] => identifier/literal
'@keywords': { token: 'keyword' },
'@keywords_types': { token: 'type' },
'@default': { token: 'identifier' }
}
}
},
// lexing_IDENT_sym:
{ regex: /\/\/\/\//, action: { token: 'comment', next: '@lexing_COMMENT_rest' } },
{ regex: /\/\/.*$/, action: { token: 'comment' } },
{ regex: /\/\*/, action: { token: 'comment', next: '@lexing_COMMENT_block_c' } },
// AS-20160627: specifically for effect annotations
{ regex: /-<|=</, action: { token: 'keyword', next: '@lexing_EFFECT_commaseq0' } },
{
regex: /@symbolic+/,
action: {
cases: {
'@operators': 'keyword',
'@default': 'operator'
}
}
},
// lexing_ZERO:
// FIXME: this one is quite messy/unfinished yet
// TODO: lexing_INT_hex
// - testing_hexiexp => lexing_FLOAT_hexiexp
// - testing_fexponent_bin => lexing_FLOAT_hexiexp
// - testing_intspseq0 => T_INT_hex
// lexing_INT_hex:
{ regex: /0[xX]@xdigit+(@hexiexp|@fexponent_bin)@FLOATSP*/, action: { token: 'number.float' } },
{ regex: /0[xX]@xdigit+@INTSP*/, action: { token: 'number.hex' } },
{ regex: /0[0-7]+(?![0-9])@INTSP*/, action: { token: 'number.octal' } },
//{regex: /0/, action: { token: 'number' } }, // INTZERO
// lexing_INT_dec:
// - testing_deciexp => lexing_FLOAT_deciexp
// - testing_fexponent => lexing_FLOAT_deciexp
// - otherwise => intspseq0 ([0-9]*[lLuU]?)
{ regex: /@digit+(@fexponent|@deciexp)@FLOATSP*/, action: { token: 'number.float' } },
{ regex: /@digit@digitseq0@INTSP*/, action: { token: 'number.decimal' } },
// DIGIT, if followed by digitseq0, is lexing_INT_dec
{ regex: /@digit+@INTSP*/, action: { token: 'number' } },
],
lexing_COMMENT_block_ml: [
[/[^\(\*]+/, 'comment'],
[/\(\*/, 'comment', '@push'],
[/\(\*/, 'comment.invalid'],
[/\*\)/, 'comment', '@pop'],
[/\*/, 'comment']
],
lexing_COMMENT_block_c: [
[/[^\/*]+/, 'comment'],
// [/\/\*/, 'comment', '@push' ], // nested C-style block comments not allowed
// [/\/\*/, 'comment.invalid' ], // NOTE: this breaks block comments in the shape of /* //*/
[/\*\//, 'comment', '@pop'],
[/[\/*]/, 'comment']
],
lexing_COMMENT_rest: [
[/$/, 'comment', '@pop'],
[/.*/, 'comment']
],
// NOTE: added by AS, specifically for highlighting
lexing_EFFECT_commaseq0: [
{
regex: /@IDENTFST@IDENTRST+|@digit+/,
action: {
cases: {
'@keywords_effects': { token: 'type.effect' },
'@default': { token: 'identifier' }
}
}
},
{ regex: /,/, action: { token: 'punctuation' } },
{ regex: />/, action: { token: '@rematch', next: '@pop' } },
],
lexing_EXTCODE: [
{ regex: /^%}/, action: { token: '@rematch', next: '@pop', nextEmbedded: '@pop' } },
{ regex: /[^%]+/, action: '' },
],
lexing_DQUOTE: [
{ regex: /"/, action: { token: 'string.quote', next: '@pop' } },
// AS-20160628: additional hi-lighting for variables in staload/dynload strings
{ regex: /(\{\$)(@IDENTFST@IDENTRST*)(\})/, action: [{ token: 'string.escape' }, { token: 'identifier' }, { token: 'string.escape' }] },
{ regex: /\\$/, action: { token: 'string.escape' } },
{ regex: /\\(@ESCHAR|[xX]@xdigit+|@digit+)/, action: { token: 'string.escape' } },
{ regex: /[^\\"]+/, action: { token: 'string' } }
],
},
};