/*--------------------------------------------------------------------------------------------- * Copyright (c) Artyom Shalkhakov. All rights reserved. * Licensed under the MIT License. See License.txt in the project root for license information. * * Based on the ATS/Postiats lexer by Hongwei Xi. *--------------------------------------------------------------------------------------------*/ 'use strict'; export var conf = { comments: { lineComment: '//', blockComment: ['(*', '*)'], }, brackets: [['{', '}'], ['[', ']'], ['(', ')'], ['<', '>']], autoClosingPairs: [ { open: '"', close: '"', notIn: ['string', 'comment'] }, { open: '{', close: '}', notIn: ['string', 'comment'] }, { open: '[', close: ']', notIn: ['string', 'comment'] }, { open: '(', close: ')', notIn: ['string', 'comment'] }, ] }; export var language = { tokenPostfix: '.pats', // TODO: staload and dynload are followed by a special kind of string literals // with {$IDENTIFER} variables, and it also may make sense to highlight // the punctuation (. and / and \) differently. // Set defaultToken to invalid to see what you do not tokenize yet defaultToken: 'invalid', // keyword reference: https://github.com/githwxi/ATS-Postiats/blob/master/src/pats_lexing_token.dats keywords: [ // "abstype", "abst0ype", "absprop", "absview", "absvtype", "absviewtype", "absvt0ype", "absviewt0ype", // "as", // "and", // "assume", // "begin", // /* "case", // CASE */ // "classdec", // "datasort", // "datatype", "dataprop", "dataview", "datavtype", "dataviewtype", // "do", // "end", // "extern", "extype", "extvar", // "exception", // "fn", "fnx", "fun", // "prfn", "prfun", // "praxi", "castfn", // "if", "then", "else", // "ifcase", // "in", // "infix", "infixl", "infixr", "prefix", "postfix", // "implmnt", "implement", // "primplmnt", "primplement", // "import", // /* "lam", // LAM "llam", // LLAM "fix", // FIX */ // "let", // "local", // "macdef", "macrodef", // "nonfix", // "symelim", "symintr", "overload", // "of", "op", // "rec", // "sif", "scase", // "sortdef", /* // HX: [sta] is now deprecated */ "sta", "stacst", "stadef", "static", /* "stavar", // T_STAVAR */ // "staload", "dynload", // "try", // "tkindef", // /* "type", // TYPE */ "typedef", "propdef", "viewdef", "vtypedef", "viewtypedef", // /* "val", // VAL */ "prval", // "var", "prvar", // "when", "where", // /* "for", // T_FOR "while", // T_WHILE */ // "with", // "withtype", "withprop", "withview", "withvtype", "withviewtype", ], keywords_dlr: [ "$delay", "$ldelay", // "$arrpsz", "$arrptrsize", // "$d2ctype", // "$effmask", "$effmask_ntm", "$effmask_exn", "$effmask_ref", "$effmask_wrt", "$effmask_all", // "$extern", "$extkind", "$extype", "$extype_struct", // "$extval", "$extfcall", "$extmcall", // "$literal", // "$myfilename", "$mylocation", "$myfunction", // "$lst", "$lst_t", "$lst_vt", "$list", "$list_t", "$list_vt", // "$rec", "$rec_t", "$rec_vt", "$record", "$record_t", "$record_vt", // "$tup", "$tup_t", "$tup_vt", "$tuple", "$tuple_t", "$tuple_vt", // "$break", "$continue", // "$raise", // "$showtype", // "$vcopyenv_v", "$vcopyenv_vt", // "$tempenver", // "$solver_assert", "$solver_verify", ], keywords_srp: [ // "#if", "#ifdef", "#ifndef", // "#then", // "#elif", "#elifdef", "#elifndef", // "#else", "#endif", // "#error", // "#prerr", "#print", // "#assert", // "#undef", "#define", // "#include", "#require", // "#pragma", "#codegen2", "#codegen3", ], irregular_keyword_list: [ "val+", "val-", "val", "case+", "case-", "case", "addr@", "addr", "fold@", "free@", "fix@", "fix", "lam@", "lam", "llam@", "llam", "viewt@ype+", "viewt@ype-", "viewt@ype", "viewtype+", "viewtype-", "viewtype", "view+", "view-", "view@", "view", "type+", "type-", "type", "vtype+", "vtype-", "vtype", "vt@ype+", "vt@ype-", "vt@ype", "viewt@ype+", "viewt@ype-", "viewt@ype", "viewtype+", "viewtype-", "viewtype", "prop+", "prop-", "prop", "type+", "type-", "type", "t@ype", "t@ype+", "t@ype-", "abst@ype", "abstype", "absviewt@ype", "absvt@ype", "for*", "for", "while*", "while" ], keywords_types: [ 'bool', 'double', 'byte', 'int', 'short', 'char', 'void', 'unit', 'long', 'float', 'string', 'strptr' ], // TODO: reference for this? keywords_effects: [ "0", "fun", "clo", "prf", "funclo", "cloptr", "cloref", "ref", "ntm", "1" // all effects ], operators: [ "@", "!", "|", "`", ":", "$", ".", "=", "#", "~", // "..", "...", // "=>", // "=<", // T_EQLT "=<>", "=/=>", "=>>", "=/=>>", // "<", ">", // "><", // ".<", ">.", // ".<>.", // "->", //"-<", // T_MINUSLT "-<>", ], brackets: [ { open: ',(', close: ')', token: 'delimiter.parenthesis' }, { open: '`(', close: ')', token: 'delimiter.parenthesis' }, { open: '%(', close: ')', token: 'delimiter.parenthesis' }, { open: '\'(', close: ')', token: 'delimiter.parenthesis' }, { open: '\'{', close: '}', token: 'delimiter.parenthesis' }, { open: '@(', close: ')', token: 'delimiter.parenthesis' }, { open: '@{', close: '}', token: 'delimiter.brace' }, { open: '@[', close: ']', token: 'delimiter.square' }, { open: '#[', close: ']', token: 'delimiter.square' }, { open: '{', close: '}', token: 'delimiter.curly' }, { open: '[', close: ']', token: 'delimiter.square' }, { open: '(', close: ')', token: 'delimiter.parenthesis' }, { open: '<', close: '>', token: 'delimiter.angle' } ], // we include these common regular expressions symbols: /[=>]/, digit: /[0-9]/, digitseq0: /@digit*/, xdigit: /[0-9A-Za-z]/, xdigitseq0: /@xdigit*/, INTSP: /[lLuU]/, FLOATSP: /[fFlL]/, fexponent: /[eE][+-]?[0-9]+/, fexponent_bin: /[pP][+-]?[0-9]+/, deciexp: /\.[0-9]*@fexponent?/, hexiexp: /\.[0-9a-zA-Z]*@fexponent_bin?/, irregular_keywords: /val[+-]?|case[+-]?|addr\@?|fold\@|free\@|fix\@?|lam\@?|llam\@?|prop[+-]?|type[+-]?|view[+-@]?|viewt@?ype[+-]?|t@?ype[+-]?|v(iew)?t@?ype[+-]?|abst@?ype|absv(iew)?t@?ype|for\*?|while\*?/, ESCHAR: /[ntvbrfa\\\?'"\(\[\{]/, start: 'root', // The main tokenizer for ATS/Postiats // reference: https://github.com/githwxi/ATS-Postiats/blob/master/src/pats_lexing.dats tokenizer: { root: [ // lexing_blankseq0 { regex: /[ \t\r\n]+/, action: { token: '' } }, // NOTE: (*) is an invalid ML-like comment! { regex: /\(\*\)/, action: { token: 'invalid' } }, { regex: /\(\*/, action: { token: 'comment', next: 'lexing_COMMENT_block_ml' } }, { regex: /\(/, action: '@brackets' /*{ token: 'delimiter.parenthesis' }*/ }, { regex: /\)/, action: '@brackets' /*{ token: 'delimiter.parenthesis' }*/ }, { regex: /\[/, action: '@brackets' /*{ token: 'delimiter.bracket' }*/ }, { regex: /\]/, action: '@brackets' /*{ token: 'delimiter.bracket' }*/ }, { regex: /\{/, action: '@brackets' /*{ token: 'delimiter.brace' }*/ }, { regex: /\}/, action: '@brackets' /*{ token: 'delimiter.brace' }*/ }, // lexing_COMMA { regex: /,\(/, action: '@brackets' /*{ token: 'delimiter.parenthesis' }*/ }, { regex: /,/, action: { token: 'delimiter.comma' } }, { regex: /;/, action: { token: 'delimiter.semicolon' } }, // lexing_AT { regex: /@\(/, action: '@brackets' /* { token: 'delimiter.parenthesis' }*/ }, { regex: /@\[/, action: '@brackets' /* { token: 'delimiter.bracket' }*/ }, { regex: /@\{/, action: '@brackets' /*{ token: 'delimiter.brace' }*/ }, // lexing_COLON { regex: /: lexing_IDENT_sym . FLOATDOT => lexing_FLOAT_deciexp . DIGIT => T_DOTINT */ { regex: /\.@symbolic+/, action: { token: 'identifier.sym' } }, // FLOATDOT case { regex: /\.@digit*@fexponent@FLOATSP*/, action: { token: 'number.float' } }, { regex: /\.@digit+/, action: { token: 'number.float' } }, // lexing_DOLLAR: // '$' IDENTFST IDENTRST* => lexing_IDENT_dlr, _ => lexing_IDENT_sym { regex: /\$@IDENTFST@IDENTRST*/, action: { cases: { '@keywords_dlr': { token: 'keyword.dlr' }, '@default': { token: 'namespace' }, } } }, // lexing_SHARP: // '#' IDENTFST IDENTRST* => lexing_ident_srp, _ => lexing_IDENT_sym { regex: /\#@IDENTFST@IDENTRST*/, action: { cases: { '@keywords_srp': { token: 'keyword.srp' }, '@default': { token: 'identifier' }, } } }, // lexing_PERCENT: { regex: /%\(/, action: { token: 'delimiter.parenthesis' } }, { regex: /^%{(#|\^|\$)?/, action: { token: 'keyword', next: '@lexing_EXTCODE', nextEmbedded: 'text/javascript' } }, { regex: /^%}/, action: { token: 'keyword' } }, // lexing_QUOTE { regex: /'\(/, action: { token: 'delimiter.parenthesis' } }, { regex: /'\[/, action: { token: 'delimiter.bracket' } }, { regex: /'\{/, action: { token: 'delimiter.brace' } }, [/(')(\\@ESCHAR|\\[xX]@xdigit+|\\@digit+)(')/, ['string', 'string.escape', 'string']], [/'[^\\']'/, 'string'], // lexing_DQUOTE [/"/, 'string.quote', '@lexing_DQUOTE'], // lexing_BQUOTE { regex: /`\(/, action: '@brackets' /* { token: 'delimiter.parenthesis' }*/ }, // TODO: otherwise, try lexing_IDENT_sym { regex: /\\/, action: { token: 'punctuation' } }, // lexing_IDENT_alp: // NOTE: (?!regex) is syntax for "not-followed-by" regex // to resolve ambiguity such as foreach$fwork being incorrectly lexed as [for] [each$fwork]! { regex: /@irregular_keywords(?!@IDENTRST)/, action: { token: 'keyword' } }, { regex: /@IDENTFST@IDENTRST*[ punctuation // ({\$)([a-zA-Z_][a-zA-Z_0-9]*)(}) => punctuation,keyword,punctuation // [^"] => identifier/literal '@keywords': { token: 'keyword' }, '@keywords_types': { token: 'type' }, '@default': { token: 'identifier' } } } }, // lexing_IDENT_sym: { regex: /\/\/\/\//, action: { token: 'comment', next: '@lexing_COMMENT_rest' } }, { regex: /\/\/.*$/, action: { token: 'comment' } }, { regex: /\/\*/, action: { token: 'comment', next: '@lexing_COMMENT_block_c' } }, // AS-20160627: specifically for effect annotations { regex: /-<|= lexing_FLOAT_hexiexp // - testing_fexponent_bin => lexing_FLOAT_hexiexp // - testing_intspseq0 => T_INT_hex // lexing_INT_hex: { regex: /0[xX]@xdigit+(@hexiexp|@fexponent_bin)@FLOATSP*/, action: { token: 'number.float' } }, { regex: /0[xX]@xdigit+@INTSP*/, action: { token: 'number.hex' } }, { regex: /0[0-7]+(?![0-9])@INTSP*/, action: { token: 'number.octal' } }, //{regex: /0/, action: { token: 'number' } }, // INTZERO // lexing_INT_dec: // - testing_deciexp => lexing_FLOAT_deciexp // - testing_fexponent => lexing_FLOAT_deciexp // - otherwise => intspseq0 ([0-9]*[lLuU]?) { regex: /@digit+(@fexponent|@deciexp)@FLOATSP*/, action: { token: 'number.float' } }, { regex: /@digit@digitseq0@INTSP*/, action: { token: 'number.decimal' } }, // DIGIT, if followed by digitseq0, is lexing_INT_dec { regex: /@digit+@INTSP*/, action: { token: 'number' } }, ], lexing_COMMENT_block_ml: [ [/[^\(\*]+/, 'comment'], [/\(\*/, 'comment', '@push'], [/\(\*/, 'comment.invalid'], [/\*\)/, 'comment', '@pop'], [/\*/, 'comment'] ], lexing_COMMENT_block_c: [ [/[^\/*]+/, 'comment'], // [/\/\*/, 'comment', '@push' ], // nested C-style block comments not allowed // [/\/\*/, 'comment.invalid' ], // NOTE: this breaks block comments in the shape of /* //*/ [/\*\//, 'comment', '@pop'], [/[\/*]/, 'comment'] ], lexing_COMMENT_rest: [ [/$/, 'comment', '@pop'], [/.*/, 'comment'] ], // NOTE: added by AS, specifically for highlighting lexing_EFFECT_commaseq0: [ { regex: /@IDENTFST@IDENTRST+|@digit+/, action: { cases: { '@keywords_effects': { token: 'type.effect' }, '@default': { token: 'identifier' } } } }, { regex: /,/, action: { token: 'punctuation' } }, { regex: />/, action: { token: '@rematch', next: '@pop' } }, ], lexing_EXTCODE: [ { regex: /^%}/, action: { token: '@rematch', next: '@pop', nextEmbedded: '@pop' } }, { regex: /[^%]+/, action: '' }, ], lexing_DQUOTE: [ { regex: /"/, action: { token: 'string.quote', next: '@pop' } }, // AS-20160628: additional hi-lighting for variables in staload/dynload strings { regex: /(\{\$)(@IDENTFST@IDENTRST*)(\})/, action: [{ token: 'string.escape' }, { token: 'identifier' }, { token: 'string.escape' }] }, { regex: /\\$/, action: { token: 'string.escape' } }, { regex: /\\(@ESCHAR|[xX]@xdigit+|@digit+)/, action: { token: 'string.escape' } }, { regex: /[^\\"]+/, action: { token: 'string' } } ], }, };