403 lines
16 KiB
JavaScript
403 lines
16 KiB
JavaScript
(function (Prism) {
|
|
|
|
var comment_inside = {
|
|
'function': /\b(?:BUGS?|FIX(?:MES?)?|NOTES?|TODOS?|XX+|HACKS?|WARN(?:ING)?|\?{2,}|!{2,})\b/
|
|
};
|
|
var string_inside = {
|
|
'number': /\\[^\s']|%\w/
|
|
};
|
|
|
|
var factor = {
|
|
'comment': [
|
|
{
|
|
// ! single-line exclamation point comments with whitespace after/around the !
|
|
pattern: /(^|\s)(?:! .*|!$)/,
|
|
lookbehind: true,
|
|
inside: comment_inside
|
|
},
|
|
|
|
/* from basis/multiline: */
|
|
{
|
|
// /* comment */, /* comment*/
|
|
pattern: /(^|\s)\/\*\s[\s\S]*?\*\/(?=\s|$)/,
|
|
lookbehind: true,
|
|
greedy: true,
|
|
inside: comment_inside
|
|
},
|
|
{
|
|
// ![[ comment ]] , ![===[ comment]===]
|
|
pattern: /(^|\s)!\[(={0,6})\[\s[\s\S]*?\]\2\](?=\s|$)/,
|
|
lookbehind: true,
|
|
greedy: true,
|
|
inside: comment_inside
|
|
}
|
|
],
|
|
|
|
'number': [
|
|
{
|
|
// basic base 10 integers 9, -9
|
|
pattern: /(^|\s)[+-]?\d+(?=\s|$)/,
|
|
lookbehind: true
|
|
},
|
|
{
|
|
// base prefix integers 0b010 0o70 0xad 0d10 0XAD -0xa9
|
|
pattern: /(^|\s)[+-]?0(?:b[01]+|o[0-7]+|d\d+|x[\dA-F]+)(?=\s|$)/i,
|
|
lookbehind: true
|
|
},
|
|
{
|
|
// fractional ratios 1/5 -1/5 and the literal float approximations 1/5. -1/5.
|
|
pattern: /(^|\s)[+-]?\d+\/\d+\.?(?=\s|$)/,
|
|
lookbehind: true
|
|
},
|
|
{
|
|
// positive mixed numbers 23+1/5 +23+1/5
|
|
pattern: /(^|\s)\+?\d+\+\d+\/\d+(?=\s|$)/,
|
|
lookbehind: true
|
|
},
|
|
{
|
|
// negative mixed numbers -23-1/5
|
|
pattern: /(^|\s)-\d+-\d+\/\d+(?=\s|$)/,
|
|
lookbehind: true
|
|
},
|
|
{
|
|
// basic decimal floats -0.01 0. .0 .1 -.1 -1. -12.13 +12.13
|
|
// and scientific notation with base 10 exponents 3e4 3e-4 .3e-4
|
|
pattern: /(^|\s)[+-]?(?:\d*\.\d+|\d+\.\d*|\d+)(?:e[+-]?\d+)?(?=\s|$)/i,
|
|
lookbehind: true
|
|
},
|
|
{
|
|
// NAN literal syntax NAN: 80000deadbeef, NAN: a
|
|
pattern: /(^|\s)NAN:\s+[\da-fA-F]+(?=\s|$)/,
|
|
lookbehind: true
|
|
},
|
|
{
|
|
/*
|
|
base prefix floats 0x1.0p3 (8.0) 0b1.010p2 (5.0) 0x1.p1 0b1.11111111p11111...
|
|
"The normalized hex form ±0x1.MMMMMMMMMMMMM[pP]±EEEE allows any floating-point number to be specified precisely.
|
|
The values of MMMMMMMMMMMMM and EEEE map directly to the mantissa and exponent fields of the binary IEEE 754 representation."
|
|
<https://docs.factorcode.org/content/article-syntax-floats.html>
|
|
*/
|
|
pattern: /(^|\s)[+-]?0(?:b1\.[01]*|o1\.[0-7]*|d1\.\d*|x1\.[\dA-F]*)p\d+(?=\s|$)/i,
|
|
lookbehind: true
|
|
}
|
|
],
|
|
|
|
// R/ regexp?\/\\/
|
|
'regexp': {
|
|
pattern: /(^|\s)R\/\s(?:\\\S|[^\\/])*\/(?:[idmsr]*|[idmsr]+-[idmsr]+)(?=\s|$)/,
|
|
lookbehind: true,
|
|
alias: 'number',
|
|
inside: {
|
|
'variable': /\\\S/,
|
|
'keyword': /[+?*\[\]^$(){}.|]/,
|
|
'operator': {
|
|
pattern: /(\/)[idmsr]+(?:-[idmsr]+)?/,
|
|
lookbehind: true
|
|
}
|
|
}
|
|
},
|
|
|
|
'boolean': {
|
|
pattern: /(^|\s)[tf](?=\s|$)/,
|
|
lookbehind: true
|
|
},
|
|
|
|
// SBUF" asd", URL" ://...", P" /etc/"
|
|
'custom-string': {
|
|
pattern: /(^|\s)[A-Z0-9\-]+"\s(?:\\\S|[^"\\])*"/,
|
|
lookbehind: true,
|
|
greedy: true,
|
|
alias: 'string',
|
|
inside: {
|
|
'number': /\\\S|%\w|\//
|
|
}
|
|
},
|
|
|
|
'multiline-string': [
|
|
{
|
|
// STRING: name \n content \n ; -> CONSTANT: name "content" (symbol)
|
|
pattern: /(^|\s)STRING:\s+\S+(?:\n|\r\n).*(?:\n|\r\n)\s*;(?=\s|$)/,
|
|
lookbehind: true,
|
|
greedy: true,
|
|
alias: 'string',
|
|
inside: {
|
|
'number': string_inside.number,
|
|
// trailing semicolon on its own line
|
|
'semicolon-or-setlocal': {
|
|
pattern: /([\r\n][ \t]*);(?=\s|$)/,
|
|
lookbehind: true,
|
|
alias: 'function'
|
|
}
|
|
}
|
|
},
|
|
{
|
|
// HEREDOC: marker \n content \n marker ; -> "content" (immediate)
|
|
pattern: /(^|\s)HEREDOC:\s+\S+(?:\n|\r\n).*(?:\n|\r\n)\s*\S+(?=\s|$)/,
|
|
lookbehind: true,
|
|
greedy: true,
|
|
alias: 'string',
|
|
inside: string_inside
|
|
},
|
|
{
|
|
// [[ string ]], [==[ string]==]
|
|
pattern: /(^|\s)\[(={0,6})\[\s[\s\S]*?\]\2\](?=\s|$)/,
|
|
lookbehind: true,
|
|
greedy: true,
|
|
alias: 'string',
|
|
inside: string_inside
|
|
}
|
|
],
|
|
|
|
'special-using': {
|
|
pattern: /(^|\s)USING:(?:\s\S+)*(?=\s+;(?:\s|$))/,
|
|
lookbehind: true,
|
|
alias: 'function',
|
|
inside: {
|
|
// this is essentially a regex for vocab names, which i don't want to specify
|
|
// but the USING: gets picked up as a vocab name
|
|
'string': {
|
|
pattern: /(\s)[^:\s]+/,
|
|
lookbehind: true
|
|
}
|
|
}
|
|
},
|
|
|
|
/* this description of stack effect literal syntax is not complete and not as specific as theoretically possible
|
|
trying to do better is more work and regex-computation-time than it's worth though.
|
|
- we'd like to have the "delimiter" parts of the stack effect [ (, --, and ) ] be a different (less-important or comment-like) colour to the stack effect contents
|
|
- we'd like if nested stack effects were treated as such rather than just appearing flat (with `inside`)
|
|
- we'd like if the following variable name conventions were recognised specifically:
|
|
special row variables = ..a b..
|
|
type and stack effect annotations end with a colon = ( quot: ( a: ( -- ) -- b ) -- x ), ( x: number -- )
|
|
word throws unconditional error = *
|
|
any other word-like variable name = a ? q' etc
|
|
|
|
https://docs.factorcode.org/content/article-effects.html
|
|
|
|
these are pretty complicated to highlight properly without a real parser, and therefore out of scope
|
|
the old pattern, which may be later useful, was: (^|\s)(?:call|execute|eval)?\((?:\s+[^"\r\n\t ]\S*)*?\s+--(?:\s+[^"\n\t ]\S*)*?\s+\)(?=\s|$)
|
|
*/
|
|
|
|
// current solution is not great
|
|
'stack-effect-delimiter': [
|
|
{
|
|
// opening parenthesis
|
|
pattern: /(^|\s)(?:call|eval|execute)?\((?=\s)/,
|
|
lookbehind: true,
|
|
alias: 'operator'
|
|
},
|
|
{
|
|
// middle --
|
|
pattern: /(\s)--(?=\s)/,
|
|
lookbehind: true,
|
|
alias: 'operator'
|
|
},
|
|
{
|
|
// closing parenthesis
|
|
pattern: /(\s)\)(?=\s|$)/,
|
|
lookbehind: true,
|
|
alias: 'operator'
|
|
}
|
|
],
|
|
|
|
'combinators': {
|
|
pattern: null,
|
|
lookbehind: true,
|
|
alias: 'keyword'
|
|
},
|
|
|
|
'kernel-builtin': {
|
|
pattern: null,
|
|
lookbehind: true,
|
|
alias: 'variable'
|
|
},
|
|
|
|
'sequences-builtin': {
|
|
pattern: null,
|
|
lookbehind: true,
|
|
alias: 'variable'
|
|
},
|
|
|
|
'math-builtin': {
|
|
pattern: null,
|
|
lookbehind: true,
|
|
alias: 'variable'
|
|
},
|
|
|
|
'constructor-word': {
|
|
// <array> but not <=>
|
|
pattern: /(^|\s)<(?!=+>|-+>)\S+>(?=\s|$)/,
|
|
lookbehind: true,
|
|
alias: 'keyword'
|
|
},
|
|
|
|
'other-builtin-syntax': {
|
|
pattern: null,
|
|
lookbehind: true,
|
|
alias: 'operator'
|
|
},
|
|
|
|
/*
|
|
full list of supported word naming conventions: (the convention appears outside of the [brackets])
|
|
set-[x]
|
|
change-[x]
|
|
with-[x]
|
|
new-[x]
|
|
>[string]
|
|
[base]>
|
|
[string]>[number]
|
|
+[symbol]+
|
|
[boolean-word]?
|
|
?[of]
|
|
[slot-reader]>>
|
|
>>[slot-setter]
|
|
[slot-writer]<<
|
|
([implementation-detail])
|
|
[mutater]!
|
|
[variant]*
|
|
[prettyprint].
|
|
$[help-markup]
|
|
|
|
<constructors>, SYNTAX:, etc are supported by their own patterns.
|
|
|
|
`with` and `new` from `kernel` are their own builtins.
|
|
|
|
see <https://docs.factorcode.org/content/article-conventions.html>
|
|
*/
|
|
'conventionally-named-word': {
|
|
pattern: /(^|\s)(?!")(?:(?:change|new|set|with)-\S+|\$\S+|>[^>\s]+|[^:>\s]+>|[^>\s]+>[^>\s]+|\+[^+\s]+\+|[^?\s]+\?|\?[^?\s]+|[^>\s]+>>|>>[^>\s]+|[^<\s]+<<|\([^()\s]+\)|[^!\s]+!|[^*\s]\S*\*|[^.\s]\S*\.)(?=\s|$)/,
|
|
lookbehind: true,
|
|
alias: 'keyword'
|
|
},
|
|
|
|
'colon-syntax': {
|
|
pattern: /(^|\s)(?:[A-Z0-9\-]+#?)?:{1,2}\s+(?:;\S+|(?!;)\S+)(?=\s|$)/,
|
|
lookbehind: true,
|
|
greedy: true,
|
|
alias: 'function'
|
|
},
|
|
|
|
'semicolon-or-setlocal': {
|
|
pattern: /(\s)(?:;|:>)(?=\s|$)/,
|
|
lookbehind: true,
|
|
alias: 'function'
|
|
},
|
|
|
|
// do not highlight leading } or trailing X{ at the begin/end of the file as it's invalid syntax
|
|
'curly-brace-literal-delimiter': [
|
|
{
|
|
// opening
|
|
pattern: /(^|\s)[a-z]*\{(?=\s)/i,
|
|
lookbehind: true,
|
|
alias: 'operator'
|
|
},
|
|
{
|
|
// closing
|
|
pattern: /(\s)\}(?=\s|$)/,
|
|
lookbehind: true,
|
|
alias: 'operator'
|
|
},
|
|
|
|
],
|
|
|
|
// do not highlight leading ] or trailing [ at the begin/end of the file as it's invalid syntax
|
|
'quotation-delimiter': [
|
|
{
|
|
// opening
|
|
pattern: /(^|\s)\[(?=\s)/,
|
|
lookbehind: true,
|
|
alias: 'operator'
|
|
},
|
|
{
|
|
// closing
|
|
pattern: /(\s)\](?=\s|$)/,
|
|
lookbehind: true,
|
|
alias: 'operator'
|
|
},
|
|
],
|
|
|
|
'normal-word': {
|
|
pattern: /(^|\s)[^"\s]\S*(?=\s|$)/,
|
|
lookbehind: true
|
|
},
|
|
|
|
/*
|
|
basic first-class string "a"
|
|
with escaped double-quote "a\""
|
|
escaped backslash "\\"
|
|
and general escapes since Factor has so many "\N"
|
|
|
|
syntax that works in the reference implementation that isn't fully
|
|
supported because it's an implementation detail:
|
|
"string 1""string 2" -> 2 strings (works anyway)
|
|
"string"5 -> string, 5
|
|
"string"[ ] -> string, quotation
|
|
{ "a"} -> array<string>
|
|
|
|
the rest of those examples all properly recognise the string, but not
|
|
the other object (number, quotation, etc)
|
|
this is fine for a regex-only implementation.
|
|
*/
|
|
'string': {
|
|
pattern: /"(?:\\\S|[^"\\])*"/,
|
|
greedy: true,
|
|
inside: string_inside
|
|
}
|
|
};
|
|
|
|
var escape = function (str) {
|
|
return (str + '').replace(/([.?*+\^$\[\]\\(){}|\-])/g, '\\$1');
|
|
};
|
|
|
|
var arrToWordsRegExp = function (arr) {
|
|
return new RegExp(
|
|
'(^|\\s)(?:' + arr.map(escape).join('|') + ')(?=\\s|$)'
|
|
);
|
|
};
|
|
|
|
var builtins = {
|
|
'kernel-builtin': [
|
|
'or', '2nipd', '4drop', 'tuck', 'wrapper', 'nip', 'wrapper?', 'callstack>array', 'die', 'dupd', 'callstack', 'callstack?', '3dup', 'hashcode', 'pick', '4nip', 'build', '>boolean', 'nipd', 'clone', '5nip', 'eq?', '?', '=', 'swapd', '2over', 'clear', '2dup', 'get-retainstack', 'not', 'tuple?', 'dup', '3nipd', 'call', '-rotd', 'object', 'drop', 'assert=', 'assert?', '-rot', 'execute', 'boa', 'get-callstack', 'curried?', '3drop', 'pickd', 'overd', 'over', 'roll', '3nip', 'swap', 'and', '2nip', 'rotd', 'throw', '(clone)', 'hashcode*', 'spin', 'reach', '4dup', 'equal?', 'get-datastack', 'assert', '2drop', '<wrapper>', 'boolean?', 'identity-hashcode', 'identity-tuple?', 'null', 'composed?', 'new', '5drop', 'rot', '-roll', 'xor', 'identity-tuple', 'boolean'
|
|
],
|
|
'other-builtin-syntax': [
|
|
// syntax
|
|
'=======', 'recursive', 'flushable', '>>', '<<<<<<', 'M\\', 'B', 'PRIVATE>', '\\', '======', 'final', 'inline', 'delimiter', 'deprecated', '<PRIVATE', '>>>>>>', '<<<<<<<', 'parse-complex', 'malformed-complex', 'read-only', '>>>>>>>', 'call-next-method', '<<', 'foldable',
|
|
// literals
|
|
'$', '$[', '${'
|
|
],
|
|
'sequences-builtin': [
|
|
'member-eq?', 'mismatch', 'append', 'assert-sequence=', 'longer', 'repetition', 'clone-like', '3sequence', 'assert-sequence?', 'last-index-from', 'reversed', 'index-from', 'cut*', 'pad-tail', 'join-as', 'remove-eq!', 'concat-as', 'but-last', 'snip', 'nths', 'nth', 'sequence', 'longest', 'slice?', '<slice>', 'remove-nth', 'tail-slice', 'empty?', 'tail*', 'member?', 'virtual-sequence?', 'set-length', 'drop-prefix', 'iota', 'unclip', 'bounds-error?', 'unclip-last-slice', 'non-negative-integer-expected', 'non-negative-integer-expected?', 'midpoint@', 'longer?', '?set-nth', '?first', 'rest-slice', 'prepend-as', 'prepend', 'fourth', 'sift', 'subseq-start', 'new-sequence', '?last', 'like', 'first4', '1sequence', 'reverse', 'slice', 'virtual@', 'repetition?', 'set-last', 'index', '4sequence', 'max-length', 'set-second', 'immutable-sequence', 'first2', 'first3', 'supremum', 'unclip-slice', 'suffix!', 'insert-nth', 'tail', '3append', 'short', 'suffix', 'concat', 'flip', 'immutable?', 'reverse!', '2sequence', 'sum', 'delete-all', 'indices', 'snip-slice', '<iota>', 'check-slice', 'sequence?', 'head', 'append-as', 'halves', 'sequence=', 'collapse-slice', '?second', 'slice-error?', 'product', 'bounds-check?', 'bounds-check', 'immutable', 'virtual-exemplar', 'harvest', 'remove', 'pad-head', 'last', 'set-fourth', 'cartesian-product', 'remove-eq', 'shorten', 'shorter', 'reversed?', 'shorter?', 'shortest', 'head-slice', 'pop*', 'tail-slice*', 'but-last-slice', 'iota?', 'append!', 'cut-slice', 'new-resizable', 'head-slice*', 'sequence-hashcode', 'pop', 'set-nth', '?nth', 'second', 'join', 'immutable-sequence?', '<reversed>', '3append-as', 'virtual-sequence', 'subseq?', 'remove-nth!', 'length', 'last-index', 'lengthen', 'assert-sequence', 'copy', 'move', 'third', 'first', 'tail?', 'set-first', 'prefix', 'bounds-error', '<repetition>', 'exchange', 'surround', 'cut', 'min-length', 'set-third', 'push-all', 'head?', 'subseq-start-from', 'delete-slice', 'rest', 'sum-lengths', 'head*', 'infimum', 'remove!', 'glue', 'slice-error', 'subseq', 'push', 'replace-slice', 'subseq-as', 'unclip-last'
|
|
],
|
|
'math-builtin': [
|
|
'number=', 'next-power-of-2', '?1+', 'fp-special?', 'imaginary-part', 'float>bits', 'number?', 'fp-infinity?', 'bignum?', 'fp-snan?', 'denominator', 'gcd', '*', '+', 'fp-bitwise=', '-', 'u>=', '/', '>=', 'bitand', 'power-of-2?', 'log2-expects-positive', 'neg?', '<', 'log2', '>', 'integer?', 'number', 'bits>double', '2/', 'zero?', 'bits>float', 'float?', 'shift', 'ratio?', 'rect>', 'even?', 'ratio', 'fp-sign', 'bitnot', '>fixnum', 'complex?', '/i', 'integer>fixnum', '/f', 'sgn', '>bignum', 'next-float', 'u<', 'u>', 'mod', 'recip', 'rational', '>float', '2^', 'integer', 'fixnum?', 'neg', 'fixnum', 'sq', 'bignum', '>rect', 'bit?', 'fp-qnan?', 'simple-gcd', 'complex', '<fp-nan>', 'real', '>fraction', 'double>bits', 'bitor', 'rem', 'fp-nan-payload', 'real-part', 'log2-expects-positive?', 'prev-float', 'align', 'unordered?', 'float', 'fp-nan?', 'abs', 'bitxor', 'integer>fixnum-strict', 'u<=', 'odd?', '<=', '/mod', '>integer', 'real?', 'rational?', 'numerator'
|
|
]
|
|
// that's all for now
|
|
};
|
|
|
|
Object.keys(builtins).forEach(function (k) {
|
|
factor[k].pattern = arrToWordsRegExp(builtins[k]);
|
|
});
|
|
|
|
var combinators = [
|
|
// kernel
|
|
'2bi', 'while', '2tri', 'bi*', '4dip', 'both?', 'same?', 'tri@', 'curry', 'prepose', '3bi', '?if', 'tri*', '2keep', '3keep', 'curried', '2keepd', 'when', '2bi*', '2tri*', '4keep', 'bi@', 'keepdd', 'do', 'unless*', 'tri-curry', 'if*', 'loop', 'bi-curry*', 'when*', '2bi@', '2tri@', 'with', '2with', 'either?', 'bi', 'until', '3dip', '3curry', 'tri-curry*', 'tri-curry@', 'bi-curry', 'keepd', 'compose', '2dip', 'if', '3tri', 'unless', 'tuple', 'keep', '2curry', 'tri', 'most', 'while*', 'dip', 'composed', 'bi-curry@',
|
|
// sequences
|
|
'find-last-from', 'trim-head-slice', 'map-as', 'each-from', 'none?', 'trim-tail', 'partition', 'if-empty', 'accumulate*', 'reject!', 'find-from', 'accumulate-as', 'collector-for-as', 'reject', 'map', 'map-sum', 'accumulate!', '2each-from', 'follow', 'supremum-by', 'map!', 'unless-empty', 'collector', 'padding', 'reduce-index', 'replicate-as', 'infimum-by', 'trim-tail-slice', 'count', 'find-index', 'filter', 'accumulate*!', 'reject-as', 'map-integers', 'map-find', 'reduce', 'selector', 'interleave', '2map', 'filter-as', 'binary-reduce', 'map-index-as', 'find', 'produce', 'filter!', 'replicate', 'cartesian-map', 'cartesian-each', 'find-index-from', 'map-find-last', '3map-as', '3map', 'find-last', 'selector-as', '2map-as', '2map-reduce', 'accumulate', 'each', 'each-index', 'accumulate*-as', 'when-empty', 'all?', 'collector-as', 'push-either', 'new-like', 'collector-for', '2selector', 'push-if', '2all?', 'map-reduce', '3each', 'any?', 'trim-slice', '2reduce', 'change-nth', 'produce-as', '2each', 'trim', 'trim-head', 'cartesian-find', 'map-index',
|
|
// math
|
|
'if-zero', 'each-integer', 'unless-zero', '(find-integer)', 'when-zero', 'find-last-integer', '(all-integers?)', 'times', '(each-integer)', 'find-integer', 'all-integers?',
|
|
// math.combinators
|
|
'unless-negative', 'if-positive', 'when-positive', 'when-negative', 'unless-positive', 'if-negative',
|
|
// combinators
|
|
'case', '2cleave', 'cond>quot', 'case>quot', '3cleave', 'wrong-values', 'to-fixed-point', 'alist>quot', 'cond', 'cleave', 'call-effect', 'recursive-hashcode', 'spread', 'deep-spread>quot',
|
|
// combinators.short-circuit
|
|
'2||', '0||', 'n||', '0&&', '2&&', '3||', '1||', '1&&', 'n&&', '3&&',
|
|
// combinators.smart
|
|
'smart-unless*', 'keep-inputs', 'reduce-outputs', 'smart-when*', 'cleave>array', 'smart-with', 'smart-apply', 'smart-if', 'inputs/outputs', 'output>sequence-n', 'map-outputs', 'map-reduce-outputs', 'dropping', 'output>array', 'smart-map-reduce', 'smart-2map-reduce', 'output>array-n', 'nullary', 'input<sequence', 'append-outputs', 'drop-inputs', 'inputs', 'smart-2reduce', 'drop-outputs', 'smart-reduce', 'preserving', 'smart-when', 'outputs', 'append-outputs-as', 'smart-unless', 'smart-if*', 'sum-outputs', 'input<sequence-unsafe', 'output>sequence',
|
|
// tafn
|
|
];
|
|
|
|
factor.combinators.pattern = arrToWordsRegExp(combinators);
|
|
|
|
Prism.languages.factor = factor;
|
|
|
|
}(Prism));
|