virt2/api/soft/CodeMirror/text/src/char.ts

let extendingChars = /[\u0300-\u036f\u0483-\u0489\u0591-\u05bd\u05bf\u05c1\u05c2\u05c4\u05c5\u05c7\u0610-\u061a\u064b-\u065e\u0670\u06d6-\u06dc\u06de-\u06e4\u06e7\u06e8\u06ea-\u06ed\u0711\u0730-\u074a\u0b82\u0bbe\u0bc0\u0bcd\u0bd7\u0d3e\u0d41-\u0d44\u0d4d\u0d57\u0d62\u0d63\u0e31\u0e34-\u0e3a\u0e47-\u0e4e\u0eb1\u0eb4-\u0eb9\u0ebb\u0ebc\u0ec8-\u0ecd\u180b-\u180d\u18a9\u200c\u200d]/
try { extendingChars = new RegExp("\\p{Grapheme_Extend}", "u") } catch (_) {}

/// Test whether a given code unit (as in, the thing that `charCodeAt`
/// returns) extends the character before it.
export function isExtendingChar(code: number): boolean {
  return code >= 768 && (code >= 0xdc00 && code < 0xe000 || extendingChars.test(String.fromCharCode(code)))
}

const nonASCIISingleCaseWordChar = /[\u00df\u0587\u0590-\u05f4\u0600-\u06ff\u3040-\u309f\u30a0-\u30ff\u3400-\u4db5\u4e00-\u9fcc\uac00-\ud7af]/

let wordChar: RegExp | null
try { wordChar = new RegExp("[\\p{Alphabetic}_]", "u") } catch (_) {}

// FIXME this doesn't work for astral chars yet (need different calling convention)

function isWordCharBasic(ch: string): boolean {
  if (wordChar) return wordChar.test(ch)
  return /\w/.test(ch) || ch > "\x80" &&
    (ch.toUpperCase() != ch.toLowerCase() || nonASCIISingleCaseWordChar.test(ch))
}

/// Test whether the given character is a word character.
export function isWordChar(ch: string, wordChars?: RegExp): boolean {
  if (!wordChars) return isWordCharBasic(ch)
  if (wordChars.source.indexOf("\\w") > -1 && isWordCharBasic(ch)) return true
  return wordChars.test(ch)
}

/// This is used to group characters into three categories—word
/// characters, whitespace, and anything else. It is used, by default,
/// to do things like selecting by word.
export enum CharType { Word, Space, Other }

/// Determine the character type for a given character.
export function charType(ch: string, wordChars?: RegExp): CharType {
  // FIXME make this configurable in a better way
  return /\s/.test(ch) ? CharType.Space : isWordChar(ch, wordChars) ? CharType.Word : CharType.Other
}

/// Find the code point at the given position in a string (as in the
/// [`codePointAt`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/codePointAt)
/// string method).
export function codePointAt(str: string, pos: number) {
  let code0 = str.charCodeAt(pos)
  if (code0 < 0xd800 || code0 > 0xdbff || pos + 1 == str.length) return code0
  let code1 = str.charCodeAt(pos + 1)
  if (code1 < 0xdc00 || code1 > 0xdfff) return code0
  return ((code0 - 0xd800) << 10) + (code1 - 0xdc00) + 0x10000
}

/// Given a Unicode codepoint, return the JavaScript string that
/// respresents it (as in
/// [`String.fromCodePoint`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/fromCodePoint).
export function fromCodePoint(code: number) {
  if (code <= 0xffff) return String.fromCharCode(code)
  code -= 0x10000
  return String.fromCharCode((code >> 10) + 0xd800, (code & 1023) + 0xdc00)
}

/// The first character that takes up two positions in a JavaScript
/// string. It is often useful to compare with this after calling
/// `codePointAt`, to figure out whether your character takes up 1 or
/// 2 index positions.
export const minPairCodePoint = 0x10000