"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.removeAccents = exports.calculateTextDocumentOffsets = exports.stringToRegExp = exports.extractText = exports.textOffset = exports.matchCase = exports.camelToSnake = exports.snakeToCamel = exports.lcFirst = exports.ucFirst = exports.isFirstCharacterLower = exports.isFirstCharacterUpper = exports.isLowerCase = exports.isUpperCase = exports.extractWordsFromCodeTextOffset = exports.extractWordsFromCode = exports.extractWordsFromTextOffset = exports.extractWordsFromText = exports.extractLinesOfText = exports.matchToTextOffset = exports.matchStringToTextOffset = exports.match = exports.splitCamelCaseWord = exports.splitCamelCaseWordWithOffset = void 0; const cspell_util_bundle_1 = require("cspell-util-bundle"); const gensequence_1 = require("gensequence"); const search_1 = require("./search"); const util_1 = require("./util"); const regExLines = /.*(\r?\n|$)/g; // const regExIdentifiers = XRegExp('(?:\\p{L}|[0-9_\'])+', 'gi'); const regExUpperSOrIng = cspell_util_bundle_1.xregexp("(\\p{Lu}+\\\\?['’]?(?:s|ing|ies|es|ings|ed|ning))(?!\\p{Ll})", 'g'); const regExSplitWords = cspell_util_bundle_1.xregexp('(\\p{Ll})(\\p{Lu})', 'g'); const regExSplitWords2 = cspell_util_bundle_1.xregexp('(\\p{Lu})(\\p{Lu}\\p{Ll})', 'g'); const regExWords = cspell_util_bundle_1.xregexp("\\p{L}(?:\\\\?['’]\\p{L}|\\p{L})+|\\p{L}", 'g'); const regExIgnoreCharacters = cspell_util_bundle_1.xregexp('\\p{Hiragana}|\\p{Han}|\\p{Katakana}|[\\u30A0-\\u30FF]|[\\p{Hangul}]', 'g'); const regExFirstUpper = cspell_util_bundle_1.xregexp('^\\p{Lu}\\p{Ll}+$'); const regExAllUpper = cspell_util_bundle_1.xregexp('^\\p{Lu}+$'); const regExAllLower = cspell_util_bundle_1.xregexp('^\\p{Ll}+$'); const regExMatchRegExParts = /^\/(.*)\/([gimuy]*)$/; const regExAccents = cspell_util_bundle_1.xregexp('\\p{M}', 'g'); function splitCamelCaseWordWithOffset(wo) { return splitCamelCaseWord(wo.text).map(util_1.scanMap((last, text) => ({ text, offset: last.offset + last.text.length }), { text: '', offset: wo.offset, })); } exports.splitCamelCaseWordWithOffset = splitCamelCaseWordWithOffset; /** * Split camelCase words into an array of strings. */ function splitCamelCaseWord(word) { const wPrime = word.replace(regExUpperSOrIng, (s) => s[0] + s.substr(1).toLowerCase()); const separator = '_<^*_*^>_'; const pass1 = cspell_util_bundle_1.xregexp.replace(wPrime, regExSplitWords, '$1' + separator + '$2'); const pass2 = cspell_util_bundle_1.xregexp.replace(pass1, regExSplitWords2, '$1' + separator + '$2'); return cspell_util_bundle_1.xregexp.split(pass2, separator); } exports.splitCamelCaseWord = splitCamelCaseWord; /** * This function lets you iterate over regular expression matches. */ function match(reg, text) { return gensequence_1.sequenceFromRegExpMatch(reg, text); } exports.match = match; function matchStringToTextOffset(reg, text) { return matchToTextOffset(reg, { text, offset: 0 }); } exports.matchStringToTextOffset = matchStringToTextOffset; function matchToTextOffset(reg, text) { const textOffset = text; const fnOffsetMap = offsetMap(textOffset.offset); return match(reg, textOffset.text).map((m) => fnOffsetMap({ text: m[0], offset: m.index })); } exports.matchToTextOffset = matchToTextOffset; function extractLinesOfText(text) { return matchStringToTextOffset(regExLines, text); } exports.extractLinesOfText = extractLinesOfText; /** * Extract out whole words from a string of text. */ function extractWordsFromText(text) { return extractWordsFromTextOffset(textOffset(text)); } exports.extractWordsFromText = extractWordsFromText; /** * Extract out whole words from a string of text. */ function extractWordsFromTextOffset(text) { const reg = cspell_util_bundle_1.xregexp(regExWords); const reg2 = cspell_util_bundle_1.xregexp(regExWords); return (matchToTextOffset(reg, text) // remove characters that match against \p{L} but are not letters (Chinese characters are an example). .map(({ text, offset }) => ({ text: cspell_util_bundle_1.xregexp.replace(text, regExIgnoreCharacters, (match) => ' '.repeat(match.length)), offset, })) .concatMap((wo) => matchToTextOffset(reg2, wo)) .filter((wo) => !!wo.text)); } exports.extractWordsFromTextOffset = extractWordsFromTextOffset; function extractWordsFromCode(text) { return extractWordsFromCodeTextOffset(textOffset(text)); } exports.extractWordsFromCode = extractWordsFromCode; function extractWordsFromCodeTextOffset(textOffset) { return extractWordsFromTextOffset(textOffset).concatMap(splitCamelCaseWordWithOffset); } exports.extractWordsFromCodeTextOffset = extractWordsFromCodeTextOffset; function isUpperCase(word) { return !!word.match(regExAllUpper); } exports.isUpperCase = isUpperCase; function isLowerCase(word) { return !!word.match(regExAllLower); } exports.isLowerCase = isLowerCase; function isFirstCharacterUpper(word) { return isUpperCase(word.slice(0, 1)); } exports.isFirstCharacterUpper = isFirstCharacterUpper; function isFirstCharacterLower(word) { return isLowerCase(word.slice(0, 1)); } exports.isFirstCharacterLower = isFirstCharacterLower; function ucFirst(word) { return word.slice(0, 1).toUpperCase() + word.slice(1); } exports.ucFirst = ucFirst; function lcFirst(word) { return word.slice(0, 1).toLowerCase() + word.slice(1); } exports.lcFirst = lcFirst; function snakeToCamel(word) { return word.split('_').map(ucFirst).join(''); } exports.snakeToCamel = snakeToCamel; function camelToSnake(word) { return splitCamelCaseWord(word).join('_').toLowerCase(); } exports.camelToSnake = camelToSnake; function matchCase(example, word) { if (example.match(regExFirstUpper)) { return word.slice(0, 1).toUpperCase() + word.slice(1).toLowerCase(); } if (example.match(regExAllLower)) { return word.toLowerCase(); } if (example.match(regExAllUpper)) { return word.toUpperCase(); } if (isFirstCharacterUpper(example)) { return ucFirst(word); } if (isFirstCharacterLower(example)) { return lcFirst(word); } return word; } exports.matchCase = matchCase; function textOffset(text, offset = 0) { return { text, offset }; } exports.textOffset = textOffset; function extractText(textOffset, startPos, endPos) { const { text, offset: orig } = textOffset; const a = Math.max(startPos - orig, 0); const b = Math.max(endPos - orig, 0); return text.slice(a, b); } exports.extractText = extractText; function offsetMap(offset) { return (xo) => ({ ...xo, offset: xo.offset + offset }); } function stringToRegExp(pattern, defaultFlags = 'gim', forceFlags = 'g') { if (pattern instanceof RegExp) { return pattern; } try { const [, pat, flag] = [...(pattern.match(regExMatchRegExParts) || ['', pattern, defaultFlags]), forceFlags]; // Make sure the flags are unique. const flags = [...new Set(forceFlags + flag)].join('').replace(/[^gimuy]/g, ''); if (pat) { const regex = new RegExp(pat, flags); return regex; } } catch (e) { /* empty */ } return undefined; } exports.stringToRegExp = stringToRegExp; function calculateTextDocumentOffsets(uri, doc, wordOffsets) { const lines = [-1, ...match(/\n/g, doc).map((a) => a.index), doc.length]; function findRowCol(offset) { const row = search_1.binarySearch(lines, offset); const col = offset - lines[Math.max(0, row - 1)]; return [row, col]; } return wordOffsets.map((wo) => { const [row, col] = findRowCol(wo.offset); return { ...wo, row, col, doc, uri }; }); } exports.calculateTextDocumentOffsets = calculateTextDocumentOffsets; function removeAccents(text) { return text.normalize('NFKD').replace(regExAccents, ''); } exports.removeAccents = removeAccents; //# sourceMappingURL=text.js.map