You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
191 lines
7.9 KiB
191 lines
7.9 KiB
"use strict"; |
|
Object.defineProperty(exports, "__esModule", { value: true }); |
|
exports.removeAccents = exports.calculateTextDocumentOffsets = exports.stringToRegExp = exports.extractText = exports.textOffset = exports.matchCase = exports.camelToSnake = exports.snakeToCamel = exports.lcFirst = exports.ucFirst = exports.isFirstCharacterLower = exports.isFirstCharacterUpper = exports.isLowerCase = exports.isUpperCase = exports.extractWordsFromCodeTextOffset = exports.extractWordsFromCode = exports.extractWordsFromTextOffset = exports.extractWordsFromText = exports.extractLinesOfText = exports.matchToTextOffset = exports.matchStringToTextOffset = exports.match = exports.splitCamelCaseWord = exports.splitCamelCaseWordWithOffset = void 0; |
|
const cspell_util_bundle_1 = require("cspell-util-bundle"); |
|
const gensequence_1 = require("gensequence"); |
|
const search_1 = require("./search"); |
|
const util_1 = require("./util"); |
|
const regExLines = /.*(\r?\n|$)/g; |
|
// const regExIdentifiers = XRegExp('(?:\\p{L}|[0-9_\'])+', 'gi'); |
|
const regExUpperSOrIng = cspell_util_bundle_1.xregexp("(\\p{Lu}+\\\\?['’]?(?:s|ing|ies|es|ings|ed|ning))(?!\\p{Ll})", 'g'); |
|
const regExSplitWords = cspell_util_bundle_1.xregexp('(\\p{Ll})(\\p{Lu})', 'g'); |
|
const regExSplitWords2 = cspell_util_bundle_1.xregexp('(\\p{Lu})(\\p{Lu}\\p{Ll})', 'g'); |
|
const regExWords = cspell_util_bundle_1.xregexp("\\p{L}(?:\\\\?['’]\\p{L}|\\p{L})+|\\p{L}", 'g'); |
|
const regExIgnoreCharacters = cspell_util_bundle_1.xregexp('\\p{Hiragana}|\\p{Han}|\\p{Katakana}|[\\u30A0-\\u30FF]|[\\p{Hangul}]', 'g'); |
|
const regExFirstUpper = cspell_util_bundle_1.xregexp('^\\p{Lu}\\p{Ll}+$'); |
|
const regExAllUpper = cspell_util_bundle_1.xregexp('^\\p{Lu}+$'); |
|
const regExAllLower = cspell_util_bundle_1.xregexp('^\\p{Ll}+$'); |
|
const regExMatchRegExParts = /^\/(.*)\/([gimuy]*)$/; |
|
const regExAccents = cspell_util_bundle_1.xregexp('\\p{M}', 'g'); |
|
function splitCamelCaseWordWithOffset(wo) { |
|
return splitCamelCaseWord(wo.text).map(util_1.scanMap((last, text) => ({ text, offset: last.offset + last.text.length }), { |
|
text: '', |
|
offset: wo.offset, |
|
})); |
|
} |
|
exports.splitCamelCaseWordWithOffset = splitCamelCaseWordWithOffset; |
|
/** |
|
* Split camelCase words into an array of strings. |
|
*/ |
|
function splitCamelCaseWord(word) { |
|
const wPrime = word.replace(regExUpperSOrIng, (s) => s[0] + s.substr(1).toLowerCase()); |
|
const separator = '_<^*_*^>_'; |
|
const pass1 = cspell_util_bundle_1.xregexp.replace(wPrime, regExSplitWords, '$1' + separator + '$2'); |
|
const pass2 = cspell_util_bundle_1.xregexp.replace(pass1, regExSplitWords2, '$1' + separator + '$2'); |
|
return cspell_util_bundle_1.xregexp.split(pass2, separator); |
|
} |
|
exports.splitCamelCaseWord = splitCamelCaseWord; |
|
/** |
|
* This function lets you iterate over regular expression matches. |
|
*/ |
|
function match(reg, text) { |
|
return gensequence_1.sequenceFromRegExpMatch(reg, text); |
|
} |
|
exports.match = match; |
|
function matchStringToTextOffset(reg, text) { |
|
return matchToTextOffset(reg, { text, offset: 0 }); |
|
} |
|
exports.matchStringToTextOffset = matchStringToTextOffset; |
|
function matchToTextOffset(reg, text) { |
|
const textOffset = text; |
|
const fnOffsetMap = offsetMap(textOffset.offset); |
|
return match(reg, textOffset.text).map((m) => fnOffsetMap({ text: m[0], offset: m.index })); |
|
} |
|
exports.matchToTextOffset = matchToTextOffset; |
|
function extractLinesOfText(text) { |
|
return matchStringToTextOffset(regExLines, text); |
|
} |
|
exports.extractLinesOfText = extractLinesOfText; |
|
/** |
|
* Extract out whole words from a string of text. |
|
*/ |
|
function extractWordsFromText(text) { |
|
return extractWordsFromTextOffset(textOffset(text)); |
|
} |
|
exports.extractWordsFromText = extractWordsFromText; |
|
/** |
|
* Extract out whole words from a string of text. |
|
*/ |
|
function extractWordsFromTextOffset(text) { |
|
const reg = cspell_util_bundle_1.xregexp(regExWords); |
|
const reg2 = cspell_util_bundle_1.xregexp(regExWords); |
|
return (matchToTextOffset(reg, text) |
|
// remove characters that match against \p{L} but are not letters (Chinese characters are an example). |
|
.map(({ text, offset }) => ({ |
|
text: cspell_util_bundle_1.xregexp.replace(text, regExIgnoreCharacters, (match) => ' '.repeat(match.length)), |
|
offset, |
|
})) |
|
.concatMap((wo) => matchToTextOffset(reg2, wo)) |
|
.filter((wo) => !!wo.text)); |
|
} |
|
exports.extractWordsFromTextOffset = extractWordsFromTextOffset; |
|
function extractWordsFromCode(text) { |
|
return extractWordsFromCodeTextOffset(textOffset(text)); |
|
} |
|
exports.extractWordsFromCode = extractWordsFromCode; |
|
function extractWordsFromCodeTextOffset(textOffset) { |
|
return extractWordsFromTextOffset(textOffset).concatMap(splitCamelCaseWordWithOffset); |
|
} |
|
exports.extractWordsFromCodeTextOffset = extractWordsFromCodeTextOffset; |
|
function isUpperCase(word) { |
|
return !!word.match(regExAllUpper); |
|
} |
|
exports.isUpperCase = isUpperCase; |
|
function isLowerCase(word) { |
|
return !!word.match(regExAllLower); |
|
} |
|
exports.isLowerCase = isLowerCase; |
|
function isFirstCharacterUpper(word) { |
|
return isUpperCase(word.slice(0, 1)); |
|
} |
|
exports.isFirstCharacterUpper = isFirstCharacterUpper; |
|
function isFirstCharacterLower(word) { |
|
return isLowerCase(word.slice(0, 1)); |
|
} |
|
exports.isFirstCharacterLower = isFirstCharacterLower; |
|
function ucFirst(word) { |
|
return word.slice(0, 1).toUpperCase() + word.slice(1); |
|
} |
|
exports.ucFirst = ucFirst; |
|
function lcFirst(word) { |
|
return word.slice(0, 1).toLowerCase() + word.slice(1); |
|
} |
|
exports.lcFirst = lcFirst; |
|
function snakeToCamel(word) { |
|
return word.split('_').map(ucFirst).join(''); |
|
} |
|
exports.snakeToCamel = snakeToCamel; |
|
function camelToSnake(word) { |
|
return splitCamelCaseWord(word).join('_').toLowerCase(); |
|
} |
|
exports.camelToSnake = camelToSnake; |
|
function matchCase(example, word) { |
|
if (example.match(regExFirstUpper)) { |
|
return word.slice(0, 1).toUpperCase() + word.slice(1).toLowerCase(); |
|
} |
|
if (example.match(regExAllLower)) { |
|
return word.toLowerCase(); |
|
} |
|
if (example.match(regExAllUpper)) { |
|
return word.toUpperCase(); |
|
} |
|
if (isFirstCharacterUpper(example)) { |
|
return ucFirst(word); |
|
} |
|
if (isFirstCharacterLower(example)) { |
|
return lcFirst(word); |
|
} |
|
return word; |
|
} |
|
exports.matchCase = matchCase; |
|
function textOffset(text, offset = 0) { |
|
return { text, offset }; |
|
} |
|
exports.textOffset = textOffset; |
|
function extractText(textOffset, startPos, endPos) { |
|
const { text, offset: orig } = textOffset; |
|
const a = Math.max(startPos - orig, 0); |
|
const b = Math.max(endPos - orig, 0); |
|
return text.slice(a, b); |
|
} |
|
exports.extractText = extractText; |
|
function offsetMap(offset) { |
|
return (xo) => ({ ...xo, offset: xo.offset + offset }); |
|
} |
|
function stringToRegExp(pattern, defaultFlags = 'gim', forceFlags = 'g') { |
|
if (pattern instanceof RegExp) { |
|
return pattern; |
|
} |
|
try { |
|
const [, pat, flag] = [...(pattern.match(regExMatchRegExParts) || ['', pattern, defaultFlags]), forceFlags]; |
|
// Make sure the flags are unique. |
|
const flags = [...new Set(forceFlags + flag)].join('').replace(/[^gimuy]/g, ''); |
|
if (pat) { |
|
const regex = new RegExp(pat, flags); |
|
return regex; |
|
} |
|
} |
|
catch (e) { |
|
/* empty */ |
|
} |
|
return undefined; |
|
} |
|
exports.stringToRegExp = stringToRegExp; |
|
function calculateTextDocumentOffsets(uri, doc, wordOffsets) { |
|
const lines = [-1, ...match(/\n/g, doc).map((a) => a.index), doc.length]; |
|
function findRowCol(offset) { |
|
const row = search_1.binarySearch(lines, offset); |
|
const col = offset - lines[Math.max(0, row - 1)]; |
|
return [row, col]; |
|
} |
|
return wordOffsets.map((wo) => { |
|
const [row, col] = findRowCol(wo.offset); |
|
return { ...wo, row, col, doc, uri }; |
|
}); |
|
} |
|
exports.calculateTextDocumentOffsets = calculateTextDocumentOffsets; |
|
function removeAccents(text) { |
|
return text.normalize('NFKD').replace(regExAccents, ''); |
|
} |
|
exports.removeAccents = removeAccents; |
|
//# sourceMappingURL=text.js.map
|