"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.__testMethods = exports.hasOptionToSearchOption = exports.createSpellingDictionaryTrie = exports.SpellingDictionaryFromTrie = exports.createSpellingDictionary = exports.defaultNumSuggestions = exports.regexPrefix = exports.PREFIX_NO_CASE = exports.WORD_SEPARATOR = exports.suggestionCollector = exports.JOIN_SEPARATOR = exports.CompoundWordsMethod = void 0; const gensequence_1 = require("gensequence"); const cspell_trie_lib_1 = require("cspell-trie-lib"); const repMap_1 = require("../util/repMap"); const Settings_1 = require("../Settings"); const text_1 = require("../util/text"); const Memorizer_1 = require("../util/Memorizer"); // cspell:word café var cspell_trie_lib_2 = require("cspell-trie-lib"); Object.defineProperty(exports, "CompoundWordsMethod", { enumerable: true, get: function () { return cspell_trie_lib_2.CompoundWordsMethod; } }); Object.defineProperty(exports, "JOIN_SEPARATOR", { enumerable: true, get: function () { return cspell_trie_lib_2.JOIN_SEPARATOR; } }); Object.defineProperty(exports, "suggestionCollector", { enumerable: true, get: function () { return cspell_trie_lib_2.suggestionCollector; } }); Object.defineProperty(exports, "WORD_SEPARATOR", { enumerable: true, get: function () { return cspell_trie_lib_2.WORD_SEPARATOR; } }); exports.PREFIX_NO_CASE = '>'; exports.regexPrefix = /^[>]/; exports.defaultNumSuggestions = 10; function createSpellingDictionary(wordList, name, source, options) { // console.log(`createSpellingDictionary ${name} ${source}`); const opts = options || {}; const { caseSensitive = false } = opts; const words = new Set(gensequence_1.genSequence(wordList) .filter((word) => typeof word === 'string') .map((word) => word.trim()) .filter((w) => !!w) .concatMap(wordDictionaryFormsCollector(caseSensitive))); const mapWord = repMap_1.createMapper(opts.repMap || []); let trieDict; function getTrie() { if (trieDict) { return trieDict; } // console.log(`Build Trie ${name}`); return (trieDict = new SpellingDictionaryFromTrie(cspell_trie_lib_1.Trie.create(words), name, options, source, words.size)); } const isDictionaryCaseSensitive = opts.caseSensitive || false; const dict = { name, source, type: 'SpellingDictionaryFromSet', mapWord, size: words.size, isDictionaryCaseSensitive, options: opts, has: (word, hasOptions) => { if (words.has(word)) { return true; } const searchOptions = hasOptionToSearchOption(hasOptions); const mWord = mapWord(word); const { ignoreCase = true } = searchOptions; const forms = wordSearchForms(mWord, isDictionaryCaseSensitive, ignoreCase); for (const w of forms) { if (words.has(w)) { return true; } } const useCompounds = searchOptions.useCompounds === undefined ? opts.useCompounds : searchOptions.useCompounds; if (isDictionaryCaseSensitive || useCompounds || searchOptions.ignoreCase === false) { return getTrie().has(word, hasOptions); } return false; }, suggest: (...args) => getTrie().suggest(...args), genSuggestions: (collector, suggestOptions) => getTrie().genSuggestions(collector, suggestOptions), }; return dict; } exports.createSpellingDictionary = createSpellingDictionary; class SpellingDictionaryFromTrie { constructor(trie, name, options = {}, source = 'from trie', size) { this.trie = trie; this.name = name; this.options = options; this.source = source; this._size = 0; this.knownWords = new Set(); this.unknownWords = new Set(); this.type = 'SpellingDictionaryFromTrie'; this._has = Memorizer_1.memorizer((word, useCompounds, ignoreCase) => this.hasAnyForm(word, useCompounds, ignoreCase), SpellingDictionaryFromTrie.cachedWordsLimit); trie.root.f = 0; this.mapWord = repMap_1.createMapper(options.repMap || []); this.isDictionaryCaseSensitive = options.caseSensitive || false; this._size = size || 0; } get size() { if (!this._size) { // walk the trie and get the approximate size. const i = this.trie.iterate(); let deeper = true; let size = 0; for (let r = i.next(); !r.done; r = i.next(deeper)) { // count all nodes even though they are not words. // because we are not going to all the leaves, this should give a good enough approximation. size += 1; deeper = r.value.text.length < 5; } this._size = size; } return this._size; } has(word, hasOptions) { const searchOptions = hasOptionToSearchOption(hasOptions); const useCompounds = searchOptions.useCompounds === undefined ? this.options.useCompounds : searchOptions.useCompounds; const { ignoreCase = true } = searchOptions; return this._has(word, useCompounds, ignoreCase); } hasAnyForm(word, useCompounds, ignoreCase) { const mWord = this.mapWord(word); const forms = wordSearchForms(mWord, this.isDictionaryCaseSensitive, ignoreCase); for (const w of forms) { if (this.trie.has(w, false)) { return true; } } if (useCompounds) { for (const w of forms) { if (this.trie.has(w, useCompounds)) { return true; } } } return false; } suggest(...args) { const [word, options, compoundMethod, numChanges] = args; const suggestOptions = typeof options === 'object' ? options : { numSuggestions: options, compoundMethod, numChanges, }; return this._suggest(word, suggestOptions); } _suggest(word, suggestOptions) { const { numSuggestions = Settings_1.getDefaultSettings().numSuggestions || exports.defaultNumSuggestions, numChanges, ignoreCase = true, } = suggestOptions; function filter(word) { return ignoreCase || word[0] !== exports.PREFIX_NO_CASE; } const collector = cspell_trie_lib_1.suggestionCollector(word, numSuggestions, filter, numChanges); this.genSuggestions(collector, suggestOptions); return collector.suggestions.map((r) => ({ ...r, word: r.word.replace(exports.regexPrefix, '') })); } genSuggestions(collector, suggestOptions) { const { compoundMethod = cspell_trie_lib_1.CompoundWordsMethod.SEPARATE_WORDS, ignoreCase = true } = suggestOptions; const _compoundMethod = this.options.useCompounds ? cspell_trie_lib_1.CompoundWordsMethod.JOIN_WORDS : compoundMethod; wordSearchForms(collector.word, this.isDictionaryCaseSensitive, ignoreCase).forEach((w) => this.trie.genSuggestions(impersonateCollector(collector, w), _compoundMethod)); } } exports.SpellingDictionaryFromTrie = SpellingDictionaryFromTrie; SpellingDictionaryFromTrie.cachedWordsLimit = 50000; function impersonateCollector(collector, word) { return { collect: collector.collect, add: (suggestion) => collector.add(suggestion), get suggestions() { return collector.suggestions; }, get maxCost() { return collector.maxCost; }, get word() { return word; }, get maxNumSuggestions() { return collector.maxNumSuggestions; }, }; } function wordSearchForms(word, isDictionaryCaseSensitive, ignoreCase) { // if (!isDictionaryCaseSensitive) { // return [word.toLowerCase()]; // } word = word.normalize('NFC'); const wordLc = word.toLowerCase(); const wordNa = text_1.removeAccents(word); const wordLcNa = text_1.removeAccents(wordLc); const forms = new Set(); function add(w, prefix = '') { forms.add(prefix + w); } if (!isDictionaryCaseSensitive) { add(wordLc); } add(word); // HOUSE -> House, house if (text_1.isUpperCase(word)) { add(wordLc); add(text_1.ucFirst(wordLc)); } if (!isDictionaryCaseSensitive) { add(wordLc); add(wordNa); add(wordLcNa); return [...forms]; } // House -> house if (word === text_1.ucFirst(wordLc)) { add(wordLc); } // Café -> >café, >cafe if (ignoreCase) { add(wordNa, exports.PREFIX_NO_CASE); add(wordLcNa, exports.PREFIX_NO_CASE); if (text_1.isUpperCase(word)) { add(text_1.ucFirst(wordLcNa), exports.PREFIX_NO_CASE); } } return [...forms]; } function* wordDictionaryForms(word, isDictionaryCaseSensitive) { word = word.normalize('NFC'); const wordLc = word.toLowerCase(); const wordNa = text_1.removeAccents(word); const wordLcNa = text_1.removeAccents(wordLc); function wf(w, p = '') { return { w, p }; } const prefix = isDictionaryCaseSensitive ? exports.PREFIX_NO_CASE : ''; yield wf(word); yield wf(wordNa, prefix); yield wf(wordLc, prefix); yield wf(wordLcNa, prefix); } function wordDictionaryFormsCollector(isDictionaryCaseSensitive) { const knownWords = new Set(); return (word) => { return gensequence_1.genSequence(wordDictionaryForms(word, isDictionaryCaseSensitive)) .filter((w) => !knownWords.has(w.w)) .map((w) => w.p + w.w) .filter((w) => !knownWords.has(w)) .map((w) => (knownWords.add(w), w)); }; } async function createSpellingDictionaryTrie(data, name, source, options) { const trieNode = cspell_trie_lib_1.importTrie(data); const trie = new cspell_trie_lib_1.Trie(trieNode); return new SpellingDictionaryFromTrie(trie, name, options, source); } exports.createSpellingDictionaryTrie = createSpellingDictionaryTrie; function hasOptionToSearchOption(opt) { return !opt ? {} : typeof opt === 'object' ? opt : { useCompounds: opt }; } exports.hasOptionToSearchOption = hasOptionToSearchOption; exports.__testMethods = { wordSearchForms, wordDictionaryForms, wordDictionaryFormsCollector, }; //# sourceMappingURL=SpellingDictionary.js.map