You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
252 lines
10 KiB
252 lines
10 KiB
"use strict"; |
|
Object.defineProperty(exports, "__esModule", { value: true }); |
|
exports.__testMethods = exports.hasOptionToSearchOption = exports.createSpellingDictionaryTrie = exports.SpellingDictionaryFromTrie = exports.createSpellingDictionary = exports.defaultNumSuggestions = exports.regexPrefix = exports.PREFIX_NO_CASE = exports.WORD_SEPARATOR = exports.suggestionCollector = exports.JOIN_SEPARATOR = exports.CompoundWordsMethod = void 0; |
|
const gensequence_1 = require("gensequence"); |
|
const cspell_trie_lib_1 = require("cspell-trie-lib"); |
|
const repMap_1 = require("../util/repMap"); |
|
const Settings_1 = require("../Settings"); |
|
const text_1 = require("../util/text"); |
|
const Memorizer_1 = require("../util/Memorizer"); |
|
// cspell:word café |
|
var cspell_trie_lib_2 = require("cspell-trie-lib"); |
|
Object.defineProperty(exports, "CompoundWordsMethod", { enumerable: true, get: function () { return cspell_trie_lib_2.CompoundWordsMethod; } }); |
|
Object.defineProperty(exports, "JOIN_SEPARATOR", { enumerable: true, get: function () { return cspell_trie_lib_2.JOIN_SEPARATOR; } }); |
|
Object.defineProperty(exports, "suggestionCollector", { enumerable: true, get: function () { return cspell_trie_lib_2.suggestionCollector; } }); |
|
Object.defineProperty(exports, "WORD_SEPARATOR", { enumerable: true, get: function () { return cspell_trie_lib_2.WORD_SEPARATOR; } }); |
|
exports.PREFIX_NO_CASE = '>'; |
|
exports.regexPrefix = /^[>]/; |
|
exports.defaultNumSuggestions = 10; |
|
function createSpellingDictionary(wordList, name, source, options) { |
|
// console.log(`createSpellingDictionary ${name} ${source}`); |
|
const opts = options || {}; |
|
const { caseSensitive = false } = opts; |
|
const words = new Set(gensequence_1.genSequence(wordList) |
|
.filter((word) => typeof word === 'string') |
|
.map((word) => word.trim()) |
|
.filter((w) => !!w) |
|
.concatMap(wordDictionaryFormsCollector(caseSensitive))); |
|
const mapWord = repMap_1.createMapper(opts.repMap || []); |
|
let trieDict; |
|
function getTrie() { |
|
if (trieDict) { |
|
return trieDict; |
|
} |
|
// console.log(`Build Trie ${name}`); |
|
return (trieDict = new SpellingDictionaryFromTrie(cspell_trie_lib_1.Trie.create(words), name, options, source, words.size)); |
|
} |
|
const isDictionaryCaseSensitive = opts.caseSensitive || false; |
|
const dict = { |
|
name, |
|
source, |
|
type: 'SpellingDictionaryFromSet', |
|
mapWord, |
|
size: words.size, |
|
isDictionaryCaseSensitive, |
|
options: opts, |
|
has: (word, hasOptions) => { |
|
if (words.has(word)) { |
|
return true; |
|
} |
|
const searchOptions = hasOptionToSearchOption(hasOptions); |
|
const mWord = mapWord(word); |
|
const { ignoreCase = true } = searchOptions; |
|
const forms = wordSearchForms(mWord, isDictionaryCaseSensitive, ignoreCase); |
|
for (const w of forms) { |
|
if (words.has(w)) { |
|
return true; |
|
} |
|
} |
|
const useCompounds = searchOptions.useCompounds === undefined ? opts.useCompounds : searchOptions.useCompounds; |
|
if (isDictionaryCaseSensitive || useCompounds || searchOptions.ignoreCase === false) { |
|
return getTrie().has(word, hasOptions); |
|
} |
|
return false; |
|
}, |
|
suggest: (...args) => getTrie().suggest(...args), |
|
genSuggestions: (collector, suggestOptions) => getTrie().genSuggestions(collector, suggestOptions), |
|
}; |
|
return dict; |
|
} |
|
exports.createSpellingDictionary = createSpellingDictionary; |
|
class SpellingDictionaryFromTrie { |
|
constructor(trie, name, options = {}, source = 'from trie', size) { |
|
this.trie = trie; |
|
this.name = name; |
|
this.options = options; |
|
this.source = source; |
|
this._size = 0; |
|
this.knownWords = new Set(); |
|
this.unknownWords = new Set(); |
|
this.type = 'SpellingDictionaryFromTrie'; |
|
this._has = Memorizer_1.memorizer((word, useCompounds, ignoreCase) => this.hasAnyForm(word, useCompounds, ignoreCase), SpellingDictionaryFromTrie.cachedWordsLimit); |
|
trie.root.f = 0; |
|
this.mapWord = repMap_1.createMapper(options.repMap || []); |
|
this.isDictionaryCaseSensitive = options.caseSensitive || false; |
|
this._size = size || 0; |
|
} |
|
get size() { |
|
if (!this._size) { |
|
// walk the trie and get the approximate size. |
|
const i = this.trie.iterate(); |
|
let deeper = true; |
|
let size = 0; |
|
for (let r = i.next(); !r.done; r = i.next(deeper)) { |
|
// count all nodes even though they are not words. |
|
// because we are not going to all the leaves, this should give a good enough approximation. |
|
size += 1; |
|
deeper = r.value.text.length < 5; |
|
} |
|
this._size = size; |
|
} |
|
return this._size; |
|
} |
|
has(word, hasOptions) { |
|
const searchOptions = hasOptionToSearchOption(hasOptions); |
|
const useCompounds = searchOptions.useCompounds === undefined ? this.options.useCompounds : searchOptions.useCompounds; |
|
const { ignoreCase = true } = searchOptions; |
|
return this._has(word, useCompounds, ignoreCase); |
|
} |
|
hasAnyForm(word, useCompounds, ignoreCase) { |
|
const mWord = this.mapWord(word); |
|
const forms = wordSearchForms(mWord, this.isDictionaryCaseSensitive, ignoreCase); |
|
for (const w of forms) { |
|
if (this.trie.has(w, false)) { |
|
return true; |
|
} |
|
} |
|
if (useCompounds) { |
|
for (const w of forms) { |
|
if (this.trie.has(w, useCompounds)) { |
|
return true; |
|
} |
|
} |
|
} |
|
return false; |
|
} |
|
suggest(...args) { |
|
const [word, options, compoundMethod, numChanges] = args; |
|
const suggestOptions = typeof options === 'object' |
|
? options |
|
: { |
|
numSuggestions: options, |
|
compoundMethod, |
|
numChanges, |
|
}; |
|
return this._suggest(word, suggestOptions); |
|
} |
|
_suggest(word, suggestOptions) { |
|
const { numSuggestions = Settings_1.getDefaultSettings().numSuggestions || exports.defaultNumSuggestions, numChanges, ignoreCase = true, } = suggestOptions; |
|
function filter(word) { |
|
return ignoreCase || word[0] !== exports.PREFIX_NO_CASE; |
|
} |
|
const collector = cspell_trie_lib_1.suggestionCollector(word, numSuggestions, filter, numChanges); |
|
this.genSuggestions(collector, suggestOptions); |
|
return collector.suggestions.map((r) => ({ ...r, word: r.word.replace(exports.regexPrefix, '') })); |
|
} |
|
genSuggestions(collector, suggestOptions) { |
|
const { compoundMethod = cspell_trie_lib_1.CompoundWordsMethod.SEPARATE_WORDS, ignoreCase = true } = suggestOptions; |
|
const _compoundMethod = this.options.useCompounds ? cspell_trie_lib_1.CompoundWordsMethod.JOIN_WORDS : compoundMethod; |
|
wordSearchForms(collector.word, this.isDictionaryCaseSensitive, ignoreCase).forEach((w) => this.trie.genSuggestions(impersonateCollector(collector, w), _compoundMethod)); |
|
} |
|
} |
|
exports.SpellingDictionaryFromTrie = SpellingDictionaryFromTrie; |
|
SpellingDictionaryFromTrie.cachedWordsLimit = 50000; |
|
function impersonateCollector(collector, word) { |
|
return { |
|
collect: collector.collect, |
|
add: (suggestion) => collector.add(suggestion), |
|
get suggestions() { |
|
return collector.suggestions; |
|
}, |
|
get maxCost() { |
|
return collector.maxCost; |
|
}, |
|
get word() { |
|
return word; |
|
}, |
|
get maxNumSuggestions() { |
|
return collector.maxNumSuggestions; |
|
}, |
|
}; |
|
} |
|
function wordSearchForms(word, isDictionaryCaseSensitive, ignoreCase) { |
|
// if (!isDictionaryCaseSensitive) { |
|
// return [word.toLowerCase()]; |
|
// } |
|
word = word.normalize('NFC'); |
|
const wordLc = word.toLowerCase(); |
|
const wordNa = text_1.removeAccents(word); |
|
const wordLcNa = text_1.removeAccents(wordLc); |
|
const forms = new Set(); |
|
function add(w, prefix = '') { |
|
forms.add(prefix + w); |
|
} |
|
if (!isDictionaryCaseSensitive) { |
|
add(wordLc); |
|
} |
|
add(word); |
|
// HOUSE -> House, house |
|
if (text_1.isUpperCase(word)) { |
|
add(wordLc); |
|
add(text_1.ucFirst(wordLc)); |
|
} |
|
if (!isDictionaryCaseSensitive) { |
|
add(wordLc); |
|
add(wordNa); |
|
add(wordLcNa); |
|
return [...forms]; |
|
} |
|
// House -> house |
|
if (word === text_1.ucFirst(wordLc)) { |
|
add(wordLc); |
|
} |
|
// Café -> >café, >cafe |
|
if (ignoreCase) { |
|
add(wordNa, exports.PREFIX_NO_CASE); |
|
add(wordLcNa, exports.PREFIX_NO_CASE); |
|
if (text_1.isUpperCase(word)) { |
|
add(text_1.ucFirst(wordLcNa), exports.PREFIX_NO_CASE); |
|
} |
|
} |
|
return [...forms]; |
|
} |
|
function* wordDictionaryForms(word, isDictionaryCaseSensitive) { |
|
word = word.normalize('NFC'); |
|
const wordLc = word.toLowerCase(); |
|
const wordNa = text_1.removeAccents(word); |
|
const wordLcNa = text_1.removeAccents(wordLc); |
|
function wf(w, p = '') { |
|
return { w, p }; |
|
} |
|
const prefix = isDictionaryCaseSensitive ? exports.PREFIX_NO_CASE : ''; |
|
yield wf(word); |
|
yield wf(wordNa, prefix); |
|
yield wf(wordLc, prefix); |
|
yield wf(wordLcNa, prefix); |
|
} |
|
function wordDictionaryFormsCollector(isDictionaryCaseSensitive) { |
|
const knownWords = new Set(); |
|
return (word) => { |
|
return gensequence_1.genSequence(wordDictionaryForms(word, isDictionaryCaseSensitive)) |
|
.filter((w) => !knownWords.has(w.w)) |
|
.map((w) => w.p + w.w) |
|
.filter((w) => !knownWords.has(w)) |
|
.map((w) => (knownWords.add(w), w)); |
|
}; |
|
} |
|
async function createSpellingDictionaryTrie(data, name, source, options) { |
|
const trieNode = cspell_trie_lib_1.importTrie(data); |
|
const trie = new cspell_trie_lib_1.Trie(trieNode); |
|
return new SpellingDictionaryFromTrie(trie, name, options, source); |
|
} |
|
exports.createSpellingDictionaryTrie = createSpellingDictionaryTrie; |
|
function hasOptionToSearchOption(opt) { |
|
return !opt ? {} : typeof opt === 'object' ? opt : { useCompounds: opt }; |
|
} |
|
exports.hasOptionToSearchOption = hasOptionToSearchOption; |
|
exports.__testMethods = { |
|
wordSearchForms, |
|
wordDictionaryForms, |
|
wordDictionaryFormsCollector, |
|
}; |
|
//# sourceMappingURL=SpellingDictionary.js.map
|