You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
251 lines
9.5 KiB
251 lines
9.5 KiB
"use strict"; |
|
Object.defineProperty(exports, "__esModule", { value: true }); |
|
exports.suggestionCollector = exports.compSuggestionResults = exports.genCompoundableSuggestions = exports.genSuggestions = exports.suggest = exports.WORD_SEPARATOR = exports.JOIN_SEPARATOR = exports.CompoundWordsMethod = void 0; |
|
const util_1 = require("./util"); |
|
const walker_1 = require("./walker"); |
|
var walker_2 = require("./walker"); |
|
Object.defineProperty(exports, "CompoundWordsMethod", { enumerable: true, get: function () { return walker_2.CompoundWordsMethod; } }); |
|
Object.defineProperty(exports, "JOIN_SEPARATOR", { enumerable: true, get: function () { return walker_2.JOIN_SEPARATOR; } }); |
|
Object.defineProperty(exports, "WORD_SEPARATOR", { enumerable: true, get: function () { return walker_2.WORD_SEPARATOR; } }); |
|
const orthography_1 = require("./orthography"); |
|
const defaultMaxNumberSuggestions = 10; |
|
const baseCost = 100; |
|
const swapCost = 75; |
|
const postSwapCost = swapCost - baseCost; |
|
const maxNumChanges = 5; |
|
const insertSpaceCost = -1; |
|
const mapSubCost = 10; |
|
const setOfSeparators = new Set([walker_1.JOIN_SEPARATOR, walker_1.WORD_SEPARATOR]); |
|
const collator = new Intl.Collator(); |
|
const regexSeparator = new RegExp(regexQuote(walker_1.JOIN_SEPARATOR) + '|' + regexQuote(walker_1.WORD_SEPARATOR), 'g'); |
|
const wordLengthCost = [0, 50, 25, 0]; |
|
function suggest(root, word, maxNumSuggestions = defaultMaxNumberSuggestions, compoundMethod = walker_1.CompoundWordsMethod.NONE, numChanges = maxNumChanges) { |
|
const collector = suggestionCollector(word, maxNumSuggestions, undefined, numChanges); |
|
collector.collect(genSuggestions(root, word, compoundMethod)); |
|
return collector.suggestions; |
|
} |
|
exports.suggest = suggest; |
|
function* genSuggestions(root, word, compoundMethod = walker_1.CompoundWordsMethod.NONE) { |
|
yield* genCompoundableSuggestions(root, word, compoundMethod); |
|
} |
|
exports.genSuggestions = genSuggestions; |
|
function* genCompoundableSuggestions(root, word, compoundMethod) { |
|
const history = []; |
|
const historyTags = new Map(); |
|
const bc = baseCost; |
|
const psc = postSwapCost; |
|
const matrix = [[]]; |
|
const stack = []; |
|
const x = ' ' + word; |
|
const mx = x.length - 1; |
|
const specialCosts = { |
|
[walker_1.WORD_SEPARATOR]: insertSpaceCost, |
|
[walker_1.JOIN_SEPARATOR]: insertSpaceCost, |
|
}; |
|
let costLimit = Math.min((bc * word.length) / 2, bc * maxNumChanges); |
|
const a = 0; |
|
let b = 0; |
|
for (let i = 0, c = 0; i <= mx && c <= costLimit; ++i) { |
|
c = i * baseCost; |
|
matrix[0][i] = c; |
|
b = i; |
|
} |
|
stack[0] = { a, b }; |
|
let hint = word.slice(a); |
|
const i = walker_1.hintedWalker(root, compoundMethod, hint); |
|
let goDeeper = true; |
|
for (let r = i.next({ goDeeper }); !r.done; r = i.next({ goDeeper })) { |
|
const { text, node, depth } = r.value; |
|
let { a, b } = stack[depth]; |
|
const w = text.slice(-1); |
|
const wG = orthography_1.visualLetterMap.get(w) || -1; |
|
if (setOfSeparators.has(w)) { |
|
const mxRange = matrix[depth].slice(a, b + 1); |
|
const mxMin = Math.min(...mxRange); |
|
const tag = [a].concat(mxRange.map((c) => c - mxMin)).join(); |
|
if (historyTags.has(tag) && historyTags.get(tag).m <= mxMin) { |
|
goDeeper = false; |
|
const { i, w, m } = historyTags.get(tag); |
|
if (i >= history.length) { |
|
continue; |
|
} |
|
const r = history[i]; |
|
if (r.word.slice(0, w.length) !== w) { |
|
continue; |
|
} |
|
const dc = mxMin - m; |
|
for (let p = i; p < history.length; ++p) { |
|
const { word, cost: hCost } = history[p]; |
|
const fix = word.slice(0, w.length); |
|
if (fix !== w) { |
|
break; |
|
} |
|
const cost = hCost + dc; |
|
if (cost <= costLimit) { |
|
const suffix = word.slice(w.length); |
|
const emit = text + suffix; |
|
costLimit = (yield { word: emit, cost }) || costLimit; |
|
} |
|
} |
|
continue; |
|
} |
|
else { |
|
historyTags.set(tag, { w: text, i: history.length, m: mxMin }); |
|
} |
|
} |
|
const d = depth + 1; |
|
const lastSugLetter = d > 1 ? text[d - 2] : ''; |
|
const c = bc - d; |
|
const ci = c + (specialCosts[w] || 0); |
|
// Setup first column |
|
matrix[d] = matrix[d] || []; |
|
matrix[d][a] = matrix[d - 1][a] + ci + d - a; |
|
let lastLetter = x[a]; |
|
let min = matrix[d][a]; |
|
let i; |
|
// calc the core letters |
|
for (i = a + 1; i <= b; ++i) { |
|
const curLetter = x[i]; |
|
const cG = orthography_1.visualLetterMap.get(curLetter) || -2; |
|
const subCost = w === curLetter |
|
? 0 |
|
: wG === cG |
|
? mapSubCost |
|
: curLetter === lastSugLetter |
|
? w === lastLetter |
|
? psc |
|
: c |
|
: c; |
|
const e = Math.min(matrix[d - 1][i - 1] + subCost, // substitute |
|
matrix[d - 1][i] + ci, // insert |
|
matrix[d][i - 1] + c // delete |
|
); |
|
min = Math.min(min, e); |
|
matrix[d][i] = e; |
|
lastLetter = curLetter; |
|
} |
|
// fix the last column |
|
b += 1; |
|
if (b <= mx) { |
|
i = b; |
|
const curLetter = x[i]; |
|
const cG = orthography_1.visualLetterMap.get(curLetter) || -2; |
|
const subCost = w === curLetter |
|
? 0 |
|
: wG === cG |
|
? mapSubCost |
|
: curLetter === lastSugLetter |
|
? w === lastLetter |
|
? psc |
|
: c |
|
: c; |
|
const e = Math.min(matrix[d - 1][i - 1] + subCost, // substitute |
|
matrix[d][i - 1] + c // delete |
|
); |
|
min = Math.min(min, e); |
|
matrix[d][i] = e; |
|
lastLetter = curLetter; |
|
} |
|
else { |
|
b -= 1; |
|
} |
|
// Adjust the range between a and b |
|
for (; b > a && matrix[d][b] > costLimit; b -= 1) { |
|
/* empty */ |
|
} |
|
for (; a < b && matrix[d][a] > costLimit; a += 1) { |
|
/* empty */ |
|
} |
|
b = Math.min(b + 1, mx); |
|
stack[d] = { a, b }; |
|
const cost = matrix[d][b]; |
|
if (node.f && util_1.isWordTerminationNode(node) && cost <= costLimit) { |
|
const r = { word: text, cost }; |
|
history.push(r); |
|
costLimit = (yield r) || costLimit; |
|
} |
|
goDeeper = min <= costLimit; |
|
hint = word.slice(a, b); |
|
} |
|
// console.log(`tag size: ${historyTags.size}, history size: ${history.length}`); |
|
// console.log(history.map((r, i) => `${i} ${r.cost} ${r.word}`).join('\n')); |
|
} |
|
exports.genCompoundableSuggestions = genCompoundableSuggestions; |
|
// comparison function for Suggestion Results. |
|
function compSuggestionResults(a, b) { |
|
return a.cost - b.cost || a.word.length - b.word.length || collator.compare(a.word, b.word); |
|
} |
|
exports.compSuggestionResults = compSuggestionResults; |
|
function suggestionCollector(wordToMatch, maxNumSuggestions, filter = () => true, changeLimit = maxNumChanges) { |
|
const sugs = new Map(); |
|
let maxCost = Math.min((baseCost * wordToMatch.length) / 2, baseCost * changeLimit); |
|
function dropMax() { |
|
if (sugs.size < 2) { |
|
sugs.clear(); |
|
return; |
|
} |
|
const sorted = [...sugs.values()].sort(compSuggestionResults); |
|
const toRemove = sorted.pop(); |
|
const maxSug = sorted.pop(); |
|
sugs.delete(toRemove.word); |
|
maxCost = maxSug.cost; |
|
} |
|
function adjustCost(sug) { |
|
const words = sug.word.split(regexSeparator); |
|
const extraCost = words.map((w) => wordLengthCost[w.length] || 0).reduce((a, b) => a + b, 0); |
|
return { word: sug.word, cost: sug.cost + extraCost }; |
|
} |
|
function collector(suggestion) { |
|
const { word, cost } = adjustCost(suggestion); |
|
if (cost <= maxCost && filter(suggestion.word)) { |
|
if (sugs.has(word)) { |
|
const known = sugs.get(word); |
|
known.cost = Math.min(known.cost, cost); |
|
} |
|
else { |
|
sugs.set(word, { word, cost }); |
|
if (sugs.size > maxNumSuggestions) { |
|
dropMax(); |
|
} |
|
} |
|
} |
|
return maxCost; |
|
} |
|
function collect(src) { |
|
let ir; |
|
while (!(ir = src.next(maxCost)).done) { |
|
if (ir.value !== undefined) { |
|
collector(ir.value); |
|
} |
|
} |
|
} |
|
return { |
|
collect, |
|
add: function (suggestion) { |
|
collector(suggestion); |
|
return this; |
|
}, |
|
get suggestions() { |
|
return [...sugs.values()].sort(compSuggestionResults); |
|
}, |
|
get maxCost() { |
|
return maxCost; |
|
}, |
|
get word() { |
|
return wordToMatch; |
|
}, |
|
get maxNumSuggestions() { |
|
return maxNumSuggestions; |
|
}, |
|
}; |
|
} |
|
exports.suggestionCollector = suggestionCollector; |
|
/** |
|
* |
|
* @param text verbatim text to be inserted into a regexp |
|
* @returns text that can be used in a regexp. |
|
*/ |
|
function regexQuote(text) { |
|
return text.replace(/[[\]\-+(){},|*.\\]/g, '\\$1'); |
|
} |
|
//# sourceMappingURL=suggest.js.map
|