d11 theme
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

251 lines
9.5 KiB

"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.suggestionCollector = exports.compSuggestionResults = exports.genCompoundableSuggestions = exports.genSuggestions = exports.suggest = exports.WORD_SEPARATOR = exports.JOIN_SEPARATOR = exports.CompoundWordsMethod = void 0;
const util_1 = require("./util");
const walker_1 = require("./walker");
var walker_2 = require("./walker");
Object.defineProperty(exports, "CompoundWordsMethod", { enumerable: true, get: function () { return walker_2.CompoundWordsMethod; } });
Object.defineProperty(exports, "JOIN_SEPARATOR", { enumerable: true, get: function () { return walker_2.JOIN_SEPARATOR; } });
Object.defineProperty(exports, "WORD_SEPARATOR", { enumerable: true, get: function () { return walker_2.WORD_SEPARATOR; } });
const orthography_1 = require("./orthography");
const defaultMaxNumberSuggestions = 10;
const baseCost = 100;
const swapCost = 75;
const postSwapCost = swapCost - baseCost;
const maxNumChanges = 5;
const insertSpaceCost = -1;
const mapSubCost = 10;
const setOfSeparators = new Set([walker_1.JOIN_SEPARATOR, walker_1.WORD_SEPARATOR]);
const collator = new Intl.Collator();
const regexSeparator = new RegExp(regexQuote(walker_1.JOIN_SEPARATOR) + '|' + regexQuote(walker_1.WORD_SEPARATOR), 'g');
const wordLengthCost = [0, 50, 25, 0];
function suggest(root, word, maxNumSuggestions = defaultMaxNumberSuggestions, compoundMethod = walker_1.CompoundWordsMethod.NONE, numChanges = maxNumChanges) {
const collector = suggestionCollector(word, maxNumSuggestions, undefined, numChanges);
collector.collect(genSuggestions(root, word, compoundMethod));
return collector.suggestions;
}
exports.suggest = suggest;
function* genSuggestions(root, word, compoundMethod = walker_1.CompoundWordsMethod.NONE) {
yield* genCompoundableSuggestions(root, word, compoundMethod);
}
exports.genSuggestions = genSuggestions;
function* genCompoundableSuggestions(root, word, compoundMethod) {
const history = [];
const historyTags = new Map();
const bc = baseCost;
const psc = postSwapCost;
const matrix = [[]];
const stack = [];
const x = ' ' + word;
const mx = x.length - 1;
const specialCosts = {
[walker_1.WORD_SEPARATOR]: insertSpaceCost,
[walker_1.JOIN_SEPARATOR]: insertSpaceCost,
};
let costLimit = Math.min((bc * word.length) / 2, bc * maxNumChanges);
const a = 0;
let b = 0;
for (let i = 0, c = 0; i <= mx && c <= costLimit; ++i) {
c = i * baseCost;
matrix[0][i] = c;
b = i;
}
stack[0] = { a, b };
let hint = word.slice(a);
const i = walker_1.hintedWalker(root, compoundMethod, hint);
let goDeeper = true;
for (let r = i.next({ goDeeper }); !r.done; r = i.next({ goDeeper })) {
const { text, node, depth } = r.value;
let { a, b } = stack[depth];
const w = text.slice(-1);
const wG = orthography_1.visualLetterMap.get(w) || -1;
if (setOfSeparators.has(w)) {
const mxRange = matrix[depth].slice(a, b + 1);
const mxMin = Math.min(...mxRange);
const tag = [a].concat(mxRange.map((c) => c - mxMin)).join();
if (historyTags.has(tag) && historyTags.get(tag).m <= mxMin) {
goDeeper = false;
const { i, w, m } = historyTags.get(tag);
if (i >= history.length) {
continue;
}
const r = history[i];
if (r.word.slice(0, w.length) !== w) {
continue;
}
const dc = mxMin - m;
for (let p = i; p < history.length; ++p) {
const { word, cost: hCost } = history[p];
const fix = word.slice(0, w.length);
if (fix !== w) {
break;
}
const cost = hCost + dc;
if (cost <= costLimit) {
const suffix = word.slice(w.length);
const emit = text + suffix;
costLimit = (yield { word: emit, cost }) || costLimit;
}
}
continue;
}
else {
historyTags.set(tag, { w: text, i: history.length, m: mxMin });
}
}
const d = depth + 1;
const lastSugLetter = d > 1 ? text[d - 2] : '';
const c = bc - d;
const ci = c + (specialCosts[w] || 0);
// Setup first column
matrix[d] = matrix[d] || [];
matrix[d][a] = matrix[d - 1][a] + ci + d - a;
let lastLetter = x[a];
let min = matrix[d][a];
let i;
// calc the core letters
for (i = a + 1; i <= b; ++i) {
const curLetter = x[i];
const cG = orthography_1.visualLetterMap.get(curLetter) || -2;
const subCost = w === curLetter
? 0
: wG === cG
? mapSubCost
: curLetter === lastSugLetter
? w === lastLetter
? psc
: c
: c;
const e = Math.min(matrix[d - 1][i - 1] + subCost, // substitute
matrix[d - 1][i] + ci, // insert
matrix[d][i - 1] + c // delete
);
min = Math.min(min, e);
matrix[d][i] = e;
lastLetter = curLetter;
}
// fix the last column
b += 1;
if (b <= mx) {
i = b;
const curLetter = x[i];
const cG = orthography_1.visualLetterMap.get(curLetter) || -2;
const subCost = w === curLetter
? 0
: wG === cG
? mapSubCost
: curLetter === lastSugLetter
? w === lastLetter
? psc
: c
: c;
const e = Math.min(matrix[d - 1][i - 1] + subCost, // substitute
matrix[d][i - 1] + c // delete
);
min = Math.min(min, e);
matrix[d][i] = e;
lastLetter = curLetter;
}
else {
b -= 1;
}
// Adjust the range between a and b
for (; b > a && matrix[d][b] > costLimit; b -= 1) {
/* empty */
}
for (; a < b && matrix[d][a] > costLimit; a += 1) {
/* empty */
}
b = Math.min(b + 1, mx);
stack[d] = { a, b };
const cost = matrix[d][b];
if (node.f && util_1.isWordTerminationNode(node) && cost <= costLimit) {
const r = { word: text, cost };
history.push(r);
costLimit = (yield r) || costLimit;
}
goDeeper = min <= costLimit;
hint = word.slice(a, b);
}
// console.log(`tag size: ${historyTags.size}, history size: ${history.length}`);
// console.log(history.map((r, i) => `${i} ${r.cost} ${r.word}`).join('\n'));
}
exports.genCompoundableSuggestions = genCompoundableSuggestions;
// comparison function for Suggestion Results.
function compSuggestionResults(a, b) {
return a.cost - b.cost || a.word.length - b.word.length || collator.compare(a.word, b.word);
}
exports.compSuggestionResults = compSuggestionResults;
function suggestionCollector(wordToMatch, maxNumSuggestions, filter = () => true, changeLimit = maxNumChanges) {
const sugs = new Map();
let maxCost = Math.min((baseCost * wordToMatch.length) / 2, baseCost * changeLimit);
function dropMax() {
if (sugs.size < 2) {
sugs.clear();
return;
}
const sorted = [...sugs.values()].sort(compSuggestionResults);
const toRemove = sorted.pop();
const maxSug = sorted.pop();
sugs.delete(toRemove.word);
maxCost = maxSug.cost;
}
function adjustCost(sug) {
const words = sug.word.split(regexSeparator);
const extraCost = words.map((w) => wordLengthCost[w.length] || 0).reduce((a, b) => a + b, 0);
return { word: sug.word, cost: sug.cost + extraCost };
}
function collector(suggestion) {
const { word, cost } = adjustCost(suggestion);
if (cost <= maxCost && filter(suggestion.word)) {
if (sugs.has(word)) {
const known = sugs.get(word);
known.cost = Math.min(known.cost, cost);
}
else {
sugs.set(word, { word, cost });
if (sugs.size > maxNumSuggestions) {
dropMax();
}
}
}
return maxCost;
}
function collect(src) {
let ir;
while (!(ir = src.next(maxCost)).done) {
if (ir.value !== undefined) {
collector(ir.value);
}
}
}
return {
collect,
add: function (suggestion) {
collector(suggestion);
return this;
},
get suggestions() {
return [...sugs.values()].sort(compSuggestionResults);
},
get maxCost() {
return maxCost;
},
get word() {
return wordToMatch;
},
get maxNumSuggestions() {
return maxNumSuggestions;
},
};
}
exports.suggestionCollector = suggestionCollector;
/**
*
* @param text verbatim text to be inserted into a regexp
* @returns text that can be used in a regexp.
*/
function regexQuote(text) {
return text.replace(/[[\]\-+(){},|*.\\]/g, '\\$1');
}
//# sourceMappingURL=suggest.js.map