You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
89 lines
2.8 KiB
89 lines
2.8 KiB
"use strict"; |
|
Object.defineProperty(exports, "__esModule", { value: true }); |
|
exports.convertToTrieRefNodes = void 0; |
|
const TrieNode_1 = require("./TrieNode"); |
|
const gensequence_1 = require("gensequence"); |
|
const MinReferenceCount = 3; |
|
/** |
|
* An iterator that will emit TrieRefNodes mostly in descending frequency |
|
* @param root Root of the Trie -- a DAWG is preferred to keep the number of duplicates down. |
|
*/ |
|
function convertToTrieRefNodes(root) { |
|
const eow = { f: TrieNode_1.FLAG_WORD, c: undefined }; |
|
const tallies = new Map([[eow, 0]]); |
|
let count = 0; |
|
const cached = new Map(); |
|
const rollupTally = new Map(); |
|
function tally(n) { |
|
var _a; |
|
if (n.f && !n.c) { |
|
tallies.set(eow, (tallies.get(eow) || 0) + 1); |
|
return; |
|
} |
|
const t = tallies.get(n); |
|
if (t) { |
|
tallies.set(n, t + 1); |
|
return; |
|
} |
|
tallies.set(n, 1); |
|
for (const c of ((_a = n.c) === null || _a === void 0 ? void 0 : _a.values()) || []) { |
|
tally(c); |
|
} |
|
} |
|
function rollup(n) { |
|
const c = rollupTally.get(n); |
|
if (c) { |
|
return c; |
|
} |
|
if (!n.c) { |
|
const sum = tallies.get(eow); |
|
rollupTally.set(n, sum); |
|
return sum; |
|
} |
|
const sum = [...n.c.values()].reduce((acc, v) => acc + rollup(v), tallies.get(n)); |
|
rollupTally.set(n, sum); |
|
return sum; |
|
} |
|
function* walkByTallies(tallies) { |
|
const nodes = [...gensequence_1.genSequence(tallies).filter((a) => a[1] >= MinReferenceCount)].sort((a, b) => b[1] - a[1]); |
|
for (const [n] of nodes) { |
|
yield* walkByRollup(n); |
|
} |
|
} |
|
function* walkByRollup(n) { |
|
var _a; |
|
if (cached.has(n)) |
|
return; |
|
if (n.f && !n.c) { |
|
cached.set(n, cached.get(eow)); |
|
return; |
|
} |
|
const children = [...(((_a = n.c) === null || _a === void 0 ? void 0 : _a.values()) || [])].sort((a, b) => rollupTally.get(b) - rollupTally.get(a)); |
|
for (const c of children) { |
|
yield* walkByRollup(c); |
|
} |
|
cached.set(n, count++); |
|
yield convert(n); |
|
} |
|
function convert(n) { |
|
const { f, c } = n; |
|
const r = c |
|
? [...c].sort((a, b) => (a[0] < b[0] ? -1 : 1)).map(([s, n]) => [s, cached.get(n)]) |
|
: undefined; |
|
const rn = r ? (f ? { f, r } : { r }) : { f }; |
|
return rn; |
|
} |
|
function* walk(root) { |
|
cached.set(eow, count++); |
|
yield convert(eow); |
|
// Emit the highest referenced nodes first. |
|
yield* walkByTallies(tallies); |
|
// Emit the rest. |
|
yield* walkByRollup(root); |
|
} |
|
tally(root); |
|
rollup(root); |
|
return walk(root); |
|
} |
|
exports.convertToTrieRefNodes = convertToTrieRefNodes; |
|
//# sourceMappingURL=convertToTrieRefNodes.js.map
|