You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
174 lines
5.7 KiB
174 lines
5.7 KiB
"use strict"; |
|
Object.defineProperty(exports, "__esModule", { value: true }); |
|
exports.importTrie = exports.serializeTrie = exports.DATA = void 0; |
|
const TrieNode_1 = require("./TrieNode"); |
|
const gensequence_1 = require("gensequence"); |
|
const EOW = '*'; |
|
exports.DATA = '__DATA__'; |
|
function leaves(node) { |
|
function toRefNode(node, k) { |
|
var _a; |
|
const refNode = node; |
|
refNode.s = (_a = refNode.s) !== null && _a !== void 0 ? _a : k; |
|
return refNode; |
|
} |
|
function* walk(node, k, p) { |
|
const ref = toRefNode(node, k); |
|
if (!ref.c) { |
|
yield { n: ref, p }; |
|
} |
|
else { |
|
for (const n of ref.c) { |
|
yield* walk(n[1], n[0], ref); |
|
} |
|
} |
|
} |
|
return gensequence_1.genSequence(walk(node, '')); |
|
} |
|
function flattenToReferences(node) { |
|
function* walk() { |
|
let iterations = 100; |
|
let processed = 0; |
|
let index = 0; |
|
do { |
|
processed = 0; |
|
const signatureMap = new Map(); |
|
for (const leaf of leaves(node)) { |
|
const h = signature(leaf.n); |
|
let m = signatureMap.get(h); |
|
if (m === undefined) { |
|
// first time, add it to hash |
|
yield leaf.n; |
|
m = index; |
|
signatureMap.set(h, m); |
|
index += 1; |
|
} |
|
// Fix up the parent |
|
/* istanbul ignore else */ |
|
if (leaf.p && leaf.p.c) { |
|
leaf.p.r = leaf.p.r || []; |
|
leaf.p.r.push(m); |
|
leaf.p.c.delete(leaf.n.s); |
|
if (!leaf.p.c.size) { |
|
delete leaf.p.c; |
|
} |
|
} |
|
processed += 1; |
|
} |
|
iterations -= 1; |
|
} while (processed && iterations && node.c); |
|
yield node; |
|
} |
|
return gensequence_1.genSequence(walk()); |
|
} |
|
function signature(node) { |
|
const flags = node.f ? EOW : ''; |
|
const refs = node.r ? node.r.sort((a, b) => a - b).join(',') : ''; |
|
return node.s + flags + refs; |
|
} |
|
function toLine(node, base) { |
|
const flags = node.f ? EOW : ''; |
|
const refs = node.r |
|
? node.r |
|
.sort((a, b) => a - b) |
|
.map((r) => r.toString(base)) |
|
.join(',') |
|
: ''; |
|
return node.s + flags + refs; |
|
} |
|
function generateHeader(base, comment) { |
|
const header = ['#!/usr/bin/env cspell-trie reader', 'TrieXv2', 'base=' + base] |
|
.concat(comment ? comment.split('\n').map((a) => '# ' + a) : []) |
|
.concat(['# Data:', exports.DATA]); |
|
return gensequence_1.genSequence(header); |
|
} |
|
/** |
|
* Serialize a TrieNode. |
|
* Note: This is destructive. The node will no longer be usable. |
|
* Even though it is possible to preserve the trie, dealing with very large tries can consume a lot of memory. |
|
* Considering this is the last step before exporting, it was decided to let this be destructive. |
|
*/ |
|
function serializeTrie(root, options = 16) { |
|
options = typeof options === 'number' ? { base: options } : options; |
|
const { base = 16, comment = '' } = options; |
|
const radix = base > 36 ? 36 : base < 10 ? 10 : base; |
|
const rootRef = { ...root, s: '^' }; |
|
const rows = flattenToReferences(rootRef).map((n) => toLine(n, base)); |
|
return generateHeader(radix, comment) |
|
.concat(rows) |
|
.map((a) => a + '\n'); |
|
} |
|
exports.serializeTrie = serializeTrie; |
|
function* toIterableIterator(iter) { |
|
yield* iter; |
|
} |
|
function importTrie(linesX) { |
|
let radix = 16; |
|
const comment = /^\s*#/; |
|
const iter = toIterableIterator(linesX); |
|
function parseHeaderRows(headerRows) { |
|
const header = headerRows.slice(0, 2).join('\n'); |
|
const headerReg = /^TrieXv2\nbase=(\d+)$/; |
|
/* istanbul ignore if */ |
|
if (!headerReg.test(header)) |
|
throw new Error('Unknown file format'); |
|
radix = Number.parseInt(header.replace(headerReg, '$1'), 10); |
|
} |
|
function readHeader(iter) { |
|
const headerRows = []; |
|
// eslint-disable-next-line no-constant-condition |
|
while (true) { |
|
const next = iter.next(); |
|
if (next.done) { |
|
break; |
|
} |
|
const line = next.value.trim(); |
|
if (!line || comment.test(line)) { |
|
continue; |
|
} |
|
if (line === exports.DATA) { |
|
break; |
|
} |
|
headerRows.push(line); |
|
} |
|
parseHeaderRows(headerRows); |
|
} |
|
function parseLine(line, base) { |
|
const isWord = line[1] === EOW; |
|
const refOffset = isWord ? 2 : 1; |
|
const refs = line |
|
.slice(refOffset) |
|
.split(',') |
|
.filter((a) => !!a) |
|
.map((r) => parseInt(r, base)); |
|
return { |
|
letter: line[0], |
|
isWord, |
|
refs, |
|
}; |
|
} |
|
const flagsWord = { f: TrieNode_1.FLAG_WORD }; |
|
function decodeLine(line, nodes) { |
|
const { letter, isWord, refs } = parseLine(line, radix); |
|
const flags = isWord ? flagsWord : {}; |
|
const children = refs |
|
.map((r) => nodes[r]) |
|
.sort((a, b) => (a.s < b.s ? -1 : 1)) |
|
.map((n) => [n.s, n]); |
|
const cNode = children.length ? { c: new TrieNode_1.ChildMap(children) } : {}; |
|
return { s: letter, ...cNode, ...flags }; |
|
} |
|
readHeader(iter); |
|
const n = gensequence_1.genSequence(iter) |
|
.map((a) => a.replace(/\r?\n/, '')) |
|
.filter((a) => !!a) |
|
.reduce((acc, line) => { |
|
const { nodes } = acc; |
|
const root = decodeLine(line, nodes); |
|
nodes.push(root); |
|
return { root, nodes }; |
|
}, { nodes: [], root: { s: '' } }); |
|
return n.root; |
|
} |
|
exports.importTrie = importTrie; |
|
//# sourceMappingURL=importExportV2.js.map
|