You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
233 lines
7.6 KiB
233 lines
7.6 KiB
"use strict"; |
|
Object.defineProperty(exports, "__esModule", { value: true }); |
|
exports.importTrie = exports.serializeTrie = exports.DATA = void 0; |
|
const TrieNode_1 = require("./TrieNode"); |
|
const gensequence_1 = require("gensequence"); |
|
const bufferLines_1 = require("./bufferLines"); |
|
const EOW = '$'; |
|
const BACK = '<'; |
|
const EOL = '\n'; |
|
const LF = '\r'; |
|
const REF = '#'; |
|
const EOR = ';'; |
|
const ESCAPE = '\\'; |
|
const specialCharacters = new Set([EOW, BACK, EOL, REF, EOR, ESCAPE, LF] |
|
.concat('0123456789'.split('')) |
|
.concat('`~!@#$%^&*()_-+=[]{};:\'"<>,./?\\|'.split(''))); |
|
const specialCharacterMap = new Map([ |
|
['\n', '\\n'], |
|
['\r', '\\r'], |
|
['\\', '\\\\'], |
|
]); |
|
const characterMap = new Map([...specialCharacterMap].map((a) => [a[1], a[0]])); |
|
exports.DATA = '__DATA__'; |
|
function generateHeader(base, comment) { |
|
const header = ['#!/usr/bin/env cspell-trie reader', 'TrieXv3', 'base=' + base] |
|
.concat(comment ? comment.split('\n').map((a) => '# ' + a) : []) |
|
.concat(['# Data:', exports.DATA]); |
|
return gensequence_1.genSequence(header).map((a) => a + '\n'); |
|
} |
|
/** |
|
* Serialize a TrieNode. |
|
*/ |
|
function serializeTrie(root, options = 16) { |
|
options = typeof options === 'number' ? { base: options } : options; |
|
const { base = 16, comment = '' } = options; |
|
const radix = base > 36 ? 36 : base < 10 ? 10 : base; |
|
const cache = new Map(); |
|
let count = 0; |
|
const backBuffer = { last: '', count: 0 }; |
|
function ref(n) { |
|
return '#' + n.toString(radix) + ';'; |
|
} |
|
function escape(s) { |
|
return specialCharacters.has(s) ? ESCAPE + (specialCharacterMap.get(s) || s) : s; |
|
} |
|
function* flush() { |
|
while (backBuffer.count) { |
|
const n = Math.min(9, backBuffer.count); |
|
yield n > 1 ? backBuffer.last + n : backBuffer.last; |
|
backBuffer.last = BACK; |
|
backBuffer.count -= n; |
|
} |
|
} |
|
function* emit(s) { |
|
switch (s) { |
|
case EOW: |
|
yield* flush(); |
|
backBuffer.last = EOW; |
|
backBuffer.count = 0; |
|
break; |
|
case BACK: |
|
backBuffer.count++; |
|
break; |
|
default: |
|
yield* flush(); |
|
yield s; |
|
} |
|
} |
|
function* walk(node, depth) { |
|
const r = cache.get(node); |
|
if (r !== undefined) { |
|
yield* emit(ref(r)); |
|
return; |
|
} |
|
if (node.c) { |
|
cache.set(node, count++); |
|
const c = [...node.c].sort((a, b) => (a[0] < b[0] ? -1 : 1)); |
|
for (const [s, n] of c) { |
|
yield* emit(escape(s)); |
|
yield* walk(n, depth + 1); |
|
yield* emit(BACK); |
|
if (depth === 0) |
|
yield* emit(EOL); |
|
} |
|
} |
|
// Output EOW after children so it can be optimized on read |
|
if (node.f) { |
|
yield* emit(EOW); |
|
} |
|
} |
|
function* serialize(node) { |
|
yield* walk(node, 0); |
|
yield* flush(); |
|
} |
|
return generateHeader(radix, comment).concat(bufferLines_1.bufferLines(bufferLines_1.bufferLines(serialize(root), 120, '\n'), 10, '')); |
|
} |
|
exports.serializeTrie = serializeTrie; |
|
function* toIterableIterator(iter) { |
|
yield* iter; |
|
} |
|
function importTrie(linesX) { |
|
let radix = 16; |
|
const comment = /^\s*#/; |
|
const iter = toIterableIterator(linesX); |
|
function parseHeaderRows(headerRows) { |
|
const header = headerRows.slice(0, 2).join('\n'); |
|
const headerReg = /^TrieXv3\nbase=(\d+)$/; |
|
/* istanbul ignore if */ |
|
if (!headerReg.test(header)) |
|
throw new Error('Unknown file format'); |
|
radix = Number.parseInt(header.replace(headerReg, '$1'), 10); |
|
} |
|
function readHeader(iter) { |
|
const headerRows = []; |
|
// eslint-disable-next-line no-constant-condition |
|
while (true) { |
|
const next = iter.next(); |
|
if (next.done) { |
|
break; |
|
} |
|
const line = next.value.trim().replace(/\r|\n/g, ''); |
|
if (!line || comment.test(line)) { |
|
continue; |
|
} |
|
if (line === exports.DATA) { |
|
break; |
|
} |
|
headerRows.push(line); |
|
} |
|
parseHeaderRows(headerRows); |
|
} |
|
readHeader(iter); |
|
const root = {}; |
|
const n = gensequence_1.genSequence(iter) |
|
.concatMap((a) => a.split('')) |
|
.reduce(parseStream(radix), { nodes: [root], root, stack: [{ node: root, s: '' }], parser: undefined }); |
|
return n.root; |
|
} |
|
exports.importTrie = importTrie; |
|
function parseStream(radix) { |
|
const eow = Object.freeze({ f: 1 }); |
|
function parseReference(acc, _) { |
|
let ref = ''; |
|
function parser(acc, s) { |
|
if (s === EOR) { |
|
const { root, nodes, stack } = acc; |
|
const r = parseInt(ref, radix); |
|
const top = stack[stack.length - 1]; |
|
const p = stack[stack.length - 2].node; |
|
p.c.set(top.s, nodes[r]); |
|
return { root, nodes, stack, parser: undefined }; |
|
} |
|
ref = ref + s; |
|
return acc; |
|
} |
|
const { nodes } = acc; |
|
nodes.pop(); |
|
return { ...acc, nodes, parser }; |
|
} |
|
function parseEscapeCharacter(acc, _) { |
|
let prev = ''; |
|
const parser = function (acc, s) { |
|
if (prev) { |
|
s = characterMap.get(prev + s) || s; |
|
return parseCharacter({ ...acc, parser: undefined }, s); |
|
} |
|
if (s === ESCAPE) { |
|
prev = s; |
|
return acc; |
|
} |
|
return parseCharacter({ ...acc, parser: undefined }, s); |
|
}; |
|
return { ...acc, parser }; |
|
} |
|
function parseCharacter(acc, s) { |
|
var _a; |
|
const parser = undefined; |
|
const { root, nodes, stack } = acc; |
|
const top = stack[stack.length - 1]; |
|
const node = top.node; |
|
node.c = (_a = node.c) !== null && _a !== void 0 ? _a : new Map(); |
|
const n = { f: undefined, c: undefined, n: nodes.length }; |
|
node.c.set(s, n); |
|
stack.push({ node: n, s }); |
|
nodes.push(n); |
|
return { root, nodes, stack, parser }; |
|
} |
|
function parseEOW(acc, _) { |
|
const parser = parseBack; |
|
const { root, nodes, stack } = acc; |
|
const top = stack[stack.length - 1]; |
|
const node = top.node; |
|
node.f = TrieNode_1.FLAG_WORD; |
|
if (!node.c) { |
|
top.node = eow; |
|
const p = stack[stack.length - 2].node; |
|
p.c.set(top.s, eow); |
|
nodes.pop(); |
|
} |
|
stack.pop(); |
|
return { root, nodes, stack, parser }; |
|
} |
|
const charactersBack = new Set((BACK + '23456789').split('')); |
|
function parseBack(acc, s) { |
|
if (!charactersBack.has(s)) { |
|
return parserMain({ ...acc, parser: undefined }, s); |
|
} |
|
let n = s === BACK ? 1 : parseInt(s, 10) - 1; |
|
const { stack } = acc; |
|
while (n-- > 0) { |
|
stack.pop(); |
|
} |
|
return { ...acc, parser: parseBack }; |
|
} |
|
function parseIgnore(acc, _) { |
|
return acc; |
|
} |
|
const parsers = new Map([ |
|
[EOW, parseEOW], |
|
[BACK, parseBack], |
|
[REF, parseReference], |
|
[ESCAPE, parseEscapeCharacter], |
|
[EOL, parseIgnore], |
|
[LF, parseIgnore], |
|
]); |
|
function parserMain(acc, s) { |
|
var _a, _b; |
|
const parser = (_b = (_a = acc.parser) !== null && _a !== void 0 ? _a : parsers.get(s)) !== null && _b !== void 0 ? _b : parseCharacter; |
|
return parser(acc, s); |
|
} |
|
return parserMain; |
|
} |
|
//# sourceMappingURL=importExportV3.js.map
|