You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
282 lines
6.4 KiB
282 lines
6.4 KiB
var util = require('./util'); |
|
var types = require('./types'); |
|
var sets = require('./sets'); |
|
var positions = require('./positions'); |
|
|
|
|
|
module.exports = function(regexpStr) { |
|
var i = 0, l, c, |
|
start = { type: types.ROOT, stack: []}, |
|
|
|
// Keep track of last clause/group and stack. |
|
lastGroup = start, |
|
last = start.stack, |
|
groupStack = []; |
|
|
|
|
|
var repeatErr = function(i) { |
|
util.error(regexpStr, 'Nothing to repeat at column ' + (i - 1)); |
|
}; |
|
|
|
// Decode a few escaped characters. |
|
var str = util.strToChars(regexpStr); |
|
l = str.length; |
|
|
|
// Iterate through each character in string. |
|
while (i < l) { |
|
c = str[i++]; |
|
|
|
switch (c) { |
|
// Handle escaped characters, inclues a few sets. |
|
case '\\': |
|
c = str[i++]; |
|
|
|
switch (c) { |
|
case 'b': |
|
last.push(positions.wordBoundary()); |
|
break; |
|
|
|
case 'B': |
|
last.push(positions.nonWordBoundary()); |
|
break; |
|
|
|
case 'w': |
|
last.push(sets.words()); |
|
break; |
|
|
|
case 'W': |
|
last.push(sets.notWords()); |
|
break; |
|
|
|
case 'd': |
|
last.push(sets.ints()); |
|
break; |
|
|
|
case 'D': |
|
last.push(sets.notInts()); |
|
break; |
|
|
|
case 's': |
|
last.push(sets.whitespace()); |
|
break; |
|
|
|
case 'S': |
|
last.push(sets.notWhitespace()); |
|
break; |
|
|
|
default: |
|
// Check if c is integer. |
|
// In which case it's a reference. |
|
if (/\d/.test(c)) { |
|
last.push({ type: types.REFERENCE, value: parseInt(c, 10) }); |
|
|
|
// Escaped character. |
|
} else { |
|
last.push({ type: types.CHAR, value: c.charCodeAt(0) }); |
|
} |
|
} |
|
|
|
break; |
|
|
|
|
|
// Positionals. |
|
case '^': |
|
last.push(positions.begin()); |
|
break; |
|
|
|
case '$': |
|
last.push(positions.end()); |
|
break; |
|
|
|
|
|
// Handle custom sets. |
|
case '[': |
|
// Check if this class is 'anti' i.e. [^abc]. |
|
var not; |
|
if (str[i] === '^') { |
|
not = true; |
|
i++; |
|
} else { |
|
not = false; |
|
} |
|
|
|
// Get all the characters in class. |
|
var classTokens = util.tokenizeClass(str.slice(i), regexpStr); |
|
|
|
// Increase index by length of class. |
|
i += classTokens[1]; |
|
last.push({ |
|
type: types.SET, |
|
set: classTokens[0], |
|
not: not, |
|
}); |
|
|
|
break; |
|
|
|
|
|
// Class of any character except \n. |
|
case '.': |
|
last.push(sets.anyChar()); |
|
break; |
|
|
|
|
|
// Push group onto stack. |
|
case '(': |
|
// Create group. |
|
var group = { |
|
type: types.GROUP, |
|
stack: [], |
|
remember: true, |
|
}; |
|
|
|
c = str[i]; |
|
|
|
// If if this is a special kind of group. |
|
if (c === '?') { |
|
c = str[i + 1]; |
|
i += 2; |
|
|
|
// Match if followed by. |
|
if (c === '=') { |
|
group.followedBy = true; |
|
|
|
// Match if not followed by. |
|
} else if (c === '!') { |
|
group.notFollowedBy = true; |
|
|
|
} else if (c !== ':') { |
|
util.error(regexpStr, |
|
'Invalid group, character \'' + c + |
|
'\' after \'?\' at column ' + (i - 1)); |
|
} |
|
|
|
group.remember = false; |
|
} |
|
|
|
// Insert subgroup into current group stack. |
|
last.push(group); |
|
|
|
// Remember the current group for when the group closes. |
|
groupStack.push(lastGroup); |
|
|
|
// Make this new group the current group. |
|
lastGroup = group; |
|
last = group.stack; |
|
break; |
|
|
|
|
|
// Pop group out of stack. |
|
case ')': |
|
if (groupStack.length === 0) { |
|
util.error(regexpStr, 'Unmatched ) at column ' + (i - 1)); |
|
} |
|
lastGroup = groupStack.pop(); |
|
|
|
// Check if this group has a PIPE. |
|
// To get back the correct last stack. |
|
last = lastGroup.options ? |
|
lastGroup.options[lastGroup.options.length - 1] : lastGroup.stack; |
|
break; |
|
|
|
|
|
// Use pipe character to give more choices. |
|
case '|': |
|
// Create array where options are if this is the first PIPE |
|
// in this clause. |
|
if (!lastGroup.options) { |
|
lastGroup.options = [lastGroup.stack]; |
|
delete lastGroup.stack; |
|
} |
|
|
|
// Create a new stack and add to options for rest of clause. |
|
var stack = []; |
|
lastGroup.options.push(stack); |
|
last = stack; |
|
break; |
|
|
|
|
|
// Repetition. |
|
// For every repetition, remove last element from last stack |
|
// then insert back a RANGE object. |
|
// This design is chosen because there could be more than |
|
// one repetition symbols in a regex i.e. `a?+{2,3}`. |
|
case '{': |
|
var rs = /^(\d+)(,(\d+)?)?\}/.exec(str.slice(i)), min, max; |
|
if (rs !== null) { |
|
if (last.length === 0) { |
|
repeatErr(i); |
|
} |
|
min = parseInt(rs[1], 10); |
|
max = rs[2] ? rs[3] ? parseInt(rs[3], 10) : Infinity : min; |
|
i += rs[0].length; |
|
|
|
last.push({ |
|
type: types.REPETITION, |
|
min: min, |
|
max: max, |
|
value: last.pop(), |
|
}); |
|
} else { |
|
last.push({ |
|
type: types.CHAR, |
|
value: 123, |
|
}); |
|
} |
|
break; |
|
|
|
case '?': |
|
if (last.length === 0) { |
|
repeatErr(i); |
|
} |
|
last.push({ |
|
type: types.REPETITION, |
|
min: 0, |
|
max: 1, |
|
value: last.pop(), |
|
}); |
|
break; |
|
|
|
case '+': |
|
if (last.length === 0) { |
|
repeatErr(i); |
|
} |
|
last.push({ |
|
type: types.REPETITION, |
|
min: 1, |
|
max: Infinity, |
|
value: last.pop(), |
|
}); |
|
break; |
|
|
|
case '*': |
|
if (last.length === 0) { |
|
repeatErr(i); |
|
} |
|
last.push({ |
|
type: types.REPETITION, |
|
min: 0, |
|
max: Infinity, |
|
value: last.pop(), |
|
}); |
|
break; |
|
|
|
|
|
// Default is a character that is not `\[](){}?+*^$`. |
|
default: |
|
last.push({ |
|
type: types.CHAR, |
|
value: c.charCodeAt(0), |
|
}); |
|
} |
|
|
|
} |
|
|
|
// Check if any groups have not been closed. |
|
if (groupStack.length !== 0) { |
|
util.error(regexpStr, 'Unterminated group'); |
|
} |
|
|
|
return start; |
|
}; |
|
|
|
module.exports.types = types;
|
|
|