You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
252 lines
5.9 KiB
252 lines
5.9 KiB
'use strict'; |
|
|
|
var constants = require('./const'); |
|
var PUNCTUATION = constants.PUNCTUATION; |
|
var STOP_URL_RAW = constants.STOP_URL_RAW; |
|
var TYPE = constants.TYPE; |
|
var FULLSTOP = TYPE.FullStop; |
|
var PLUSSIGN = TYPE.PlusSign; |
|
var HYPHENMINUS = TYPE.HyphenMinus; |
|
var PUNCTUATOR = TYPE.Punctuator; |
|
var TAB = 9; |
|
var N = 10; |
|
var F = 12; |
|
var R = 13; |
|
var SPACE = 32; |
|
var BACK_SLASH = 92; |
|
var E = 101; // 'e'.charCodeAt(0) |
|
|
|
function firstCharOffset(source) { |
|
// detect BOM (https://en.wikipedia.org/wiki/Byte_order_mark) |
|
if (source.charCodeAt(0) === 0xFEFF || // UTF-16BE |
|
source.charCodeAt(0) === 0xFFFE) { // UTF-16LE |
|
return 1; |
|
} |
|
|
|
return 0; |
|
} |
|
|
|
function isHex(code) { |
|
return (code >= 48 && code <= 57) || // 0 .. 9 |
|
(code >= 65 && code <= 70) || // A .. F |
|
(code >= 97 && code <= 102); // a .. f |
|
} |
|
|
|
function isNumber(code) { |
|
return code >= 48 && code <= 57; |
|
} |
|
|
|
function isWhiteSpace(code) { |
|
return code === SPACE || code === TAB || isNewline(code); |
|
} |
|
|
|
function isNewline(code) { |
|
return code === R || code === N || code === F; |
|
} |
|
|
|
function getNewlineLength(source, offset, code) { |
|
if (isNewline(code)) { |
|
if (code === R && offset + 1 < source.length && source.charCodeAt(offset + 1) === N) { |
|
return 2; |
|
} |
|
|
|
return 1; |
|
} |
|
|
|
return 0; |
|
} |
|
|
|
function cmpChar(testStr, offset, referenceCode) { |
|
var code = testStr.charCodeAt(offset); |
|
|
|
// code.toLowerCase() for A..Z |
|
if (code >= 65 && code <= 90) { |
|
code = code | 32; |
|
} |
|
|
|
return code === referenceCode; |
|
} |
|
|
|
function cmpStr(testStr, start, end, referenceStr) { |
|
if (end - start !== referenceStr.length) { |
|
return false; |
|
} |
|
|
|
if (start < 0 || end > testStr.length) { |
|
return false; |
|
} |
|
|
|
for (var i = start; i < end; i++) { |
|
var testCode = testStr.charCodeAt(i); |
|
var refCode = referenceStr.charCodeAt(i - start); |
|
|
|
// testCode.toLowerCase() for A..Z |
|
if (testCode >= 65 && testCode <= 90) { |
|
testCode = testCode | 32; |
|
} |
|
|
|
if (testCode !== refCode) { |
|
return false; |
|
} |
|
} |
|
|
|
return true; |
|
} |
|
|
|
function findWhiteSpaceStart(source, offset) { |
|
while (offset >= 0 && isWhiteSpace(source.charCodeAt(offset))) { |
|
offset--; |
|
} |
|
|
|
return offset + 1; |
|
} |
|
|
|
function findWhiteSpaceEnd(source, offset) { |
|
while (offset < source.length && isWhiteSpace(source.charCodeAt(offset))) { |
|
offset++; |
|
} |
|
|
|
return offset; |
|
} |
|
|
|
function findCommentEnd(source, offset) { |
|
var commentEnd = source.indexOf('*/', offset); |
|
|
|
if (commentEnd === -1) { |
|
return source.length; |
|
} |
|
|
|
return commentEnd + 2; |
|
} |
|
|
|
function findStringEnd(source, offset, quote) { |
|
for (; offset < source.length; offset++) { |
|
var code = source.charCodeAt(offset); |
|
|
|
// TODO: bad string |
|
if (code === BACK_SLASH) { |
|
offset++; |
|
} else if (code === quote) { |
|
offset++; |
|
break; |
|
} |
|
} |
|
|
|
return offset; |
|
} |
|
|
|
function findDecimalNumberEnd(source, offset) { |
|
while (offset < source.length && isNumber(source.charCodeAt(offset))) { |
|
offset++; |
|
} |
|
|
|
return offset; |
|
} |
|
|
|
function findNumberEnd(source, offset, allowFraction) { |
|
var code; |
|
|
|
offset = findDecimalNumberEnd(source, offset); |
|
|
|
// fraction: .\d+ |
|
if (allowFraction && offset + 1 < source.length && source.charCodeAt(offset) === FULLSTOP) { |
|
code = source.charCodeAt(offset + 1); |
|
|
|
if (isNumber(code)) { |
|
offset = findDecimalNumberEnd(source, offset + 1); |
|
} |
|
} |
|
|
|
// exponent: e[+-]\d+ |
|
if (offset + 1 < source.length) { |
|
if ((source.charCodeAt(offset) | 32) === E) { // case insensitive check for `e` |
|
code = source.charCodeAt(offset + 1); |
|
|
|
if (code === PLUSSIGN || code === HYPHENMINUS) { |
|
if (offset + 2 < source.length) { |
|
code = source.charCodeAt(offset + 2); |
|
} |
|
} |
|
|
|
if (isNumber(code)) { |
|
offset = findDecimalNumberEnd(source, offset + 2); |
|
} |
|
} |
|
} |
|
|
|
return offset; |
|
} |
|
|
|
// skip escaped unicode sequence that can ends with space |
|
// [0-9a-f]{1,6}(\r\n|[ \n\r\t\f])? |
|
function findEscapeEnd(source, offset) { |
|
for (var i = 0; i < 7 && offset + i < source.length; i++) { |
|
var code = source.charCodeAt(offset + i); |
|
|
|
if (i !== 6 && isHex(code)) { |
|
continue; |
|
} |
|
|
|
if (i > 0) { |
|
offset += i - 1 + getNewlineLength(source, offset + i, code); |
|
if (code === SPACE || code === TAB) { |
|
offset++; |
|
} |
|
} |
|
|
|
break; |
|
} |
|
|
|
return offset; |
|
} |
|
|
|
function findIdentifierEnd(source, offset) { |
|
for (; offset < source.length; offset++) { |
|
var code = source.charCodeAt(offset); |
|
|
|
if (code === BACK_SLASH) { |
|
offset = findEscapeEnd(source, offset + 1); |
|
} else if (code < 0x80 && PUNCTUATION[code] === PUNCTUATOR) { |
|
break; |
|
} |
|
} |
|
|
|
return offset; |
|
} |
|
|
|
function findUrlRawEnd(source, offset) { |
|
for (; offset < source.length; offset++) { |
|
var code = source.charCodeAt(offset); |
|
|
|
if (code === BACK_SLASH) { |
|
offset = findEscapeEnd(source, offset + 1); |
|
} else if (code < 0x80 && STOP_URL_RAW[code] === 1) { |
|
break; |
|
} |
|
} |
|
|
|
return offset; |
|
} |
|
|
|
module.exports = { |
|
firstCharOffset: firstCharOffset, |
|
|
|
isHex: isHex, |
|
isNumber: isNumber, |
|
isWhiteSpace: isWhiteSpace, |
|
isNewline: isNewline, |
|
getNewlineLength: getNewlineLength, |
|
|
|
cmpChar: cmpChar, |
|
cmpStr: cmpStr, |
|
|
|
findWhiteSpaceStart: findWhiteSpaceStart, |
|
findWhiteSpaceEnd: findWhiteSpaceEnd, |
|
findCommentEnd: findCommentEnd, |
|
findStringEnd: findStringEnd, |
|
findDecimalNumberEnd: findDecimalNumberEnd, |
|
findNumberEnd: findNumberEnd, |
|
findEscapeEnd: findEscapeEnd, |
|
findIdentifierEnd: findIdentifierEnd, |
|
findUrlRawEnd: findUrlRawEnd |
|
};
|
|
|