You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
631 lines
20 KiB
631 lines
20 KiB
'use strict'; |
|
|
|
var CssSyntaxError = require('./error'); |
|
|
|
var constants = require('./const'); |
|
var TYPE = constants.TYPE; |
|
var NAME = constants.NAME; |
|
var SYMBOL_TYPE = constants.SYMBOL_TYPE; |
|
|
|
var utils = require('./utils'); |
|
var firstCharOffset = utils.firstCharOffset; |
|
var cmpStr = utils.cmpStr; |
|
var isNumber = utils.isNumber; |
|
var findWhiteSpaceStart = utils.findWhiteSpaceStart; |
|
var findWhiteSpaceEnd = utils.findWhiteSpaceEnd; |
|
var findCommentEnd = utils.findCommentEnd; |
|
var findStringEnd = utils.findStringEnd; |
|
var findNumberEnd = utils.findNumberEnd; |
|
var findIdentifierEnd = utils.findIdentifierEnd; |
|
var findUrlRawEnd = utils.findUrlRawEnd; |
|
|
|
var NULL = 0; |
|
var WHITESPACE = TYPE.WhiteSpace; |
|
var IDENTIFIER = TYPE.Identifier; |
|
var NUMBER = TYPE.Number; |
|
var STRING = TYPE.String; |
|
var COMMENT = TYPE.Comment; |
|
var PUNCTUATOR = TYPE.Punctuator; |
|
var CDO = TYPE.CDO; |
|
var CDC = TYPE.CDC; |
|
var ATRULE = TYPE.Atrule; |
|
var FUNCTION = TYPE.Function; |
|
var URL = TYPE.Url; |
|
var RAW = TYPE.Raw; |
|
|
|
var N = 10; |
|
var F = 12; |
|
var R = 13; |
|
var STAR = TYPE.Asterisk; |
|
var SLASH = TYPE.Solidus; |
|
var FULLSTOP = TYPE.FullStop; |
|
var PLUSSIGN = TYPE.PlusSign; |
|
var HYPHENMINUS = TYPE.HyphenMinus; |
|
var GREATERTHANSIGN = TYPE.GreaterThanSign; |
|
var LESSTHANSIGN = TYPE.LessThanSign; |
|
var EXCLAMATIONMARK = TYPE.ExclamationMark; |
|
var COMMERCIALAT = TYPE.CommercialAt; |
|
var QUOTATIONMARK = TYPE.QuotationMark; |
|
var APOSTROPHE = TYPE.Apostrophe; |
|
var LEFTPARENTHESIS = TYPE.LeftParenthesis; |
|
var RIGHTPARENTHESIS = TYPE.RightParenthesis; |
|
var LEFTCURLYBRACKET = TYPE.LeftCurlyBracket; |
|
var RIGHTCURLYBRACKET = TYPE.RightCurlyBracket; |
|
var LEFTSQUAREBRACKET = TYPE.LeftSquareBracket; |
|
var RIGHTSQUAREBRACKET = TYPE.RightSquareBracket; |
|
|
|
var MIN_BUFFER_SIZE = 16 * 1024; |
|
var OFFSET_MASK = 0x00FFFFFF; |
|
var TYPE_SHIFT = 24; |
|
var SafeUint32Array = typeof Uint32Array !== 'undefined' ? Uint32Array : Array; // fallback on Array when TypedArray is not supported |
|
|
|
function computeLinesAndColumns(tokenizer, source) { |
|
var sourceLength = source.length; |
|
var start = firstCharOffset(source); |
|
var lines = tokenizer.lines; |
|
var line = tokenizer.startLine; |
|
var columns = tokenizer.columns; |
|
var column = tokenizer.startColumn; |
|
|
|
if (lines === null || lines.length < sourceLength + 1) { |
|
lines = new SafeUint32Array(Math.max(sourceLength + 1024, MIN_BUFFER_SIZE)); |
|
columns = new SafeUint32Array(lines.length); |
|
} |
|
|
|
for (var i = start; i < sourceLength; i++) { |
|
var code = source.charCodeAt(i); |
|
|
|
lines[i] = line; |
|
columns[i] = column++; |
|
|
|
if (code === N || code === R || code === F) { |
|
if (code === R && i + 1 < sourceLength && source.charCodeAt(i + 1) === N) { |
|
i++; |
|
lines[i] = line; |
|
columns[i] = column; |
|
} |
|
|
|
line++; |
|
column = 1; |
|
} |
|
} |
|
|
|
lines[i] = line; |
|
columns[i] = column; |
|
|
|
tokenizer.linesAnsColumnsComputed = true; |
|
tokenizer.lines = lines; |
|
tokenizer.columns = columns; |
|
} |
|
|
|
function tokenLayout(tokenizer, source, startPos) { |
|
var sourceLength = source.length; |
|
var offsetAndType = tokenizer.offsetAndType; |
|
var balance = tokenizer.balance; |
|
var tokenCount = 0; |
|
var prevType = 0; |
|
var offset = startPos; |
|
var anchor = 0; |
|
var balanceCloseCode = 0; |
|
var balanceStart = 0; |
|
var balancePrev = 0; |
|
|
|
if (offsetAndType === null || offsetAndType.length < sourceLength + 1) { |
|
offsetAndType = new SafeUint32Array(sourceLength + 1024); |
|
balance = new SafeUint32Array(sourceLength + 1024); |
|
} |
|
|
|
while (offset < sourceLength) { |
|
var code = source.charCodeAt(offset); |
|
var type = code < 0x80 ? SYMBOL_TYPE[code] : IDENTIFIER; |
|
|
|
balance[tokenCount] = sourceLength; |
|
|
|
switch (type) { |
|
case WHITESPACE: |
|
offset = findWhiteSpaceEnd(source, offset + 1); |
|
break; |
|
|
|
case PUNCTUATOR: |
|
switch (code) { |
|
case balanceCloseCode: |
|
balancePrev = balanceStart & OFFSET_MASK; |
|
balanceStart = balance[balancePrev]; |
|
balanceCloseCode = balanceStart >> TYPE_SHIFT; |
|
balance[tokenCount] = balancePrev; |
|
balance[balancePrev++] = tokenCount; |
|
for (; balancePrev < tokenCount; balancePrev++) { |
|
if (balance[balancePrev] === sourceLength) { |
|
balance[balancePrev] = tokenCount; |
|
} |
|
} |
|
break; |
|
|
|
case LEFTSQUAREBRACKET: |
|
balance[tokenCount] = balanceStart; |
|
balanceCloseCode = RIGHTSQUAREBRACKET; |
|
balanceStart = (balanceCloseCode << TYPE_SHIFT) | tokenCount; |
|
break; |
|
|
|
case LEFTCURLYBRACKET: |
|
balance[tokenCount] = balanceStart; |
|
balanceCloseCode = RIGHTCURLYBRACKET; |
|
balanceStart = (balanceCloseCode << TYPE_SHIFT) | tokenCount; |
|
break; |
|
|
|
case LEFTPARENTHESIS: |
|
balance[tokenCount] = balanceStart; |
|
balanceCloseCode = RIGHTPARENTHESIS; |
|
balanceStart = (balanceCloseCode << TYPE_SHIFT) | tokenCount; |
|
break; |
|
} |
|
|
|
// /* |
|
if (code === STAR && prevType === SLASH) { |
|
type = COMMENT; |
|
offset = findCommentEnd(source, offset + 1); |
|
tokenCount--; // rewrite prev token |
|
break; |
|
} |
|
|
|
// edge case for -.123 and +.123 |
|
if (code === FULLSTOP && (prevType === PLUSSIGN || prevType === HYPHENMINUS)) { |
|
if (offset + 1 < sourceLength && isNumber(source.charCodeAt(offset + 1))) { |
|
type = NUMBER; |
|
offset = findNumberEnd(source, offset + 2, false); |
|
tokenCount--; // rewrite prev token |
|
break; |
|
} |
|
} |
|
|
|
// <!-- |
|
if (code === EXCLAMATIONMARK && prevType === LESSTHANSIGN) { |
|
if (offset + 2 < sourceLength && |
|
source.charCodeAt(offset + 1) === HYPHENMINUS && |
|
source.charCodeAt(offset + 2) === HYPHENMINUS) { |
|
type = CDO; |
|
offset = offset + 3; |
|
tokenCount--; // rewrite prev token |
|
break; |
|
} |
|
} |
|
|
|
// --> |
|
if (code === HYPHENMINUS && prevType === HYPHENMINUS) { |
|
if (offset + 1 < sourceLength && source.charCodeAt(offset + 1) === GREATERTHANSIGN) { |
|
type = CDC; |
|
offset = offset + 2; |
|
tokenCount--; // rewrite prev token |
|
break; |
|
} |
|
} |
|
|
|
// ident( |
|
if (code === LEFTPARENTHESIS && prevType === IDENTIFIER) { |
|
offset = offset + 1; |
|
tokenCount--; // rewrite prev token |
|
balance[tokenCount] = balance[tokenCount + 1]; |
|
balanceStart--; |
|
|
|
// 4 char length identifier and equal to `url(` (case insensitive) |
|
if (offset - anchor === 4 && cmpStr(source, anchor, offset, 'url(')) { |
|
// special case for url() because it can contain any symbols sequence with few exceptions |
|
anchor = findWhiteSpaceEnd(source, offset); |
|
code = source.charCodeAt(anchor); |
|
if (code !== LEFTPARENTHESIS && |
|
code !== RIGHTPARENTHESIS && |
|
code !== QUOTATIONMARK && |
|
code !== APOSTROPHE) { |
|
// url( |
|
offsetAndType[tokenCount++] = (URL << TYPE_SHIFT) | offset; |
|
balance[tokenCount] = sourceLength; |
|
|
|
// ws* |
|
if (anchor !== offset) { |
|
offsetAndType[tokenCount++] = (WHITESPACE << TYPE_SHIFT) | anchor; |
|
balance[tokenCount] = sourceLength; |
|
} |
|
|
|
// raw |
|
type = RAW; |
|
offset = findUrlRawEnd(source, anchor); |
|
} else { |
|
type = URL; |
|
} |
|
} else { |
|
type = FUNCTION; |
|
} |
|
break; |
|
} |
|
|
|
type = code; |
|
offset = offset + 1; |
|
break; |
|
|
|
case NUMBER: |
|
offset = findNumberEnd(source, offset + 1, prevType !== FULLSTOP); |
|
|
|
// merge number with a preceding dot, dash or plus |
|
if (prevType === FULLSTOP || |
|
prevType === HYPHENMINUS || |
|
prevType === PLUSSIGN) { |
|
tokenCount--; // rewrite prev token |
|
} |
|
|
|
break; |
|
|
|
case STRING: |
|
offset = findStringEnd(source, offset + 1, code); |
|
break; |
|
|
|
default: |
|
anchor = offset; |
|
offset = findIdentifierEnd(source, offset); |
|
|
|
// merge identifier with a preceding dash |
|
if (prevType === HYPHENMINUS) { |
|
// rewrite prev token |
|
tokenCount--; |
|
// restore prev prev token type |
|
// for case @-prefix-ident |
|
prevType = tokenCount === 0 ? 0 : offsetAndType[tokenCount - 1] >> TYPE_SHIFT; |
|
} |
|
|
|
if (prevType === COMMERCIALAT) { |
|
// rewrite prev token and change type to <at-keyword-token> |
|
tokenCount--; |
|
type = ATRULE; |
|
} |
|
} |
|
|
|
offsetAndType[tokenCount++] = (type << TYPE_SHIFT) | offset; |
|
prevType = type; |
|
} |
|
|
|
// finalize arrays |
|
offsetAndType[tokenCount] = offset; |
|
balance[tokenCount] = sourceLength; |
|
balance[sourceLength] = sourceLength; // prevents false positive balance match with any token |
|
while (balanceStart !== 0) { |
|
balancePrev = balanceStart & OFFSET_MASK; |
|
balanceStart = balance[balancePrev]; |
|
balance[balancePrev] = sourceLength; |
|
} |
|
|
|
tokenizer.offsetAndType = offsetAndType; |
|
tokenizer.tokenCount = tokenCount; |
|
tokenizer.balance = balance; |
|
} |
|
|
|
// |
|
// tokenizer |
|
// |
|
|
|
var Tokenizer = function(source, startOffset, startLine, startColumn) { |
|
this.offsetAndType = null; |
|
this.balance = null; |
|
this.lines = null; |
|
this.columns = null; |
|
|
|
this.setSource(source, startOffset, startLine, startColumn); |
|
}; |
|
|
|
Tokenizer.prototype = { |
|
setSource: function(source, startOffset, startLine, startColumn) { |
|
var safeSource = String(source || ''); |
|
var start = firstCharOffset(safeSource); |
|
|
|
this.source = safeSource; |
|
this.firstCharOffset = start; |
|
this.startOffset = typeof startOffset === 'undefined' ? 0 : startOffset; |
|
this.startLine = typeof startLine === 'undefined' ? 1 : startLine; |
|
this.startColumn = typeof startColumn === 'undefined' ? 1 : startColumn; |
|
this.linesAnsColumnsComputed = false; |
|
|
|
this.eof = false; |
|
this.currentToken = -1; |
|
this.tokenType = 0; |
|
this.tokenStart = start; |
|
this.tokenEnd = start; |
|
|
|
tokenLayout(this, safeSource, start); |
|
this.next(); |
|
}, |
|
|
|
lookupType: function(offset) { |
|
offset += this.currentToken; |
|
|
|
if (offset < this.tokenCount) { |
|
return this.offsetAndType[offset] >> TYPE_SHIFT; |
|
} |
|
|
|
return NULL; |
|
}, |
|
lookupNonWSType: function(offset) { |
|
offset += this.currentToken; |
|
|
|
for (var type; offset < this.tokenCount; offset++) { |
|
type = this.offsetAndType[offset] >> TYPE_SHIFT; |
|
|
|
if (type !== WHITESPACE) { |
|
return type; |
|
} |
|
} |
|
|
|
return NULL; |
|
}, |
|
lookupValue: function(offset, referenceStr) { |
|
offset += this.currentToken; |
|
|
|
if (offset < this.tokenCount) { |
|
return cmpStr( |
|
this.source, |
|
this.offsetAndType[offset - 1] & OFFSET_MASK, |
|
this.offsetAndType[offset] & OFFSET_MASK, |
|
referenceStr |
|
); |
|
} |
|
|
|
return false; |
|
}, |
|
getTokenStart: function(tokenNum) { |
|
if (tokenNum === this.currentToken) { |
|
return this.tokenStart; |
|
} |
|
|
|
if (tokenNum > 0) { |
|
return tokenNum < this.tokenCount |
|
? this.offsetAndType[tokenNum - 1] & OFFSET_MASK |
|
: this.offsetAndType[this.tokenCount] & OFFSET_MASK; |
|
} |
|
|
|
return this.firstCharOffset; |
|
}, |
|
getOffsetExcludeWS: function() { |
|
if (this.currentToken > 0) { |
|
if ((this.offsetAndType[this.currentToken - 1] >> TYPE_SHIFT) === WHITESPACE) { |
|
return this.currentToken > 1 |
|
? this.offsetAndType[this.currentToken - 2] & OFFSET_MASK |
|
: this.firstCharOffset; |
|
} |
|
} |
|
return this.tokenStart; |
|
}, |
|
getRawLength: function(startToken, endTokenType1, endTokenType2, includeTokenType2) { |
|
var cursor = startToken; |
|
var balanceEnd; |
|
|
|
loop: |
|
for (; cursor < this.tokenCount; cursor++) { |
|
balanceEnd = this.balance[cursor]; |
|
|
|
// belance end points to offset before start |
|
if (balanceEnd < startToken) { |
|
break loop; |
|
} |
|
|
|
// check token is stop type |
|
switch (this.offsetAndType[cursor] >> TYPE_SHIFT) { |
|
case endTokenType1: |
|
break loop; |
|
|
|
case endTokenType2: |
|
if (includeTokenType2) { |
|
cursor++; |
|
} |
|
break loop; |
|
|
|
default: |
|
// fast forward to the end of balanced block |
|
if (this.balance[balanceEnd] === cursor) { |
|
cursor = balanceEnd; |
|
} |
|
} |
|
|
|
} |
|
|
|
return cursor - this.currentToken; |
|
}, |
|
isBalanceEdge: function(pos) { |
|
var balanceStart = this.balance[this.currentToken]; |
|
return balanceStart < pos; |
|
}, |
|
|
|
getTokenValue: function() { |
|
return this.source.substring(this.tokenStart, this.tokenEnd); |
|
}, |
|
substrToCursor: function(start) { |
|
return this.source.substring(start, this.tokenStart); |
|
}, |
|
|
|
skipWS: function() { |
|
for (var i = this.currentToken, skipTokenCount = 0; i < this.tokenCount; i++, skipTokenCount++) { |
|
if ((this.offsetAndType[i] >> TYPE_SHIFT) !== WHITESPACE) { |
|
break; |
|
} |
|
} |
|
|
|
if (skipTokenCount > 0) { |
|
this.skip(skipTokenCount); |
|
} |
|
}, |
|
skipSC: function() { |
|
while (this.tokenType === WHITESPACE || this.tokenType === COMMENT) { |
|
this.next(); |
|
} |
|
}, |
|
skip: function(tokenCount) { |
|
var next = this.currentToken + tokenCount; |
|
|
|
if (next < this.tokenCount) { |
|
this.currentToken = next; |
|
this.tokenStart = this.offsetAndType[next - 1] & OFFSET_MASK; |
|
next = this.offsetAndType[next]; |
|
this.tokenType = next >> TYPE_SHIFT; |
|
this.tokenEnd = next & OFFSET_MASK; |
|
} else { |
|
this.currentToken = this.tokenCount; |
|
this.next(); |
|
} |
|
}, |
|
next: function() { |
|
var next = this.currentToken + 1; |
|
|
|
if (next < this.tokenCount) { |
|
this.currentToken = next; |
|
this.tokenStart = this.tokenEnd; |
|
next = this.offsetAndType[next]; |
|
this.tokenType = next >> TYPE_SHIFT; |
|
this.tokenEnd = next & OFFSET_MASK; |
|
} else { |
|
this.currentToken = this.tokenCount; |
|
this.eof = true; |
|
this.tokenType = NULL; |
|
this.tokenStart = this.tokenEnd = this.source.length; |
|
} |
|
}, |
|
|
|
eat: function(tokenType) { |
|
if (this.tokenType !== tokenType) { |
|
var offset = this.tokenStart; |
|
var message = NAME[tokenType] + ' is expected'; |
|
|
|
// tweak message and offset |
|
if (tokenType === IDENTIFIER) { |
|
// when identifier is expected but there is a function or url |
|
if (this.tokenType === FUNCTION || this.tokenType === URL) { |
|
offset = this.tokenEnd - 1; |
|
message += ' but function found'; |
|
} |
|
} else { |
|
// when test type is part of another token show error for current position + 1 |
|
// e.g. eat(HYPHENMINUS) will fail on "-foo", but pointing on "-" is odd |
|
if (this.source.charCodeAt(this.tokenStart) === tokenType) { |
|
offset = offset + 1; |
|
} |
|
} |
|
|
|
this.error(message, offset); |
|
} |
|
|
|
this.next(); |
|
}, |
|
eatNonWS: function(tokenType) { |
|
this.skipWS(); |
|
this.eat(tokenType); |
|
}, |
|
|
|
consume: function(tokenType) { |
|
var value = this.getTokenValue(); |
|
|
|
this.eat(tokenType); |
|
|
|
return value; |
|
}, |
|
consumeFunctionName: function() { |
|
var name = this.source.substring(this.tokenStart, this.tokenEnd - 1); |
|
|
|
this.eat(FUNCTION); |
|
|
|
return name; |
|
}, |
|
consumeNonWS: function(tokenType) { |
|
this.skipWS(); |
|
|
|
return this.consume(tokenType); |
|
}, |
|
|
|
expectIdentifier: function(name) { |
|
if (this.tokenType !== IDENTIFIER || cmpStr(this.source, this.tokenStart, this.tokenEnd, name) === false) { |
|
this.error('Identifier `' + name + '` is expected'); |
|
} |
|
|
|
this.next(); |
|
}, |
|
|
|
getLocation: function(offset, filename) { |
|
if (!this.linesAnsColumnsComputed) { |
|
computeLinesAndColumns(this, this.source); |
|
} |
|
|
|
return { |
|
source: filename, |
|
offset: this.startOffset + offset, |
|
line: this.lines[offset], |
|
column: this.columns[offset] |
|
}; |
|
}, |
|
|
|
getLocationRange: function(start, end, filename) { |
|
if (!this.linesAnsColumnsComputed) { |
|
computeLinesAndColumns(this, this.source); |
|
} |
|
|
|
return { |
|
source: filename, |
|
start: { |
|
offset: this.startOffset + start, |
|
line: this.lines[start], |
|
column: this.columns[start] |
|
}, |
|
end: { |
|
offset: this.startOffset + end, |
|
line: this.lines[end], |
|
column: this.columns[end] |
|
} |
|
}; |
|
}, |
|
|
|
error: function(message, offset) { |
|
var location = typeof offset !== 'undefined' && offset < this.source.length |
|
? this.getLocation(offset) |
|
: this.eof |
|
? this.getLocation(findWhiteSpaceStart(this.source, this.source.length - 1)) |
|
: this.getLocation(this.tokenStart); |
|
|
|
throw new CssSyntaxError( |
|
message || 'Unexpected input', |
|
this.source, |
|
location.offset, |
|
location.line, |
|
location.column |
|
); |
|
}, |
|
|
|
dump: function() { |
|
var offset = 0; |
|
|
|
return Array.prototype.slice.call(this.offsetAndType, 0, this.tokenCount).map(function(item, idx) { |
|
var start = offset; |
|
var end = item & OFFSET_MASK; |
|
|
|
offset = end; |
|
|
|
return { |
|
idx: idx, |
|
type: NAME[item >> TYPE_SHIFT], |
|
chunk: this.source.substring(start, end), |
|
balance: this.balance[idx] |
|
}; |
|
}, this); |
|
} |
|
}; |
|
|
|
// extend with error class |
|
Tokenizer.CssSyntaxError = CssSyntaxError; |
|
|
|
// extend tokenizer with constants |
|
Object.keys(constants).forEach(function(key) { |
|
Tokenizer[key] = constants[key]; |
|
}); |
|
|
|
// extend tokenizer with static methods from utils |
|
Object.keys(utils).forEach(function(key) { |
|
Tokenizer[key] = utils[key]; |
|
}); |
|
|
|
// warm up tokenizer to elimitate code branches that never execute |
|
// fix soft deoptimizations (insufficient type feedback) |
|
new Tokenizer('\n\r\r\n\f<!---->//""\'\'/*\r\n\f*/1a;.\\31\t\+2{url(a);func();+1.2e3 -.4e-5 .6e+7}').getLocation(); |
|
|
|
module.exports = Tokenizer;
|
|
|