补上篇, 继续写等式右侧的解析.
parseMaybeAssign
解析变量赋值中等式右边的语句.
首先使用parseMaybeConditional
解析出一个节点, 然后判断是否存在等号, 如果是, 则将operator设置为当前token值, 然后递归调用parseMaybeAssign
解析对应的node, 设置为right.
javascript
/**
* 解析 a = b; a += b; xxx -= xxxx;
* 同时兼容三元表达式a?b:c
* 解析时, 表达式支持逻辑运算符等
* @param {string | boolean} forInit
* @param {*} refDestructuringErrors
* @param {Function} afterLeftParse
* @returns
*/
pp.parseMaybeAssign = function(forInit, refDestructuringErrors, afterLeftParse) {
if (this.isContextual("yield")) {
if (this.inGenerator) return this.parseYield(forInit)
// The tokenizer will assume an expression is allowed after
// `yield`, but this isn't that kind of yield
else this.exprAllowed = false
}
...
let startPos = this.start, startLoc = this.startLoc
if (this.type === tt.parenL || this.type === tt.name) {
this.potentialArrowAt = this.start
this.potentialArrowInForAwait = forInit === "await"
}
let left = this.parseMaybeConditional(forInit, refDestructuringErrors)
if (afterLeftParse) left = afterLeftParse.call(this, left, startPos, startLoc)
if (this.type.isAssign) {
let node = this.startNodeAt(startPos, startLoc)
node.operator = this.value
if (this.type === tt.eq)
left = this.toAssignable(left, false, refDestructuringErrors)
if (!ownDestructuringErrors) {
refDestructuringErrors.parenthesizedAssign = refDestructuringErrors.trailingComma = refDestructuringErrors.doubleProto = -1
}
if (refDestructuringErrors.shorthandAssign >= left.start)
refDestructuringErrors.shorthandAssign = -1 // reset because shorthand default was used correctly
if (this.type === tt.eq)
this.checkLValPattern(left)
else
this.checkLValSimple(left)
node.left = left
this.next()
node.right = this.parseMaybeAssign(forInit)
if (oldDoubleProto > -1) refDestructuringErrors.doubleProto = oldDoubleProto
return this.finishNode(node, NodeTypes.AssignmentExpression)
} else {
if (ownDestructuringErrors) this.checkExpressionErrors(refDestructuringErrors, true)
}
if (oldParenAssign > -1) refDestructuringErrors.parenthesizedAssign = oldParenAssign
if (oldTrailingComma > -1) refDestructuringErrors.trailingComma = oldTrailingComma
return left
}
parseMaybeConditional
解析三元表达式. 首先使用parseExprOps
解析表达式, 其次, 如果读到了?
则此时递归调用parseMaybeAssign
解析左右两个值. 并且返回为ConditionalExpression
.
javascript
/**
* 只有在parseMaybeAssign中调用
* 解析 ? : 三元表达式
* @param {string} forInit
* @param {*} refDestructuringErrors
* @returns
*/
pp.parseMaybeConditional = function(forInit, refDestructuringErrors) {
let startPos = this.start, startLoc = this.startLoc
let expr = this.parseExprOps(forInit, refDestructuringErrors)
if (this.checkExpressionErrors(refDestructuringErrors)) return expr
if (this.eat(tt.question)) {
let node = this.startNodeAt(startPos, startLoc)
node.test = expr // ? 前面的内容
node.consequent = this.parseMaybeAssign() // 第一个表达式
this.expect(tt.colon) // :
node.alternate = this.parseMaybeAssign(forInit) // 第二个表达式
return this.finishNode(node, NodeTypes.ConditionalExpression)
}
return expr
}
ast结构为
json
{
"type": "ConditionalExpression",
"start": 0,
"end": 12,
"test": {
"type": "Identifier",
"start": 0,
"end": 4,
"name": "test"
},
"consequent": {
"type": "Literal",
"start": 7,
"end": 8,
"value": 1,
"raw": "1"
},
"alternate": {
"type": "Literal",
"start": 11,
"end": 12,
"value": 2,
"raw": "2"
}
}
parseExprOps
调用parseMaybeUnary
解析出单个变量
javascript
/**
* 只有parseMaybeConditional调用, 解析表达式
* 1. 首先调用`parseMaybeUnary`调出单个变量
2. 判断expr.type === "ArrowFunctionExpression", 如果是, 则调用`parseExprOp`继续读出表达式运算
* @param {string} forInit
*/
pp.parseExprOps = function(forInit, refDestructuringErrors) {
let startPos = this.start, startLoc = this.startLoc
let expr = this.parseMaybeUnary(refDestructuringErrors, false, false, forInit)
if (this.checkExpressionErrors(refDestructuringErrors)) return expr
return expr.start === startPos && expr.type === "ArrowFunctionExpression" ? expr : this.parseExprOp(expr, startPos, startLoc, -1, forInit)
}
parseMaybeUnary
读出单个变量或者表达式, 如a, b.c, ccc[0], ddd(), ++a, a++, delete a[1], typeof a
javascript
/**
* 读出单个变量或者表达式, 如a, b.c, ccc[0], ddd(), ++a, a++, delete a[1], typeof a
* 1. 首先判断是否有await上下文并且在async内, 如果是, 则调用`parseAwait`解析表达式
2. 判断是否存在prefix, 这里是指++/--或者typeof, delete, 如果是, 则新建node, 并且将node.operator设置为操作符, 设置prefix为true, 并且递归调用`parseMaybeUnary`解析出后面的内容.
> 1. 如果是++/--, 则检查一下合法性.
> 2. 如果读出来是单个变量名, 并且运算符为delete, 则报错.
> 3. 如果读出来是私有变量, 并且运算符为delete, 则报错. 否则返回node
4. 如果sawUnary为false, 并且为私有变量, 则此时调用`parsePrivateIdent`解析为私有变量.
5. 如果以上都不是, 则此时调用`parseExprSubscripts`解析子表达式, 然后判断是否有后缀运算符, 如果有, 则创建新的节点并设置为`UnaryExpression`
6. 最后, 检查是否存在幂运算符`**`, 如果有则创建一个新的 AST 节点,类型为 `BinaryExpression`,并返回该节点
* @param {*} refDestructuringErrors
* @param {boolean} sawUnary
* @param {boolean} incDec
* @param {boolean} forInit 是否for 循环的初始化部分。
* @returns
*/
pp.parseMaybeUnary = function(refDestructuringErrors, sawUnary, incDec, forInit) {
let startPos = this.start, startLoc = this.startLoc, expr
if (this.isContextual("await") && this.canAwait) {
// 如果允许await 则调用parseAwait
expr = this.parseAwait(forInit)
sawUnary = true
} else if (this.type.prefix) {
// 如果是++/--或者typeof, delete等
let node = this.startNode(), update = this.type === tt.incDec
node.operator = this.value
node.prefix = true
this.next()
// 读出需要操作的变量
node.argument = this.parseMaybeUnary(null, true, update, forInit)
this.checkExpressionErrors(refDestructuringErrors, true)
// 如果是++/--, 检查合法性
if (update) this.checkLValSimple(node.argument)
else if (this.strict && node.operator === "delete" &&
node.argument.type === "Identifier")
// 如果是delete aaa这种类型, 报错
this.raiseRecoverable(node.start, "Deleting local variable in strict mode")
else if (node.operator === "delete" && isPrivateFieldAccess(node.argument))
// 如果是私有变量, 则报错
this.raiseRecoverable(node.start, "Private fields can not be deleted")
else sawUnary = true
expr = this.finishNode(node, update ? nt.UpdateExpression : nt.UnaryExpression)
} else if (!sawUnary && this.type === tt.privateId) {
if (forInit || this.privateNameStack.length === 0) this.unexpected()
// 解析私有变量
expr = this.parsePrivateIdent()
// only could be private fields in 'in', such as #x in obj
// 在这里私有变量下一个词必须接in, 否则错误
if (this.type !== tt._in) this.unexpected()
} else {
expr = this.parseExprSubscripts(refDestructuringErrors, forInit)
// 这一段检查没懂
if (this.checkExpressionErrors(refDestructuringErrors)) return expr
while (this.type.postfix && !this.canInsertSemicolon()) {
// ++/--
let node = this.startNodeAt(startPos, startLoc)
node.operator = this.value
node.prefix = false
node.argument = expr
this.checkLValSimple(expr)
this.next()
expr = this.finishNode(node, nt.UnaryExpression)
}
}
if (!incDec && this.eat(tt.starstar)) {
if (sawUnary)
this.unexpected(this.lastTokStart)
else
return this.buildBinary(startPos, startLoc, expr, this.parseMaybeUnary(null, false, false, forInit), "**", false)
} else {
return expr
}
}
parseExprSubscripts
解析调用链或者箭头函数, 主要在parseMaybeUnary中调用.
javascript
/**
* 解析调用链或者箭头函数, 主要在parseMaybeUnary中调用.
* 正常情况下, 默认会进入这里解析.
* 1. 首先调用`parseExprAtom`解析表达式的初始部分
2. 其次判断是否箭头函数, 如果是, 则直接返回
3. 如果不是箭头函数, 则调用`parseSubscripts`进行解析
* @param {*} refDestructuringErrors
* @param {boolean} forInit
* @returns
*/
pp.parseExprSubscripts = function (refDestructuringErrors, forInit) {
let startPos = this.start, startLoc = this.startLoc
let expr = this.parseExprAtom(refDestructuringErrors, forInit)
// 箭头函数
if (expr.type === NodeTypes.ArrowFunctionExpression && this.input.slice(this.lastTokStart, this.lastTokEnd) !== ")")
return expr
let result = this.parseSubscripts(expr, startPos, startLoc, false, forInit)
if (refDestructuringErrors && result.type === NodeTypes.MemberExpression) {
if (refDestructuringErrors.parenthesizedAssign >= result.start) refDestructuringErrors.parenthesizedAssign = -1
if (refDestructuringErrors.parenthesizedBind >= result.start) refDestructuringErrors.parenthesizedBind = -1
if (refDestructuringErrors.trailingComma >= result.start) refDestructuringErrors.trailingComma = -1
}
return result
}
parseExprAtom
解析原子表达式
javascript
/**
* 解析单个表达式, 可能是function, 或者new 开头, 或者是被括号包起来的内容.
*
*/
pp.parseExprAtom = function(refDestructuringErrors, forInit) {
// If a division operator appears in an expression position, the
// tokenizer got confused, and we force it to read a regexp instead.
if (this.type === tt.slash) this.readRegexp()
let node, canBeArrow = this.potentialArrowAt === this.start
switch (this.type) {
case tt._super:
if (!this.allowSuper)
this.raise(this.start, "'super' keyword outside a method")
node = this.startNode()
this.next()
if (this.type === tt.parenL && !this.allowDirectSuper)
this.raise(node.start, "super() call outside constructor of a subclass")
// The `super` keyword can appear at below:
// SuperProperty:
// super [ Expression ]
// super . IdentifierName
// SuperCall:
// super ( Arguments )
if (this.type !== tt.dot && this.type !== tt.bracketL && this.type !== tt.parenL)
this.unexpected()
return this.finishNode(node, "Super")
case tt._this:
node = this.startNode()
this.next()
return this.finishNode(node, "ThisExpression")
case tt.name:
let startPos = this.start, startLoc = this.startLoc, containsEsc = this.containsEsc
let id = this.parseIdent(false)
if (this.options.ecmaVersion >= 8 && !containsEsc && id.name === "async" && !this.canInsertSemicolon() && this.eat(tt._function)) {
this.overrideContext(tokenCtxTypes.f_expr)
return this.parseFunction(this.startNodeAt(startPos, startLoc), 0, false, true, forInit)
}
if (canBeArrow && !this.canInsertSemicolon()) {
if (this.eat(tt.arrow))
return this.parseArrowExpression(this.startNodeAt(startPos, startLoc), [id], false, forInit)
if (this.options.ecmaVersion >= 8 && id.name === "async" && this.type === tt.name && !containsEsc &&
(!this.potentialArrowInForAwait || this.value !== "of" || this.containsEsc)) {
id = this.parseIdent(false)
if (this.canInsertSemicolon() || !this.eat(tt.arrow))
this.unexpected()
return this.parseArrowExpression(this.startNodeAt(startPos, startLoc), [id], true, forInit)
}
}
return id
case tt.regexp: // 这里特指/reg/
let value = this.value
node = this.parseLiteral(value.value)
node.regex = {pattern: value.pattern, flags: value.flags}
return node
case tt.num: case tt.string:
return this.parseLiteral(this.value)
case tt._null: case tt._true: case tt._false:
node = this.startNode()
node.value = this.type === tt._null ? null : this.type === tt._true
node.raw = this.type.keyword
this.next()
return this.finishNode(node, nt.Literal)
case tt.parenL:
let start = this.start, expr = this.parseParenAndDistinguishExpression(canBeArrow, forInit)
if (refDestructuringErrors) {
if (refDestructuringErrors.parenthesizedAssign < 0 && !this.isSimpleAssignTarget(expr))
refDestructuringErrors.parenthesizedAssign = start
if (refDestructuringErrors.parenthesizedBind < 0)
refDestructuringErrors.parenthesizedBind = start
}
return expr
case tt.bracketL: // [
node = this.startNode()
this.next()
node.elements = this.parseExprList(tt.bracketR, true, true, refDestructuringErrors)
return this.finishNode(node, nt.ArrayExpression)
case tt.braceL: // {
this.overrideContext(tokenCtxTypes.b_expr)
return this.parseObj(false, refDestructuringErrors)
case tt._function:
node = this.startNode()
this.next()
return this.parseFunction(node, 0)
case tt._class:
return this.parseClass(this.startNode(), false)
case tt._new:
return this.parseNew()
case tt.backQuote:
return this.parseTemplate()
case tt._import:
if (this.options.ecmaVersion >= 11) {
return this.parseExprImport()
} else {
return this.unexpected()
}
default:
this.unexpected()
}
}
parseLiteral
解析字面量, 这里指数字/字符串
javascript
pp.parseLiteral = function(value) {
let node = this.startNode()
node.value = value
node.raw = this.input.slice(this.start, this.end)
if (node.raw.charCodeAt(node.raw.length - 1) === 110) node.bigint = node.raw.slice(0, -1).replace(/_/g, "")
this.next()
return this.finishNode(node, "Literal")
}
综上
当我们输入 const num = 1
这么一个语句时, 解析的过程分别是:
parseVarStatement
开始解析节点parseVar
开始解析等号左右parseVarId
开始解析等号左边->parseBindingAtom
尝试解析单个表达式或变量->parseIdent
解析单个变量->获得Identifier
节点parseMaybeAssign
开始解析等号右侧->parseMaybeConditional
解析首节点->parseExprOps
解析表达式->parseMaybeUnary
解析可能是单变量的表达式->parseExprSubscripts
解析子表达式->parseExprAtom
解析原子表达式->parseLiteral
解析字面量.
见微知著, 可以看出解析器的写法是在根据不同的条件去兼容解析不同的语句情况. 这里可以参考之前的语法分析笔记