补上篇, 继续写等式右侧的解析.

parseMaybeAssign

解析变量赋值中等式右边的语句.

首先使用parseMaybeConditional解析出一个节点, 然后判断是否存在等号, 如果是, 则将operator设置为当前token值, 然后递归调用parseMaybeAssign解析对应的node, 设置为right.

javascript 复制代码

/**
 * 解析 a = b; a += b; xxx -= xxxx;
 * 同时兼容三元表达式a?b:c
 * 解析时, 表达式支持逻辑运算符等
 * @param {string | boolean} forInit 
 * @param {*} refDestructuringErrors 
 * @param {Function} afterLeftParse 
 * @returns 
 */
pp.parseMaybeAssign = function(forInit, refDestructuringErrors, afterLeftParse) {
  if (this.isContextual("yield")) {
    if (this.inGenerator) return this.parseYield(forInit)
    // The tokenizer will assume an expression is allowed after
    // `yield`, but this isn't that kind of yield
    else this.exprAllowed = false
  }

  ...

  let startPos = this.start, startLoc = this.startLoc
  if (this.type === tt.parenL || this.type === tt.name) {
    this.potentialArrowAt = this.start
    this.potentialArrowInForAwait = forInit === "await"
  }
  let left = this.parseMaybeConditional(forInit, refDestructuringErrors)
  if (afterLeftParse) left = afterLeftParse.call(this, left, startPos, startLoc)
  if (this.type.isAssign) {
    let node = this.startNodeAt(startPos, startLoc)
    node.operator = this.value
    if (this.type === tt.eq)
      left = this.toAssignable(left, false, refDestructuringErrors)
    if (!ownDestructuringErrors) {
      refDestructuringErrors.parenthesizedAssign = refDestructuringErrors.trailingComma = refDestructuringErrors.doubleProto = -1
    }
    if (refDestructuringErrors.shorthandAssign >= left.start)
      refDestructuringErrors.shorthandAssign = -1 // reset because shorthand default was used correctly
    if (this.type === tt.eq)
      this.checkLValPattern(left)
    else
      this.checkLValSimple(left)
    node.left = left
    this.next()
    node.right = this.parseMaybeAssign(forInit)
    if (oldDoubleProto > -1) refDestructuringErrors.doubleProto = oldDoubleProto
    return this.finishNode(node, NodeTypes.AssignmentExpression)
  } else {
    if (ownDestructuringErrors) this.checkExpressionErrors(refDestructuringErrors, true)
  }
  if (oldParenAssign > -1) refDestructuringErrors.parenthesizedAssign = oldParenAssign
  if (oldTrailingComma > -1) refDestructuringErrors.trailingComma = oldTrailingComma
  return left
}

parseMaybeConditional

解析三元表达式. 首先使用parseExprOps解析表达式, 其次, 如果读到了?则此时递归调用parseMaybeAssign解析左右两个值. 并且返回为ConditionalExpression.

javascript 复制代码

/**
 * 只有在parseMaybeAssign中调用
 * 解析 ? : 三元表达式
 * @param {string} forInit 
 * @param {*} refDestructuringErrors 
 * @returns 
 */
pp.parseMaybeConditional = function(forInit, refDestructuringErrors) {
  let startPos = this.start, startLoc = this.startLoc
  let expr = this.parseExprOps(forInit, refDestructuringErrors)
  if (this.checkExpressionErrors(refDestructuringErrors)) return expr
  if (this.eat(tt.question)) {
    let node = this.startNodeAt(startPos, startLoc)
    node.test = expr // ? 前面的内容
    node.consequent = this.parseMaybeAssign() // 第一个表达式
    this.expect(tt.colon) // :
    node.alternate = this.parseMaybeAssign(forInit) // 第二个表达式
    return this.finishNode(node, NodeTypes.ConditionalExpression)
  }
  return expr
}

ast结构为

json 复制代码

{
    "type": "ConditionalExpression",
    "start": 0,
    "end": 12,
    "test": {
      "type": "Identifier",
      "start": 0,
      "end": 4,
      "name": "test"
    },
    "consequent": {
      "type": "Literal",
      "start": 7,
      "end": 8,
      "value": 1,
      "raw": "1"
    },
    "alternate": {
      "type": "Literal",
      "start": 11,
      "end": 12,
      "value": 2,
      "raw": "2"
    }
}

parseExprOps

调用parseMaybeUnary解析出单个变量

javascript 复制代码

/**
 * 只有parseMaybeConditional调用, 解析表达式
 * 1. 首先调用`parseMaybeUnary`调出单个变量
   2. 判断expr.type === "ArrowFunctionExpression", 如果是, 则调用`parseExprOp`继续读出表达式运算
 * @param {string} forInit
 */
pp.parseExprOps = function(forInit, refDestructuringErrors) {
  let startPos = this.start, startLoc = this.startLoc
  let expr = this.parseMaybeUnary(refDestructuringErrors, false, false, forInit)
  if (this.checkExpressionErrors(refDestructuringErrors)) return expr
  return expr.start === startPos && expr.type === "ArrowFunctionExpression" ? expr : this.parseExprOp(expr, startPos, startLoc, -1, forInit)
}

parseMaybeUnary

读出单个变量或者表达式, 如a, b.c, ccc[0], ddd(), ++a, a++, delete a[1], typeof a

javascript 复制代码

/**
 * 读出单个变量或者表达式, 如a, b.c, ccc[0], ddd(), ++a, a++, delete a[1], typeof a
 * 1. 首先判断是否有await上下文并且在async内, 如果是, 则调用`parseAwait`解析表达式
2. 判断是否存在prefix, 这里是指++/--或者typeof, delete, 如果是, 则新建node, 并且将node.operator设置为操作符, 设置prefix为true, 并且递归调用`parseMaybeUnary`解析出后面的内容. 
   > 1. 如果是++/--, 则检查一下合法性. 
   > 2. 如果读出来是单个变量名, 并且运算符为delete, 则报错.
   > 3. 如果读出来是私有变量, 并且运算符为delete, 则报错. 否则返回node
4. 如果sawUnary为false, 并且为私有变量, 则此时调用`parsePrivateIdent`解析为私有变量.
5. 如果以上都不是, 则此时调用`parseExprSubscripts`解析子表达式, 然后判断是否有后缀运算符, 如果有, 则创建新的节点并设置为`UnaryExpression`
6. 最后, 检查是否存在幂运算符`**`, 如果有则创建一个新的 AST 节点，类型为 `BinaryExpression`，并返回该节点
 * @param {*} refDestructuringErrors 
 * @param {boolean} sawUnary 
 * @param {boolean} incDec 
 * @param {boolean} forInit 是否for 循环的初始化部分。
 * @returns 
 */
pp.parseMaybeUnary = function(refDestructuringErrors, sawUnary, incDec, forInit) {
  let startPos = this.start, startLoc = this.startLoc, expr
  if (this.isContextual("await") && this.canAwait) {
    // 如果允许await 则调用parseAwait
    expr = this.parseAwait(forInit)
    sawUnary = true
  } else if (this.type.prefix) {
    // 如果是++/--或者typeof, delete等
    let node = this.startNode(), update = this.type === tt.incDec
    node.operator = this.value
    node.prefix = true
    this.next()
    // 读出需要操作的变量
    node.argument = this.parseMaybeUnary(null, true, update, forInit)
    this.checkExpressionErrors(refDestructuringErrors, true)
    // 如果是++/--, 检查合法性
    if (update) this.checkLValSimple(node.argument)
    else if (this.strict && node.operator === "delete" &&
             node.argument.type === "Identifier")
      // 如果是delete aaa这种类型, 报错       
      this.raiseRecoverable(node.start, "Deleting local variable in strict mode")
    else if (node.operator === "delete" && isPrivateFieldAccess(node.argument))
      // 如果是私有变量, 则报错
      this.raiseRecoverable(node.start, "Private fields can not be deleted")
    else sawUnary = true
    expr = this.finishNode(node, update ? nt.UpdateExpression : nt.UnaryExpression)
  } else if (!sawUnary && this.type === tt.privateId) {
    if (forInit || this.privateNameStack.length === 0) this.unexpected()
    // 解析私有变量
    expr = this.parsePrivateIdent()
    // only could be private fields in 'in', such as #x in obj
    // 在这里私有变量下一个词必须接in, 否则错误
    if (this.type !== tt._in) this.unexpected()
  } else {
    expr = this.parseExprSubscripts(refDestructuringErrors, forInit)
    // 这一段检查没懂
    if (this.checkExpressionErrors(refDestructuringErrors)) return expr
    while (this.type.postfix && !this.canInsertSemicolon()) {
      // ++/--
      let node = this.startNodeAt(startPos, startLoc)
      node.operator = this.value
      node.prefix = false
      node.argument = expr
      this.checkLValSimple(expr)
      this.next()
      expr = this.finishNode(node, nt.UnaryExpression)
    }
  }

  if (!incDec && this.eat(tt.starstar)) {
    if (sawUnary)
      this.unexpected(this.lastTokStart)
    else
      return this.buildBinary(startPos, startLoc, expr, this.parseMaybeUnary(null, false, false, forInit), "**", false)
  } else {
    return expr
  }
}

parseExprSubscripts

解析调用链或者箭头函数, 主要在parseMaybeUnary中调用.

javascript 复制代码

/**
 * 解析调用链或者箭头函数, 主要在parseMaybeUnary中调用.
 * 正常情况下, 默认会进入这里解析.
 * 1. 首先调用`parseExprAtom`解析表达式的初始部分
2. 其次判断是否箭头函数, 如果是, 则直接返回
3. 如果不是箭头函数, 则调用`parseSubscripts`进行解析
 * @param {*} refDestructuringErrors 
 * @param {boolean} forInit 
 * @returns 
 */
pp.parseExprSubscripts = function (refDestructuringErrors, forInit) {
  let startPos = this.start, startLoc = this.startLoc
  let expr = this.parseExprAtom(refDestructuringErrors, forInit)
  // 箭头函数
  if (expr.type === NodeTypes.ArrowFunctionExpression && this.input.slice(this.lastTokStart, this.lastTokEnd) !== ")")
    return expr
  let result = this.parseSubscripts(expr, startPos, startLoc, false, forInit)
  if (refDestructuringErrors && result.type === NodeTypes.MemberExpression) {
    if (refDestructuringErrors.parenthesizedAssign >= result.start) refDestructuringErrors.parenthesizedAssign = -1
    if (refDestructuringErrors.parenthesizedBind >= result.start) refDestructuringErrors.parenthesizedBind = -1
    if (refDestructuringErrors.trailingComma >= result.start) refDestructuringErrors.trailingComma = -1
  }
  return result
}

parseExprAtom

解析原子表达式

javascript 复制代码

/**
 * 解析单个表达式, 可能是function, 或者new 开头, 或者是被括号包起来的内容.
 * 
 */
pp.parseExprAtom = function(refDestructuringErrors, forInit) {
  // If a division operator appears in an expression position, the
  // tokenizer got confused, and we force it to read a regexp instead.
  if (this.type === tt.slash) this.readRegexp()
  let node, canBeArrow = this.potentialArrowAt === this.start
  switch (this.type) {
  case tt._super:
    if (!this.allowSuper)
      this.raise(this.start, "'super' keyword outside a method")
    node = this.startNode()
    this.next()
    if (this.type === tt.parenL && !this.allowDirectSuper)
      this.raise(node.start, "super() call outside constructor of a subclass")
    // The `super` keyword can appear at below:
    // SuperProperty:
    //     super [ Expression ]
    //     super . IdentifierName
    // SuperCall:
    //     super ( Arguments )
    if (this.type !== tt.dot && this.type !== tt.bracketL && this.type !== tt.parenL)
      this.unexpected()
    return this.finishNode(node, "Super")

  case tt._this:
    node = this.startNode()
    this.next()
    return this.finishNode(node, "ThisExpression")

  case tt.name:
    let startPos = this.start, startLoc = this.startLoc, containsEsc = this.containsEsc
    let id = this.parseIdent(false)
    if (this.options.ecmaVersion >= 8 && !containsEsc && id.name === "async" && !this.canInsertSemicolon() && this.eat(tt._function)) {
      this.overrideContext(tokenCtxTypes.f_expr)
      return this.parseFunction(this.startNodeAt(startPos, startLoc), 0, false, true, forInit)
    }
    if (canBeArrow && !this.canInsertSemicolon()) {
      if (this.eat(tt.arrow))
        return this.parseArrowExpression(this.startNodeAt(startPos, startLoc), [id], false, forInit)
      if (this.options.ecmaVersion >= 8 && id.name === "async" && this.type === tt.name && !containsEsc &&
          (!this.potentialArrowInForAwait || this.value !== "of" || this.containsEsc)) {
        id = this.parseIdent(false)
        if (this.canInsertSemicolon() || !this.eat(tt.arrow))
          this.unexpected()
        return this.parseArrowExpression(this.startNodeAt(startPos, startLoc), [id], true, forInit)
      }
    }
    return id

  case tt.regexp: // 这里特指/reg/
    let value = this.value
    node = this.parseLiteral(value.value)
    node.regex = {pattern: value.pattern, flags: value.flags}
    return node

  case tt.num: case tt.string:
    return this.parseLiteral(this.value)

  case tt._null: case tt._true: case tt._false:
    node = this.startNode()
    node.value = this.type === tt._null ? null : this.type === tt._true
    node.raw = this.type.keyword
    this.next()
    return this.finishNode(node, nt.Literal)

  case tt.parenL:
    let start = this.start, expr = this.parseParenAndDistinguishExpression(canBeArrow, forInit)
    if (refDestructuringErrors) {
      if (refDestructuringErrors.parenthesizedAssign < 0 && !this.isSimpleAssignTarget(expr))
        refDestructuringErrors.parenthesizedAssign = start
      if (refDestructuringErrors.parenthesizedBind < 0)
        refDestructuringErrors.parenthesizedBind = start
    }
    return expr

  case tt.bracketL: // [
    node = this.startNode()
    this.next()
    node.elements = this.parseExprList(tt.bracketR, true, true, refDestructuringErrors)
    return this.finishNode(node, nt.ArrayExpression)

  case tt.braceL: // {
    this.overrideContext(tokenCtxTypes.b_expr)
    return this.parseObj(false, refDestructuringErrors)

  case tt._function:
    node = this.startNode()
    this.next()
    return this.parseFunction(node, 0)

  case tt._class:
    return this.parseClass(this.startNode(), false)

  case tt._new:
    return this.parseNew()

  case tt.backQuote:
    return this.parseTemplate()

  case tt._import:
    if (this.options.ecmaVersion >= 11) {
      return this.parseExprImport()
    } else {
      return this.unexpected()
    }

  default:
    this.unexpected()
  }
}

parseLiteral

解析字面量, 这里指数字/字符串

javascript 复制代码

pp.parseLiteral = function(value) {
  let node = this.startNode()
  node.value = value
  node.raw = this.input.slice(this.start, this.end)
  if (node.raw.charCodeAt(node.raw.length - 1) === 110) node.bigint = node.raw.slice(0, -1).replace(/_/g, "")
  this.next()
  return this.finishNode(node, "Literal")
}

综上

当我们输入 const num = 1这么一个语句时, 解析的过程分别是:

parseVarStatement开始解析节点
parseVar开始解析等号左右
parseVarId开始解析等号左边->parseBindingAtom尝试解析单个表达式或变量->parseIdent解析单个变量->获得Identifier节点
parseMaybeAssign开始解析等号右侧->parseMaybeConditional解析首节点->parseExprOps解析表达式->parseMaybeUnary解析可能是单变量的表达式->parseExprSubscripts解析子表达式->parseExprAtom解析原子表达式->parseLiteral解析字面量.

见微知著, 可以看出解析器的写法是在根据不同的条件去兼容解析不同的语句情况. 这里可以参考之前的语法分析笔记

编译原理笔记-源码学习-语句解析(变量赋值-等式右侧)

parseMaybeAssign

parseMaybeConditional

parseExprOps

parseMaybeUnary

parseExprSubscripts

parseExprAtom

parseLiteral

综上