前言
之前写了几篇编译原理学习笔记, 断了一段时间后重读以前的笔记, 才发现写得过于晦涩难懂, 有点影响阅读. 甚至连写作脉络都难寻, 所以还是重开一个篇章, 以单条语句为切入点, 力争在写完单篇后都能理解对应的内容.
变量赋值
变量赋值在任何的代码学习教材中, 都是除了hello world之外的第一个学习的语法, 以此作为切入点会是一个很好的开始.
源码
const num = 123; var str = 'string'
ast树
json
{
"type": "Program",
"start": 0,
"end": 35,
"body": [
{
"type": "VariableDeclaration",
"start": 0,
"end": 16,
"declarations": [
{
"type": "VariableDeclarator",
"start": 6,
"end": 15,
"id": {
"type": "Identifier",
"start": 6,
"end": 9,
"name": "num"
},
"init": {
"type": "Literal",
"start": 12,
"end": 15,
"value": 123,
"raw": "123"
}
}
],
"kind": "const"
},
{
"type": "VariableDeclaration",
"start": 17,
"end": 35,
"declarations": [
{
"type": "VariableDeclarator",
"start": 21,
"end": 35,
"id": {
"type": "Identifier",
"start": 21,
"end": 24,
"name": "str"
},
"init": {
"type": "Literal",
"start": 27,
"end": 35,
"value": "string",
"raw": "'string'"
}
}
],
"kind": "var"
}
],
"sourceType": "module"
}
入口
从下面代码可以看出, 其实解析重点就是循环的调用parseStatement, 直到遇到type == tt.eof
, 此时就完成了解析.
然后再进行格式检查和Directive处理.
javascript
/**
* parse的入口函数, 解析出program的node.
* @param {Node} node
* @returns {Node}
*/
pp.parseTopLevel = function(node) {
let exports = Object.create(null)
if (!node.body) node.body = []
// 重点
while (this.type !== tt.eof) {
let stmt = this.parseStatement(null, true, exports)
node.body.push(stmt)
}
if (this.inModule)
// undefined exports在checkLocalExport时插入, 如果是没有定义的本地变量, 则报错.
for (let name of Object.keys(this.undefinedExports))
this.raiseRecoverable(this.undefinedExports[name].start, `Export '${name}' is not defined`)
this.adaptDirectivePrologue(node.body)
this.next()
node.sourceType = this.options.sourceType
return this.finishNode(node, NodeTypes.Program)
}
parseVarStatement
在parseStatement
中遇到const, let, var
后, 调用parseVarStatement
, 并返回为VariableDeclaration
类型节点
javascript
/**
* `let a = 1`定义赋值
* @param {string} kind, 可能是let, const, var
*/
pp.parseVarStatement = function(node, kind) {
this.next()
// 调用parseVar解析等号左右
this.parseVar(node, false, kind)
// 尝试插入分号
this.semicolon()
return this.finishNode(node, NodeTypes.VariableDeclaration)
}
parseVar
解析变量定义, 语句类似num = 123
, 也可以是num = 123, str = 'abc'
这种连续写法. 在parseMaybeAssign
的解析中, 兼容了num = n = 123
这样的连续赋值写法.
javascript
/**
* 解析变量定义, 设置到declarations中, 返回Node
* @param {Node} node
* @param {boolean} isFor
* @param {string} kind 'let','var','const'
* @returns {Node}
*/
pp.parseVar = function(node, isFor, kind) {
node.declarations = []
node.kind = kind
for (;;) {
let decl = this.startNode()
this.parseVarId(decl, kind)
if (this.eat(tt.eq)) {
decl.init = this.parseMaybeAssign(isFor)
} else if (kind === "const" && !(this.type === tt._in || (this.options.ecmaVersion >= 6 && this.isContextual("of")))) {
// const aaa += 这种形式报错
this.unexpected()
} else if (decl.id.type !== "Identifier" && !(isFor && (this.type === tt._in || this.isContextual("of")))) {
this.raise(this.lastTokEnd, "Complex binding patterns require an initialization value")
} else {
decl.init = null
}
node.declarations.push(this.finishNode(decl, NodeTypes.VariableDeclarator))
if (!this.eat(tt.comma)) break
}
return node
}
等号左边表达式解读
parseVarId
读出变量名
javascript
/**
* 读出变量名, 设置到decl.id上
* @param {Node} decl
* @param {string} kind
*/
pp.parseVarId = function(decl, kind) {
decl.id = this.parseBindingAtom()
this.checkLValPattern(decl.id, kind === "var" ? BIND_VAR : BIND_LEXICAL, false)
}
parseBindingAtom
解析单个变量或表达式
读出下一个token, 区分不同情况, 如果是遇到[
则调用parseBindingList
解析为ArrayPattern
节点, 如果遇到了{
, 则调用parseObj
解析, 否则默认调用parseIdent
解析.
javascript
/**
* 解析单个变量或表达式, 如let [a,b], {c, d}, e中的let后面的内容
* @returns {Node}
*/
pp.parseBindingAtom = function() {
if (this.options.ecmaVersion >= 6) {
switch (this.type) {
case tt.bracketL: // [a,b]
let node = this.startNode()
this.next()
node.elements = this.parseBindingList(tt.bracketR, true, true)
return this.finishNode(node, NodeTypes.ArrayPattern) // 待赋值属性使用
case tt.braceL: // {c, d}
return this.parseObj(true)
}
}
return this.parseIdent() // e
}
parseBindingList
解析列表形式表达的变量, 依次调用parseMaybeDefault
读出多个变量, 同时兼容...
Rest语法.
javascript
/**
* 在let [a,b] = c 这种赋值语句中使用, 也可以是function (a,b){}这种语句中
* 连续解析多个node, 直到遇到close, 然后返回解析到的node
* @param {TokenType} close 预期读到的结束符
* @param {boolean} allowEmpty
* @param {boolean} allowTrailingComma 是否允许以逗号结尾
* @returns
*/
pp.parseBindingList = function(close, allowEmpty, allowTrailingComma) {
let elts = [], first = true
while (!this.eat(close)) {
if (first) first = false
else this.expect(tt.comma) // 如果不是第一个, 则预期遇到逗号
if (allowEmpty && this.type === tt.comma) {
// 类似[a,,b]这种中间有空白的
elts.push(null)
} else if (allowTrailingComma && this.afterTrailingComma(close)) {
// [a,b,]这种逗号后遇到结束符
break
} else if (this.type === tt.ellipsis) {
// ...收集符
let rest = this.parseRestBinding()
this.parseBindingListItem(rest)
elts.push(rest)
// 收集符后不允许再有逗号
if (this.type === tt.comma) this.raise(this.start, "Comma is not permitted after the rest element")
this.expect(close)
break
} else {
// 如let [a = 1] = [2]这种有默认值
let elem = this.parseMaybeDefault(this.start, this.startLoc)
this.parseBindingListItem(elem)
elts.push(elem)
}
}
return elts
}
parseMaybeDefault
解析单个变量, 同时允许赋值, 这里是与parseBindingAtom
组成了递归调用, 归根到底是使用parseIdent
解析
javascript
/**
* 解析单个变量, 同时兼容 a = 1这种有默认值的情况
* @param {*} startPos
* @param {*} startLoc
* @param {*} left
* @returns
*/
pp.parseMaybeDefault = function(startPos, startLoc, left) {
left = left || this.parseBindingAtom()
if (this.options.ecmaVersion < 6 || !this.eat(tt.eq)) return left
let node = this.startNodeAt(startPos, startLoc)
node.left = left
node.right = this.parseMaybeAssign()
return this.finishNode(node, NodeTypes.AssignmentPattern)
}
parseRestBinding
解析...rest
后的rest语法, 与parseBindingAtom
组成递归
javascript
/** ...解构赋值, 如 let [...aaa] = bbb; */
pp.parseRestBinding = function() {
let node = this.startNode()
this.next()
// RestElement inside of a function parameter must be an identifier
if (this.options.ecmaVersion === 6 && this.type !== tt.name)
this.unexpected()
node.argument = this.parseBindingAtom()
return this.finishNode(node, NodeTypes.RestElement)
}
parseIdent
解析单个token并作为变量名返回, 这里不需要兼容任何情况, 是最小的子类型.
javascript
/**
* 解析当前token并作为变量类型返回
* @param {boolean} liberal 是否解析properties
* @returns
*/
pp.parseIdent = function(liberal) {
let node = this.startNode()
if (this.type === tt.name) {
node.name = this.value
} else if (this.type.keyword) {
node.name = this.type.keyword
// To fix https://github.com/acornjs/acorn/issues/575
// `class` and `function` keywords push new context into this.context.
if ((node.name === "class" || node.name === "function") &&
(this.lastTokEnd !== this.lastTokStart + 1 || this.input.charCodeAt(this.lastTokStart) !== 46)) { // 46 .
// 只有是xxx.class这种情况不需要pop
this.context.pop()
}
} else {
this.unexpected()
}
this.next(!!liberal)
this.finishNode(node, "Identifier")
if (!liberal) {
// 检查变量合法性
this.checkUnreserved(node)
if (node.name === "await" && !this.awaitIdentPos)
this.awaitIdentPos = node.start
}
return node
}
结果示例
json
{
"type": "Identifier",
"start": 21,
"end": 24,
"name": "str"
}