vue3编译原理：ast树生成

文接上篇的第一步const ast = isString(source) ? baseParse(source, resolvedOptions) :source，我们简单介绍vue3底层是如何用解析器将字符串转换成ast树的。

一、解析器

1、`baseParse`

js 复制代码

// 重置ast生成条件
function reset() {
  tokenizer.reset(); // 词法分析器重置
  currentOpenTag = null; // 当前开始标签
  currentProp = null; // 当前属性
  currentAttrValue = ""; // 当前属性值
  currentAttrStartIndex = -1; // 当前属性开始索引
  currentAttrEndIndex = -1; // 当前属性结束索引
  stack.length = 0; // 清空标签栈
}
// 生成根节点
function createRoot(children, source = "") {
  return {
    type: 0,
    source,
    children,
    helpers: /* @__PURE__ */ new Set(),
    components: [],
    directives: [],
    hoists: [],
    imports: [],
    cached: 0,
    temps: 0,
    codegenNode: void 0,
    loc: locStub,
  };
}
function baseParse(input, options) {
  reset();
  // 当前template模版
  currentInput = input;
  // 当前options
  currentOptions = extend({}, defaultParserOptions);
  // 如果存在参数传入的options[key]，覆盖其currentOptions[key]
  if (options) {
    let key;
    for (key in options) {
      if (options[key] != null) {
        currentOptions[key] = options[key];
      }
    }
  }
  // 创建根节点
  const root = (currentRoot = createRoot([], input));
  // 重点：词法分析器编译模版
  tokenizer.parse(currentInput);
  // 根节点的字符串
  root.loc = getLoc(0, input.length);
  // 压缩过滤子节点中的空白部分
  root.children = condenseWhitespace(root.children);
  // 当前根节点设为null
  currentRoot = null;
  return root;
}

以上逻辑主要是重置解析条件reset，生成根节点createRoot，进行解析tokenizer.parse，过滤空白子元素condenseWhitespace。到这里不禁要问，tokenizer到底是啥？

Tokenizer是一个构造类：

2、`Tokenizer`

js 复制代码

  class Tokenizer {
    // constructor
    constructor(stack, cbs) {
      this.stack = stack; // 栈
      this.cbs = cbs; // cbs
      this.state = 1;
      this.buffer = "";
      this.sectionStart = 0;
      this.index = 0;
      this.newlines = [];
      this.mode = 0;
      this.delimiterOpen = defaultDelimitersOpen;
      this.delimiterClose = defaultDelimitersClose;
      this.delimiterIndex = -1;
      this.currentSequence = void 0;
      this.sequenceIndex = 0;
      // 还有其他属性...
    }
    // 方法
    reset(){},
    parse(input){}, // 编译方法
    getPos(index){},
    peek(){},
    stateText(){},
    cleanup(){},
    finish(){},
    handleTagName(c){},
    stateInTagName(c){},
    handleAttrStart(c){},
    stateInAttrName(c){},
    stateInDirName(c){},
    stateInDeclaration(c){},
    // 还有其他方法...
  }

在创建Tokenizer时，传入了stack和各方法组成的对象{}

js 复制代码

const tokenizer = new Tokenizer(stack, {
  onerr: emitError,
  ontext(start, end) {},
  ontextentity(char, start, end) {},
  oninterpolation(start, end) {},
  onopentagname(start, end) {},
  onopentagend(end) {},
  onclosetag(start, end) {},
  onselfclosingtag(end) {},
  onattribname(start, end) {},
  ondirname(start, end) {},
  ondirarg(start, end) {},
  ondirmodifier(start, end) {},
  onattribdata(start, end) {},
  onattribentity(char, start, end) {},
  onattribnameend(end) {},
  onattribend(quote, end) {},
  oncomment(start, end) {},
  onend() {},
  oncdata(start, end) {},
  onprocessinginstruction(start) {},
});

以上首先定义了一个Tokenizer，其中包含属性和方法。再通过new Tokenizer的方式定义实例new tokenizer，并传入stack栈和cbs方法集合。

接下来继续介绍核心逻辑：tokenizer.parse(currentInput)。

二、指针扫描过程

我们继续以下面template为例：

html 复制代码

<div class="myApp">
  <!-- 这是注释文案 -->
  <h3>编译原理</h3>
  <div v-if="flag">
    <p>{{ first + second }}</p>
  </div>
  <childComp v-else></childComp>
</div>

接下来看parse方法：

js 复制代码

/**
   * Iterates through the buffer, calling the function corresponding to the current state.
   *
   * States that are more likely to be hit are higher up, as a performance improvement.
   */
  public parse(input: string) {
    this.buffer = input
    while (this.index < this.buffer.length) {
      const c = this.buffer.charCodeAt(this.index)
      if (c === CharCodes.NewLine) {
        this.newlines.push(this.index)
      }
      switch (this.state) {
        case State.Text: {
          this.stateText(c)
          break
        }
        case State.InterpolationOpen: {
          this.stateInterpolationOpen(c)
          break
        }
        case State.Interpolation: {
          this.stateInterpolation(c)
          break
        }
        case State.InterpolationClose: {
          this.stateInterpolationClose(c)
          break
        }
        case State.SpecialStartSequence: {
          this.stateSpecialStartSequence(c)
          break
        }
        case State.InRCDATA: {
          this.stateInRCDATA(c)
          break
        }
        case State.CDATASequence: {
          this.stateCDATASequence(c)
          break
        }
        case State.InAttrValueDq: {
          this.stateInAttrValueDoubleQuotes(c)
          break
        }
        case State.InAttrName: {
          this.stateInAttrName(c)
          break
        }
        case State.InDirName: {
          this.stateInDirName(c)
          break
        }
        case State.InDirArg: {
          this.stateInDirArg(c)
          break
        }
        case State.InDirDynamicArg: {
          this.stateInDynamicDirArg(c)
          break
        }
        case State.InDirModifier: {
          this.stateInDirModifier(c)
          break
        }
        case State.InCommentLike: {
          this.stateInCommentLike(c)
          break
        }
        case State.InSpecialComment: {
          this.stateInSpecialComment(c)
          break
        }
        case State.BeforeAttrName: {
          this.stateBeforeAttrName(c)
          break
        }
        case State.InTagName: {
          this.stateInTagName(c)
          break
        }
        case State.InSFCRootTagName: {
          this.stateInSFCRootTagName(c)
          break
        }
        case State.InClosingTagName: {
          this.stateInClosingTagName(c)
          break
        }
        case State.BeforeTagName: {
          this.stateBeforeTagName(c)
          break
        }
        case State.AfterAttrName: {
          this.stateAfterAttrName(c)
          break
        }
        case State.InAttrValueSq: {
          this.stateInAttrValueSingleQuotes(c)
          break
        }
        case State.BeforeAttrValue: {
          this.stateBeforeAttrValue(c)
          break
        }
        case State.BeforeClosingTagName: {
          this.stateBeforeClosingTagName(c)
          break
        }
        case State.AfterClosingTagName: {
          this.stateAfterClosingTagName(c)
          break
        }
        case State.BeforeSpecialS: {
          this.stateBeforeSpecialS(c)
          break
        }
        case State.BeforeSpecialT: {
          this.stateBeforeSpecialT(c)
          break
        }
        case State.InAttrValueNq: {
          this.stateInAttrValueNoQuotes(c)
          break
        }
        case State.InSelfClosingTag: {
          this.stateInSelfClosingTag(c)
          break
        }
        case State.InDeclaration: {
          this.stateInDeclaration(c)
          break
        }
        case State.BeforeDeclaration: {
          this.stateBeforeDeclaration(c)
          break
        }
        case State.BeforeComment: {
          this.stateBeforeComment(c)
          break
        }
        case State.InProcessingInstruction: {
          this.stateInProcessingInstruction(c)
          break
        }
        case State.InEntity: {
          this.stateInEntity()
          break
        }
      }
      this.index++
    }
    this.cleanup()
    this.finish()
  }

可以看出这里根据state值产生了众多分支，这些分支处理了各种可能的场景。结合实例，一步步进行扫描，从字符串，变成ast树。

1、`State.Text`

在class Tokenizer内部第一行就定义了public state = State.Text。所以，扫描的第一个字符<(c === 60)时，执行this.stateText(c)的分支逻辑：

我们定义如下指针，指针上带的属性值默认为：

c: 60（<）
state: State.Text
this.index: 0
this.sectionStart: 0

js 复制代码

  private stateText(c: number): void {
    if (c === CharCodes.Lt) {
      if (this.index > this.sectionStart) {
        this.cbs.ontext(this.sectionStart, this.index)
      }
      this.state = State.BeforeTagName
      this.sectionStart = this.index
    } else if (!__BROWSER__ && c === CharCodes.Amp) {
      this.startEntity()
    } else if (!this.inVPre && c === this.delimiterOpen[0]) {
      this.state = State.InterpolationOpen
      this.delimiterIndex = 0
      this.stateInterpolationOpen(c)
    }
  }

以上逻辑中，满足c === CharCodes.Lt，因此this.state = State.BeforeTagName，指针指向下一个阶段，标签tag开始前的名称。

2、`State.BeforeTagName`

指针变为：

c: 100(d)
state: State.BeforeTagName
this.index: 1
this.sectionStart: 0

js 复制代码

  private stateBeforeTagName(c: number): void {
    if (c === CharCodes.ExclamationMark) {
      this.state = State.BeforeDeclaration
      this.sectionStart = this.index + 1
    } else if (c === CharCodes.Questionmark) {
      this.state = State.InProcessingInstruction
      this.sectionStart = this.index + 1
    } else if (isTagStartChar(c)) {
      this.sectionStart = this.index
      if (this.mode === ParseMode.BASE) {
        this.state = State.InTagName
      } else if (this.inSFCRoot) {
        this.state = State.InSFCRootTagName
      } else if (!this.inXML) {
        if (c === 116 /* t */) {
          this.state = State.BeforeSpecialT
        } else {
          this.state =
            c === 115 /* s */ ? State.BeforeSpecialS : State.InTagName
        }
      } else {
        this.state = State.InTagName
      }
    } else if (c === CharCodes.Slash) {
      this.state = State.BeforeClosingTagName
    } else {
      this.state = State.Text
      this.stateText(c)
    }
  }

以上逻辑中，满足isTagStartChar(c)，因此this.sectionStart = this.index = 1，并且，this.state = State.InTagName，指针指向下一个阶段，标签tag中。

3、`State.InTagName`

指针变为：

c: 105(i)
state: State.InTagName
this.index: 2
this.sectionStart: 1

js 复制代码

  private stateInTagName(c: number): void {
    if (isEndOfTagSection(c)) {
      this.handleTagName(c)
    }
  }

以上逻辑中，字符i不满足isEndOfTagSection(c)，所以指针继续扫描，直到扫描到空格位置，此时进入方法this.handleTagName(c)。

指针变为：

c: 32(空格)
state: State.InTagName
this.index: 4
this.sectionStart: 1

js 复制代码

  // this.cbs.onopentagname
  onopentagname(start, end) {
    const name = getSlice(start, end)
    currentOpenTag = {
      type: NodeTypes.ELEMENT,
      tag: name,
      ns: currentOptions.getNamespace(name, stack[0], currentOptions.ns),
      tagType: ElementTypes.ELEMENT, // will be refined on tag close
      props: [],
      children: [],
      loc: getLoc(start - 1, end),
      codegenNode: undefined,
    }
  },
  private handleTagName(c: number) {
    this.cbs.onopentagname(this.sectionStart, this.index)
    this.sectionStart = -1
    this.state = State.BeforeAttrName
    this.stateBeforeAttrName(c)
  }

在以上逻辑中，我们得到name就时div，currentOpenTag就是一个丰富的描述当前节点信息的对象。至此，我们的第一个标签div产生。结束之后，执行this.sectionStart = -1和this.state = State.BeforeAttrName，最后执行的this.stateBeforeAttrName(c)是为了判断当前是否是闭合标签<div>，明显当前例子中不是，执行过程未有实际逻辑发生，我们继续移动指针。

4、`State.BeforeAttrName`

指针变为：

c: 99(c)
state: State.BeforeAttrName
this.index: 5
this.sectionStart: -1

js 复制代码

  private stateBeforeAttrName(c: number): void {
    if (c === CharCodes.Gt) {
      this.cbs.onopentagend(this.index)
      if (this.inRCDATA) {
        this.state = State.InRCDATA
      } else {
        this.state = State.Text
      }
      this.sectionStart = this.index + 1
    } else if (c === CharCodes.Slash) {
      this.state = State.InSelfClosingTag
      if ((__DEV__ || !__BROWSER__) && this.peek() !== CharCodes.Gt) {
        this.cbs.onerr(ErrorCodes.UNEXPECTED_SOLIDUS_IN_TAG, this.index)
      }
    } else if (c === CharCodes.Lt && this.peek() === CharCodes.Slash) {
      this.cbs.onopentagend(this.index)
      this.state = State.BeforeTagName
      this.sectionStart = this.index
    } else if (!isWhitespace(c)) {
      if ((__DEV__ || !__BROWSER__) && c === CharCodes.Eq) {
        this.cbs.onerr(
          ErrorCodes.UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME,
          this.index,
        )
      }
      this.handleAttrStart(c)
    }
  }

以上例子中字符c不为空，因此继续执行this.handleAttrStart(c)

js 复制代码

  private handleAttrStart(c: number) {
    if (c === CharCodes.LowerV && this.peek() === CharCodes.Dash) {
      this.state = State.InDirName
      this.sectionStart = this.index
    } else if (
      c === CharCodes.Dot ||
      c === CharCodes.Colon ||
      c === CharCodes.At ||
      c === CharCodes.Number
    ) {
      this.cbs.ondirname(this.index, this.index + 1)
      this.state = State.InDirArg
      this.sectionStart = this.index + 1
    } else {
      this.state = State.InAttrName
      this.sectionStart = this.index
    }
  }

当前例子中，执行到了this.state = State.InAttrName和this.sectionStart = this.index。指针继续移动：

5、`State.InAttrName`

指针变为：

c: 108(l)
state: State.InAttrName
this.index: 5
this.sectionStart: 5

js 复制代码

  // this.cbs.onattribname
  onattribname(start, end) {
    currentProp = {
      type: NodeTypes.ATTRIBUTE,
      name: getSlice(start, end),
      nameLoc: getLoc(start, end),
      value: undefined,
      loc: getLoc(start),
    }
  },
  private stateInAttrName(c: number): void {
    if (c === CharCodes.Eq || isEndOfTagSection(c)) {
      this.cbs.onattribname(this.sectionStart, this.index)
      this.handleAttrNameEnd(c)
    } else if (
      (__DEV__ || !__BROWSER__) &&
      (c === CharCodes.DoubleQuote ||
        c === CharCodes.SingleQuote ||
        c === CharCodes.Lt)
    ) {
      this.cbs.onerr(
        ErrorCodes.UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME,
        this.index,
      )
    }
  }

这里指针持续移动，直到满足c === CharCodes.Eq时，去执行this.cbs.onattribname(this.sectionStart, this.index)，获取了第一个描述属性的currentProp，这里暂时没有value值，需要我们继续移动指针。后面会扫描到例子中的myApp，这个步骤可以自行调试。

6、扫描至`>`时

指针变为：

c: 62(>)
state: State.BeforeAttrName
this.index: 18
this.sectionStart: -1

js 复制代码

// addNode
function addNode(node: TemplateChildNode) {
  ;(stack[0] || currentRoot).children.push(node)
}
// endOpenTag
function endOpenTag(end: number) {
  if (tokenizer.inSFCRoot) {
    currentOpenTag!.innerLoc = getLoc(end + 1, end + 1)
  }
  addNode(currentOpenTag!)
  const { tag, ns } = currentOpenTag!
  if (ns === Namespaces.HTML && currentOptions.isPreTag(tag)) {
    inPre++
  }
  if (currentOptions.isVoidTag(tag)) {
    onCloseTag(currentOpenTag!, end)
  } else {
    stack.unshift(currentOpenTag!)
    if (ns === Namespaces.SVG || ns === Namespaces.MATH_ML) {
      tokenizer.inXML = true
    }
  }
  currentOpenTag = null
}

首先注意addNode，这里会将当前获取到的节点div对象currentOpenTag推入到栈顶元素stack[0]的children中去，如果栈为空，则推入到根节点currentRoot的children中去，实现了父子关系的建立。

其次关注stack.unshift(currentOpenTag)，这里会将当前currentOpenTag推入到栈中，通过栈的方式维护树形结构。

三、图示解析过程

接下来按照指针扫描一行为单位，介绍树的构建和栈的维护。

以上介绍了解析器针对字符串转换成ast树的过程通过指针挨个扫描字符和指针扫描每行代码这两个尺度做了简单分析。字符的扫描过程中，遇到节点、文本、注释和插值则将其插入到栈顶（或根root）的children中去，遇到诸如<div ***>起始节点会进行入栈操作，遇到诸如</div>闭合标签则会进行出栈操作。通过以上操作，就将字符串转换成了ast树。

vue3编译原理：ast树生成

一、解析器

1、baseParse

2、Tokenizer

二、指针扫描过程

1、State.Text

2、State.BeforeTagName

3、State.InTagName

4、State.BeforeAttrName

5、State.InAttrName

6、扫描至>时

三、图示解析过程

1、`baseParse`

2、`Tokenizer`

1、`State.Text`

2、`State.BeforeTagName`

3、`State.InTagName`

4、`State.BeforeAttrName`

5、`State.InAttrName`

6、扫描至`>`时