vue3编译原理:ast树生成

文接上篇的第一步const ast = isString(source) ? baseParse(source, resolvedOptions) :source,我们简单介绍vue3底层是如何用解析器将字符串转换成ast树的。

一、解析器

1、baseParse

js 复制代码
// 重置ast生成条件
function reset() {
  tokenizer.reset(); // 词法分析器重置
  currentOpenTag = null; // 当前开始标签
  currentProp = null; // 当前属性
  currentAttrValue = ""; // 当前属性值
  currentAttrStartIndex = -1; // 当前属性开始索引
  currentAttrEndIndex = -1; // 当前属性结束索引
  stack.length = 0; // 清空标签栈
}
// 生成根节点
function createRoot(children, source = "") {
  return {
    type: 0,
    source,
    children,
    helpers: /* @__PURE__ */ new Set(),
    components: [],
    directives: [],
    hoists: [],
    imports: [],
    cached: 0,
    temps: 0,
    codegenNode: void 0,
    loc: locStub,
  };
}
function baseParse(input, options) {
  reset();
  // 当前template模版
  currentInput = input;
  // 当前options
  currentOptions = extend({}, defaultParserOptions);
  // 如果存在参数传入的options[key],覆盖其currentOptions[key]
  if (options) {
    let key;
    for (key in options) {
      if (options[key] != null) {
        currentOptions[key] = options[key];
      }
    }
  }
  // 创建根节点
  const root = (currentRoot = createRoot([], input));
  // 重点:词法分析器编译模版
  tokenizer.parse(currentInput);
  // 根节点的字符串
  root.loc = getLoc(0, input.length);
  // 压缩过滤子节点中的空白部分
  root.children = condenseWhitespace(root.children);
  // 当前根节点设为null
  currentRoot = null;
  return root;
}

以上逻辑主要是重置解析条件reset,生成根节点createRoot,进行解析tokenizer.parse,过滤空白子元素condenseWhitespace。到这里不禁要问,tokenizer到底是啥?

Tokenizer是一个构造类:

2、Tokenizer

js 复制代码
  class Tokenizer {
    // constructor
    constructor(stack, cbs) {
      this.stack = stack; // 栈
      this.cbs = cbs; // cbs
      this.state = 1;
      this.buffer = "";
      this.sectionStart = 0;
      this.index = 0;
      this.newlines = [];
      this.mode = 0;
      this.delimiterOpen = defaultDelimitersOpen;
      this.delimiterClose = defaultDelimitersClose;
      this.delimiterIndex = -1;
      this.currentSequence = void 0;
      this.sequenceIndex = 0;
      // 还有其他属性...
    }
    // 方法
    reset(){},
    parse(input){}, // 编译方法
    getPos(index){},
    peek(){},
    stateText(){},
    cleanup(){},
    finish(){},
    handleTagName(c){},
    stateInTagName(c){},
    handleAttrStart(c){},
    stateInAttrName(c){},
    stateInDirName(c){},
    stateInDeclaration(c){},
    // 还有其他方法...
  }

在创建Tokenizer时,传入了stack和各方法组成的对象{}

js 复制代码
const tokenizer = new Tokenizer(stack, {
  onerr: emitError,
  ontext(start, end) {},
  ontextentity(char, start, end) {},
  oninterpolation(start, end) {},
  onopentagname(start, end) {},
  onopentagend(end) {},
  onclosetag(start, end) {},
  onselfclosingtag(end) {},
  onattribname(start, end) {},
  ondirname(start, end) {},
  ondirarg(start, end) {},
  ondirmodifier(start, end) {},
  onattribdata(start, end) {},
  onattribentity(char, start, end) {},
  onattribnameend(end) {},
  onattribend(quote, end) {},
  oncomment(start, end) {},
  onend() {},
  oncdata(start, end) {},
  onprocessinginstruction(start) {},
});

以上首先定义了一个Tokenizer,其中包含属性和方法。再通过new Tokenizer的方式定义实例new tokenizer,并传入stack栈和cbs方法集合。

接下来继续介绍核心逻辑:tokenizer.parse(currentInput)

二、指针扫描过程

我们继续以下面template为例:

html 复制代码
<div class="myApp">
  <!-- 这是注释文案 -->
  <h3>编译原理</h3>
  <div v-if="flag">
    <p>{{ first + second }}</p>
  </div>
  <childComp v-else></childComp>
</div>

接下来看parse方法:

js 复制代码
/**
   * Iterates through the buffer, calling the function corresponding to the current state.
   *
   * States that are more likely to be hit are higher up, as a performance improvement.
   */
  public parse(input: string) {
    this.buffer = input
    while (this.index < this.buffer.length) {
      const c = this.buffer.charCodeAt(this.index)
      if (c === CharCodes.NewLine) {
        this.newlines.push(this.index)
      }
      switch (this.state) {
        case State.Text: {
          this.stateText(c)
          break
        }
        case State.InterpolationOpen: {
          this.stateInterpolationOpen(c)
          break
        }
        case State.Interpolation: {
          this.stateInterpolation(c)
          break
        }
        case State.InterpolationClose: {
          this.stateInterpolationClose(c)
          break
        }
        case State.SpecialStartSequence: {
          this.stateSpecialStartSequence(c)
          break
        }
        case State.InRCDATA: {
          this.stateInRCDATA(c)
          break
        }
        case State.CDATASequence: {
          this.stateCDATASequence(c)
          break
        }
        case State.InAttrValueDq: {
          this.stateInAttrValueDoubleQuotes(c)
          break
        }
        case State.InAttrName: {
          this.stateInAttrName(c)
          break
        }
        case State.InDirName: {
          this.stateInDirName(c)
          break
        }
        case State.InDirArg: {
          this.stateInDirArg(c)
          break
        }
        case State.InDirDynamicArg: {
          this.stateInDynamicDirArg(c)
          break
        }
        case State.InDirModifier: {
          this.stateInDirModifier(c)
          break
        }
        case State.InCommentLike: {
          this.stateInCommentLike(c)
          break
        }
        case State.InSpecialComment: {
          this.stateInSpecialComment(c)
          break
        }
        case State.BeforeAttrName: {
          this.stateBeforeAttrName(c)
          break
        }
        case State.InTagName: {
          this.stateInTagName(c)
          break
        }
        case State.InSFCRootTagName: {
          this.stateInSFCRootTagName(c)
          break
        }
        case State.InClosingTagName: {
          this.stateInClosingTagName(c)
          break
        }
        case State.BeforeTagName: {
          this.stateBeforeTagName(c)
          break
        }
        case State.AfterAttrName: {
          this.stateAfterAttrName(c)
          break
        }
        case State.InAttrValueSq: {
          this.stateInAttrValueSingleQuotes(c)
          break
        }
        case State.BeforeAttrValue: {
          this.stateBeforeAttrValue(c)
          break
        }
        case State.BeforeClosingTagName: {
          this.stateBeforeClosingTagName(c)
          break
        }
        case State.AfterClosingTagName: {
          this.stateAfterClosingTagName(c)
          break
        }
        case State.BeforeSpecialS: {
          this.stateBeforeSpecialS(c)
          break
        }
        case State.BeforeSpecialT: {
          this.stateBeforeSpecialT(c)
          break
        }
        case State.InAttrValueNq: {
          this.stateInAttrValueNoQuotes(c)
          break
        }
        case State.InSelfClosingTag: {
          this.stateInSelfClosingTag(c)
          break
        }
        case State.InDeclaration: {
          this.stateInDeclaration(c)
          break
        }
        case State.BeforeDeclaration: {
          this.stateBeforeDeclaration(c)
          break
        }
        case State.BeforeComment: {
          this.stateBeforeComment(c)
          break
        }
        case State.InProcessingInstruction: {
          this.stateInProcessingInstruction(c)
          break
        }
        case State.InEntity: {
          this.stateInEntity()
          break
        }
      }
      this.index++
    }
    this.cleanup()
    this.finish()
  }

可以看出这里根据state值产生了众多分支,这些分支处理了各种可能的场景。结合实例,一步步进行扫描,从字符串,变成ast树。

1、State.Text

class Tokenizer内部第一行就定义了public state = State.Text。所以,扫描的第一个字符<(c === 60)时,执行this.stateText(c)的分支逻辑:

我们定义如下指针,指针上带的属性值默认为:

  • c: 60(<)
  • state: State.Text
  • this.index: 0
  • this.sectionStart: 0
js 复制代码
  private stateText(c: number): void {
    if (c === CharCodes.Lt) {
      if (this.index > this.sectionStart) {
        this.cbs.ontext(this.sectionStart, this.index)
      }
      this.state = State.BeforeTagName
      this.sectionStart = this.index
    } else if (!__BROWSER__ && c === CharCodes.Amp) {
      this.startEntity()
    } else if (!this.inVPre && c === this.delimiterOpen[0]) {
      this.state = State.InterpolationOpen
      this.delimiterIndex = 0
      this.stateInterpolationOpen(c)
    }
  }

以上逻辑中,满足c === CharCodes.Lt,因此this.state = State.BeforeTagName,指针指向下一个阶段,标签tag开始前的名称。

2、State.BeforeTagName

指针变为:

  • c: 100(d)
  • state: State.BeforeTagName
  • this.index: 1
  • this.sectionStart: 0
js 复制代码
  private stateBeforeTagName(c: number): void {
    if (c === CharCodes.ExclamationMark) {
      this.state = State.BeforeDeclaration
      this.sectionStart = this.index + 1
    } else if (c === CharCodes.Questionmark) {
      this.state = State.InProcessingInstruction
      this.sectionStart = this.index + 1
    } else if (isTagStartChar(c)) {
      this.sectionStart = this.index
      if (this.mode === ParseMode.BASE) {
        this.state = State.InTagName
      } else if (this.inSFCRoot) {
        this.state = State.InSFCRootTagName
      } else if (!this.inXML) {
        if (c === 116 /* t */) {
          this.state = State.BeforeSpecialT
        } else {
          this.state =
            c === 115 /* s */ ? State.BeforeSpecialS : State.InTagName
        }
      } else {
        this.state = State.InTagName
      }
    } else if (c === CharCodes.Slash) {
      this.state = State.BeforeClosingTagName
    } else {
      this.state = State.Text
      this.stateText(c)
    }
  }

以上逻辑中,满足isTagStartChar(c),因此this.sectionStart = this.index = 1,并且,this.state = State.InTagName,指针指向下一个阶段,标签tag中。

3、State.InTagName

指针变为:

  • c: 105(i)
  • state: State.InTagName
  • this.index: 2
  • this.sectionStart: 1
js 复制代码
  private stateInTagName(c: number): void {
    if (isEndOfTagSection(c)) {
      this.handleTagName(c)
    }
  }

以上逻辑中,字符i不满足isEndOfTagSection(c),所以指针继续扫描,直到扫描到空格位置,此时进入方法this.handleTagName(c)

指针变为:

  • c: 32(空格)
  • state: State.InTagName
  • this.index: 4
  • this.sectionStart: 1
js 复制代码
  // this.cbs.onopentagname
  onopentagname(start, end) {
    const name = getSlice(start, end)
    currentOpenTag = {
      type: NodeTypes.ELEMENT,
      tag: name,
      ns: currentOptions.getNamespace(name, stack[0], currentOptions.ns),
      tagType: ElementTypes.ELEMENT, // will be refined on tag close
      props: [],
      children: [],
      loc: getLoc(start - 1, end),
      codegenNode: undefined,
    }
  },
  private handleTagName(c: number) {
    this.cbs.onopentagname(this.sectionStart, this.index)
    this.sectionStart = -1
    this.state = State.BeforeAttrName
    this.stateBeforeAttrName(c)
  }

在以上逻辑中,我们得到name就时divcurrentOpenTag就是一个丰富的描述当前节点信息的对象。至此,我们的第一个标签div产生。结束之后,执行this.sectionStart = -1this.state = State.BeforeAttrName,最后执行的this.stateBeforeAttrName(c)是为了判断当前是否是闭合标签<div>,明显当前例子中不是,执行过程未有实际逻辑发生,我们继续移动指针。

4、State.BeforeAttrName

指针变为:

  • c: 99(c)
  • state: State.BeforeAttrName
  • this.index: 5
  • this.sectionStart: -1
js 复制代码
  private stateBeforeAttrName(c: number): void {
    if (c === CharCodes.Gt) {
      this.cbs.onopentagend(this.index)
      if (this.inRCDATA) {
        this.state = State.InRCDATA
      } else {
        this.state = State.Text
      }
      this.sectionStart = this.index + 1
    } else if (c === CharCodes.Slash) {
      this.state = State.InSelfClosingTag
      if ((__DEV__ || !__BROWSER__) && this.peek() !== CharCodes.Gt) {
        this.cbs.onerr(ErrorCodes.UNEXPECTED_SOLIDUS_IN_TAG, this.index)
      }
    } else if (c === CharCodes.Lt && this.peek() === CharCodes.Slash) {
      this.cbs.onopentagend(this.index)
      this.state = State.BeforeTagName
      this.sectionStart = this.index
    } else if (!isWhitespace(c)) {
      if ((__DEV__ || !__BROWSER__) && c === CharCodes.Eq) {
        this.cbs.onerr(
          ErrorCodes.UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME,
          this.index,
        )
      }
      this.handleAttrStart(c)
    }
  }

以上例子中字符c不为空,因此继续执行this.handleAttrStart(c)

js 复制代码
  private handleAttrStart(c: number) {
    if (c === CharCodes.LowerV && this.peek() === CharCodes.Dash) {
      this.state = State.InDirName
      this.sectionStart = this.index
    } else if (
      c === CharCodes.Dot ||
      c === CharCodes.Colon ||
      c === CharCodes.At ||
      c === CharCodes.Number
    ) {
      this.cbs.ondirname(this.index, this.index + 1)
      this.state = State.InDirArg
      this.sectionStart = this.index + 1
    } else {
      this.state = State.InAttrName
      this.sectionStart = this.index
    }
  }

当前例子中,执行到了this.state = State.InAttrNamethis.sectionStart = this.index。指针继续移动:

5、State.InAttrName

指针变为:

  • c: 108(l)
  • state: State.InAttrName
  • this.index: 5
  • this.sectionStart: 5
js 复制代码
  // this.cbs.onattribname
  onattribname(start, end) {
    currentProp = {
      type: NodeTypes.ATTRIBUTE,
      name: getSlice(start, end),
      nameLoc: getLoc(start, end),
      value: undefined,
      loc: getLoc(start),
    }
  },
  private stateInAttrName(c: number): void {
    if (c === CharCodes.Eq || isEndOfTagSection(c)) {
      this.cbs.onattribname(this.sectionStart, this.index)
      this.handleAttrNameEnd(c)
    } else if (
      (__DEV__ || !__BROWSER__) &&
      (c === CharCodes.DoubleQuote ||
        c === CharCodes.SingleQuote ||
        c === CharCodes.Lt)
    ) {
      this.cbs.onerr(
        ErrorCodes.UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME,
        this.index,
      )
    }
  }

这里指针持续移动,直到满足c === CharCodes.Eq时,去执行this.cbs.onattribname(this.sectionStart, this.index),获取了第一个描述属性的currentProp,这里暂时没有value值,需要我们继续移动指针。后面会扫描到例子中的myApp,这个步骤可以自行调试。

6、扫描至>

指针变为:

  • c: 62(>)
  • state: State.BeforeAttrName
  • this.index: 18
  • this.sectionStart: -1
js 复制代码
// addNode
function addNode(node: TemplateChildNode) {
  ;(stack[0] || currentRoot).children.push(node)
}
// endOpenTag
function endOpenTag(end: number) {
  if (tokenizer.inSFCRoot) {
    currentOpenTag!.innerLoc = getLoc(end + 1, end + 1)
  }
  addNode(currentOpenTag!)
  const { tag, ns } = currentOpenTag!
  if (ns === Namespaces.HTML && currentOptions.isPreTag(tag)) {
    inPre++
  }
  if (currentOptions.isVoidTag(tag)) {
    onCloseTag(currentOpenTag!, end)
  } else {
    stack.unshift(currentOpenTag!)
    if (ns === Namespaces.SVG || ns === Namespaces.MATH_ML) {
      tokenizer.inXML = true
    }
  }
  currentOpenTag = null
}

首先注意addNode,这里会将当前获取到的节点div对象currentOpenTag推入到栈顶元素stack[0]children中去,如果栈为空,则推入到根节点currentRootchildren中去,实现了父子关系的建立

其次关注stack.unshift(currentOpenTag),这里会将当前currentOpenTag推入到栈中,通过栈的方式维护树形结构

三、图示解析过程

接下来按照指针扫描一行为单位,介绍树的构建和栈的维护。

以上介绍了解析器针对字符串转换成ast树的过程通过指针挨个扫描字符和指针扫描每行代码这两个尺度做了简单分析。字符的扫描过程中,遇到节点、文本、注释和插值则将其插入到栈顶(或根root)的children中去,遇到诸如<div ***>起始节点会进行入栈操作,遇到诸如</div>闭合标签则会进行出栈操作。通过以上操作,就将字符串转换成了ast树

相关推荐
夏花里的尘埃28 分钟前
vue3实现echarts——小demo
前端·vue.js·echarts
努力学习的木子1 小时前
uniapp如何隐藏默认的页面头部导航栏,uniapp开发小程序如何隐藏默认的页面头部导航栏
前端·小程序·uni-app
java小郭4 小时前
html的浮动作用详解
前端·html
水星记_4 小时前
echarts-wordcloud:打造个性化词云库
前端·vue
强迫老板HelloWord5 小时前
前端JS特效第22波:jQuery滑动手风琴内容切换特效
前端·javascript·jquery
luanluan88886 小时前
维护el-table列,循环生成el-table
javascript·vue.js·ecmascript·element plus
续亮~6 小时前
9、程序化创意
前端·javascript·人工智能
RainbowFish7 小时前
「Vue学习之路」—— vue的常用指令
前端·vue.js
Wang's Blog7 小时前
Webpack: 三种Chunk产物的打包逻辑
前端·webpack·node.js
pan_junbiao7 小时前
HTML5使用<blockquote>标签:段落缩进
前端·html·html5