文接上篇的第一步const ast = isString(source) ? baseParse(source, resolvedOptions) :source
,我们简单介绍vue3
底层是如何用解析器将字符串
转换成ast树
的。
一、解析器
1、baseParse
js
// 重置ast生成条件
function reset() {
tokenizer.reset(); // 词法分析器重置
currentOpenTag = null; // 当前开始标签
currentProp = null; // 当前属性
currentAttrValue = ""; // 当前属性值
currentAttrStartIndex = -1; // 当前属性开始索引
currentAttrEndIndex = -1; // 当前属性结束索引
stack.length = 0; // 清空标签栈
}
// 生成根节点
function createRoot(children, source = "") {
return {
type: 0,
source,
children,
helpers: /* @__PURE__ */ new Set(),
components: [],
directives: [],
hoists: [],
imports: [],
cached: 0,
temps: 0,
codegenNode: void 0,
loc: locStub,
};
}
function baseParse(input, options) {
reset();
// 当前template模版
currentInput = input;
// 当前options
currentOptions = extend({}, defaultParserOptions);
// 如果存在参数传入的options[key],覆盖其currentOptions[key]
if (options) {
let key;
for (key in options) {
if (options[key] != null) {
currentOptions[key] = options[key];
}
}
}
// 创建根节点
const root = (currentRoot = createRoot([], input));
// 重点:词法分析器编译模版
tokenizer.parse(currentInput);
// 根节点的字符串
root.loc = getLoc(0, input.length);
// 压缩过滤子节点中的空白部分
root.children = condenseWhitespace(root.children);
// 当前根节点设为null
currentRoot = null;
return root;
}
以上逻辑主要是重置解析条件reset
,生成根节点createRoot
,进行解析tokenizer.parse
,过滤空白子元素condenseWhitespace
。到这里不禁要问,tokenizer
到底是啥?
Tokenizer
是一个构造类:
2、Tokenizer
js
class Tokenizer {
// constructor
constructor(stack, cbs) {
this.stack = stack; // 栈
this.cbs = cbs; // cbs
this.state = 1;
this.buffer = "";
this.sectionStart = 0;
this.index = 0;
this.newlines = [];
this.mode = 0;
this.delimiterOpen = defaultDelimitersOpen;
this.delimiterClose = defaultDelimitersClose;
this.delimiterIndex = -1;
this.currentSequence = void 0;
this.sequenceIndex = 0;
// 还有其他属性...
}
// 方法
reset(){},
parse(input){}, // 编译方法
getPos(index){},
peek(){},
stateText(){},
cleanup(){},
finish(){},
handleTagName(c){},
stateInTagName(c){},
handleAttrStart(c){},
stateInAttrName(c){},
stateInDirName(c){},
stateInDeclaration(c){},
// 还有其他方法...
}
在创建Tokenizer
时,传入了stack
和各方法组成的对象{}
js
const tokenizer = new Tokenizer(stack, {
onerr: emitError,
ontext(start, end) {},
ontextentity(char, start, end) {},
oninterpolation(start, end) {},
onopentagname(start, end) {},
onopentagend(end) {},
onclosetag(start, end) {},
onselfclosingtag(end) {},
onattribname(start, end) {},
ondirname(start, end) {},
ondirarg(start, end) {},
ondirmodifier(start, end) {},
onattribdata(start, end) {},
onattribentity(char, start, end) {},
onattribnameend(end) {},
onattribend(quote, end) {},
oncomment(start, end) {},
onend() {},
oncdata(start, end) {},
onprocessinginstruction(start) {},
});
以上首先定义了一个Tokenizer
,其中包含属性和方法。再通过new Tokenizer
的方式定义实例new tokenizer
,并传入stack
栈和cbs
方法集合。
接下来继续介绍核心逻辑:tokenizer.parse(currentInput)
。
二、指针扫描过程
我们继续以下面template
为例:
html
<div class="myApp">
<!-- 这是注释文案 -->
<h3>编译原理</h3>
<div v-if="flag">
<p>{{ first + second }}</p>
</div>
<childComp v-else></childComp>
</div>
接下来看parse
方法:
js
/**
* Iterates through the buffer, calling the function corresponding to the current state.
*
* States that are more likely to be hit are higher up, as a performance improvement.
*/
public parse(input: string) {
this.buffer = input
while (this.index < this.buffer.length) {
const c = this.buffer.charCodeAt(this.index)
if (c === CharCodes.NewLine) {
this.newlines.push(this.index)
}
switch (this.state) {
case State.Text: {
this.stateText(c)
break
}
case State.InterpolationOpen: {
this.stateInterpolationOpen(c)
break
}
case State.Interpolation: {
this.stateInterpolation(c)
break
}
case State.InterpolationClose: {
this.stateInterpolationClose(c)
break
}
case State.SpecialStartSequence: {
this.stateSpecialStartSequence(c)
break
}
case State.InRCDATA: {
this.stateInRCDATA(c)
break
}
case State.CDATASequence: {
this.stateCDATASequence(c)
break
}
case State.InAttrValueDq: {
this.stateInAttrValueDoubleQuotes(c)
break
}
case State.InAttrName: {
this.stateInAttrName(c)
break
}
case State.InDirName: {
this.stateInDirName(c)
break
}
case State.InDirArg: {
this.stateInDirArg(c)
break
}
case State.InDirDynamicArg: {
this.stateInDynamicDirArg(c)
break
}
case State.InDirModifier: {
this.stateInDirModifier(c)
break
}
case State.InCommentLike: {
this.stateInCommentLike(c)
break
}
case State.InSpecialComment: {
this.stateInSpecialComment(c)
break
}
case State.BeforeAttrName: {
this.stateBeforeAttrName(c)
break
}
case State.InTagName: {
this.stateInTagName(c)
break
}
case State.InSFCRootTagName: {
this.stateInSFCRootTagName(c)
break
}
case State.InClosingTagName: {
this.stateInClosingTagName(c)
break
}
case State.BeforeTagName: {
this.stateBeforeTagName(c)
break
}
case State.AfterAttrName: {
this.stateAfterAttrName(c)
break
}
case State.InAttrValueSq: {
this.stateInAttrValueSingleQuotes(c)
break
}
case State.BeforeAttrValue: {
this.stateBeforeAttrValue(c)
break
}
case State.BeforeClosingTagName: {
this.stateBeforeClosingTagName(c)
break
}
case State.AfterClosingTagName: {
this.stateAfterClosingTagName(c)
break
}
case State.BeforeSpecialS: {
this.stateBeforeSpecialS(c)
break
}
case State.BeforeSpecialT: {
this.stateBeforeSpecialT(c)
break
}
case State.InAttrValueNq: {
this.stateInAttrValueNoQuotes(c)
break
}
case State.InSelfClosingTag: {
this.stateInSelfClosingTag(c)
break
}
case State.InDeclaration: {
this.stateInDeclaration(c)
break
}
case State.BeforeDeclaration: {
this.stateBeforeDeclaration(c)
break
}
case State.BeforeComment: {
this.stateBeforeComment(c)
break
}
case State.InProcessingInstruction: {
this.stateInProcessingInstruction(c)
break
}
case State.InEntity: {
this.stateInEntity()
break
}
}
this.index++
}
this.cleanup()
this.finish()
}
可以看出这里根据state
值产生了众多分支,这些分支处理了各种可能的场景。结合实例,一步步进行扫描,从字符串,变成ast
树。
1、State.Text
在class Tokenizer
内部第一行就定义了public state = State.Text
。所以,扫描的第一个字符<
(c === 60
)时,执行this.stateText(c)
的分支逻辑:
我们定义如下指针,指针上带的属性值默认为:
- c: 60(<)
- state: State.Text
- this.index: 0
- this.sectionStart: 0
js
private stateText(c: number): void {
if (c === CharCodes.Lt) {
if (this.index > this.sectionStart) {
this.cbs.ontext(this.sectionStart, this.index)
}
this.state = State.BeforeTagName
this.sectionStart = this.index
} else if (!__BROWSER__ && c === CharCodes.Amp) {
this.startEntity()
} else if (!this.inVPre && c === this.delimiterOpen[0]) {
this.state = State.InterpolationOpen
this.delimiterIndex = 0
this.stateInterpolationOpen(c)
}
}
以上逻辑中,满足c === CharCodes.Lt
,因此this.state = State.BeforeTagName
,指针指向下一个阶段,标签tag
开始前的名称。
2、State.BeforeTagName
指针变为:
- c: 100(d)
- state: State.BeforeTagName
- this.index: 1
- this.sectionStart: 0
js
private stateBeforeTagName(c: number): void {
if (c === CharCodes.ExclamationMark) {
this.state = State.BeforeDeclaration
this.sectionStart = this.index + 1
} else if (c === CharCodes.Questionmark) {
this.state = State.InProcessingInstruction
this.sectionStart = this.index + 1
} else if (isTagStartChar(c)) {
this.sectionStart = this.index
if (this.mode === ParseMode.BASE) {
this.state = State.InTagName
} else if (this.inSFCRoot) {
this.state = State.InSFCRootTagName
} else if (!this.inXML) {
if (c === 116 /* t */) {
this.state = State.BeforeSpecialT
} else {
this.state =
c === 115 /* s */ ? State.BeforeSpecialS : State.InTagName
}
} else {
this.state = State.InTagName
}
} else if (c === CharCodes.Slash) {
this.state = State.BeforeClosingTagName
} else {
this.state = State.Text
this.stateText(c)
}
}
以上逻辑中,满足isTagStartChar(c)
,因此this.sectionStart = this.index = 1
,并且,this.state = State.InTagName
,指针指向下一个阶段,标签tag
中。
3、State.InTagName
指针变为:
- c: 105(i)
- state: State.InTagName
- this.index: 2
- this.sectionStart: 1
js
private stateInTagName(c: number): void {
if (isEndOfTagSection(c)) {
this.handleTagName(c)
}
}
以上逻辑中,字符i
不满足isEndOfTagSection(c)
,所以指针继续扫描,直到扫描到空格位置,此时进入方法this.handleTagName(c)
。
指针变为:
- c: 32(空格)
- state: State.InTagName
- this.index: 4
- this.sectionStart: 1
js
// this.cbs.onopentagname
onopentagname(start, end) {
const name = getSlice(start, end)
currentOpenTag = {
type: NodeTypes.ELEMENT,
tag: name,
ns: currentOptions.getNamespace(name, stack[0], currentOptions.ns),
tagType: ElementTypes.ELEMENT, // will be refined on tag close
props: [],
children: [],
loc: getLoc(start - 1, end),
codegenNode: undefined,
}
},
private handleTagName(c: number) {
this.cbs.onopentagname(this.sectionStart, this.index)
this.sectionStart = -1
this.state = State.BeforeAttrName
this.stateBeforeAttrName(c)
}
在以上逻辑中,我们得到name
就时div
,currentOpenTag
就是一个丰富的描述当前节点信息的对象。至此,我们的第一个标签div
产生。结束之后,执行this.sectionStart = -1
和this.state = State.BeforeAttrName
,最后执行的this.stateBeforeAttrName(c)
是为了判断当前是否是闭合标签<div>
,明显当前例子中不是,执行过程未有实际逻辑发生,我们继续移动指针。
4、State.BeforeAttrName
指针变为:
- c: 99(c)
- state: State.BeforeAttrName
- this.index: 5
- this.sectionStart: -1
js
private stateBeforeAttrName(c: number): void {
if (c === CharCodes.Gt) {
this.cbs.onopentagend(this.index)
if (this.inRCDATA) {
this.state = State.InRCDATA
} else {
this.state = State.Text
}
this.sectionStart = this.index + 1
} else if (c === CharCodes.Slash) {
this.state = State.InSelfClosingTag
if ((__DEV__ || !__BROWSER__) && this.peek() !== CharCodes.Gt) {
this.cbs.onerr(ErrorCodes.UNEXPECTED_SOLIDUS_IN_TAG, this.index)
}
} else if (c === CharCodes.Lt && this.peek() === CharCodes.Slash) {
this.cbs.onopentagend(this.index)
this.state = State.BeforeTagName
this.sectionStart = this.index
} else if (!isWhitespace(c)) {
if ((__DEV__ || !__BROWSER__) && c === CharCodes.Eq) {
this.cbs.onerr(
ErrorCodes.UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME,
this.index,
)
}
this.handleAttrStart(c)
}
}
以上例子中字符c
不为空,因此继续执行this.handleAttrStart(c)
js
private handleAttrStart(c: number) {
if (c === CharCodes.LowerV && this.peek() === CharCodes.Dash) {
this.state = State.InDirName
this.sectionStart = this.index
} else if (
c === CharCodes.Dot ||
c === CharCodes.Colon ||
c === CharCodes.At ||
c === CharCodes.Number
) {
this.cbs.ondirname(this.index, this.index + 1)
this.state = State.InDirArg
this.sectionStart = this.index + 1
} else {
this.state = State.InAttrName
this.sectionStart = this.index
}
}
当前例子中,执行到了this.state = State.InAttrName
和this.sectionStart = this.index
。指针继续移动:
5、State.InAttrName
指针变为:
- c: 108(l)
- state: State.InAttrName
- this.index: 5
- this.sectionStart: 5
js
// this.cbs.onattribname
onattribname(start, end) {
currentProp = {
type: NodeTypes.ATTRIBUTE,
name: getSlice(start, end),
nameLoc: getLoc(start, end),
value: undefined,
loc: getLoc(start),
}
},
private stateInAttrName(c: number): void {
if (c === CharCodes.Eq || isEndOfTagSection(c)) {
this.cbs.onattribname(this.sectionStart, this.index)
this.handleAttrNameEnd(c)
} else if (
(__DEV__ || !__BROWSER__) &&
(c === CharCodes.DoubleQuote ||
c === CharCodes.SingleQuote ||
c === CharCodes.Lt)
) {
this.cbs.onerr(
ErrorCodes.UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME,
this.index,
)
}
}
这里指针持续移动,直到满足c === CharCodes.Eq
时,去执行this.cbs.onattribname(this.sectionStart, this.index)
,获取了第一个描述属性的currentProp
,这里暂时没有value
值,需要我们继续移动指针。后面会扫描到例子中的myApp
,这个步骤可以自行调试。
6、扫描至>
时
指针变为:
- c: 62(>)
- state: State.BeforeAttrName
- this.index: 18
- this.sectionStart: -1
js
// addNode
function addNode(node: TemplateChildNode) {
;(stack[0] || currentRoot).children.push(node)
}
// endOpenTag
function endOpenTag(end: number) {
if (tokenizer.inSFCRoot) {
currentOpenTag!.innerLoc = getLoc(end + 1, end + 1)
}
addNode(currentOpenTag!)
const { tag, ns } = currentOpenTag!
if (ns === Namespaces.HTML && currentOptions.isPreTag(tag)) {
inPre++
}
if (currentOptions.isVoidTag(tag)) {
onCloseTag(currentOpenTag!, end)
} else {
stack.unshift(currentOpenTag!)
if (ns === Namespaces.SVG || ns === Namespaces.MATH_ML) {
tokenizer.inXML = true
}
}
currentOpenTag = null
}
首先注意addNode
,这里会将当前获取到的节点div
对象currentOpenTag
推入到栈顶元素stack[0]
的children
中去,如果栈为空,则推入到根节点currentRoot
的children
中去,实现了父子关系的建立
。
其次关注stack.unshift(currentOpenTag)
,这里会将当前currentOpenTag
推入到栈中,通过栈的方式维护树形结构
。
三、图示解析过程
接下来按照指针扫描一行为单位,介绍树的构建和栈的维护。
以上介绍了解析器针对字符串转换成ast树的过程通过指针挨个扫描字符和指针扫描每行代码这两个尺度做了简单分析。字符的扫描过程中,遇到节点、文本、注释和插值则将其插入到栈顶(或根root)的
children
中去,遇到诸如<div ***>
起始节点会进行入栈操作,遇到诸如</div>
闭合标签则会进行出栈操作。通过以上操作,就将字符串转换成了ast树
。