PostgreSQL源码分析—— ValueScan

本文以select * from (values (1,1),(2,2)) as foo;为例，分析一下ValueScan其源码执行过程。

语法解析层

Select SQL语句字符串输入到数据库后，会首先在语法解析层表示成抽象语法树SelectStmt，进而经过语义分析，转换为查询树Query。将values的信息保存在了valuesLists字段中。

c 复制代码

exec_simple_query
--> pg_parse_query
    --> raw_parser
        --> base_yyparse
--> pg_analyze_and_rewrite

核心结构体

c 复制代码

typedef struct SelectStmt
{
	NodeTag		type;

	/* These fields are used only in "leaf" SelectStmts.*/
	List	   *distinctClause; /* NULL, list of DISTINCT ON exprs, or
								 * lcons(NIL,NIL) for all (SELECT DISTINCT) */
	IntoClause *intoClause;		/* target for SELECT INTO */
	List	   *targetList;		/* the target list (of ResTarget) */
	List	   *fromClause;		/* the FROM clause */
	Node	   *whereClause;	/* WHERE qualification */
	List	   *groupClause;	/* GROUP BY clauses */
	Node	   *havingClause;	/* HAVING conditional-expression */
	List	   *windowClause;	/* WINDOW window_name AS (...), ... */

	/*
	 * In a "leaf" node representing a VALUES list, the above fields are all
	 * null, and instead this field is set.  Note that the elements of the
	 * sublists are just expressions, without ResTarget decoration. Also note
	 * that a list element can be DEFAULT (represented as a SetToDefault
	 * node), regardless of the context of the VALUES list. It's up to parse
	 * analysis to reject that where not valid.
	 */
	List	   *valuesLists;	/* untransformed list of expression lists */

	/* These fields are used in both "leaf" SelectStmts and upper-level SelectStmts.*/
	List	   *sortClause;		/* sort clause (a list of SortBy's) */
	Node	   *limitOffset;	/* # of result tuples to skip */
	Node	   *limitCount;		/* # of result tuples to return */
	LimitOption limitOption;	/* limit type */
	List	   *lockingClause;	/* FOR UPDATE (list of LockingClause's) */
	WithClause *withClause;		/* WITH clause */

	/* These fields are used only in upper-level SelectStmts.*/
	SetOperation op;			/* type of set op */
	bool		all;			/* ALL specified? */
	struct SelectStmt *larg;	/* left child */
	struct SelectStmt *rarg;	/* right child */
} SelectStmt;

/* RangeSubselect - subquery appearing in a FROM clause */
typedef struct RangeSubselect
{
	NodeTag		type;
	bool		lateral;		/* does it have LATERAL prefix? */
	Node	   *subquery;		/* the untransformed sub-select clause */
	Alias	   *alias;			/* table alias & optional column aliases */
} RangeSubselect;

gram.y中语法定义分析：

sql 复制代码

-- 表示的 from () as foo
from_clause:
			FROM from_list							{ $$ = $2; }
			| /*EMPTY*/								{ $$ = NIL; }
		;

from_list:
			table_ref								{ $$ = list_make1($1); }
			| from_list ',' table_ref				{ $$ = lappend($1, $3); }
		;

/*
 * table_ref is where an alias clause can be attached.
 */
table_ref:	relation_expr opt_alias_clause
				{
					$1->alias = $2;
					$$ = (Node *) $1;
				}
			| select_with_parens opt_alias_clause
				{   -- 将values 表示为了子查询 
					RangeSubselect *n = makeNode(RangeSubselect);
					n->lateral = false;
					n->subquery = $1;
					n->alias = $2;

					if ($2 == NULL)
					{
						if (IsA($1, SelectStmt) &&
							((SelectStmt *) $1)->valuesLists)
							ereport(ERROR,
									(errcode(ERRCODE_SYNTAX_ERROR),
									 errmsg("VALUES in FROM must have an alias"),
									 errhint("For example, FROM (VALUES ...) [AS] foo."),
									 parser_errposition(@1)));
					}
					$$ = (Node *) n;
				}

select_with_parens:
			'(' select_no_parens ')'				{ $$ = $2; }

select_no_parens:
			simple_select						{ $$ = $1; }
        
simple_select:
			SELECT opt_all_clause opt_target_list
			into_clause from_clause where_clause
			group_clause having_clause window_clause
				{
					SelectStmt *n = makeNode(SelectStmt);
					n->targetList = $3;
					n->intoClause = $4;
					n->fromClause = $5;
					n->whereClause = $6;
					n->groupClause = ($7)->list;
					n->havingClause = $8;
					n->windowClause = $9;
					$$ = (Node *)n;
				}
    		| values_clause							{ $$ = $1; }

-- 表示 values ( (1,1), (2,2))
values_clause:
			VALUES '(' expr_list ')'
				{
					SelectStmt *n = makeNode(SelectStmt);
					n->valuesLists = list_make1($3);
					$$ = (Node *) n;
				}
			| values_clause ',' '(' expr_list ')'
				{
					SelectStmt *n = (SelectStmt *) $1;
					n->valuesLists = lappend(n->valuesLists, $4);
					$$ = (Node *) n;
				}
		;

最终表示为：

c 复制代码

// select * from Subquery,  Subquery is (Values (1,1),(2,2)) as foo;
SelectStmt  (top-sql)
{
    targetList :  '*'
    fromClause :  RangeSubselect
}
// 子查询范围表
RangeSubselect
{
    subquery:  SelectStmt  (Subquery is (Values (1,1),(2,2)))
    alias:    as foo
}
// 子查询
SelectStmt
{
    valuesLists:   `(Values (1,1),(2,2))`
}

语义分析

将上面的抽象语法树SelectStmt转换为查询树Query。检查语义是否合法。流程如下：

c 复制代码

pg_analyze_and_rewrite
--> pg_analyze
    --> transformStmt
        --> transformSelectStmt
            --> transformFromClause  // 处理子查询范围表
                --> transformFromClauseItem
                    --> transformRangeSubselect
                        --> parse_sub_analyze // 处理子查询，生成子查询的查询树Query
                            --> transformStmt
                                --> transformValuesClause  // 处理子查询中的values表达式
                                    --> transformExpressionList
                                        --> transformExpr
                                    --> addRangeTableEntryForValues
                        --> addRangeTableEntryForSubquery //将values expr list 放到rte->values_lists字段中
            --> transformTargetList
--> pg_rewrite_query

关键数据结构：

c 复制代码

// 范围表类型
typedef enum RTEKind
{
	RTE_RELATION,				/* ordinary relation reference */
	RTE_SUBQUERY,				/* subquery in FROM */
	RTE_JOIN,					/* join */
	RTE_FUNCTION,				/* function in FROM */
	RTE_TABLEFUNC,				/* TableFunc(.., column list) */
	RTE_VALUES,					/* VALUES (<exprlist>), (<exprlist>), ... */
	RTE_CTE,					/* common table expr (WITH list element) */
	RTE_NAMEDTUPLESTORE,		/* tuplestore, e.g. for AFTER triggers */
	RTE_RESULT					/* RTE represents an empty FROM clause; such
								 * RTEs are added by the planner, they're not
								 * present during parsing or rewriting */
} RTEKind;
// 范围表
typedef struct RangeTblEntry
{
	NodeTag		type;
	RTEKind		rtekind;		/* see above */
	/*
	 * Fields valid for a plain relation RTE (else zero):
	 */
	Oid			relid;			/* OID of the relation */
	char		relkind;		/* relation kind (see pg_class.relkind) */
	int			rellockmode;	/* lock level that query requires on the rel */
	struct TableSampleClause *tablesample;	/* sampling info, or NULL */

	/*
	 * Fields valid for a subquery RTE (else NULL):
	 */
	Query	   *subquery;		/* the sub-query */
	bool		security_barrier;	/* is from security_barrier view? */

	/*
	 * Fields valid for a join RTE (else NULL/zero):
	 */
	JoinType	jointype;		/* type of join */
	int			joinmergedcols; /* number of merged (JOIN USING) columns */
	List	   *joinaliasvars;	/* list of alias-var expansions */
	List	   *joinleftcols;	/* left-side input column numbers */
	List	   *joinrightcols;	/* right-side input column numbers */

	/*
	 * Fields valid for a function RTE (else NIL/zero):
	 */
	List	   *functions;		/* list of RangeTblFunction nodes */
	bool		funcordinality; /* is this called WITH ORDINALITY? */

	/*
	 * Fields valid for a TableFunc RTE (else NULL):
	 */
	TableFunc  *tablefunc;

	/*
	 * Fields valid for a values RTE (else NIL):
	 */
	List	   *values_lists;	/* list of expression lists */  // Values表达式列表

	/*
	 * Fields valid for a CTE RTE (else NULL/zero):
	 */
	char	   *ctename;		/* name of the WITH list item */
	Index		ctelevelsup;	/* number of query levels up */
	bool		self_reference; /* is this a recursive self-reference? */

	/*
	 * Fields valid for CTE, VALUES, ENR, and TableFunc RTEs (else NIL):
	 */
	List	   *coltypes;		/* OID list of column type OIDs */
	List	   *coltypmods;		/* integer list of column typmods */
	List	   *colcollations;	/* OID list of column collation OIDs */

	/*
	 * Fields valid for ENR RTEs (else NULL/zero):
	 */
	char	   *enrname;		/* name of ephemeral named relation */
	double		enrtuples;		/* estimated or actual from caller */

	/*
	 * Fields valid in all RTEs:
	 */
	Alias	   *alias;			/* user-written alias clause, if any */
	Alias	   *eref;			/* expanded reference names */
	bool		lateral;		/* subquery, function, or values is LATERAL? */
	bool		inh;			/* inheritance requested? */
	bool		inFromCl;		/* present in FROM clause? */
	AclMode		requiredPerms;	/* bitmask of required access permissions */
	Oid			checkAsUser;	/* if valid, check access as this role */
	Bitmapset  *selectedCols;	/* columns needing SELECT permission */
	Bitmapset  *insertedCols;	/* columns needing INSERT permission */
	Bitmapset  *updatedCols;	/* columns needing UPDATE permission */
	Bitmapset  *extraUpdatedCols;	/* generated columns being updated */
	List	   *securityQuals;	/* security barrier quals to apply, if any */
	const char *qb_name;
} RangeTblEntry;

关键处理逻辑：

c 复制代码

// 子查询
SelectStmt
{
    valuesLists:   `(Values (1,1),(2,2))`
}

会将valuesLists转为RangeTblEntry，相关信息保存到values_list字段中。子查询SelectStmt转为子查询树Query。

c 复制代码

// 子查询范围表
RangeSubselect
{
    subquery:  SelectStmt  (Subquery is (Values (1,1),(2,2)))
    alias:    as foo
}

RangeSubselect转为父查询的RangeTblEntry，

c 复制代码

// select * from Subquery,  Subquery is (Values (1,1),(2,2)) as foo;
SelectStmt  (top-sql)
{
    targetList :  '*'
    fromClause :  RangeSubselect
}

SelectStmt转为top查询树Query

优化器

通过查询树生成最优路径，再生成最优执行计划。

优化器主流程如下：

c 复制代码

pg_plan_queries
--> pg_plan_query
    --> planner
        --> standard_planner
            --> subquery_planner
                --> pull_up_subqueries  // 上拉子查询
                --> grouping_planner
                    --> query_planner
                        --> setup_simple_rel_arrays
                        --> add_base_rels_to_query
                            --> build_simple_rel
                        --> make_one_rel
                            --> set_base_rel_sizes
                                --> set_rel_size
                                    --> set_values_size_estimates
                                        --> set_baserel_size_estimates
                            --> set_base_rel_pathlists
                                --> set_rel_pathlist
                                    --> set_values_pathlist
                                        --> create_valuesscan_path  // 生成ValueScanPath
                            --> make_rel_from_joinlist
                    --> apply_scanjoin_target_to_paths
            --> create_plan
                --> create_scan_plan
                    --> create_valuesscan_plan

重点函数分析：最重要的是看一下VauleScan执行计划的生成

c 复制代码

/*
 * create_valuesscan_plan
 *	 Returns a valuesscan plan for the base relation scanned by 'best_path'
 *	 with restriction clauses 'scan_clauses' and targetlist 'tlist'.
 */
ValuesScan *
create_valuesscan_plan(PlannerInfo *root, Path *best_path, List *tlist, List *scan_clauses)
{
	ValuesScan *scan_plan;
	Index		scan_relid = best_path->parent->relid;
	RangeTblEntry *rte;
	List	   *values_lists;

	rte = planner_rt_fetch(scan_relid, root);
	values_lists = rte->values_lists;       //存放在RangeTblEntry中的values_lists

	/* Sort clauses into best execution order */
	scan_clauses = order_qual_clauses(root, scan_clauses);

	/* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */
	scan_clauses = extract_actual_clauses(scan_clauses, false);

	/* Replace any outer-relation variables with nestloop params */
	if (best_path->param_info)
	{
		scan_clauses = (List *)replace_nestloop_params(root, (Node *) scan_clauses);
		/* The values lists could contain nestloop params, too */
		values_lists = (List *)replace_nestloop_params(root, (Node *) values_lists);
	}

	scan_plan = make_valuesscan(tlist, scan_clauses, scan_relid, values_lists);
	copy_generic_path_info(&scan_plan->scan.plan, best_path);

	return scan_plan;
}

ValuesScan *
make_valuesscan(List *qptlist,List *qpqual,Index scanrelid,List *values_lists)
{
	ValuesScan *node = makeNode(ValuesScan);
	Plan	   *plan = &node->scan.plan;

	plan->targetlist = qptlist;
	plan->qual = qpqual;
	plan->lefttree = NULL;
	plan->righttree = NULL;
	node->scan.scanrelid = scanrelid;
	node->values_lists = values_lists;  // values expr list

	return node;
}

最终的执行计划：

sql 复制代码

postgres@postgres=# explain select * from (values (1,1), (2,2)) as foo;
                         QUERY PLAN                          
-------------------------------------------------------------
 Values Scan on "*VALUES*"  (cost=0.00..0.03 rows=2 width=8)
(1 row)

执行器

输入执行计划，输出最终结果。

c 复制代码

PortalStart
--> ExecutorStart
    --> InitPlan
        --> ExecInitValuesScan
PortalRun
--> ExecutorRun
    --> ExecutePlan
        --> ExecValuesScan   // 从这里开始，ExecValuesScan、ExecScan、ExecScanFetch、ValuesNext都要读懂
            --> ExecScan
                --> ExecScanFetch
                    --> ValuesNext   // 从 values expr list中获取值 转为tuple
PortalDrop

执行器算子实现源码如下：因为这几个算子都是比较重要且高频的算子，所以我们把他的源码全部列了出来，要把整个都读懂才行。

c 复制代码

/* ----------------------------------------------------------------
 *		ExecValuesScan(node)
 *
 *		Scans the values lists sequentially and returns the next qualifying tuple.
 *		We call the ExecScan() routine and pass it the appropriate access method functions.
 * ----------------------------------------------------------------*/
TupleTableSlot *ExecValuesScan(PlanState *pstate)
{
	ValuesScanState *node = castNode(ValuesScanState, pstate);

	return ExecScan(&node->ss,
					(ExecScanAccessMtd) ValuesNext,   // 这个参数最重要，access method实现
					(ExecScanRecheckMtd) ValuesRecheck);
}

/* ----------------------------------------------------------------
 *		ExecScan
 *
 *		Scans the relation using the 'access method' indicated and
 *		returns the next qualifying tuple.
 *		The access method returns the next tuple and ExecScan() is
 *		responsible for checking the tuple returned against the qual-clause.
 *
 *		A 'recheck method' must also be provided that can check an
 *		arbitrary tuple of the relation against any qual conditions
 *		that are implemented internal to the access method.
 *
 *		Conditions:
 *		  -- the "cursor" maintained by the AMI is positioned at the tuple
 *			 returned previously.
 *
 *		Initial States:
 *		  -- the relation indicated is opened for scanning so that the
 *			 "cursor" is positioned before the first qualifying tuple.
 * ----------------------------------------------------------------
 */
TupleTableSlot *
ExecScan(ScanState *node, ExecScanAccessMtd accessMtd, ExecScanRecheckMtd recheckMtd)
{
	ExprContext *econtext;
	ExprState  *qual;
	ProjectionInfo *projInfo;

	/* Fetch data from node */
	qual = node->ps.qual;
	projInfo = node->ps.ps_ProjInfo;
	econtext = node->ps.ps_ExprContext;

	/* If we have neither a qual to check nor a projection to do, just skip
	 * all the overhead and return the raw scan tuple.*/
	if (!qual && !projInfo)
	{
		ResetExprContext(econtext);
		return ExecScanFetch(node, accessMtd, recheckMtd);
	}

	/* Reset per-tuple memory context to free any expression evaluation
	 * storage allocated in the previous tuple cycle.*/
	ResetExprContext(econtext);

	/* get a tuple from the access method.  Loop until we obtain a tuple that
	 * passes the qualification. */
	for (;;)
	{
		TupleTableSlot *slot;
		slot = ExecScanFetch(node, accessMtd, recheckMtd);

		/* if the slot returned by the accessMtd contains NULL, then it means
		 * there is nothing more to scan so we just return an empty slot,
		 * being careful to use the projection result slot so it has correct tupleDesc. */
		if (TupIsNull(slot))
		{
			if (projInfo)
				return ExecClearTuple(projInfo->pi_state.resultslot);
			else
				return slot;
		}

		/* place the current tuple into the expr context */
		econtext->ecxt_scantuple = slot;

		/* check that the current tuple satisfies the qual-clause
		 *
		 * check for non-null qual here to avoid a function call to ExecQual()
		 * when the qual is null ... saves only a few cycles, but they add up ... */
		if (qual == NULL || ExecQual(qual, econtext))
		{
			/* Found a satisfactory scan tuple. */
			if (projInfo)
			{
				/* Form a projection tuple, store it in the result tuple slot and return it. */
				return ExecProject(projInfo);
			} else
			{
				return slot;  /* Here, we aren't projecting, so just return scan tuple. */
			}
		}
		else
			InstrCountFiltered1(node, 1);

		/* Tuple fails qual, so free per-tuple memory and try again. */
		ResetExprContext(econtext);
	}
}


/* ----------------------------------------------------------------
 *		ValuesNext
 *
 *		This is a workhorse for ExecValuesScan
 * ----------------------------------------------------------------*/
TupleTableSlot *ValuesNext(ValuesScanState *node)
{
	TupleTableSlot *slot;
	EState	   *estate;
	ExprContext *econtext;
	ScanDirection direction;
	int			curr_idx;

	/* get information from the estate and scan state */
	estate = node->ss.ps.state;
	direction = estate->es_direction;
	slot = node->ss.ss_ScanTupleSlot;
	econtext = node->rowcontext;

	/* Get the next tuple. Return NULL if no more tuples. */
	if (ScanDirectionIsForward(direction))
	{
		if (node->curr_idx < node->array_len)
			node->curr_idx++;
	} else
	{
		if (node->curr_idx >= 0)
			node->curr_idx--;
	}

	/* Always clear the result slot; this is appropriate if we are at the end
	 * of the data, and if we're not, we still need it as the first step of
	 * the store-virtual-tuple protocol.  It seems wise to clear the slot
	 * before we reset the context it might have pointers into. */
	ExecClearTuple(slot);

	curr_idx = node->curr_idx;
	if (curr_idx >= 0 && curr_idx < node->array_len)
	{
		List	   *exprlist = node->exprlists[curr_idx];
		List	   *exprstatelist = node->exprstatelists[curr_idx];
		MemoryContext oldContext;
		Datum	   *values;
		bool	   *isnull;
		ListCell   *lc;
		int			resind;

		/* Get rid of any prior cycle's leftovers.  We use ReScanExprContext
		 * not just ResetExprContext because we want any registered shutdown callbacks to be called. */
		ReScanExprContext(econtext);

		/* Do per-VALUES-row work in the per-tuple context.*/
		oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);

		/*
		 * Unless we already made the expression eval state for this row,
		 * build it in the econtext's per-tuple memory.  This is a tad
		 * unusual, but we want to delete the eval state again when we move to
		 * the next row, to avoid growth of memory requirements over a long
		 * values list.  For rows in which that won't work, we already built
		 * the eval state at plan startup.
		 */
		if (exprstatelist == NIL)
		{
			/*
			 * Pass parent as NULL, not my plan node, because we don't want
			 * anything in this transient state linking into permanent state.
			 * The only expression type that might wish to do so is a SubPlan,
			 * and we already checked that there aren't any.
			 *
			 * Note that passing parent = NULL also disables JIT compilation
			 * of the expressions, which is a win, because they're only going
			 * to be used once under normal circumstances.
			 */
			exprstatelist = ExecInitExprList(exprlist, NULL);
		}

		/*
		 * Compute the expressions and build a virtual result tuple. We
		 * already did ExecClearTuple(slot).
		 */
		values = slot->tts_values;
		isnull = slot->tts_isnull;

		resind = 0;
		foreach(lc, exprstatelist)
		{
			ExprState  *estate = (ExprState *) lfirst(lc);
			Form_pg_attribute attr = TupleDescAttr(slot->tts_tupleDescriptor, resind);

			values[resind] = ExecEvalExpr(estate, econtext, &isnull[resind]);

			/*
			 * We must force any R/W expanded datums to read-only state, in
			 * case they are multiply referenced in the plan node's output
			 * expressions, or in case we skip the output projection and the
			 * output column is multiply referenced in higher plan nodes.
			 */
			values[resind] = MakeExpandedObjectReadOnly(values[resind],isnull[resind],attr->attlen);

			resind++;
		}

		MemoryContextSwitchTo(oldContext);
		ExecStoreVirtualTuple(slot);  		/* And return the virtual tuple. */
	}

	return slot;
}