PostgreSQL源码分析——常量表达式化简

常量表达式化简

常量表达式可以进行化简，可降低执行器计算表达式的代价。在逻辑优化阶段，会判断是否可以进行常量表达式化简，如果可以，则在执行器执行之前就预先对常量表达式树进行计算，计算出常量后，以新计算出的常量表达式代替原有的表达式。当进入执行器时，此时表达式已被替换为常量，避免了在执行器中频繁的计算表达式。

化简示例

我们以下面的SQL为例，条件表达式为a > 1 + 1，是常量表达式，可以进行化简。

sql 复制代码

-- 常量表达式可以进行化简
postgres=# explain select * from t1 where a >  1 + 1;   
                     QUERY PLAN                      
-----------------------------------------------------
 Seq Scan on t1  (cost=0.00..17.50 rows=998 width=8)
   Filter: (a > 2)   -- 进入执行器中， 表达式已被提前替换为常量，每扫描一个元组，不用再进行1+1的表达式计算了
(2 rows)
-- 不满足常量表达式化简的条件
postgres=# explain select * from t1 where a >  1 + random();  
                               QUERY PLAN                               
------------------------------------------------------------------------
 Seq Scan on t1  (cost=0.00..25.00 rows=333 width=8)
   Filter: ((a)::double precision > ('1'::double precision + random()))     --  执行器中每扫描一个元组，都要进行一次表达式计算
(2 rows)

除了常量表达式，常量函数也可以在逻辑优化阶段提前执行，计算得到常量，用常量替换原有的函数表达式。

源码分析

我们以select * from t1 where a = 1 + 1;这条语句为例，分析一下在PostgreSQL中是如何进行化简的。主流程如下

c 复制代码

exec_simple_query
--> pg_parse_query
    --> raw_parser
        --> base_yyparse
--> pg_analyze_and_rewrite
    --> transformStmt
        --> transformSelectStmt
            --> transformWhereClause
                --> transformExpr
--> pg_plan_queries
    --> pg_plan_query
        --> planner
            --> standard_planner
                --> subquery_planner
                    --> preprocess_qual_conditions
                        --> preprocess_expression
                            --> eval_const_expressions  // 在这里完成常量表达式化简，完成1+1表达式替换为常量2
                                --> eval_const_expressions_mutator
                                    --> simplify_function
                                        --> evaluate_function
                                            --> evaluate_expr  // 1+1的表达式被计算为常量2
                --> create_plan
--> PortalStart
--> PortalRun  // 执行器执行
--> PortalDrop

可以看到，在逻辑优化阶段，常量表达式已被化简为常量，进入执行器时，表达式树已被替换为常量。下面为详细实现。

c 复制代码

/*
 * preprocess_qual_conditions
 *		Recursively scan the query's jointree and do subquery_planner's
 *		preprocessing work on each qual condition found therein.
 */
static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode)
{
	if (jtnode == NULL)
		return;
	if (IsA(jtnode, RangeTblRef))
	{
		/* nothing to do here */
	}
	else if (IsA(jtnode, FromExpr))
	{
		FromExpr   *f = (FromExpr *) jtnode;
		ListCell   *l;

		foreach(l, f->fromlist)
			preprocess_qual_conditions(root, lfirst(l));

		f->quals = preprocess_expression(root, f->quals, EXPRKIND_QUAL);
	}
	else if (IsA(jtnode, JoinExpr))
	{
        // ...
	}
	else
		elog(ERROR, "unrecognized node type: %d",
			 (int) nodeTag(jtnode));
}

/*
 * preprocess_expression
 *		Do subquery_planner's preprocessing work for an expression,
 *		which can be a targetlist, a WHERE clause (including JOIN/ON
 *		conditions), a HAVING clause, or a few other things.
 */
static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind)
{
    // ...

	/*
	 * Simplify constant expressions.  For function RTEs, this was already
	 * done by preprocess_function_rtes.  
	 */
	if (kind != EXPRKIND_RTFUNC)
		expr = eval_const_expressions(root, expr);

	/* If it's a qual or havingQual, canonicalize it. */
	if (kind == EXPRKIND_QUAL)
		expr = (Node *) canonicalize_qual((Expr *) expr, false);

    // ...

	/*
	 * If it's a qual or havingQual, convert it to implicit-AND format. (We
	 * don't want to do this before eval_const_expressions, since the latter
	 * would be unable to simplify a top-level AND correctly. Also,
	 * SS_process_sublinks expects explicit-AND format.)
	 */
	if (kind == EXPRKIND_QUAL)
		expr = (Node *) make_ands_implicit((Expr *) expr);

	return expr;
}

/*--------------------
 * eval_const_expressions
 *
 * Reduce any recognizably constant subexpressions of the given
 * expression tree, for example "2 + 2" => "4".  More interestingly,
 * we can reduce certain boolean expressions even when they contain
 * non-constant subexpressions: "x OR true" => "true" no matter what
 * the subexpression x is.  */
Node *eval_const_expressions(PlannerInfo *root, Node *node)
{
	eval_const_expressions_context context;

	if (root)
		context.boundParams = root->glob->boundParams;	/* bound Params */
	else
		context.boundParams = NULL;
	context.root = root;		/* for inlined-function dependencies */
	context.active_fns = NIL;	/* nothing being recursively simplified */
	context.case_val = NULL;	/* no CASE being examined */
	context.estimate = false;	/* safe transformations only */
	return eval_const_expressions_mutator(node, &context);
}

/* Recursive guts of eval_const_expressions/estimate_expression_value */
static Node *eval_const_expressions_mutator(Node *node,
							   eval_const_expressions_context *context)
{
	if (node == NULL)
		return NULL;
	switch (nodeTag(node))
	{
		case T_FuncExpr:
			{
                // ...
			}
		case T_OpExpr:
			{
				OpExpr	   *expr = (OpExpr *) node;
				List	   *args = expr->args;
				Expr	   *simple;
				OpExpr	   *newexpr;

				/* Need to get OID of underlying function.  Okay to scribble on input to this extent. */
				set_opfuncid(expr);

				/*
				 * Code for op/func reduction is pretty bulky, so split it out
				 * as a separate function. */
				simple = simplify_function(expr->opfuncid,
										   expr->opresulttype, -1,
										   expr->opcollid,
										   expr->inputcollid,
										   &args,
										   false,
										   true,
										   true,
										   context);
				if (simple)		/* successfully simplified it */
					return (Node *) simple;

				/*
				 * If the operator is boolean equality or inequality, we know
				 * how to simplify cases involving one constant and one
				 * non-constant argument.
				 */
				if (expr->opno == BooleanEqualOperator ||
					expr->opno == BooleanNotEqualOperator)
				{
					simple = (Expr *) simplify_boolean_equality(expr->opno,
																args);
					if (simple) /* successfully simplified it */
						return (Node *) simple;
				}

				/*
				 * The expression cannot be simplified any further, so build
				 * and return a replacement OpExpr node using the
				 * possibly-simplified arguments.
				 */
				newexpr = makeNode(OpExpr);
				newexpr->opno = expr->opno;
				newexpr->opfuncid = expr->opfuncid;
				newexpr->opresulttype = expr->opresulttype;
				newexpr->opretset = expr->opretset;
				newexpr->opcollid = expr->opcollid;
				newexpr->inputcollid = expr->inputcollid;
				newexpr->args = args;
				newexpr->location = expr->location;
				return (Node *) newexpr;
			}
        // ...
		default:
			break;
	}

	/*
	 * For any node type not handled above, copy the node unchanged but
	 * const-simplify its subexpressions.  This is the correct thing for node
	 * types whose behavior might change between planning and execution, such
	 * as CurrentOfExpr.  It's also a safe default for new node types not
	 * known to this routine.
	 */
	return ece_generic_processing(node);
}

/*
 * Subroutine for eval_const_expressions: try to simplify a function call
 * (which might originally have been an operator; we don't care)
 */
static Expr *
simplify_function(Oid funcid, Oid result_type, int32 result_typmod,
				  Oid result_collid, Oid input_collid, List **args_p,
				  bool funcvariadic, bool process_args, bool allow_non_const,
				  eval_const_expressions_context *context)
{
    // ...
	/* Now attempt simplification of the function call proper. */
	newexpr = evaluate_function(funcid, result_type, result_typmod,
								result_collid, input_collid,
								args, funcvariadic,
								func_tuple, context);

    // ...

	return newexpr;
}

/*
 * evaluate_function: try to pre-evaluate a function call
 */
static Expr *
evaluate_function(Oid funcid, Oid result_type, int32 result_typmod,
				  Oid result_collid, Oid input_collid, List *args,
				  bool funcvariadic,
				  HeapTuple func_tuple,
				  eval_const_expressions_context *context)
{
    // ...

	return evaluate_expr((Expr *) newexpr, result_type, result_typmod,
						 result_collid);
}

/*
 * evaluate_expr: pre-evaluate a constant expression
 *
 * We use the executor's routine ExecEvalExpr() to avoid duplication of
 * code and ensure we get the same result as the executor would get.
 */
Expr *
evaluate_expr(Expr *expr, Oid result_type, int32 result_typmod,
			  Oid result_collation)
{
	EState	   *estate;
	ExprState  *exprstate;
	MemoryContext oldcontext;
	Datum		const_val;
	bool		const_is_null;
	int16		resultTypLen;
	bool		resultTypByVal;

	/*
	 * To use the executor, we need an EState.
	 */
	estate = CreateExecutorState();

	/* We can use the estate's working context to avoid memory leaks. */
	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);

	/* Make sure any opfuncids are filled in. */
	fix_opfuncids((Node *) expr);

	/*
	 * Prepare expr for execution.  (Note: we can't use ExecPrepareExpr
	 * because it'd result in recursively invoking eval_const_expressions.)
	 */
	exprstate = ExecInitExpr(expr, NULL);

	/*
	 * And evaluate it.
	 *
	 * It is OK to use a default econtext because none of the ExecEvalExpr()
	 * code used in this situation will use econtext.  That might seem
	 * fortuitous, but it's not so unreasonable --- a constant expression does
	 * not depend on context, by definition, n'est ce pas?
	 */
	const_val = ExecEvalExprSwitchContext(exprstate,
										  GetPerTupleExprContext(estate),
										  &const_is_null);  // 计算表达式 1 + 1

	/* Get info needed about result datatype */
	get_typlenbyval(result_type, &resultTypLen, &resultTypByVal);

	/* Get back to outer memory context */
	MemoryContextSwitchTo(oldcontext);

	/*
	 * Must copy result out of sub-context used by expression eval.
	 *
	 * Also, if it's varlena, forcibly detoast it.  This protects us against
	 * storing TOAST pointers into plans that might outlive the referenced
	 * data.  (makeConst would handle detoasting anyway, but it's worth a few
	 * extra lines here so that we can do the copy and detoast in one step.)
	 */
	if (!const_is_null)
	{
		if (resultTypLen == -1)
			const_val = PointerGetDatum(PG_DETOAST_DATUM_COPY(const_val));
		else
			const_val = datumCopy(const_val, resultTypByVal, resultTypLen);
	}

	/* Release all the junk we just created */
	FreeExecutorState(estate);

	/*
	 * Make the constant result node.
	 */
	return (Expr *) makeConst(result_type, result_typmod, result_collation,
							  resultTypLen,
							  const_val, const_is_null,
							  resultTypByVal);
}

如果对源码不熟悉，可以补充看一下这里关于解析层的代码，有助于理解PostgreSQL源码中表达式处理相关的逻辑。

补充解析层相关代码

我们看一下在解析层表达式是如何表示的，1+1表示为A_Expr，类型为AEXPR_OP。where a = 1 + 1则是A_Expr，左子树为变量a，右子树为1+1的表达式A_Expr。

c 复制代码

where_clause:
			WHERE a_expr		{ $$ = $2; }
			| /*EMPTY*/			{ $$ = NULL; }
		;
a_expr:		c_expr			{ $$ = $1; }
			| a_expr '+' a_expr     // 表示 1 + 1
				{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "+", $1, $3, @2); }
			| a_expr '=' a_expr
				{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "=", $1, $3, @2); }
c_expr:		columnref		{ $$ = $1; }
			| AexprConst	{ $$ = $1; }
AexprConst: Iconst
				{
					$$ = makeIntConst($1, @1);
				}
Iconst:		ICONST	{ $$ = $1; };       // 1 

A_Expr *
makeSimpleA_Expr(A_Expr_Kind kind, char *name,
				 Node *lexpr, Node *rexpr, int location)
{
	A_Expr	   *a = makeNode(A_Expr);

	a->kind = kind;
	a->name = list_make1(makeString((char *) name));
	a->lexpr = lexpr;
	a->rexpr = rexpr;
	a->location = location;
	return a;
}

typedef struct A_Expr
{
	NodeTag		type;
	A_Expr_Kind kind;			/* see above */
	List	   *name;			/* possibly-qualified name of operator */
	Node	   *lexpr;			/* left argument, or NULL if none */
	Node	   *rexpr;			/* right argument, or NULL if none */
	int			location;		/* token location, or -1 if unknown */
} A_Expr;

/* A_Const - a literal constant */
typedef struct A_Const
{
	NodeTag		type;
	Value		val;			/* value (includes type info, see value.h) */
	int			location;		/* token location, or -1 if unknown */
} A_Const;