/**
* Simplifies LIKE expressions that do not need full regular expressions to evaluate the condition.
* For example, when the expression is just checking to see if a string starts with a given
* pattern.
*/
object LikeSimplification extends Rule[LogicalPlan] with PredicateHelper {
// if guards below protect from escapes on trailing %.
// Cases like "something\%" are not optimized, but this does not affect correctness.
private val startsWith = "([^_%]+)%".r
private val endsWith = "%([^_%]+)".r
private val startsAndEndsWith = "([^_%]+)%([^_%]+)".r
private val contains = "%([^_%]+)%".r
private val equalTo = "([^_%]*)".r
private def simplifyLike(
input: Expression, pattern: String, escapeChar: Char = '\\'): Option[Expression] = {
if (pattern.contains(escapeChar)) {
// There are three different situations when pattern containing escapeChar:
// 1. pattern contains invalid escape sequence, e.g. 'm\aca'
// 2. pattern contains escaped wildcard character, e.g. 'ma\%ca'
// 3. pattern contains escaped escape character, e.g. 'ma\\ca'
// Although there are patterns can be optimized if we handle the escape first, we just
// skip this rule if pattern contains any escapeChar for simplicity.
None
} else {
pattern match {
case startsWith(prefix) =>
Some(StartsWith(input, Literal(prefix)))
case endsWith(postfix) =>
Some(EndsWith(input, Literal(postfix)))
// 'a%a' pattern is basically same with 'a%' && '%a'.
// However, the additional `Length` condition is required to prevent 'a' match 'a%a'.
case startsAndEndsWith(prefix, postfix) =>
Some(And(GreaterThanOrEqual(Length(input), Literal(prefix.length + postfix.length)),
And(StartsWith(input, Literal(prefix)), EndsWith(input, Literal(postfix)))))
case contains(infix) =>
Some(Contains(input, Literal(infix)))
case equalTo(str) =>
Some(EqualTo(input, Literal(str)))
case _ => None
}
}
}
private def simplifyMultiLike(
child: Expression, patterns: Seq[UTF8String], multi: MultiLikeBase): Expression = {
val (remainPatternMap, replacementMap) =
patterns.map { p =>
p -> Option(p).flatMap(p => simplifyLike(child, p.toString))
}.partition(_._2.isEmpty)
val remainPatterns = remainPatternMap.map(_._1)
val replacements = replacementMap.map(_._2.get)
if (replacements.isEmpty) {
multi
} else {
multi match {
case l: LikeAll =>
val and = buildBalancedPredicate(replacements, And)
if (remainPatterns.nonEmpty) And(and, l.copy(patterns = remainPatterns)) else and
case l: NotLikeAll =>
val and = buildBalancedPredicate(replacements.map(Not(_)), And)
if (remainPatterns.nonEmpty) And(and, l.copy(patterns = remainPatterns)) else and
case l: LikeAny =>
val or = buildBalancedPredicate(replacements, Or)
if (remainPatterns.nonEmpty) Or(or, l.copy(patterns = remainPatterns)) else or
case l: NotLikeAny =>
val or = buildBalancedPredicate(replacements.map(Not(_)), Or)
if (remainPatterns.nonEmpty) Or(or, l.copy(patterns = remainPatterns)) else or
}
}
}
def apply(plan: LogicalPlan): LogicalPlan = plan.transformAllExpressionsWithPruning(
_.containsPattern(LIKE_FAMLIY), ruleId) {
case l @ Like(input, Literal(pattern, StringType), escapeChar) =>
if (pattern == null) {
// If pattern is null, return null value directly, since "col like null" == null.
Literal(null, BooleanType)
} else {
simplifyLike(input, pattern.toString, escapeChar).getOrElse(l)
}
case l @ LikeAll(child, patterns) if CollapseProject.isCheap(child) =>
simplifyMultiLike(child, patterns, l)
case l @ NotLikeAll(child, patterns) if CollapseProject.isCheap(child) =>
simplifyMultiLike(child, patterns, l)
case l @ LikeAny(child, patterns) if CollapseProject.isCheap(child) =>
simplifyMultiLike(child, patterns, l)
case l @ NotLikeAny(child, patterns) if CollapseProject.isCheap(child) =>
simplifyMultiLike(child, patterns, l)
}
}
测试
cpp复制代码
test("test data, force apply AQE") {
withSQLConf(
SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
SQLConf.ADAPTIVE_EXECUTION_FORCE_APPLY.key -> "true") {
val df = sql("SELECT * FROM testData where value not like '%HotFocus%'")
df.show
df.printSchema()
}
}
cpp复制代码
test("test data like, force apply AQE") {
withSQLConf(
SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
SQLConf.ADAPTIVE_EXECUTION_FORCE_APPLY.key -> "true") {
val df = sql("SELECT * FROM testData where value not like '%%HotFocus%%'")
df.show
df.printSchema()
}
}