Spark 之 expression

##.

复制代码
/**
 * Returns the number of days from startDate to endDate.
 */
@ExpressionDescription(
  usage = "_FUNC_(endDate, startDate) - Returns the number of days from `startDate` to `endDate`.",
  examples = """
    Examples:
      > SELECT _FUNC_('2009-07-31', '2009-07-30');
       1

      > SELECT _FUNC_('2009-07-30', '2009-07-31');
       -1
  """,
  group = "datetime_funcs",
  since = "1.5.0")
case class DateDiff(endDate: Expression, startDate: Expression)
  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {

  override def left: Expression = endDate
  override def right: Expression = startDate
  override def inputTypes: Seq[AbstractDataType] = Seq(DateType, DateType)
  override def dataType: DataType = IntegerType

  override def nullSafeEval(end: Any, start: Any): Any = {
    end.asInstanceOf[Int] - start.asInstanceOf[Int]
  }

  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    defineCodeGen(ctx, ev, (end, start) => s"$end - $start")
  }

  override protected def withNewChildrenInternal(
      newLeft: Expression, newRight: Expression): DateDiff =
    copy(endDate = newLeft, startDate = newRight)
}
允许有默认参数的表达式
复制代码
// scalastyle:off line.size.limit
@ExpressionDescription(
  usage = "_FUNC_([sourceTz, ]targetTz, sourceTs) - Converts the timestamp without time zone `sourceTs` from the `sourceTz` time zone to `targetTz`. ",
  arguments = """
    Arguments:
      * sourceTz - the time zone for the input timestamp.
                   If it is missed, the current session time zone is used as the source time zone.
      * targetTz - the time zone to which the input timestamp should be converted
      * sourceTs - a timestamp without time zone
  """,
  examples = """
    Examples:
      > SELECT _FUNC_('Europe/Brussels', 'America/Los_Angeles', timestamp_ntz'2021-12-06 00:00:00');
       2021-12-05 15:00:00
      > SELECT _FUNC_('Europe/Brussels', timestamp_ntz'2021-12-05 15:00:00');
       2021-12-06 00:00:00
  """,
  group = "datetime_funcs",
  since = "3.4.0")
// scalastyle:on line.size.limit
case class ConvertTimezone(
    sourceTz: Expression,
    targetTz: Expression,
    sourceTs: Expression)
  extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant {

  def this(targetTz: Expression, sourceTs: Expression) =
    this(CurrentTimeZone(), targetTz, sourceTs)

  override def first: Expression = sourceTz
  override def second: Expression = targetTz
  override def third: Expression = sourceTs

  override def inputTypes: Seq[AbstractDataType] = Seq(StringType, StringType, TimestampNTZType)
  override def dataType: DataType = TimestampNTZType

  override def nullSafeEval(srcTz: Any, tgtTz: Any, micros: Any): Any = {
    DateTimeUtils.convertTimestampNtzToAnotherTz(
      srcTz.asInstanceOf[UTF8String].toString,
      tgtTz.asInstanceOf[UTF8String].toString,
      micros.asInstanceOf[Long])
  }

  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
    defineCodeGen(ctx, ev, (srcTz, tgtTz, micros) =>
      s"""$dtu.convertTimestampNtzToAnotherTz($srcTz.toString(), $tgtTz.toString(), $micros)""")
  }

  override def prettyName: String = "convert_timezone"

  override protected def withNewChildrenInternal(
      newFirst: Expression,
      newSecond: Expression,
      newThird: Expression): ConvertTimezone = {
    copy(sourceTz = newFirst, targetTz = newSecond, sourceTs = newThird)
  }
}
时间函数
复制代码
  /**
   * Gets the difference between two timestamps.
   *
   * @param unit Specifies the interval units in which to express the difference between
   *             the two timestamp parameters.
   * @param startTs A timestamp which the function subtracts from `endTs`.
   * @param endTs A timestamp from which the function subtracts `startTs`.
   * @param zoneId The time zone ID at which the operation is performed.
   * @return The time span between two timestamp values, in the units specified.
   */
  def timestampDiff(unit: String, startTs: Long, endTs: Long, zoneId: ZoneId): Long = {
    val unitInUpperCase = unit.toUpperCase(Locale.ROOT)
    if (timestampDiffMap.contains(unitInUpperCase)) {
      val startLocalTs = getLocalDateTime(startTs, zoneId)
      val endLocalTs = getLocalDateTime(endTs, zoneId)
      timestampDiffMap(unitInUpperCase)(startLocalTs, endLocalTs)
    } else {
      throw new IllegalStateException(s"Got the unexpected unit '$unit'.")
    }
  }
TimestampDiff

旧版表达式写法

复制代码
// scalastyle:off line.size.limit
@ExpressionDescription(
  usage = "_FUNC_(unit, startTimestamp, endTimestamp) - Gets the difference between the timestamps `endTimestamp` and `startTimestamp` in the specified units by truncating the fraction part.",
  arguments = """
    Arguments:
      * unit - this indicates the units of the difference between the given timestamps.
        Supported string values of `unit` are (case insensitive):
          - "YEAR"
          - "QUARTER" - 3 months
          - "MONTH"
          - "WEEK" - 7 days
          - "DAY"
          - "HOUR"
          - "MINUTE"
          - "SECOND"
          - "MILLISECOND"
          - "MICROSECOND"
      * startTimestamp - A timestamp which the expression subtracts from `endTimestamp`.
      * endTimestamp - A timestamp from which the expression subtracts `startTimestamp`.
  """,
  examples = """
    Examples:
      > SELECT _FUNC_('HOUR', timestamp_ntz'2022-02-11 20:30:00', timestamp_ntz'2022-02-12 04:30:00');
       8
      > SELECT _FUNC_('MONTH', timestamp_ltz'2022-01-01 00:00:00', timestamp_ltz'2022-02-28 00:00:00');
       1
      > SELECT _FUNC_(SECOND, date'2022-01-01', timestamp'2021-12-31 23:59:50');
       -10
      > SELECT _FUNC_(YEAR, timestamp'2000-01-01 01:02:03.123456', timestamp'2010-01-01 01:02:03.123456');
       10
  """,
  group = "datetime_funcs",
  since = "3.3.0")
// scalastyle:on line.size.limit
case class TimestampDiff(
    unit: Expression,
    startTimestamp: Expression,
    endTimestamp: Expression,
    timeZoneId: Option[String] = None)
  extends TernaryExpression
  with ImplicitCastInputTypes
  with NullIntolerant
  with TimeZoneAwareExpression {

  def this(unit: Expression, quantity: Expression, timestamp: Expression) =
    this(unit, quantity, timestamp, None)

  override def first: Expression = unit
  override def second: Expression = startTimestamp
  override def third: Expression = endTimestamp

  override def inputTypes: Seq[AbstractDataType] = Seq(StringType, TimestampType, TimestampType)
  override def dataType: DataType = LongType

  override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
    copy(timeZoneId = Option(timeZoneId))

  @transient private lazy val zoneIdInEval: ZoneId = zoneIdForType(endTimestamp.dataType)

  override def nullSafeEval(u: Any, startMicros: Any, endMicros: Any): Any = {
    DateTimeUtils.timestampDiff(
      u.asInstanceOf[UTF8String].toString,
      startMicros.asInstanceOf[Long],
      endMicros.asInstanceOf[Long],
      zoneIdInEval)
  }

  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
    val zid = ctx.addReferenceObj("zoneId", zoneIdInEval, classOf[ZoneId].getName)
    defineCodeGen(ctx, ev, (u, s, e) =>
      s"""$dtu.timestampDiff($u.toString(), $s, $e, $zid)""")
  }

  override def prettyName: String = "timestampdiff"

  override protected def withNewChildrenInternal(
      newFirst: Expression,
      newSecond: Expression,
      newThird: Expression): TimestampDiff = {
    copy(unit = newFirst, startTimestamp = newSecond, endTimestamp = newThird)
  }
}

新版 String 传参数 写法

复制代码
// scalastyle:off line.size.limit
@ExpressionDescription(
  usage = "_FUNC_(unit, startTimestamp, endTimestamp) - Gets the difference between the timestamps `endTimestamp` and `startTimestamp` in the specified units by truncating the fraction part.",
  arguments = """
    Arguments:
      * unit - this indicates the units of the difference between the given timestamps.
        Supported string values of `unit` are (case insensitive):
          - "YEAR"
          - "QUARTER" - 3 months
          - "MONTH"
          - "WEEK" - 7 days
          - "DAY"
          - "HOUR"
          - "MINUTE"
          - "SECOND"
          - "MILLISECOND"
          - "MICROSECOND"
      * startTimestamp - A timestamp which the expression subtracts from `endTimestamp`.
      * endTimestamp - A timestamp from which the expression subtracts `startTimestamp`.
  """,
  examples = """
    Examples:
      > SELECT _FUNC_(HOUR, timestamp_ntz'2022-02-11 20:30:00', timestamp_ntz'2022-02-12 04:30:00');
       8
      > SELECT _FUNC_(MONTH, timestamp_ltz'2022-01-01 00:00:00', timestamp_ltz'2022-02-28 00:00:00');
       1
      > SELECT _FUNC_(SECOND, date'2022-01-01', timestamp'2021-12-31 23:59:50');
       -10
      > SELECT _FUNC_(YEAR, timestamp'2000-01-01 01:02:03.123456', timestamp'2010-01-01 01:02:03.123456');
       10
  """,
  group = "datetime_funcs",
  since = "3.3.0")
// scalastyle:on line.size.limit
case class TimestampDiff(
    unit: String,
    startTimestamp: Expression,
    endTimestamp: Expression,
    timeZoneId: Option[String] = None)
  extends BinaryExpression
  with ImplicitCastInputTypes
  with NullIntolerant
  with TimeZoneAwareExpression {

  def this(unit: String, quantity: Expression, timestamp: Expression) =
    this(unit, quantity, timestamp, None)

  override def left: Expression = startTimestamp
  override def right: Expression = endTimestamp

  override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType, TimestampType)
  override def dataType: DataType = LongType

  override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
    copy(timeZoneId = Option(timeZoneId))

  @transient private lazy val zoneIdInEval: ZoneId = zoneIdForType(endTimestamp.dataType)

  override def nullSafeEval(startMicros: Any, endMicros: Any): Any = {
    DateTimeUtils.timestampDiff(
      unit,
      startMicros.asInstanceOf[Long],
      endMicros.asInstanceOf[Long],
      zoneIdInEval)
  }

  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
    val zid = ctx.addReferenceObj("zoneId", zoneIdInEval, classOf[ZoneId].getName)
    defineCodeGen(ctx, ev, (s, e) =>
      s"""$dtu.timestampDiff("$unit", $s, $e, $zid)""")
  }

  override def prettyName: String = "timestampdiff"

  override def sql: String = {
    val childrenSQL = (unit +: children.map(_.sql)).mkString(", ")
    s"$prettyName($childrenSQL)"
  }

  override protected def withNewChildrenInternal(
      newLeft: Expression,
      newRight: Expression): TimestampDiff = {
    copy(startTimestamp = newLeft, endTimestamp = newRight)
  }
}
相关推荐
消失的旧时光-19434 小时前
第十六课实战:分布式锁与限流设计 —— 从原理到可跑 Demo
redis·分布式·缓存
若水不如远方4 小时前
分布式一致性(三):共识的黎明——Quorum 机制与 Basic Paxos
分布式·后端·算法
py小王子4 小时前
dy评论数据爬取实战:基于DrissionPage的自动化采集方案
大数据·开发语言·python·毕业设计
龙山云仓5 小时前
MES系统超融合架构
大数据·数据库·人工智能·sql·机器学习·架构·全文检索
会算数的⑨5 小时前
Kafka知识点问题驱动式的回顾与复习——(一)
分布式·后端·中间件·kafka
张小凡vip5 小时前
Kafka--使用 Kafka Connect 导入/导出数据
分布式·kafka
无忧智库5 小时前
某市“十五五“知识产权大数据监管平台与全链条保护系统建设方案深度解读(WORD)
大数据·人工智能
回忆是昨天里的海5 小时前
kafka概述
分布式·kafka
知识即是力量ol6 小时前
初识 Kafka(一):分布式流平台的定义、核心优势与架构全景
java·分布式·kafka·消息队列
综合热讯6 小时前
股票融资融券交易时间限制一览与制度说明
大数据·人工智能·区块链