Spark 之 expression

##.

复制代码
/**
 * Returns the number of days from startDate to endDate.
 */
@ExpressionDescription(
  usage = "_FUNC_(endDate, startDate) - Returns the number of days from `startDate` to `endDate`.",
  examples = """
    Examples:
      > SELECT _FUNC_('2009-07-31', '2009-07-30');
       1

      > SELECT _FUNC_('2009-07-30', '2009-07-31');
       -1
  """,
  group = "datetime_funcs",
  since = "1.5.0")
case class DateDiff(endDate: Expression, startDate: Expression)
  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {

  override def left: Expression = endDate
  override def right: Expression = startDate
  override def inputTypes: Seq[AbstractDataType] = Seq(DateType, DateType)
  override def dataType: DataType = IntegerType

  override def nullSafeEval(end: Any, start: Any): Any = {
    end.asInstanceOf[Int] - start.asInstanceOf[Int]
  }

  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    defineCodeGen(ctx, ev, (end, start) => s"$end - $start")
  }

  override protected def withNewChildrenInternal(
      newLeft: Expression, newRight: Expression): DateDiff =
    copy(endDate = newLeft, startDate = newRight)
}
允许有默认参数的表达式
复制代码
// scalastyle:off line.size.limit
@ExpressionDescription(
  usage = "_FUNC_([sourceTz, ]targetTz, sourceTs) - Converts the timestamp without time zone `sourceTs` from the `sourceTz` time zone to `targetTz`. ",
  arguments = """
    Arguments:
      * sourceTz - the time zone for the input timestamp.
                   If it is missed, the current session time zone is used as the source time zone.
      * targetTz - the time zone to which the input timestamp should be converted
      * sourceTs - a timestamp without time zone
  """,
  examples = """
    Examples:
      > SELECT _FUNC_('Europe/Brussels', 'America/Los_Angeles', timestamp_ntz'2021-12-06 00:00:00');
       2021-12-05 15:00:00
      > SELECT _FUNC_('Europe/Brussels', timestamp_ntz'2021-12-05 15:00:00');
       2021-12-06 00:00:00
  """,
  group = "datetime_funcs",
  since = "3.4.0")
// scalastyle:on line.size.limit
case class ConvertTimezone(
    sourceTz: Expression,
    targetTz: Expression,
    sourceTs: Expression)
  extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant {

  def this(targetTz: Expression, sourceTs: Expression) =
    this(CurrentTimeZone(), targetTz, sourceTs)

  override def first: Expression = sourceTz
  override def second: Expression = targetTz
  override def third: Expression = sourceTs

  override def inputTypes: Seq[AbstractDataType] = Seq(StringType, StringType, TimestampNTZType)
  override def dataType: DataType = TimestampNTZType

  override def nullSafeEval(srcTz: Any, tgtTz: Any, micros: Any): Any = {
    DateTimeUtils.convertTimestampNtzToAnotherTz(
      srcTz.asInstanceOf[UTF8String].toString,
      tgtTz.asInstanceOf[UTF8String].toString,
      micros.asInstanceOf[Long])
  }

  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
    defineCodeGen(ctx, ev, (srcTz, tgtTz, micros) =>
      s"""$dtu.convertTimestampNtzToAnotherTz($srcTz.toString(), $tgtTz.toString(), $micros)""")
  }

  override def prettyName: String = "convert_timezone"

  override protected def withNewChildrenInternal(
      newFirst: Expression,
      newSecond: Expression,
      newThird: Expression): ConvertTimezone = {
    copy(sourceTz = newFirst, targetTz = newSecond, sourceTs = newThird)
  }
}
时间函数
复制代码
  /**
   * Gets the difference between two timestamps.
   *
   * @param unit Specifies the interval units in which to express the difference between
   *             the two timestamp parameters.
   * @param startTs A timestamp which the function subtracts from `endTs`.
   * @param endTs A timestamp from which the function subtracts `startTs`.
   * @param zoneId The time zone ID at which the operation is performed.
   * @return The time span between two timestamp values, in the units specified.
   */
  def timestampDiff(unit: String, startTs: Long, endTs: Long, zoneId: ZoneId): Long = {
    val unitInUpperCase = unit.toUpperCase(Locale.ROOT)
    if (timestampDiffMap.contains(unitInUpperCase)) {
      val startLocalTs = getLocalDateTime(startTs, zoneId)
      val endLocalTs = getLocalDateTime(endTs, zoneId)
      timestampDiffMap(unitInUpperCase)(startLocalTs, endLocalTs)
    } else {
      throw new IllegalStateException(s"Got the unexpected unit '$unit'.")
    }
  }
TimestampDiff

旧版表达式写法

复制代码
// scalastyle:off line.size.limit
@ExpressionDescription(
  usage = "_FUNC_(unit, startTimestamp, endTimestamp) - Gets the difference between the timestamps `endTimestamp` and `startTimestamp` in the specified units by truncating the fraction part.",
  arguments = """
    Arguments:
      * unit - this indicates the units of the difference between the given timestamps.
        Supported string values of `unit` are (case insensitive):
          - "YEAR"
          - "QUARTER" - 3 months
          - "MONTH"
          - "WEEK" - 7 days
          - "DAY"
          - "HOUR"
          - "MINUTE"
          - "SECOND"
          - "MILLISECOND"
          - "MICROSECOND"
      * startTimestamp - A timestamp which the expression subtracts from `endTimestamp`.
      * endTimestamp - A timestamp from which the expression subtracts `startTimestamp`.
  """,
  examples = """
    Examples:
      > SELECT _FUNC_('HOUR', timestamp_ntz'2022-02-11 20:30:00', timestamp_ntz'2022-02-12 04:30:00');
       8
      > SELECT _FUNC_('MONTH', timestamp_ltz'2022-01-01 00:00:00', timestamp_ltz'2022-02-28 00:00:00');
       1
      > SELECT _FUNC_(SECOND, date'2022-01-01', timestamp'2021-12-31 23:59:50');
       -10
      > SELECT _FUNC_(YEAR, timestamp'2000-01-01 01:02:03.123456', timestamp'2010-01-01 01:02:03.123456');
       10
  """,
  group = "datetime_funcs",
  since = "3.3.0")
// scalastyle:on line.size.limit
case class TimestampDiff(
    unit: Expression,
    startTimestamp: Expression,
    endTimestamp: Expression,
    timeZoneId: Option[String] = None)
  extends TernaryExpression
  with ImplicitCastInputTypes
  with NullIntolerant
  with TimeZoneAwareExpression {

  def this(unit: Expression, quantity: Expression, timestamp: Expression) =
    this(unit, quantity, timestamp, None)

  override def first: Expression = unit
  override def second: Expression = startTimestamp
  override def third: Expression = endTimestamp

  override def inputTypes: Seq[AbstractDataType] = Seq(StringType, TimestampType, TimestampType)
  override def dataType: DataType = LongType

  override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
    copy(timeZoneId = Option(timeZoneId))

  @transient private lazy val zoneIdInEval: ZoneId = zoneIdForType(endTimestamp.dataType)

  override def nullSafeEval(u: Any, startMicros: Any, endMicros: Any): Any = {
    DateTimeUtils.timestampDiff(
      u.asInstanceOf[UTF8String].toString,
      startMicros.asInstanceOf[Long],
      endMicros.asInstanceOf[Long],
      zoneIdInEval)
  }

  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
    val zid = ctx.addReferenceObj("zoneId", zoneIdInEval, classOf[ZoneId].getName)
    defineCodeGen(ctx, ev, (u, s, e) =>
      s"""$dtu.timestampDiff($u.toString(), $s, $e, $zid)""")
  }

  override def prettyName: String = "timestampdiff"

  override protected def withNewChildrenInternal(
      newFirst: Expression,
      newSecond: Expression,
      newThird: Expression): TimestampDiff = {
    copy(unit = newFirst, startTimestamp = newSecond, endTimestamp = newThird)
  }
}

新版 String 传参数 写法

复制代码
// scalastyle:off line.size.limit
@ExpressionDescription(
  usage = "_FUNC_(unit, startTimestamp, endTimestamp) - Gets the difference between the timestamps `endTimestamp` and `startTimestamp` in the specified units by truncating the fraction part.",
  arguments = """
    Arguments:
      * unit - this indicates the units of the difference between the given timestamps.
        Supported string values of `unit` are (case insensitive):
          - "YEAR"
          - "QUARTER" - 3 months
          - "MONTH"
          - "WEEK" - 7 days
          - "DAY"
          - "HOUR"
          - "MINUTE"
          - "SECOND"
          - "MILLISECOND"
          - "MICROSECOND"
      * startTimestamp - A timestamp which the expression subtracts from `endTimestamp`.
      * endTimestamp - A timestamp from which the expression subtracts `startTimestamp`.
  """,
  examples = """
    Examples:
      > SELECT _FUNC_(HOUR, timestamp_ntz'2022-02-11 20:30:00', timestamp_ntz'2022-02-12 04:30:00');
       8
      > SELECT _FUNC_(MONTH, timestamp_ltz'2022-01-01 00:00:00', timestamp_ltz'2022-02-28 00:00:00');
       1
      > SELECT _FUNC_(SECOND, date'2022-01-01', timestamp'2021-12-31 23:59:50');
       -10
      > SELECT _FUNC_(YEAR, timestamp'2000-01-01 01:02:03.123456', timestamp'2010-01-01 01:02:03.123456');
       10
  """,
  group = "datetime_funcs",
  since = "3.3.0")
// scalastyle:on line.size.limit
case class TimestampDiff(
    unit: String,
    startTimestamp: Expression,
    endTimestamp: Expression,
    timeZoneId: Option[String] = None)
  extends BinaryExpression
  with ImplicitCastInputTypes
  with NullIntolerant
  with TimeZoneAwareExpression {

  def this(unit: String, quantity: Expression, timestamp: Expression) =
    this(unit, quantity, timestamp, None)

  override def left: Expression = startTimestamp
  override def right: Expression = endTimestamp

  override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType, TimestampType)
  override def dataType: DataType = LongType

  override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
    copy(timeZoneId = Option(timeZoneId))

  @transient private lazy val zoneIdInEval: ZoneId = zoneIdForType(endTimestamp.dataType)

  override def nullSafeEval(startMicros: Any, endMicros: Any): Any = {
    DateTimeUtils.timestampDiff(
      unit,
      startMicros.asInstanceOf[Long],
      endMicros.asInstanceOf[Long],
      zoneIdInEval)
  }

  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
    val zid = ctx.addReferenceObj("zoneId", zoneIdInEval, classOf[ZoneId].getName)
    defineCodeGen(ctx, ev, (s, e) =>
      s"""$dtu.timestampDiff("$unit", $s, $e, $zid)""")
  }

  override def prettyName: String = "timestampdiff"

  override def sql: String = {
    val childrenSQL = (unit +: children.map(_.sql)).mkString(", ")
    s"$prettyName($childrenSQL)"
  }

  override protected def withNewChildrenInternal(
      newLeft: Expression,
      newRight: Expression): TimestampDiff = {
    copy(startTimestamp = newLeft, endTimestamp = newRight)
  }
}
相关推荐
想ai抽10 分钟前
Flink的checkpoint interval与mini-batch什么区别?
大数据·flink·batch
鸽鸽程序猿11 分钟前
【RabbitMQ】简介
分布式·rabbitmq
字节跳动数据平台13 分钟前
火山引擎推出Data Agent评测体系,并发布《2025数据智能体实践指南》
大数据
字节跳动数据平台14 分钟前
火山引擎发布新产品用户研究Agent,并推出数据智能体评测体系
大数据
在未来等你18 分钟前
Kafka面试精讲 Day 29:版本升级与平滑迁移
大数据·分布式·面试·kafka·消息队列
雾行灯34 分钟前
Hive 中的“分布键”之思:从数据组织到查询优化的系统解析
大数据
在未来等你1 小时前
Kafka面试精讲 Day 30:Kafka面试真题解析与答题技巧
大数据·分布式·面试·kafka·消息队列
Lx3521 小时前
Flink内存管理:如何避免`OutOfMemoryError`
大数据
TDengine (老段)1 小时前
TDengine 数字函数 RADIANS 用户手册
大数据·数据库·sql·物联网·时序数据库·tdengine·涛思数据
流烟默2 小时前
机器学习中一些场景的模型评估与理解图表
大数据·人工智能·机器学习