Spark 之 expression

##.

复制代码
/**
 * Returns the number of days from startDate to endDate.
 */
@ExpressionDescription(
  usage = "_FUNC_(endDate, startDate) - Returns the number of days from `startDate` to `endDate`.",
  examples = """
    Examples:
      > SELECT _FUNC_('2009-07-31', '2009-07-30');
       1

      > SELECT _FUNC_('2009-07-30', '2009-07-31');
       -1
  """,
  group = "datetime_funcs",
  since = "1.5.0")
case class DateDiff(endDate: Expression, startDate: Expression)
  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {

  override def left: Expression = endDate
  override def right: Expression = startDate
  override def inputTypes: Seq[AbstractDataType] = Seq(DateType, DateType)
  override def dataType: DataType = IntegerType

  override def nullSafeEval(end: Any, start: Any): Any = {
    end.asInstanceOf[Int] - start.asInstanceOf[Int]
  }

  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    defineCodeGen(ctx, ev, (end, start) => s"$end - $start")
  }

  override protected def withNewChildrenInternal(
      newLeft: Expression, newRight: Expression): DateDiff =
    copy(endDate = newLeft, startDate = newRight)
}
允许有默认参数的表达式
复制代码
// scalastyle:off line.size.limit
@ExpressionDescription(
  usage = "_FUNC_([sourceTz, ]targetTz, sourceTs) - Converts the timestamp without time zone `sourceTs` from the `sourceTz` time zone to `targetTz`. ",
  arguments = """
    Arguments:
      * sourceTz - the time zone for the input timestamp.
                   If it is missed, the current session time zone is used as the source time zone.
      * targetTz - the time zone to which the input timestamp should be converted
      * sourceTs - a timestamp without time zone
  """,
  examples = """
    Examples:
      > SELECT _FUNC_('Europe/Brussels', 'America/Los_Angeles', timestamp_ntz'2021-12-06 00:00:00');
       2021-12-05 15:00:00
      > SELECT _FUNC_('Europe/Brussels', timestamp_ntz'2021-12-05 15:00:00');
       2021-12-06 00:00:00
  """,
  group = "datetime_funcs",
  since = "3.4.0")
// scalastyle:on line.size.limit
case class ConvertTimezone(
    sourceTz: Expression,
    targetTz: Expression,
    sourceTs: Expression)
  extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant {

  def this(targetTz: Expression, sourceTs: Expression) =
    this(CurrentTimeZone(), targetTz, sourceTs)

  override def first: Expression = sourceTz
  override def second: Expression = targetTz
  override def third: Expression = sourceTs

  override def inputTypes: Seq[AbstractDataType] = Seq(StringType, StringType, TimestampNTZType)
  override def dataType: DataType = TimestampNTZType

  override def nullSafeEval(srcTz: Any, tgtTz: Any, micros: Any): Any = {
    DateTimeUtils.convertTimestampNtzToAnotherTz(
      srcTz.asInstanceOf[UTF8String].toString,
      tgtTz.asInstanceOf[UTF8String].toString,
      micros.asInstanceOf[Long])
  }

  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
    defineCodeGen(ctx, ev, (srcTz, tgtTz, micros) =>
      s"""$dtu.convertTimestampNtzToAnotherTz($srcTz.toString(), $tgtTz.toString(), $micros)""")
  }

  override def prettyName: String = "convert_timezone"

  override protected def withNewChildrenInternal(
      newFirst: Expression,
      newSecond: Expression,
      newThird: Expression): ConvertTimezone = {
    copy(sourceTz = newFirst, targetTz = newSecond, sourceTs = newThird)
  }
}
时间函数
复制代码
  /**
   * Gets the difference between two timestamps.
   *
   * @param unit Specifies the interval units in which to express the difference between
   *             the two timestamp parameters.
   * @param startTs A timestamp which the function subtracts from `endTs`.
   * @param endTs A timestamp from which the function subtracts `startTs`.
   * @param zoneId The time zone ID at which the operation is performed.
   * @return The time span between two timestamp values, in the units specified.
   */
  def timestampDiff(unit: String, startTs: Long, endTs: Long, zoneId: ZoneId): Long = {
    val unitInUpperCase = unit.toUpperCase(Locale.ROOT)
    if (timestampDiffMap.contains(unitInUpperCase)) {
      val startLocalTs = getLocalDateTime(startTs, zoneId)
      val endLocalTs = getLocalDateTime(endTs, zoneId)
      timestampDiffMap(unitInUpperCase)(startLocalTs, endLocalTs)
    } else {
      throw new IllegalStateException(s"Got the unexpected unit '$unit'.")
    }
  }
TimestampDiff

旧版表达式写法

复制代码
// scalastyle:off line.size.limit
@ExpressionDescription(
  usage = "_FUNC_(unit, startTimestamp, endTimestamp) - Gets the difference between the timestamps `endTimestamp` and `startTimestamp` in the specified units by truncating the fraction part.",
  arguments = """
    Arguments:
      * unit - this indicates the units of the difference between the given timestamps.
        Supported string values of `unit` are (case insensitive):
          - "YEAR"
          - "QUARTER" - 3 months
          - "MONTH"
          - "WEEK" - 7 days
          - "DAY"
          - "HOUR"
          - "MINUTE"
          - "SECOND"
          - "MILLISECOND"
          - "MICROSECOND"
      * startTimestamp - A timestamp which the expression subtracts from `endTimestamp`.
      * endTimestamp - A timestamp from which the expression subtracts `startTimestamp`.
  """,
  examples = """
    Examples:
      > SELECT _FUNC_('HOUR', timestamp_ntz'2022-02-11 20:30:00', timestamp_ntz'2022-02-12 04:30:00');
       8
      > SELECT _FUNC_('MONTH', timestamp_ltz'2022-01-01 00:00:00', timestamp_ltz'2022-02-28 00:00:00');
       1
      > SELECT _FUNC_(SECOND, date'2022-01-01', timestamp'2021-12-31 23:59:50');
       -10
      > SELECT _FUNC_(YEAR, timestamp'2000-01-01 01:02:03.123456', timestamp'2010-01-01 01:02:03.123456');
       10
  """,
  group = "datetime_funcs",
  since = "3.3.0")
// scalastyle:on line.size.limit
case class TimestampDiff(
    unit: Expression,
    startTimestamp: Expression,
    endTimestamp: Expression,
    timeZoneId: Option[String] = None)
  extends TernaryExpression
  with ImplicitCastInputTypes
  with NullIntolerant
  with TimeZoneAwareExpression {

  def this(unit: Expression, quantity: Expression, timestamp: Expression) =
    this(unit, quantity, timestamp, None)

  override def first: Expression = unit
  override def second: Expression = startTimestamp
  override def third: Expression = endTimestamp

  override def inputTypes: Seq[AbstractDataType] = Seq(StringType, TimestampType, TimestampType)
  override def dataType: DataType = LongType

  override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
    copy(timeZoneId = Option(timeZoneId))

  @transient private lazy val zoneIdInEval: ZoneId = zoneIdForType(endTimestamp.dataType)

  override def nullSafeEval(u: Any, startMicros: Any, endMicros: Any): Any = {
    DateTimeUtils.timestampDiff(
      u.asInstanceOf[UTF8String].toString,
      startMicros.asInstanceOf[Long],
      endMicros.asInstanceOf[Long],
      zoneIdInEval)
  }

  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
    val zid = ctx.addReferenceObj("zoneId", zoneIdInEval, classOf[ZoneId].getName)
    defineCodeGen(ctx, ev, (u, s, e) =>
      s"""$dtu.timestampDiff($u.toString(), $s, $e, $zid)""")
  }

  override def prettyName: String = "timestampdiff"

  override protected def withNewChildrenInternal(
      newFirst: Expression,
      newSecond: Expression,
      newThird: Expression): TimestampDiff = {
    copy(unit = newFirst, startTimestamp = newSecond, endTimestamp = newThird)
  }
}

新版 String 传参数 写法

复制代码
// scalastyle:off line.size.limit
@ExpressionDescription(
  usage = "_FUNC_(unit, startTimestamp, endTimestamp) - Gets the difference between the timestamps `endTimestamp` and `startTimestamp` in the specified units by truncating the fraction part.",
  arguments = """
    Arguments:
      * unit - this indicates the units of the difference between the given timestamps.
        Supported string values of `unit` are (case insensitive):
          - "YEAR"
          - "QUARTER" - 3 months
          - "MONTH"
          - "WEEK" - 7 days
          - "DAY"
          - "HOUR"
          - "MINUTE"
          - "SECOND"
          - "MILLISECOND"
          - "MICROSECOND"
      * startTimestamp - A timestamp which the expression subtracts from `endTimestamp`.
      * endTimestamp - A timestamp from which the expression subtracts `startTimestamp`.
  """,
  examples = """
    Examples:
      > SELECT _FUNC_(HOUR, timestamp_ntz'2022-02-11 20:30:00', timestamp_ntz'2022-02-12 04:30:00');
       8
      > SELECT _FUNC_(MONTH, timestamp_ltz'2022-01-01 00:00:00', timestamp_ltz'2022-02-28 00:00:00');
       1
      > SELECT _FUNC_(SECOND, date'2022-01-01', timestamp'2021-12-31 23:59:50');
       -10
      > SELECT _FUNC_(YEAR, timestamp'2000-01-01 01:02:03.123456', timestamp'2010-01-01 01:02:03.123456');
       10
  """,
  group = "datetime_funcs",
  since = "3.3.0")
// scalastyle:on line.size.limit
case class TimestampDiff(
    unit: String,
    startTimestamp: Expression,
    endTimestamp: Expression,
    timeZoneId: Option[String] = None)
  extends BinaryExpression
  with ImplicitCastInputTypes
  with NullIntolerant
  with TimeZoneAwareExpression {

  def this(unit: String, quantity: Expression, timestamp: Expression) =
    this(unit, quantity, timestamp, None)

  override def left: Expression = startTimestamp
  override def right: Expression = endTimestamp

  override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType, TimestampType)
  override def dataType: DataType = LongType

  override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
    copy(timeZoneId = Option(timeZoneId))

  @transient private lazy val zoneIdInEval: ZoneId = zoneIdForType(endTimestamp.dataType)

  override def nullSafeEval(startMicros: Any, endMicros: Any): Any = {
    DateTimeUtils.timestampDiff(
      unit,
      startMicros.asInstanceOf[Long],
      endMicros.asInstanceOf[Long],
      zoneIdInEval)
  }

  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
    val zid = ctx.addReferenceObj("zoneId", zoneIdInEval, classOf[ZoneId].getName)
    defineCodeGen(ctx, ev, (s, e) =>
      s"""$dtu.timestampDiff("$unit", $s, $e, $zid)""")
  }

  override def prettyName: String = "timestampdiff"

  override def sql: String = {
    val childrenSQL = (unit +: children.map(_.sql)).mkString(", ")
    s"$prettyName($childrenSQL)"
  }

  override protected def withNewChildrenInternal(
      newLeft: Expression,
      newRight: Expression): TimestampDiff = {
    copy(startTimestamp = newLeft, endTimestamp = newRight)
  }
}
相关推荐
摩羯座-185690305941 小时前
Python数据可视化基础:使用Matplotlib绘制图表
大数据·python·信息可视化·matplotlib
在未来等你1 小时前
Kafka面试精讲 Day 13:故障检测与自动恢复
大数据·分布式·面试·kafka·消息队列
jiedaodezhuti1 小时前
Flink通讯超时问题深度解析:Akka AskTimeoutException解决方案
大数据·flink
庄小焱1 小时前
大数据存储域——Kafka实战经验总结
大数据·kafka·大数据存储域
cui_win2 小时前
基于Golang + vue3 开发的 kafka 多集群管理
分布式·kafka
iiYcyk2 小时前
kafka特性和原理
分布式·kafka
zskj_qcxjqr3 小时前
告别传统繁琐!七彩喜艾灸机器人:一键开启智能养生新时代
大数据·人工智能·科技·机器人
每日新鲜事3 小时前
Saucony索康尼推出全新 WOOOLLY 运动生活羊毛系列 生动无理由,从专业跑步延展运动生活的每一刻
大数据·人工智能
在未来等你4 小时前
Kafka面试精讲 Day 15:跨数据中心复制与灾备
大数据·分布式·面试·kafka·消息队列
计算机编程-吉哥5 小时前
大数据毕业设计-基于Python的中文起点网小说数据分析平台(高分计算机毕业设计选题·定制开发·真正大数据)
大数据·hadoop·计算机毕业设计选题·机器学习毕业设计·大数据毕业设计·大数据毕业设计选题推荐·大数据毕设项目