FlinkSQL Tips

FlinkSQL使用小技巧总结

1. 分组聚合

sql 复制代码
create view t1 as
select 1 as id, 'lisi1' as name ,12 as age
UNION ALL
select 1 as id, 'lisi2' as name,11 as age
UNION ALL
select 1 as id, 'lisi3' as name,15 as age
UNION ALL
select 1 as id, 'lisi4' as name,13 as age
UNION ALL
select 1 as id, 'lisi1' as name,10 as age;

select
     id
    , COLLECT(name) as c1
    ,STR_TO_MAP(LISTAGG(concat(name,'=',cast(age as string)))) as c2
    ,LISTAGG(name) as c3
from t1 group by id;

--结果:
c1 {lisi4=1, lisi3=1, lisi2=1, lisi1=2}
c2 {lisi4=13, lisi3=15, lisi2=11, lisi1=10}
c3  lisi1,lisi2,lisi3,lisi4,lisi1

2.设置时区

sql 复制代码
SET 'table.local-time-zone' = 'Asia/Shanghai';
SET table.local-time-zone = Asia/Shanghai; --正确写法
select now(),typeof(now())

3.时间比较&时间格式化

sql 复制代码
sql> create view t1 as
select CAST('2023-12-14 20:41:02' AS TIMESTAMP(0)) AS create_time1,
DATE_FORMAT(CURRENT_TIMESTAMP - INTERVAL '30' DAY, 'yyyy-MM-dd HH:mm:ss') as create_time2
DATE_FORMAT('2023-12-18 21:10:12.234', 'yyyy-MM-dd HH:mm:ss.SSS') as create_time3;

 -- DATE_FORMAT(timestamp/string, string) 返回值为TIMESTAMP类型
sql> select create_time1, create_time2, create_time1 >= create_time2 as flag from t1
--结果:
create_time1: 2023-12-14 20:41:02.0
create_time2:2023-11-14 20:46:39
flag: true


--字符串时间比较
SQL> select '2023-11-25 00:00:01' >= '2023-11-25 00:00:00'
--结果:
true

4.时间函数取值用法

sql 复制代码
select now(), CURRENT_TIMESTAMP,CURRENT_TIME,CURRENT_DATE,LOCALTIMESTAMP,LOCALTIME,CURRENT_ROW_TIMESTAMP(),UNIX_TIMESTAMP()

--结果:
now(): 2023-12-14T12:56:58.293Z
CURRENT_TIMESTAMP: 2023-12-14T12:56:58.293Z
CURRENT_TIME: 20:56:58.293
CURRENT_DATE: 2023-12-14
LOCALTIMESTAMP: 2023-12-14 20:56:58.293
LOCALTIME: 20:56:58.293
CURRENT_ROW_TIMESTAMP(): 2023-12-14T12:57:02.113Z
UNIX_TIMESTAMP(): 1702558622

5.判断字符串不为空的巧妙写法

sql 复制代码
select select COALESCE(col1,'') <> '' --判断col1不为null和空值

6. last_value:返回一组有序值中的最后一个值。(FIRST_VALUE:返回一组有序值中的第一个值)

sql 复制代码
create view t1 as
select 1 as id, 'lisi1' as name ,12 as age
UNION ALL
select 1 as id, 'lisi2' as name,11 as age
UNION ALL
select 1 as id, 'lisi1' as name,10 as age
UNION ALL
select 1 as id, 'lisi3' as name,15 as age
UNION ALL
select 1 as id, 'lisi4' as name,13 as age
;
select id,last_value(age),first_value(age),max(age),min(age) from t1 group by id
--结果
last_value(age): 13 --最后一个
first_value(age):12 --第一个
max(age): 15 --最大一个
min(age): 10 --最小一个

7. long毫秒时间戳转时间,TO_TIMESTAMP_LTZ/TO_TIMESTAMP用法

sql 复制代码
select cast(TO_TIMESTAMP_LTZ(1688435411000,3) as TIMESTAMP(3)), --将long类型时间戳转换能为TIMESTAMP(3)类型
TO_TIMESTAMP_LTZ(1688435411000,3), --将long类型时间戳转换能为TIMESTAMP_LTZ(3)类型
TO_TIMESTAMP('2023-12-18 21:10:12.234', 'yyyy-MM-dd HH:mm:ss.SSS')--将字符串转换为指定格式的时间TIMESTAMP(3)类型

--结果:
cast(TO_TIMESTAMP_LTZ(1688435411000,3) as TIMESTAMP(3)): 2023-07-04 09:50:11.0
TO_TIMESTAMP_LTZ(1688435411000,3): 2023-07-04T01:50:11Z
TO_TIMESTAMP('2023-12-18 21:10:12.234', 'yyyy-MM-dd HH:mm:ss.SSS'): 2023-12-18 21:10:12.234

8. 创建临时数据demo

sql 复制代码
CREATE VIEW tmp AS
SELECT symbol, rowtime,price,tax
FROM
    (VALUES
     ('ACME',TIMESTAMP '2024-01-11 10:00:00',12,1)
    ,('ACME',TIMESTAMP '2024-01-11 10:00:01',17,2)
    ,('ACME',TIMESTAMP '2024-01-11 10:00:02',19,1)
    ,('ACME',TIMESTAMP '2024-01-11 10:00:03',21,3)
    ,('ACME',TIMESTAMP '2024-01-11 10:00:04',25,2)
    ,('ACME',TIMESTAMP '2024-01-11 10:00:05',18,1)
    ,('ACME',TIMESTAMP '2024-01-11 10:00:06',15,1)
    ,('ACME',TIMESTAMP '2024-01-11 10:00:07',14,2)
    ,('ACME',TIMESTAMP '2024-01-11 10:00:08',24,2)
    ,('ACME',TIMESTAMP '2024-01-11 10:00:09',25,2)
    ,('ACME',TIMESTAMP '2024-01-11 10:00:10',19,1)
    )  AS Ticker(symbol, rowtime,price,tax);
select * from tmp

9. 配置MiniBatch 聚合 和Local-Global 聚合,

参考: MiniBatch 聚合

sql 复制代码
--MiniBatch 聚合
SET table.exec.mini-batch.enabled = true;
SET table.exec.mini-batch.allow-latency = 1s;
SET table.exec.mini-batch.size = 10000;
--Local-Global 聚合
SET table.optimizer.agg-phase-strategy = TWO_PHASE;

10.设置状态,在设计groupby,join等聚合操作,需要保存状态时使用

sql 复制代码
SET table.exec.state.ttl=86400s; --24 hour,默认: 0 ms
--SET 'table.exec.state.ttl'='86400s'; --24 hour,默认: 0 ms,错误写法
SET table.exec.state.ttl=2592000s; --30 days,默认: 0 ms
相关推荐
禾小西1 小时前
Java 逐梦力扣之旅_[204. 计数质数]
java·算法·leetcode
快来卷java1 小时前
MySQL篇(六)MySQL 分库分表:应对数据增长挑战的有效策略
数据库·mysql·oracle
ゞ 正在缓冲99%…1 小时前
leetcode295.数据流的中位数
java·数据结构·算法·leetcode·
黄雪超2 小时前
Flink介绍——实时计算核心论文之S4论文总结
大数据·论文阅读·flink
有梦想的攻城狮3 小时前
spring-cloud-alibaba-nacos-config使用说明
java·spring·nacos·springcloud·配置中心
IT认证通关3 小时前
金仓数据库KCM认证考试介绍【2025年4月更新】
数据库
viperrrrrrrrrr73 小时前
大数据学习(96)-Hive面试题
大数据·hive·学习
程序猿阿伟3 小时前
《SQL赋能人工智能:解锁特征工程的隐秘力量》
数据库·人工智能·sql
csssnxy4 小时前
叁仟数智指路机器人是否支持远程监控和管理?
大数据·人工智能
冰箱里的金鱼4 小时前
MYSQL 存储引擎 和 日志
数据库