FlinkSQL Tips

FlinkSQL使用小技巧总结

1. 分组聚合

sql 复制代码
create view t1 as
select 1 as id, 'lisi1' as name ,12 as age
UNION ALL
select 1 as id, 'lisi2' as name,11 as age
UNION ALL
select 1 as id, 'lisi3' as name,15 as age
UNION ALL
select 1 as id, 'lisi4' as name,13 as age
UNION ALL
select 1 as id, 'lisi1' as name,10 as age;

select
     id
    , COLLECT(name) as c1
    ,STR_TO_MAP(LISTAGG(concat(name,'=',cast(age as string)))) as c2
    ,LISTAGG(name) as c3
from t1 group by id;

--结果:
c1 {lisi4=1, lisi3=1, lisi2=1, lisi1=2}
c2 {lisi4=13, lisi3=15, lisi2=11, lisi1=10}
c3  lisi1,lisi2,lisi3,lisi4,lisi1

2.设置时区

sql 复制代码
SET 'table.local-time-zone' = 'Asia/Shanghai';
SET table.local-time-zone = Asia/Shanghai; --正确写法
select now(),typeof(now())

3.时间比较&时间格式化

sql 复制代码
sql> create view t1 as
select CAST('2023-12-14 20:41:02' AS TIMESTAMP(0)) AS create_time1,
DATE_FORMAT(CURRENT_TIMESTAMP - INTERVAL '30' DAY, 'yyyy-MM-dd HH:mm:ss') as create_time2
DATE_FORMAT('2023-12-18 21:10:12.234', 'yyyy-MM-dd HH:mm:ss.SSS') as create_time3;

 -- DATE_FORMAT(timestamp/string, string) 返回值为TIMESTAMP类型
sql> select create_time1, create_time2, create_time1 >= create_time2 as flag from t1
--结果:
create_time1: 2023-12-14 20:41:02.0
create_time2:2023-11-14 20:46:39
flag: true


--字符串时间比较
SQL> select '2023-11-25 00:00:01' >= '2023-11-25 00:00:00'
--结果:
true

4.时间函数取值用法

sql 复制代码
select now(), CURRENT_TIMESTAMP,CURRENT_TIME,CURRENT_DATE,LOCALTIMESTAMP,LOCALTIME,CURRENT_ROW_TIMESTAMP(),UNIX_TIMESTAMP()

--结果:
now(): 2023-12-14T12:56:58.293Z
CURRENT_TIMESTAMP: 2023-12-14T12:56:58.293Z
CURRENT_TIME: 20:56:58.293
CURRENT_DATE: 2023-12-14
LOCALTIMESTAMP: 2023-12-14 20:56:58.293
LOCALTIME: 20:56:58.293
CURRENT_ROW_TIMESTAMP(): 2023-12-14T12:57:02.113Z
UNIX_TIMESTAMP(): 1702558622

5.判断字符串不为空的巧妙写法

sql 复制代码
select select COALESCE(col1,'') <> '' --判断col1不为null和空值

6. last_value:返回一组有序值中的最后一个值。(FIRST_VALUE:返回一组有序值中的第一个值)

sql 复制代码
create view t1 as
select 1 as id, 'lisi1' as name ,12 as age
UNION ALL
select 1 as id, 'lisi2' as name,11 as age
UNION ALL
select 1 as id, 'lisi1' as name,10 as age
UNION ALL
select 1 as id, 'lisi3' as name,15 as age
UNION ALL
select 1 as id, 'lisi4' as name,13 as age
;
select id,last_value(age),first_value(age),max(age),min(age) from t1 group by id
--结果
last_value(age): 13 --最后一个
first_value(age):12 --第一个
max(age): 15 --最大一个
min(age): 10 --最小一个

7. long毫秒时间戳转时间,TO_TIMESTAMP_LTZ/TO_TIMESTAMP用法

sql 复制代码
select cast(TO_TIMESTAMP_LTZ(1688435411000,3) as TIMESTAMP(3)), --将long类型时间戳转换能为TIMESTAMP(3)类型
TO_TIMESTAMP_LTZ(1688435411000,3), --将long类型时间戳转换能为TIMESTAMP_LTZ(3)类型
TO_TIMESTAMP('2023-12-18 21:10:12.234', 'yyyy-MM-dd HH:mm:ss.SSS')--将字符串转换为指定格式的时间TIMESTAMP(3)类型

--结果:
cast(TO_TIMESTAMP_LTZ(1688435411000,3) as TIMESTAMP(3)): 2023-07-04 09:50:11.0
TO_TIMESTAMP_LTZ(1688435411000,3): 2023-07-04T01:50:11Z
TO_TIMESTAMP('2023-12-18 21:10:12.234', 'yyyy-MM-dd HH:mm:ss.SSS'): 2023-12-18 21:10:12.234

8. 创建临时数据demo

sql 复制代码
CREATE VIEW tmp AS
SELECT symbol, rowtime,price,tax
FROM
    (VALUES
     ('ACME',TIMESTAMP '2024-01-11 10:00:00',12,1)
    ,('ACME',TIMESTAMP '2024-01-11 10:00:01',17,2)
    ,('ACME',TIMESTAMP '2024-01-11 10:00:02',19,1)
    ,('ACME',TIMESTAMP '2024-01-11 10:00:03',21,3)
    ,('ACME',TIMESTAMP '2024-01-11 10:00:04',25,2)
    ,('ACME',TIMESTAMP '2024-01-11 10:00:05',18,1)
    ,('ACME',TIMESTAMP '2024-01-11 10:00:06',15,1)
    ,('ACME',TIMESTAMP '2024-01-11 10:00:07',14,2)
    ,('ACME',TIMESTAMP '2024-01-11 10:00:08',24,2)
    ,('ACME',TIMESTAMP '2024-01-11 10:00:09',25,2)
    ,('ACME',TIMESTAMP '2024-01-11 10:00:10',19,1)
    )  AS Ticker(symbol, rowtime,price,tax);
select * from tmp

9. 配置MiniBatch 聚合 和Local-Global 聚合,

参考: MiniBatch 聚合

sql 复制代码
--MiniBatch 聚合
SET table.exec.mini-batch.enabled = true;
SET table.exec.mini-batch.allow-latency = 1s;
SET table.exec.mini-batch.size = 10000;
--Local-Global 聚合
SET table.optimizer.agg-phase-strategy = TWO_PHASE;

10.设置状态,在设计groupby,join等聚合操作,需要保存状态时使用

sql 复制代码
SET table.exec.state.ttl=86400s; --24 hour,默认: 0 ms
--SET 'table.exec.state.ttl'='86400s'; --24 hour,默认: 0 ms,错误写法
SET table.exec.state.ttl=2592000s; --30 days,默认: 0 ms
相关推荐
路由侠内网穿透8 分钟前
本地部署开源 LLM 应用观测与调试平台 Langfuse 并实现外部访问
运维·服务器·数据库·物联网·开源
SPC的存折8 分钟前
1、Ansible之Ansible安装与入门
linux·数据库·ansible
冬夜戏雪8 分钟前
实习面经(十二)
java
qiumingxun13 分钟前
mysql的分区表
数据库·mysql
金融小师妹14 分钟前
基于AI航运与能源数据模型的极端收缩分析:霍尔木兹海峡从2000万桶到130万桶的结构性断层
大数据·深度学习·svn·能源
sxhcwgcy14 分钟前
Spring Boot中集成MyBatis操作数据库详细教程
数据库·spring boot·mybatis
ws20190715 分钟前
花城聚智:2026广州新能源汽车技术与热管理展为何成产业升级关键节点?
大数据·人工智能·科技·物联网·汽车
康康的AI博客15 分钟前
向量数据库选型指南:AI 数据底座怎么选不踩坑
数据库·人工智能
编码忘我17 分钟前
JVM 运行时数据区详解
java·后端·程序员
阿唯不困18 分钟前
AI智能应用开发(Java)从起点到终点-面向对象
java·后端