FlinkSQL使用小技巧总结
1. 分组聚合
sql
create view t1 as
select 1 as id, 'lisi1' as name ,12 as age
UNION ALL
select 1 as id, 'lisi2' as name,11 as age
UNION ALL
select 1 as id, 'lisi3' as name,15 as age
UNION ALL
select 1 as id, 'lisi4' as name,13 as age
UNION ALL
select 1 as id, 'lisi1' as name,10 as age;
select
id
, COLLECT(name) as c1
,STR_TO_MAP(LISTAGG(concat(name,'=',cast(age as string)))) as c2
,LISTAGG(name) as c3
from t1 group by id;
--结果:
c1 {lisi4=1, lisi3=1, lisi2=1, lisi1=2}
c2 {lisi4=13, lisi3=15, lisi2=11, lisi1=10}
c3 lisi1,lisi2,lisi3,lisi4,lisi1
2.设置时区
sql
SET 'table.local-time-zone' = 'Asia/Shanghai';
SET table.local-time-zone = Asia/Shanghai; --正确写法
select now(),typeof(now())
3.时间比较&时间格式化
sql
sql> create view t1 as
select CAST('2023-12-14 20:41:02' AS TIMESTAMP(0)) AS create_time1,
DATE_FORMAT(CURRENT_TIMESTAMP - INTERVAL '30' DAY, 'yyyy-MM-dd HH:mm:ss') as create_time2
DATE_FORMAT('2023-12-18 21:10:12.234', 'yyyy-MM-dd HH:mm:ss.SSS') as create_time3;
-- DATE_FORMAT(timestamp/string, string) 返回值为TIMESTAMP类型
sql> select create_time1, create_time2, create_time1 >= create_time2 as flag from t1
--结果:
create_time1: 2023-12-14 20:41:02.0
create_time2:2023-11-14 20:46:39
flag: true
--字符串时间比较
SQL> select '2023-11-25 00:00:01' >= '2023-11-25 00:00:00'
--结果:
true
4.时间函数取值用法
sql
select now(), CURRENT_TIMESTAMP,CURRENT_TIME,CURRENT_DATE,LOCALTIMESTAMP,LOCALTIME,CURRENT_ROW_TIMESTAMP(),UNIX_TIMESTAMP()
--结果:
now(): 2023-12-14T12:56:58.293Z
CURRENT_TIMESTAMP: 2023-12-14T12:56:58.293Z
CURRENT_TIME: 20:56:58.293
CURRENT_DATE: 2023-12-14
LOCALTIMESTAMP: 2023-12-14 20:56:58.293
LOCALTIME: 20:56:58.293
CURRENT_ROW_TIMESTAMP(): 2023-12-14T12:57:02.113Z
UNIX_TIMESTAMP(): 1702558622
5.判断字符串不为空的巧妙写法
sql
select select COALESCE(col1,'') <> '' --判断col1不为null和空值
6. last_value:返回一组有序值中的最后一个值。(FIRST_VALUE:返回一组有序值中的第一个值)
sql
create view t1 as
select 1 as id, 'lisi1' as name ,12 as age
UNION ALL
select 1 as id, 'lisi2' as name,11 as age
UNION ALL
select 1 as id, 'lisi1' as name,10 as age
UNION ALL
select 1 as id, 'lisi3' as name,15 as age
UNION ALL
select 1 as id, 'lisi4' as name,13 as age
;
select id,last_value(age),first_value(age),max(age),min(age) from t1 group by id
--结果
last_value(age): 13 --最后一个
first_value(age):12 --第一个
max(age): 15 --最大一个
min(age): 10 --最小一个
7. long毫秒时间戳转时间,TO_TIMESTAMP_LTZ/TO_TIMESTAMP用法
sql
select cast(TO_TIMESTAMP_LTZ(1688435411000,3) as TIMESTAMP(3)), --将long类型时间戳转换能为TIMESTAMP(3)类型
TO_TIMESTAMP_LTZ(1688435411000,3), --将long类型时间戳转换能为TIMESTAMP_LTZ(3)类型
TO_TIMESTAMP('2023-12-18 21:10:12.234', 'yyyy-MM-dd HH:mm:ss.SSS')--将字符串转换为指定格式的时间TIMESTAMP(3)类型
--结果:
cast(TO_TIMESTAMP_LTZ(1688435411000,3) as TIMESTAMP(3)): 2023-07-04 09:50:11.0
TO_TIMESTAMP_LTZ(1688435411000,3): 2023-07-04T01:50:11Z
TO_TIMESTAMP('2023-12-18 21:10:12.234', 'yyyy-MM-dd HH:mm:ss.SSS'): 2023-12-18 21:10:12.234
8. 创建临时数据demo
sql
CREATE VIEW tmp AS
SELECT symbol, rowtime,price,tax
FROM
(VALUES
('ACME',TIMESTAMP '2024-01-11 10:00:00',12,1)
,('ACME',TIMESTAMP '2024-01-11 10:00:01',17,2)
,('ACME',TIMESTAMP '2024-01-11 10:00:02',19,1)
,('ACME',TIMESTAMP '2024-01-11 10:00:03',21,3)
,('ACME',TIMESTAMP '2024-01-11 10:00:04',25,2)
,('ACME',TIMESTAMP '2024-01-11 10:00:05',18,1)
,('ACME',TIMESTAMP '2024-01-11 10:00:06',15,1)
,('ACME',TIMESTAMP '2024-01-11 10:00:07',14,2)
,('ACME',TIMESTAMP '2024-01-11 10:00:08',24,2)
,('ACME',TIMESTAMP '2024-01-11 10:00:09',25,2)
,('ACME',TIMESTAMP '2024-01-11 10:00:10',19,1)
) AS Ticker(symbol, rowtime,price,tax);
select * from tmp
9. 配置MiniBatch 聚合 和Local-Global 聚合,
参考: MiniBatch 聚合
sql
--MiniBatch 聚合
SET table.exec.mini-batch.enabled = true;
SET table.exec.mini-batch.allow-latency = 1s;
SET table.exec.mini-batch.size = 10000;
--Local-Global 聚合
SET table.optimizer.agg-phase-strategy = TWO_PHASE;
10.设置状态,在设计groupby,join等聚合操作,需要保存状态时使用
sql
SET table.exec.state.ttl=86400s; --24 hour,默认: 0 ms
--SET 'table.exec.state.ttl'='86400s'; --24 hour,默认: 0 ms,错误写法
SET table.exec.state.ttl=2592000s; --30 days,默认: 0 ms