flinksql-Queries查询相关实战

  1. 分组聚合

--分组集

--GROUPING SETS() 允许你定义特定的分组方式,这样你可以选择只对感兴趣的分组进行计算。

--通过手动指定不同的分组组合,你能够灵活地控制数据的聚合结果。

--与 ROLLUP 和 CUBE 不同,GROUPING SETS 不会自动生成所有子集组合,而是只生成你指定的那些。

SELECT supplier_id, rating, COUNT(*) AS total

FROM (VALUES

('supplier1', 'product1', 4),

('supplier1', 'product2', 3),

('supplier2', 'product3', 3),

('supplier2', 'product4', 4))

AS Products(supplier_id, product_id, rating)

GROUP BY GROUPING SETS ((supplier_id, rating), (supplier_id), ());

--ROLLUP

--ROLLUP() 用于执行分层级别的聚合,主要用于需要按顺序逐层汇总数据的场景。

--与 CUBE() 不同,ROLLUP() 只生成按从左到右逐步减少维度的组合,而不是所有可能的子集组合。

--例如,ROLLUP(a, b, c) 会生成 (a, b, c), (a, b), (a), 和 (),而不会像 CUBE() 那样生成所有的可能组--合。

SELECT supplier_id, rating, COUNT(*)

FROM (VALUES

('supplier1', 'product1', 4),

('supplier1', 'product2', 3),

('supplier2', 'product3', 3),

('supplier2', 'product4', 4))

AS Products(supplier_id, product_id, rating)

GROUP BY ROLLUP (supplier_id, rating);

--立方体

--CUBE() 是一种扩展的 GROUP BY 操作,允许你针对多列进行分组聚合,并生成每种可能的维度组合的聚合结果。

--如果使用了 CUBE(a, b, c),Flink 会计算出所有 a, b, c 及其子集的组合的聚合结果。

--在数据分析和 OLAP(在线分析处理)场景中,CUBE 常用来计算多维数据的统计值。

SELECT supplier_id, rating, COUNT(*)

FROM (VALUES

('supplier1', 'product1', 4),

('supplier1', 'product2', 3),

('supplier2', 'product3', 3),

('supplier2', 'product4', 4))

AS Products(supplier_id, product_id, rating)

GROUP BY CUBE (supplier_id, rating);

  1. 窗口函数TVF

--注:不支持cdc模式,因为窗口函数只支持追加模式的,不支持update与delete操作

--模拟表

CREATE TABLE bid (

`id` string,

bidtime TIMESTAMP(3),

price DECIMAL(10, 2),

item string,

ts as bidtime,

WATERMARK FOR ts AS ts - INTERVAL '5' SECOND,

--proc_time AS PROCTIME(),

PRIMARY KEY (`id`) NOT ENFORCED

)

WITH

(

'connector' = 'jdbc',

${36},

'table-name' = 'bid'

);

--滚动窗口

-- SELECT cast(window_start as string) AS window_start, cast(window_end as string) AS window_end , SUM(price) AS total_price

-- FROM TABLE(

-- TUMBLE(TABLE bid, DESCRIPTOR(ts), INTERVAL '10' MINUTES))

-- GROUP BY window_start, window_end;

--滑动窗口

-- SELECT cast(window_start as string) AS window_start, cast(window_end as string) AS window_end , SUM(price) AS total_price

-- FROM TABLE(

-- HOP(TABLE bid, DESCRIPTOR(ts), INTERVAL '5' MINUTES, INTERVAL '10' MINUTES))

-- GROUP BY window_start, window_end;

--累计窗口

-- SELECT cast(window_start as string) AS window_start, cast(window_end as string) AS window_end , SUM(price) AS total_price

-- FROM TABLE(

-- CUMULATE(TABLE bid, DESCRIPTOR(ts), INTERVAL '5' MINUTES, INTERVAL '20' MINUTES))

-- GROUP BY window_start, window_end;

--会话窗口(不支持批处理)

SELECT window_start, window_end, item, SUM(price) AS total_price

FROM TABLE(

SESSION(TABLE bid PARTITION BY item, DESCRIPTOR(ts), INTERVAL '5' MINUTES))

GROUP BY item, window_start, window_end;

  1. 窗口聚合

CREATE TABLE bid (

`id` string,

bidtime TIMESTAMP(3),

price DECIMAL(10, 2),

item string,

supplier_id string,

ts as bidtime,

WATERMARK FOR ts AS ts - INTERVAL '5' SECOND,

--proc_time AS PROCTIME(),

PRIMARY KEY (`id`) NOT ENFORCED

)

WITH

(

'connector' = 'jdbc',

${36},

'table-name' = 'bid'

);

--分组集

-- SELECT cast(window_start as string) AS window_start, cast(window_end as string) AS window_end , supplier_id, SUM(price) AS total_price

-- FROM TABLE(

-- TUMBLE(TABLE bid, DESCRIPTOR(ts), INTERVAL '10' MINUTES))

-- GROUP BY window_start, window_end, GROUPING SETS ((supplier_id), ());

--ROLLUP

-- SELECT cast(window_start as string) AS window_start, cast(window_end as string) AS window_end , supplier_id, SUM(price) AS total_price

-- FROM TABLE(

-- TUMBLE(TABLE bid, DESCRIPTOR(ts), INTERVAL '10' MINUTES))

-- GROUP BY window_start, window_end, ROLLUP (supplier_id);

--立方体

SELECT cast(window_start as string) AS window_start, cast(window_end as string) AS window_end , supplier_id, item, SUM(price) AS total_price

FROM TABLE(

TUMBLE(TABLE bid, DESCRIPTOR(ts), INTERVAL '10' MINUTES))

GROUP BY window_start, window_end, CUBE (supplier_id, item);

  1. Over聚合

持续更新

相关推荐
柠檬味的Cat4 分钟前
GEO优化系统是什么?具体做什么,有什么用?
大数据·人工智能·aigc
zzqssliu4 分钟前
Taocarts接口限流实操:基于Redis实现API防刷与流量管控
数据库·redis·缓存
snpgroupcn9 分钟前
云转型过程中, 如何有效推进Rise with SAP项目实施
大数据·人工智能·云转型
Rocktech_ruixun15 分钟前
服务机器人硬件选型指南:RK3588/RK3568核心板适配多场景方案解析
大数据·人工智能·科技·ai·机器人
MemoriKu19 分钟前
Flutter 相册 APP 收尾优化实战:未分析任务横幅持久隐藏与标签回归测试补强
大数据·人工智能·flutter·elasticsearch·机器学习·搜索引擎·重构
阿标在干嘛22 分钟前
从“拍脑袋”到“数据驱动”:政策平台的A/B测试实践
大数据·人工智能·算法·ab测试
啦啦啦啦啦zzzz25 分钟前
redis的持久化操作和主从复制与集群的关系及其应用
数据库·redis
别叫我老干部36 分钟前
一键给整个库造测试数据:外键、约束一个都不能少
后端·mysql
IT策士40 分钟前
Redis 从入门到精通:分片之道 —— Redis Cluster
数据库·redis·缓存
宸津-代码粉碎机1 小时前
Spring AI企业级实战|智能记忆摘要+自动遗忘机制落地,彻底解决上下文爆炸与Token冗余
java·大数据·人工智能·后端·python·spring