DolphinDB SQL查询:从简单到复杂

目录

    • 摘要
    • 一、基础查询
      • [1.1 SELECT语句](#1.1 SELECT语句)
      • [1.2 条件过滤](#1.2 条件过滤)
      • [1.3 排序与限制](#1.3 排序与限制)
    • 二、聚合查询
      • [2.1 基本聚合](#2.1 基本聚合)
      • [2.2 分组聚合](#2.2 分组聚合)
      • [2.3 分组集](#2.3 分组集)
    • 三、连接查询
      • [3.1 连接类型](#3.1 连接类型)
      • [3.2 内连接](#3.2 内连接)
      • [3.3 左连接](#3.3 左连接)
      • [3.4 多表连接](#3.4 多表连接)
    • 四、子查询
      • [4.1 标量子查询](#4.1 标量子查询)
      • [4.2 行子查询](#4.2 行子查询)
      • [4.3 表子查询](#4.3 表子查询)
    • 五、窗口函数
      • [5.1 排序函数](#5.1 排序函数)
      • [5.2 聚合窗口函数](#5.2 聚合窗口函数)
      • [5.3 偏移函数](#5.3 偏移函数)
    • 六、时间序列查询
      • [6.1 时间窗口](#6.1 时间窗口)
      • [6.2 时间对齐](#6.2 时间对齐)
      • [6.3 重采样](#6.3 重采样)
    • 七、复杂查询实战
      • [7.1 分组Top-N](#7.1 分组Top-N)
      • [7.2 同比环比](#7.2 同比环比)
      • [7.3 连续区间](#7.3 连续区间)
    • 八、查询优化
      • [8.1 执行计划](#8.1 执行计划)
      • [8.2 优化建议](#8.2 优化建议)
      • [8.3 常见问题](#8.3 常见问题)
    • 九、总结
    • 参考资料

摘要

本文系统介绍DolphinDB SQL查询语言。从基础SELECT语句到复杂的多表关联,从条件过滤到分组聚合,从子查询到窗口函数,逐步带领读者掌握DolphinDB SQL查询的核心技能。通过丰富的实战案例,帮助读者编写高效的数据查询语句。


一、基础查询

1.1 SELECT语句

python 复制代码
// 创建示例表
t = table(
    1..10 as id,
    `A`B`C`A`B`C`A`B`C`A as category,
    10 20 30 15 25 35 12 22 32 18 as value,
    2024.01.01 + 0..9 as date
)

// 查询所有列
select * from t

// 查询指定列
select id, category, value from t

// 列别名
select id as device_id, value as temperature from t

// 计算列
select id, value, value * 1.8 + 32 as fahrenheit from t

1.2 条件过滤

python 复制代码
// WHERE条件
select * from t where id > 5

// 多条件
select * from t where id > 3 and value < 30

// IN条件
select * from t where category in [`A, `B]

// BETWEEN条件
select * from t where value between 15 and 30

// LIKE模糊匹配
select * from t where category like "A%"

// NULL判断
select * from t where value is not NULL

1.3 排序与限制

python 复制代码
// 排序
select * from t order by value desc
select * from t order by category, value desc

// 限制行数
select top 5 * from t
select top 5 * from t order by value desc

// 分页
select * from t limit 5 offset 3  // 从第4行开始取5行

二、聚合查询

2.1 基本聚合

python 复制代码
// 聚合函数
select count(*) as cnt from t
select sum(value) as total from t
select avg(value) as average from t
select max(value) as max_val, min(value) as min_val from t

// 多聚合
select count(*) as cnt,
       sum(value) as total,
       avg(value) as average,
       std(value) as std_dev
from t

2.2 分组聚合

python 复制代码
// GROUP BY
select category, 
       count(*) as cnt,
       sum(value) as total,
       avg(value) as average
from t
group by category

// 多列分组
select category, 
       date,
       count(*) as cnt,
       avg(value) as avg_val
from t
group by category, date

// HAVING过滤
select category, avg(value) as avg_val
from t
group by category
having avg(value) > 20

2.3 分组集

python 复制代码
// ROLLUP
select category, date, sum(value) as total
from t
group by rollup(category, date)

// CUBE
select category, date, sum(value) as total
from t
group by cube(category, date)

// GROUPING SETS
select category, date, sum(value) as total
from t
group by grouping sets((category), (date), (category, date))

三、连接查询

3.1 连接类型

连接类型
INNER JOIN

内连接
交集
LEFT JOIN

左连接
左表全部
RIGHT JOIN

右连接
右表全部
FULL JOIN

全连接
并集

3.2 内连接

python 复制代码
// 创建示例表
t1 = table(1..5 as id, `A`B`C`D`E as name)
t2 = table(1..3 as id, 100..102 as value)

// 内连接
select * from lj(t1, t2, `id)
// 或
select t1.id, t1.name, t2.value 
from t1
inner join t2 on t1.id = t2.id

3.3 左连接

python 复制代码
// 左连接
select * from lj(t1, t2, `id)

/*
id name value
1  A    100
2  B    101
3  C    102
4  D    NULL
5  E    NULL
*/

3.4 多表连接

python 复制代码
// 多表连接
t1 = table(1..5 as id, `A`B`C`D`E as name)
t2 = table(1..5 as id, 100..104 as value)
t3 = table(1..5 as id, `X`Y`Z`X`Y as type)

select t1.id, t1.name, t2.value, t3.type
from t1
left join t2 on t1.id = t2.id
left join t3 on t1.id = t3.id

四、子查询

4.1 标量子查询

python 复制代码
// 标量子查询(返回单个值)
select * from t 
where value > (select avg(value) from t)

// 在SELECT中使用
select id, value, 
       (select avg(value) from t) as avg_value
from t

4.2 行子查询

python 复制代码
// 行子查询(返回多行)
select * from t
where category in (select distinct category from t where value > 25)

// EXISTS子查询
select * from t1
where exists (select * from t2 where t2.id = t1.id)

4.3 表子查询

python 复制代码
// 表子查询(返回表)
select category, avg_val
from (
    select category, avg(value) as avg_val
    from t
    group by category
)
where avg_val > 20

// WITH子句(CTE)
with 
stats as (
    select category, avg(value) as avg_val
    from t
    group by category
)
select * from stats where avg_val > 20

五、窗口函数

5.1 排序函数

python 复制代码
// ROW_NUMBER
select id, category, value,
       row_number() over (partition by category order by value desc) as rank
from t

// RANK(有并列)
select id, category, value,
       rank() over (order by value desc) as rank
from t

// DENSE_RANK(连续排名)
select id, category, value,
       dense_rank() over (order by value desc) as rank
from t

5.2 聚合窗口函数

python 复制代码
// 累积聚合
select id, value,
       sum(value) over (order by id) as cumsum,
       avg(value) over (order by id rows between 2 preceding and current row) as mavg
from t

// 分组窗口
select id, category, value,
       sum(value) over (partition by category order by id) as category_cumsum
from t

5.3 偏移函数

python 复制代码
// LAG/LEAD
select id, value,
       lag(value, 1) over (order by id) as prev_value,
       lead(value, 1) over (order by id) as next_value
from t

// FIRST/LAST
select id, value,
       first(value) over (order by id) as first_val,
       last(value) over (order by id) as last_val
from t

六、时间序列查询

6.1 时间窗口

python 复制代码
// 创建时间序列数据
t = table(
    1..100 as id,
    2024.01.01T00:00:00 + 0..99 * 60000 as timestamp,  // 每分钟一条
    rand(100.0, 100) as value
)

// 时间窗口聚合
select bar(timestamp, 10m) as time_window,
       avg(value) as avg_val,
       max(value) as max_val,
       count(*) as cnt
from t
group by bar(timestamp, 10m)

6.2 时间对齐

python 复制代码
// 时间对齐
select * from t 
where timestamp between 2024.01.01T00:00:00 and 2024.01.01T01:00:00

// 按小时聚合
select bar(timestamp, 1h) as hour,
       avg(value) as avg_val
from t
group by bar(timestamp, 1h)

6.3 重采样

python 复制代码
// 重采样(1分钟→5分钟)
select bar(timestamp, 5m) as time_5m,
       first(value) as open,
       max(value) as high,
       min(value) as low,
       last(value) as close
from t
group by bar(timestamp, 5m)

七、复杂查询实战

7.1 分组Top-N

python 复制代码
// 每个类别取前3条
select * from (
    select id, category, value,
           row_number() over (partition by category order by value desc) as rank
    from t
) where rank <= 3

7.2 同比环比

python 复制代码
// 环比计算
select date, value,
       lag(value, 1) over (order by date) as prev_value,
       (value - lag(value, 1) over (order by date)) / lag(value, 1) over (order by date) as mom_rate
from t

// 同比计算
select date, value,
       lag(value, 12) over (order by date) as prev_year_value,
       (value - lag(value, 12) over (order by date)) / lag(value, 12) over (order by date) as yoy_rate
from t

7.3 连续区间

python 复制代码
// 查找连续值
select * from (
    select id, value,
           id - row_number() over (order by id) as grp
    from t
    where value > 20
) 
group by grp
having count(*) >= 3  // 连续3个以上

八、查询优化

8.1 执行计划

python 复制代码
// 查看执行计划
explain select * from t where id > 5

// 分析查询性能
timer select count(*) from t

8.2 优化建议

优化项 说明
分区裁剪 在分区列上过滤
索引使用 在索引列上查询
减少扫描 只查询需要的列
避免全表 使用WHERE条件

8.3 常见问题

python 复制代码
// 避免SELECT *
select id, value from t  // 好
select * from t          // 避免

// 使用分区过滤
select * from t 
where date between 2024.01.01 and 2024.01.31  // 好

// 避免函数包装索引列
select * from t where date = 2024.01.15      // 好
select * from t where year(date) = 2024      // 避免

九、总结

本文系统介绍了DolphinDB SQL查询:

  1. 基础查询:SELECT、WHERE、ORDER BY
  2. 聚合查询:GROUP BY、HAVING、分组集
  3. 连接查询:内连接、左连接、多表连接
  4. 子查询:标量、行、表子查询
  5. 窗口函数:排序、聚合、偏移
  6. 时间序列:时间窗口、重采样
  7. 查询优化:执行计划、优化建议

思考题

  1. 如何选择合适的连接类型?
  2. 窗口函数和GROUP BY有什么区别?
  3. 如何优化大数据量查询?

参考资料

相关推荐
zzzzzz31015 小时前
9K Star 炸裂开源!这个 C 语言写的代码知识图谱,把 Linux 内核索引压缩到了 3 分钟
linux·服务器·sql
倔强的石头_3 天前
《Kingbase护城河》——数据库存储空间全景探测与精细化瘦身实战
数据库
云技纵横3 天前
唯一索引 INSERT 死锁实战:5 秒复现交叉插入的 S 锁循环等待
sql·mysql
沉默王二3 天前
面试官:RAG 不用向量数据库,用 MySQL 硬扛?我:100 万向量不是很轻松?
mysql·面试·ai编程
冬奇Lab3 天前
每日一个开源项目(第134篇):Zvec - 阿里开源的嵌入式向量数据库,向量搜索界的 SQLite
数据库·人工智能·llm
小猿姐3 天前
MySQL Top 10 热点问题 AI 运维实战:从内核诊断到云原生运维
mysql·云原生·aiops
ClouGence4 天前
Oracle CDC 架构优化:从主库直连到 DataGuard 备库同步
数据库·后端·oracle
云技纵横4 天前
Gap Lock 死锁实战:5 秒在本地复现 MySQL 间隙锁死锁
后端·mysql
无响应de神4 天前
三、用户与权限管理
数据库·mysql