DolphinDB SQL查询:从简单到复杂

目录

    • 摘要
    • 一、基础查询
      • [1.1 SELECT语句](#1.1 SELECT语句)
      • [1.2 条件过滤](#1.2 条件过滤)
      • [1.3 排序与限制](#1.3 排序与限制)
    • 二、聚合查询
      • [2.1 基本聚合](#2.1 基本聚合)
      • [2.2 分组聚合](#2.2 分组聚合)
      • [2.3 分组集](#2.3 分组集)
    • 三、连接查询
      • [3.1 连接类型](#3.1 连接类型)
      • [3.2 内连接](#3.2 内连接)
      • [3.3 左连接](#3.3 左连接)
      • [3.4 多表连接](#3.4 多表连接)
    • 四、子查询
      • [4.1 标量子查询](#4.1 标量子查询)
      • [4.2 行子查询](#4.2 行子查询)
      • [4.3 表子查询](#4.3 表子查询)
    • 五、窗口函数
      • [5.1 排序函数](#5.1 排序函数)
      • [5.2 聚合窗口函数](#5.2 聚合窗口函数)
      • [5.3 偏移函数](#5.3 偏移函数)
    • 六、时间序列查询
      • [6.1 时间窗口](#6.1 时间窗口)
      • [6.2 时间对齐](#6.2 时间对齐)
      • [6.3 重采样](#6.3 重采样)
    • 七、复杂查询实战
      • [7.1 分组Top-N](#7.1 分组Top-N)
      • [7.2 同比环比](#7.2 同比环比)
      • [7.3 连续区间](#7.3 连续区间)
    • 八、查询优化
      • [8.1 执行计划](#8.1 执行计划)
      • [8.2 优化建议](#8.2 优化建议)
      • [8.3 常见问题](#8.3 常见问题)
    • 九、总结
    • 参考资料

摘要

本文系统介绍DolphinDB SQL查询语言。从基础SELECT语句到复杂的多表关联,从条件过滤到分组聚合,从子查询到窗口函数,逐步带领读者掌握DolphinDB SQL查询的核心技能。通过丰富的实战案例,帮助读者编写高效的数据查询语句。


一、基础查询

1.1 SELECT语句

python 复制代码
// 创建示例表
t = table(
    1..10 as id,
    `A`B`C`A`B`C`A`B`C`A as category,
    10 20 30 15 25 35 12 22 32 18 as value,
    2024.01.01 + 0..9 as date
)

// 查询所有列
select * from t

// 查询指定列
select id, category, value from t

// 列别名
select id as device_id, value as temperature from t

// 计算列
select id, value, value * 1.8 + 32 as fahrenheit from t

1.2 条件过滤

python 复制代码
// WHERE条件
select * from t where id > 5

// 多条件
select * from t where id > 3 and value < 30

// IN条件
select * from t where category in [`A, `B]

// BETWEEN条件
select * from t where value between 15 and 30

// LIKE模糊匹配
select * from t where category like "A%"

// NULL判断
select * from t where value is not NULL

1.3 排序与限制

python 复制代码
// 排序
select * from t order by value desc
select * from t order by category, value desc

// 限制行数
select top 5 * from t
select top 5 * from t order by value desc

// 分页
select * from t limit 5 offset 3  // 从第4行开始取5行

二、聚合查询

2.1 基本聚合

python 复制代码
// 聚合函数
select count(*) as cnt from t
select sum(value) as total from t
select avg(value) as average from t
select max(value) as max_val, min(value) as min_val from t

// 多聚合
select count(*) as cnt,
       sum(value) as total,
       avg(value) as average,
       std(value) as std_dev
from t

2.2 分组聚合

python 复制代码
// GROUP BY
select category, 
       count(*) as cnt,
       sum(value) as total,
       avg(value) as average
from t
group by category

// 多列分组
select category, 
       date,
       count(*) as cnt,
       avg(value) as avg_val
from t
group by category, date

// HAVING过滤
select category, avg(value) as avg_val
from t
group by category
having avg(value) > 20

2.3 分组集

python 复制代码
// ROLLUP
select category, date, sum(value) as total
from t
group by rollup(category, date)

// CUBE
select category, date, sum(value) as total
from t
group by cube(category, date)

// GROUPING SETS
select category, date, sum(value) as total
from t
group by grouping sets((category), (date), (category, date))

三、连接查询

3.1 连接类型

连接类型
INNER JOIN

内连接
交集
LEFT JOIN

左连接
左表全部
RIGHT JOIN

右连接
右表全部
FULL JOIN

全连接
并集

3.2 内连接

python 复制代码
// 创建示例表
t1 = table(1..5 as id, `A`B`C`D`E as name)
t2 = table(1..3 as id, 100..102 as value)

// 内连接
select * from lj(t1, t2, `id)
// 或
select t1.id, t1.name, t2.value 
from t1
inner join t2 on t1.id = t2.id

3.3 左连接

python 复制代码
// 左连接
select * from lj(t1, t2, `id)

/*
id name value
1  A    100
2  B    101
3  C    102
4  D    NULL
5  E    NULL
*/

3.4 多表连接

python 复制代码
// 多表连接
t1 = table(1..5 as id, `A`B`C`D`E as name)
t2 = table(1..5 as id, 100..104 as value)
t3 = table(1..5 as id, `X`Y`Z`X`Y as type)

select t1.id, t1.name, t2.value, t3.type
from t1
left join t2 on t1.id = t2.id
left join t3 on t1.id = t3.id

四、子查询

4.1 标量子查询

python 复制代码
// 标量子查询(返回单个值)
select * from t 
where value > (select avg(value) from t)

// 在SELECT中使用
select id, value, 
       (select avg(value) from t) as avg_value
from t

4.2 行子查询

python 复制代码
// 行子查询(返回多行)
select * from t
where category in (select distinct category from t where value > 25)

// EXISTS子查询
select * from t1
where exists (select * from t2 where t2.id = t1.id)

4.3 表子查询

python 复制代码
// 表子查询(返回表)
select category, avg_val
from (
    select category, avg(value) as avg_val
    from t
    group by category
)
where avg_val > 20

// WITH子句(CTE)
with 
stats as (
    select category, avg(value) as avg_val
    from t
    group by category
)
select * from stats where avg_val > 20

五、窗口函数

5.1 排序函数

python 复制代码
// ROW_NUMBER
select id, category, value,
       row_number() over (partition by category order by value desc) as rank
from t

// RANK(有并列)
select id, category, value,
       rank() over (order by value desc) as rank
from t

// DENSE_RANK(连续排名)
select id, category, value,
       dense_rank() over (order by value desc) as rank
from t

5.2 聚合窗口函数

python 复制代码
// 累积聚合
select id, value,
       sum(value) over (order by id) as cumsum,
       avg(value) over (order by id rows between 2 preceding and current row) as mavg
from t

// 分组窗口
select id, category, value,
       sum(value) over (partition by category order by id) as category_cumsum
from t

5.3 偏移函数

python 复制代码
// LAG/LEAD
select id, value,
       lag(value, 1) over (order by id) as prev_value,
       lead(value, 1) over (order by id) as next_value
from t

// FIRST/LAST
select id, value,
       first(value) over (order by id) as first_val,
       last(value) over (order by id) as last_val
from t

六、时间序列查询

6.1 时间窗口

python 复制代码
// 创建时间序列数据
t = table(
    1..100 as id,
    2024.01.01T00:00:00 + 0..99 * 60000 as timestamp,  // 每分钟一条
    rand(100.0, 100) as value
)

// 时间窗口聚合
select bar(timestamp, 10m) as time_window,
       avg(value) as avg_val,
       max(value) as max_val,
       count(*) as cnt
from t
group by bar(timestamp, 10m)

6.2 时间对齐

python 复制代码
// 时间对齐
select * from t 
where timestamp between 2024.01.01T00:00:00 and 2024.01.01T01:00:00

// 按小时聚合
select bar(timestamp, 1h) as hour,
       avg(value) as avg_val
from t
group by bar(timestamp, 1h)

6.3 重采样

python 复制代码
// 重采样(1分钟→5分钟)
select bar(timestamp, 5m) as time_5m,
       first(value) as open,
       max(value) as high,
       min(value) as low,
       last(value) as close
from t
group by bar(timestamp, 5m)

七、复杂查询实战

7.1 分组Top-N

python 复制代码
// 每个类别取前3条
select * from (
    select id, category, value,
           row_number() over (partition by category order by value desc) as rank
    from t
) where rank <= 3

7.2 同比环比

python 复制代码
// 环比计算
select date, value,
       lag(value, 1) over (order by date) as prev_value,
       (value - lag(value, 1) over (order by date)) / lag(value, 1) over (order by date) as mom_rate
from t

// 同比计算
select date, value,
       lag(value, 12) over (order by date) as prev_year_value,
       (value - lag(value, 12) over (order by date)) / lag(value, 12) over (order by date) as yoy_rate
from t

7.3 连续区间

python 复制代码
// 查找连续值
select * from (
    select id, value,
           id - row_number() over (order by id) as grp
    from t
    where value > 20
) 
group by grp
having count(*) >= 3  // 连续3个以上

八、查询优化

8.1 执行计划

python 复制代码
// 查看执行计划
explain select * from t where id > 5

// 分析查询性能
timer select count(*) from t

8.2 优化建议

优化项 说明
分区裁剪 在分区列上过滤
索引使用 在索引列上查询
减少扫描 只查询需要的列
避免全表 使用WHERE条件

8.3 常见问题

python 复制代码
// 避免SELECT *
select id, value from t  // 好
select * from t          // 避免

// 使用分区过滤
select * from t 
where date between 2024.01.01 and 2024.01.31  // 好

// 避免函数包装索引列
select * from t where date = 2024.01.15      // 好
select * from t where year(date) = 2024      // 避免

九、总结

本文系统介绍了DolphinDB SQL查询:

  1. 基础查询:SELECT、WHERE、ORDER BY
  2. 聚合查询:GROUP BY、HAVING、分组集
  3. 连接查询:内连接、左连接、多表连接
  4. 子查询:标量、行、表子查询
  5. 窗口函数:排序、聚合、偏移
  6. 时间序列:时间窗口、重采样
  7. 查询优化:执行计划、优化建议

思考题

  1. 如何选择合适的连接类型?
  2. 窗口函数和GROUP BY有什么区别?
  3. 如何优化大数据量查询?

参考资料

相关推荐
2301_808414381 天前
MySQL中的函数
数据库·mysql
Mahir081 天前
MySQL 数据一致性的基石:三大日志( redo log/undo log/binlog)与两阶段提交(Prepare 阶段和Commit 阶段)深度解密
数据库·后端·mysql·面试
x***r1511 天前
dbeaver-ce-24.1.3-x86_64-setup安装步骤详解(附DBeaver数据库管理与SQL编写教程)
数据库·sql
一只鹿鹿鹿1 天前
数据库运维与管理规范(WORD)
运维·数据库
todoitbo1 天前
WHERE 子句中的函数执行顺序与副作用风险分析
数据库·时序数据库·函数
jiayong231 天前
MySQL 8.0 Root 用户远程登录配置完整指南
数据库·mysql
数智化管理手记1 天前
设备总停机?找准根源+TPM核心逻辑,筑牢零故障基础
数据库·人工智能·低代码·制造
zhangshuang-peta1 天前
MCP + OpenClaw:执行框架如何被“约束成系统”
数据库·人工智能·ai·ai agent·mcp·peta
java1234_小锋1 天前
说一下Spring的事务传播行为?
java·数据库·spring
苏三说技术1 天前
美团二面:高并发下如何保证接口幂等性?
java·数据库