一个在postgresql中运行很快,但是在duckdb中运行很慢的SQL

这是NineData-玖章算术-2025数据库编程大赛-一条SQL解数独问题亚军程宁先生的SQL代码。

对只有一行的数据表,执行递归CTE查询,迭代 654次,在postgresql中只要不到1秒,在duckdb中多线程要100多毫秒,设为单线程,也要7秒。

sql 复制代码
WITH RECURSIVE
-- 生成1-9的有效数字(数独允许的数字范围)
d(d) AS MATERIALIZED(SELECT d from generate_series(1, 9)t(d)),
-- 生成81个单元格的位置信息:pos(1-81)全局位置,r(1-9)行,c(1-9)列,bx(1-9)宫
pi(pos, r, c, bx) AS MATERIALIZED(
    SELECT 
        pos,
        ((pos - 1) / 9) + 1 AS r,
        ((pos - 1) % 9) + 1 AS c,
        ((pos - 1) / 9) / 3 * 3 + ((pos - 1) % 9) / 3 + 1 AS bx
    FROM generate_series(1, 81) AS t(pos)
),
-- 预处理数独数据:清洗空白字符、替换?为0,转为整数数组
cp(id, pz, bs) AS (
    SELECT 
        id,
        puzzle,
        regexp_split_to_array(
            regexp_replace(regexp_replace(puzzle, '[\r\n\s]', '', 'g'), '\?', '0', 'g'), 
            ''
        )::integer[] AS bs
    FROM (SELECT 3 AS id, E'800000000003600000070090200050007000000045700000100030001000068008500010090000400' AS puzzle)sudoku9_9
),
-- 递归求解核心:初始填充→确定填充/回溯/猜测填充循环
s1(id,flag, bs,bse,i ) AS (
    -- 递归初始态:加载预处理后的初始盘面
    SELECT id,'初始填充', bs,ARRAY[]::integer[][], 0 AS i  FROM cp
    UNION ALL
    -- 递归体:计算下一步填充逻辑
    SELECT s1.id,n.flag, n.bs,n.bse, s1.i + 1 AS i
    FROM s1
    CROSS JOIN LATERAL (
        WITH 
        -- 关联位置信息,展开当前盘面的单元格详情
        eb(pos, r, c, bx, v) AS (SELECT pi.pos, pi.r, pi.c, pi.bx, s1.bs[pi.pos] FROM pi),
        -- 计算空白单元格的合法候选数(行/列/宫无重复)
        cd(pos, r, c, bx, d) AS (

            SELECT e.pos, e.r, e.c, e.bx, d.d
            FROM eb e 
            CROSS JOIN d
            LEFT JOIN eb e2 ON e2.r = e.r AND e2.v = d.d
            LEFT JOIN eb e3 ON e3.c = e.c AND e3.v = d.d
            LEFT JOIN eb e4 ON e4.bx = e.bx AND e4.v = d.d
            WHERE e.v = 0
              AND e2.r IS NULL
              AND e3.r IS NULL 
              AND e4.r IS NULL 
        ),
        -- 筛选唯一可填的单元格(位置/行/列/宫维度)
        af(pos,r, c, bx, d) AS (
            SELECT pos,r, c, bx, MIN(d) FROM cd GROUP BY pos,r, c, bx HAVING COUNT(1) = 1
            UNION
            SELECT MIN(pos),r,min(c),min(bx), d FROM cd GROUP BY r, d HAVING COUNT(1) = 1
            UNION
            SELECT MIN(pos),min(r),c,min(bx), d FROM cd GROUP BY c, d HAVING COUNT(1) = 1
            UNION
            SELECT MIN(pos),min(r),min(c),bx, d FROM cd GROUP BY bx, d HAVING COUNT(1) = 1
        ),
        -- 检查填充是否冲突(无效解)
        error_check(is_invalid) AS (
            SELECT EXISTS (
                SELECT 1 FROM af GROUP BY r, d HAVING COUNT(*) > 1
                UNION ALL SELECT 1 FROM af GROUP BY c, d HAVING COUNT(*) > 1
                UNION ALL SELECT 1 FROM af GROUP BY bx, d HAVING COUNT(*) > 1
                UNION ALL SELECT 1 FROM af GROUP BY pos,d HAVING COUNT(*) > 1
            ) or array_length(bs,1) > 81 or not exists (select 1 from cd) AS is_invalid
        ),
        -- 选择候选数最少的位置作为猜测位置
        bg (pos) as (SELECT pos FROM (SELECT pos FROM cd GROUP BY pos ORDER BY COUNT(1), pos asc LIMIT 1) AS p), 
        -- 获取猜测位置的所有候选数
        gv (d) as (SELECT cd.d FROM cd,bg WHERE cd.pos = bg.pos ),
        -- 生成第一次猜测的盘面(取最小候选数)
        gas1 (bs) as (
        	SELECT s1.bs[1 : (bg.pos - 1)] || ARRAY[gv.d] || s1.bs[(bg.pos + 1) : 81] AS bs 
        	from bg,gv order by d limit 1 ),
        -- 剩余猜测盘面存入回溯栈
        gas2(bse) as (select ARRAY_AGG(bs) from (	
        	SELECT s1.bs[1 : (bg.pos - 1)] || ARRAY[gv.d] || s1.bs[(bg.pos + 1) : 81] AS bs
        	from bg,gv order by d offset 1) tmpbs),
        -- 从回溯栈取出上一个备选盘面
        cbse(bs) as (SELECT array_agg(x) from (select unnest(s1.bse[1:1]) as x )),
        -- 应用确定填充生成新盘面
        adf(bs, hf) AS (
            SELECT 
                (SELECT array_agg(coalesce(af.d, s1.bs[pi.pos]) order by pi.pos) FROM pi 
                LEFT JOIN af ON pi.pos = af.pos) AS bs,
                EXISTS (SELECT 1 FROM af) AS hf
        )
        -- 分支1:执行确定填充(有可填项且无冲突)
        SELECT '确定填充' flag,bs, s1.bse bse
        FROM adf,error_check WHERE hf AND NOT is_invalid
        UNION ALL
        -- 分支2:执行猜测填充(无可确定项且无冲突)
        SELECT '猜测填充',gas1.bs, gas2.bse||s1.bse
        FROM adf a,error_check,gas1,gas2
		WHERE NOT a.hf AND NOT is_invalid
		UNION ALL
        -- 分支3:执行回溯(填充冲突时)
        select '回溯',cbse.bs,s1.bse[2:array_length(s1.bse, 1)]
        from error_check,cbse where is_invalid 
    ) n
    WHERE s1.i < 10000 AND s1.bs @> ARRAY[0]
)
-- 输出最终解:将数组转为9x9格式的数独字符串
SELECT s.id, cp.pz, array_to_string(
    array(SELECT CASE WHEN pos % 9 = 1 AND pos > 1 THEN E'\n' ELSE '' END || value::text 
          FROM unnest(s.bs) WITH ORDINALITY AS elem(value, pos)),
    ''
) AS result
FROM s1 s 
JOIN cp ON s.id = cp.id
WHERE NOT s.bs @> ARRAY[0]
;

改写为duckdb版本

sql 复制代码
WITH RECURSIVE
-- 生成1-9的有效数字(数独允许的数字范围)
d(d) AS MATERIALIZED(SELECT d from generate_series(1, 9)t(d)),
-- 生成81个单元格的位置信息:pos(1-81)全局位置,r(1-9)行,c(1-9)列,bx(1-9)宫
pi(pos, r, c1, bx) AS MATERIALIZED(
    SELECT 
        pos,
        ((pos - 1) // 9) + 1 AS r,
        ((pos - 1) % 9) + 1 AS c,
        ((pos - 1) // 9) // 3 * 3 + ((pos - 1) % 9) // 3 + 1 AS bx
    FROM generate_series(1, 81) AS t(pos)
),
-- 预处理数独数据:清洗空白字符、替换?为0,转为整数数组
cp(id, pz, bs) AS (
    SELECT 
        id,
        puzzle,
        regexp_split_to_array(
            regexp_replace(regexp_replace(puzzle, '[\r\n\s]', '', 'g'), '\?', '0', 'g'), 
            ''
        )::integer[] AS bs
FROM (SELECT 3 AS id, E'800000000003600000070090200050007000000045700000100030001000068008500010090000400' AS puzzle)sudoku9_9
),
-- 递归求解核心:初始填充→确定填充/回溯/猜测填充循环
s1(id,flag, bs,bse,i ) AS MATERIALIZED(
    -- 递归初始态:加载预处理后的初始盘面
    SELECT id,'初始填充', bs,ARRAY[]::integer[][], 0 AS i  FROM cp
    UNION ALL
    -- 递归体:计算下一步填充逻辑
    SELECT s1.id,n.flag, n.bs,n.bse, s1.i + 1 AS i
    FROM s1
    CROSS JOIN LATERAL (
        WITH 
        -- 关联位置信息,展开当前盘面的单元格详情
        eb(pos, r, c, bx, v) AS (SELECT pi.pos, pi.r, pi.c1, pi.bx, s1.bs[pi.pos] FROM pi),
        -- 计算空白单元格的合法候选数(行/列/宫无重复)
        cd(pos, r, c, bx, d) AS (

            SELECT e.pos, e.r, e.c, e.bx, d.d
            FROM eb e 
            CROSS JOIN d
            LEFT JOIN eb e2 ON e2.r = e.r AND e2.v = d.d
            LEFT JOIN eb e3 ON e3.c = e.c AND e3.v = d.d
            LEFT JOIN eb e4 ON e4.bx = e.bx AND e4.v = d.d
            WHERE e.v = 0
              AND e2.r IS NULL
              AND e3.r IS NULL 
              AND e4.r IS NULL 

        ),

        -- 筛选唯一可填的单元格(位置/行/列/宫维度)
        af(pos,r, c, bx, d) AS (
            SELECT pos,r, c, bx, MIN(d) FROM cd GROUP BY pos,r, c, bx HAVING COUNT(1) = 1
            UNION
            SELECT MIN(pos),r,min(c),min(bx), d FROM cd GROUP BY r, d HAVING COUNT(1) = 1
            UNION
            SELECT MIN(pos),min(r),c,min(bx), d FROM cd GROUP BY c, d HAVING COUNT(1) = 1
            UNION
            SELECT MIN(pos),min(r),min(c),bx, d FROM cd GROUP BY bx, d HAVING COUNT(1) = 1
        ),

       -- 检查填充是否冲突(无效解)
        error_check(is_invalid) AS (
            SELECT EXISTS (
                SELECT 1 FROM af GROUP BY r, d HAVING COUNT(*) > 1
                UNION ALL SELECT 1 FROM af GROUP BY c, d HAVING COUNT(*) > 1
                UNION ALL SELECT 1 FROM af GROUP BY bx, d HAVING COUNT(*) > 1
                UNION ALL SELECT 1 FROM af GROUP BY pos,d HAVING COUNT(*) > 1
            ) or array_length(bs,1) > 81 or not exists (select 1 from cd) AS is_invalid
        ),

        -- 选择候选数最少的位置作为猜测位置 -- pos
        bg (pos) as (SELECT pos FROM (SELECT pos FROM cd GROUP BY pos ORDER BY COUNT(1),pos asc LIMIT 1) AS p), 
        -- 获取猜测位置的所有候选数
        gv (d) as (SELECT cd.d FROM cd,bg WHERE cd.pos = bg.pos ),
        -- 生成第一次猜测的盘面(取最小候选数)
        gas1 (bs) as (
        	SELECT s1.bs[1 : (bg.pos - 1)] || ARRAY[gv.d] || s1.bs[(bg.pos + 1) : 81] AS bs 
        	from bg,gv order by d limit 1 ),
        -- 剩余猜测盘面存入回溯栈

        gas2(bse) as (select ARRAY_AGG(bs) from (	
        	SELECT s1.bs[1 : (bg.pos - 1)] || ARRAY[gv.d] || s1.bs[(bg.pos + 1) : 81] AS bs
        	from bg,gv order by d offset 1) tmpbs),
 
        -- 从回溯栈取出上一个备选盘面
/*        cbse(bs) as --(SELECT array_agg(x) from (select unnest(s1.bse[1:1]) as x )),
        (SELECT s1.bse[1] as x ),
*/ 
       -- 应用确定填充生成新盘面
        adf(bs, hf) AS (
            SELECT 
                (SELECT array_agg(coalesce(af.d, s1.bs[pi.pos]) order by pi.pos) FROM pi 
                LEFT JOIN af ON pi.pos = af.pos) AS bs,
                EXISTS (SELECT 1 FROM af) AS hf
        )
        -- 分支1:执行确定填充(有可填项且无冲突)
        SELECT '确定填充' flag,bs, s1.bse bse
        FROM adf ,error_check WHERE hf AND NOT is_invalid
        UNION ALL
        -- 分支2:执行猜测填充(无可确定项且无冲突)
        SELECT '猜测填充',gas1.bs, gas2.bse||s1.bse
        FROM adf a,error_check,gas1,gas2
		WHERE NOT a.hf AND NOT is_invalid
 	 	UNION ALL
      -- 分支3:执行回溯(填充冲突时)
        select '回溯',s1.bse[1],s1.bse[2:]
        from error_check where is_invalid 

    ) n
    WHERE s1.i < 10000 AND s1.bs @> ARRAY[0]
)
-- 输出最终解:将数组转为9x9格式的数独字符串
SELECT s.id, cp.pz, array_to_string(
    array(SELECT CASE WHEN pos % 9 = 1 AND pos > 1 THEN E'\n' ELSE '' END || value::text 
          FROM unnest(s.bs) WITH ORDINALITY AS elem(value, pos)),
    ''
) AS result
FROM s1 s 
JOIN cp ON s.id = cp.id
WHERE NOT s.bs @> ARRAY[0]
;

输出如下

复制代码
postgres=# \i 1230/0112en.txt
 id |                                        pz                                         |  result
----+-----------------------------------------------------------------------------------+-----------
  3 | 800000000003600000070090200050007000000045700000100030001000068008500010090000400 | 812753649+
    |                                                                                   | 943682175+
    |                                                                                   | 675491283+
    |                                                                                   | 154237896+
    |                                                                                   | 369845721+
    |                                                                                   | 287169534+
    |                                                                                   | 521974368+
    |                                                                                   | 438526917+
    |                                                                                   | 796318452
(1 row)

Time: 135.542 ms
D .read 1230/0112ducken.txt
┌───────┬───────────────────────────────────────────────────────┬──────────────────────────────────────────────────────┐
│  id   │                          pz                           │                        result                        │
│ int32 │                        varchar                        │                       varchar                        │
├───────┼───────────────────────────────────────────────────────┼──────────────────────────────────────────────────────┤
│     3 │ 80000000000360000007009020005000700000004570000010003 │ 812753649\n943682175\n675491283\n154237896\n36984572 │
│       │ 0001000068008500010090000400                          │ 1\n287169534\n521974368\n438526917\n796318452        │
└───────┴───────────────────────────────────────────────────────┴──────────────────────────────────────────────────────┘
Run Time (s): real 11.169 user 24.808297 sys 10.038441
D set threads=1;
Run Time (s): real 0.007 user 0.011898 sys 0.000619
D .read 1230/0112ducken.txt
┌───────┬───────────────────────────────────────────────────────┬──────────────────────────────────────────────────────┐
│  id   │                          pz                           │                        result                        │
│ int32 │                        varchar                        │                       varchar                        │
├───────┼───────────────────────────────────────────────────────┼──────────────────────────────────────────────────────┤
│     3 │ 80000000000360000007009020005000700000004570000010003 │ 812753649\n943682175\n675491283\n154237896\n36984572 │
│       │ 0001000068008500010090000400                          │ 1\n287169534\n521974368\n438526917\n796318452        │
└───────┴───────────────────────────────────────────────────────┴──────────────────────────────────────────────────────┘
Run Time (s): real 7.429 user 6.501462 sys 0.397789
相关推荐
爱可生开源社区1 天前
2026 年,优秀的 DBA 需要具备哪些素质?
数据库·人工智能·dba
随逸1772 天前
《从零搭建NestJS项目》
数据库·typescript
加号32 天前
windows系统下mysql多源数据库同步部署
数据库·windows·mysql
シ風箏2 天前
MySQL【部署 04】Docker部署 MySQL8.0.32 版本(网盘镜像及启动命令分享)
数据库·mysql·docker
李慕婉学姐2 天前
Springboot智慧社区系统设计与开发6n99s526(程序+源码+数据库+调试部署+开发环境)带论文文档1万字以上,文末可获取,系统界面在最后面。
数据库·spring boot·后端
百锦再2 天前
Django实现接口token检测的实现方案
数据库·python·django·sqlite·flask·fastapi·pip
tryCbest2 天前
数据库SQL学习
数据库·sql
jnrjian2 天前
ORA-01017 查找机器名 用户名 以及library cache lock 参数含义
数据库·oracle
十月南城2 天前
数据湖技术对比——Iceberg、Hudi、Delta的表格格式与维护策略
大数据·数据库·数据仓库·hive·hadoop·spark
Henry Zhu1232 天前
数据库:并发控制基本概念
服务器·数据库