将PostgreSQL的SQL改写成Duckdb的步骤

1.整除用//代替/运算符。

2.聚合函数array_agg务必加上order by。

3.取多维数组a的一维直接用a[1]引用,而不用像PostgreSQL那样先unnest(a[1:1])再array_agg回来。

4.bit_count可以直接对整数进行二进制位计数,而不用转成bit。

5.bit(m)类型要改成bit。(未正式发布的1.5版支持bit(m)类型)

按照以上修改后的程宁先生的二进制求解数独SQL如下,与PostgreSQL原版相比,用时多了几十倍,Duckdb好像对此"水土不服"。

sql 复制代码
--EXPLAIN (ANALYZE, TIMING ON) 
WITH RECURSIVE
vd(d,msk) AS MATERIALIZED(SELECT i::int,(1 << (i - 1))::int from generate_series(1, 9)t(i))
--select * from vd;
,
cp(pos, r, c, g) AS MATERIALIZED(SELECT pos, ((pos - 1) // 9) + 1 AS r, ((pos - 1) % 9) + 1 AS c, ((pos - 1) // 9) // 3 * 3 + ((pos - 1) % 9) // 3 + 1 AS g FROM generate_series(1, 81) AS t(pos)),
pp(id, p, bs) AS MATERIALIZED(SELECT id, puzzle, regexp_split_to_array(regexp_replace(regexp_replace(puzzle, '[\r\n\s]', '', 'g'), '\?', '0', 'g'), '')::int[] AS bs FROM 
--(SELECT 3 AS id, E'????????3\n??1??56??\n?9??4??7?\n?????9?5?\n7???????8\n?5?4?2???\n?8??2??9?\n??35??1??\n6????????' AS puzzle)
(SELECT 17 AS id, E'000000010400000000020000000000050407008000300001090000300400200050100000000806000' AS puzzle)
sudoku9_9 )
--select * from pp;


,
ss(id, op, bs, rm, cm, bm, bbs, brm, bcm, bbm, iter) AS (
    SELECT id, 'int', bs, 
        (SELECT array_agg(m ORDER BY r) FROM (SELECT r, BIT_OR(CASE WHEN bs[cp.pos] > 0 THEN (1 << (bs[cp.pos] - 1)) ELSE 0 END) AS m FROM cp GROUP BY r) AS t) AS rm,
        (SELECT array_agg(m ORDER BY c) FROM (SELECT c, BIT_OR(CASE WHEN bs[cp.pos] > 0 THEN (1 << (bs[cp.pos] - 1)) ELSE 0 END) AS m FROM cp GROUP BY c) AS t) AS cm,
        (SELECT array_agg(m ORDER BY g) FROM (SELECT g, BIT_OR(CASE WHEN bs[cp.pos] > 0 THEN (1 << (bs[cp.pos] - 1)) ELSE 0 END) AS m FROM cp GROUP BY g) AS t) AS bm,
        ARRAY[]::int[][] AS bbs, ARRAY[]::int[][] AS brm, ARRAY[]::int[][] AS bcm, ARRAY[]::int[][] AS bbm, 0 AS iter
    FROM pp
    UNION ALL
    SELECT ss.id, n.op, n.bs, n.rm, n.cm, n.bm, n.bbs, n.brm, n.bcm, n.bbm, ss.iter + 1 AS iter
    FROM ss 
    CROSS JOIN LATERAL (
        WITH
        -- 使用新的二进制算法生成候选数
        cd(pos, r, c, g, cand_mask) AS (
            SELECT 
                cp.pos, cp.r, cp.c, cp.g,
                511 & ~(ss.rm[cp.r] | ss.cm[cp.c] | ss.bm[cp.g]) AS cand_mask
            FROM cp
            WHERE ss.bs[cp.pos] = 0
        ),
        -- 显性唯一数:候选数掩码只有一个位为1
        af(pos, r, c, g, d) AS (
            SELECT 
                pos, r, c, g,
                (SELECT vd.d::int FROM vd WHERE (cd.cand_mask & vd.msk) > 0)
            FROM cd
            WHERE bit_count(cd.cand_mask) = 1
        ),
        -- 选择最优回溯位置:候选数最少的单元格
        bgp(pos) AS (
            SELECT pos FROM cd ORDER BY bit_count(cand_mask), /*pos*/ -list_unique(ss.bs[(((pos-1)//9)+1)*9-8:(((pos-1)//9)+1)*9]),(((pos-1)//9)+1)   LIMIT 1
        ),
        -- 生成该位置的所有候选值
        gv(r, c, g, d) AS (
            SELECT cd.r, cd.c, cd.g, vd.d
            FROM cd
            JOIN bgp ON cd.pos = bgp.pos
            JOIN vd ON (cd.cand_mask & vd.msk) > 0
        ),
        rg(bbs, brm, bcm, bbm) AS (
            SELECT array_agg(t.bs),array_agg(t.rm), array_agg(t.cm), array_agg(t.bm) FROM (
                SELECT ss.bs[1 : (bgp.pos - 1)] || ARRAY[gv.d] || ss.bs[(bgp.pos + 1) : 81] AS bs,
                    (SELECT array_agg(CASE WHEN vd.d = gv.r THEN ss.rm[vd.d] | (1 << (gv.d - 1)) ELSE ss.rm[vd.d] END ORDER BY r) FROM vd) AS rm,
                    (SELECT array_agg(CASE WHEN vd.d = gv.c THEN ss.cm[vd.d] | (1 << (gv.d - 1)) ELSE ss.cm[vd.d] END ORDER BY c) FROM vd) AS cm,
                    (SELECT array_agg(CASE WHEN vd.d = gv.g THEN ss.bm[vd.d] | (1 << (gv.d - 1)) ELSE ss.bm[vd.d] END ORDER BY g) FROM vd) AS bm
                FROM bgp,gv --JOIN gv ON TRUE  
            ) AS t
        ),
        adf(bs, rm, cm, bm) AS (
            SELECT (SELECT array_agg(coalesce(af.d, ss.bs[cp.pos])ORDER BY cp.pos) FROM cp LEFT JOIN af ON cp.pos = af.pos) AS bs,
                (SELECT array_agg(ss.rm[vd.d] | COALESCE(f.rm_upd, 0)ORDER BY vd.d) FROM vd LEFT JOIN (SELECT af.r, BIT_OR(1 << (af.d - 1)) AS rm_upd FROM af GROUP BY af.r) AS f ON vd.d = f.r) AS rm,
                (SELECT array_agg(ss.cm[vd.d] | COALESCE(f.cm_upd, 0)ORDER BY vd.d) FROM vd LEFT JOIN (SELECT af.c, BIT_OR(1 << (af.d - 1)) AS cm_upd FROM af GROUP BY af.c) AS f ON vd.d = f.c) AS cm,
                (SELECT array_agg(ss.bm[vd.d] | COALESCE(f.bm_upd, 0)ORDER BY vd.d) FROM vd LEFT JOIN (SELECT af.g, BIT_OR(1 << (af.d - 1)) AS bm_upd FROM af GROUP BY af.g) AS f ON vd.d = f.g) AS bm
        ),
        hf(has_fills) as (select EXISTS (SELECT 1 FROM af limit 1) AS has_fills),
        -- 冲突检查:检查唯一数是否存在冲突
        ec(inv) AS (
            SELECT 
                -- 检查是否有单元格没有候选数
                EXISTS (SELECT 1 FROM cd WHERE cand_mask = 0 LIMIT 1) OR
                -- 整合所有唯一数,检查它们之间是否冲突
                EXISTS (SELECT 1 FROM 
                	(SELECT r,COUNT(*) AS unique_count FROM af GROUP BY r HAVING bit_count(BIT_OR(1 << (af.d - 1))) < COUNT(*) ) AS row_conflicts LIMIT 1) 
                or EXISTS (SELECT 1 FROM 
                	(select c,COUNT(*) AS unique_count FROM af GROUP BY c HAVING bit_count(BIT_OR(1 << (af.d - 1))) < COUNT(*)) AS col_conflicts LIMIT 1) 
                or EXISTS (SELECT 1 FROM 
                	(select g,COUNT(*) AS unique_count FROM af GROUP BY g HAVING bit_count(BIT_OR(1 << (af.d - 1))) < COUNT(*) LIMIT 1))
                	)
        -- 唯一数填充
        SELECT 'df' AS op, adf.bs, adf.rm, adf.cm, adf.bm, ss.bbs, ss.brm, ss.bcm, ss.bbm 
        FROM hf, ec, adf 
        WHERE hf.has_fills AND NOT ec.inv       
         UNION ALL 
         -- 猜测填充(回溯)
        SELECT 'gf' AS op,
            rg.bbs[1], 
            rg.brm[1],
            rg.bcm[1], 
            rg.bbm[1],
            array_cat(rg.bbs[2:], ss.bbs), 
            array_cat(rg.brm[2:], ss.brm), 
            array_cat(rg.bcm[2:], ss.bcm), 
            array_cat(rg.bbm[2:], ss.bbm)
        FROM hf, rg 
        WHERE NOT hf.has_fills 
        UNION ALL 
        -- 回溯
        SELECT 'bk'  AS op, 
            ss.bbs[1], 
            ss.brm[1],
            ss.bcm[1],
            ss.bbm[1],
            ss.bbs[2:], 
            ss.brm[2:], 
            ss.bcm[2:], 
            ss.bbm[2:] 
        FROM ec 
        WHERE ec.inv 

    ) n
    WHERE 
 ss.bs @> ARRAY[0] 
) 
--select op,count(1) from ss group by op
--select * from ss
SELECT s.id, pp.p puzzle, array_to_string(array(SELECT CASE WHEN pos % 9 = 1 AND pos > 1 THEN E'\n' ELSE '' END || v::text FROM unnest(s.bs) WITH ORDINALITY AS e(v, pos)), '') AS results
FROM ss s JOIN pp ON s.id = pp.id
WHERE NOT s.bs @> ARRAY[0];
相关推荐
李广坤2 小时前
MySQL 大表字段变更实践(改名 + 改类型 + 改长度)
数据库
爱可生开源社区1 天前
2026 年,优秀的 DBA 需要具备哪些素质?
数据库·人工智能·dba
随逸1771 天前
《从零搭建NestJS项目》
数据库·typescript
加号32 天前
windows系统下mysql多源数据库同步部署
数据库·windows·mysql
シ風箏2 天前
MySQL【部署 04】Docker部署 MySQL8.0.32 版本(网盘镜像及启动命令分享)
数据库·mysql·docker
李慕婉学姐2 天前
Springboot智慧社区系统设计与开发6n99s526(程序+源码+数据库+调试部署+开发环境)带论文文档1万字以上,文末可获取,系统界面在最后面。
数据库·spring boot·后端
百锦再2 天前
Django实现接口token检测的实现方案
数据库·python·django·sqlite·flask·fastapi·pip
tryCbest2 天前
数据库SQL学习
数据库·sql
jnrjian2 天前
ORA-01017 查找机器名 用户名 以及library cache lock 参数含义
数据库·oracle
十月南城2 天前
数据湖技术对比——Iceberg、Hudi、Delta的表格格式与维护策略
大数据·数据库·数据仓库·hive·hadoop·spark