1.整除用//代替/运算符。
2.聚合函数array_agg务必加上order by。
3.取多维数组a的一维直接用a[1]引用,而不用像PostgreSQL那样先unnest(a[1:1])再array_agg回来。
4.bit_count可以直接对整数进行二进制位计数,而不用转成bit。
5.bit(m)类型要改成bit。(未正式发布的1.5版支持bit(m)类型)
按照以上修改后的程宁先生的二进制求解数独SQL如下,与PostgreSQL原版相比,用时多了几十倍,Duckdb好像对此"水土不服"。
sql
--EXPLAIN (ANALYZE, TIMING ON)
WITH RECURSIVE
vd(d,msk) AS MATERIALIZED(SELECT i::int,(1 << (i - 1))::int from generate_series(1, 9)t(i))
--select * from vd;
,
cp(pos, r, c, g) AS MATERIALIZED(SELECT pos, ((pos - 1) // 9) + 1 AS r, ((pos - 1) % 9) + 1 AS c, ((pos - 1) // 9) // 3 * 3 + ((pos - 1) % 9) // 3 + 1 AS g FROM generate_series(1, 81) AS t(pos)),
pp(id, p, bs) AS MATERIALIZED(SELECT id, puzzle, regexp_split_to_array(regexp_replace(regexp_replace(puzzle, '[\r\n\s]', '', 'g'), '\?', '0', 'g'), '')::int[] AS bs FROM
--(SELECT 3 AS id, E'????????3\n??1??56??\n?9??4??7?\n?????9?5?\n7???????8\n?5?4?2???\n?8??2??9?\n??35??1??\n6????????' AS puzzle)
(SELECT 17 AS id, E'000000010400000000020000000000050407008000300001090000300400200050100000000806000' AS puzzle)
sudoku9_9 )
--select * from pp;
,
ss(id, op, bs, rm, cm, bm, bbs, brm, bcm, bbm, iter) AS (
SELECT id, 'int', bs,
(SELECT array_agg(m ORDER BY r) FROM (SELECT r, BIT_OR(CASE WHEN bs[cp.pos] > 0 THEN (1 << (bs[cp.pos] - 1)) ELSE 0 END) AS m FROM cp GROUP BY r) AS t) AS rm,
(SELECT array_agg(m ORDER BY c) FROM (SELECT c, BIT_OR(CASE WHEN bs[cp.pos] > 0 THEN (1 << (bs[cp.pos] - 1)) ELSE 0 END) AS m FROM cp GROUP BY c) AS t) AS cm,
(SELECT array_agg(m ORDER BY g) FROM (SELECT g, BIT_OR(CASE WHEN bs[cp.pos] > 0 THEN (1 << (bs[cp.pos] - 1)) ELSE 0 END) AS m FROM cp GROUP BY g) AS t) AS bm,
ARRAY[]::int[][] AS bbs, ARRAY[]::int[][] AS brm, ARRAY[]::int[][] AS bcm, ARRAY[]::int[][] AS bbm, 0 AS iter
FROM pp
UNION ALL
SELECT ss.id, n.op, n.bs, n.rm, n.cm, n.bm, n.bbs, n.brm, n.bcm, n.bbm, ss.iter + 1 AS iter
FROM ss
CROSS JOIN LATERAL (
WITH
-- 使用新的二进制算法生成候选数
cd(pos, r, c, g, cand_mask) AS (
SELECT
cp.pos, cp.r, cp.c, cp.g,
511 & ~(ss.rm[cp.r] | ss.cm[cp.c] | ss.bm[cp.g]) AS cand_mask
FROM cp
WHERE ss.bs[cp.pos] = 0
),
-- 显性唯一数:候选数掩码只有一个位为1
af(pos, r, c, g, d) AS (
SELECT
pos, r, c, g,
(SELECT vd.d::int FROM vd WHERE (cd.cand_mask & vd.msk) > 0)
FROM cd
WHERE bit_count(cd.cand_mask) = 1
),
-- 选择最优回溯位置:候选数最少的单元格
bgp(pos) AS (
SELECT pos FROM cd ORDER BY bit_count(cand_mask), /*pos*/ -list_unique(ss.bs[(((pos-1)//9)+1)*9-8:(((pos-1)//9)+1)*9]),(((pos-1)//9)+1) LIMIT 1
),
-- 生成该位置的所有候选值
gv(r, c, g, d) AS (
SELECT cd.r, cd.c, cd.g, vd.d
FROM cd
JOIN bgp ON cd.pos = bgp.pos
JOIN vd ON (cd.cand_mask & vd.msk) > 0
),
rg(bbs, brm, bcm, bbm) AS (
SELECT array_agg(t.bs),array_agg(t.rm), array_agg(t.cm), array_agg(t.bm) FROM (
SELECT ss.bs[1 : (bgp.pos - 1)] || ARRAY[gv.d] || ss.bs[(bgp.pos + 1) : 81] AS bs,
(SELECT array_agg(CASE WHEN vd.d = gv.r THEN ss.rm[vd.d] | (1 << (gv.d - 1)) ELSE ss.rm[vd.d] END ORDER BY r) FROM vd) AS rm,
(SELECT array_agg(CASE WHEN vd.d = gv.c THEN ss.cm[vd.d] | (1 << (gv.d - 1)) ELSE ss.cm[vd.d] END ORDER BY c) FROM vd) AS cm,
(SELECT array_agg(CASE WHEN vd.d = gv.g THEN ss.bm[vd.d] | (1 << (gv.d - 1)) ELSE ss.bm[vd.d] END ORDER BY g) FROM vd) AS bm
FROM bgp,gv --JOIN gv ON TRUE
) AS t
),
adf(bs, rm, cm, bm) AS (
SELECT (SELECT array_agg(coalesce(af.d, ss.bs[cp.pos])ORDER BY cp.pos) FROM cp LEFT JOIN af ON cp.pos = af.pos) AS bs,
(SELECT array_agg(ss.rm[vd.d] | COALESCE(f.rm_upd, 0)ORDER BY vd.d) FROM vd LEFT JOIN (SELECT af.r, BIT_OR(1 << (af.d - 1)) AS rm_upd FROM af GROUP BY af.r) AS f ON vd.d = f.r) AS rm,
(SELECT array_agg(ss.cm[vd.d] | COALESCE(f.cm_upd, 0)ORDER BY vd.d) FROM vd LEFT JOIN (SELECT af.c, BIT_OR(1 << (af.d - 1)) AS cm_upd FROM af GROUP BY af.c) AS f ON vd.d = f.c) AS cm,
(SELECT array_agg(ss.bm[vd.d] | COALESCE(f.bm_upd, 0)ORDER BY vd.d) FROM vd LEFT JOIN (SELECT af.g, BIT_OR(1 << (af.d - 1)) AS bm_upd FROM af GROUP BY af.g) AS f ON vd.d = f.g) AS bm
),
hf(has_fills) as (select EXISTS (SELECT 1 FROM af limit 1) AS has_fills),
-- 冲突检查:检查唯一数是否存在冲突
ec(inv) AS (
SELECT
-- 检查是否有单元格没有候选数
EXISTS (SELECT 1 FROM cd WHERE cand_mask = 0 LIMIT 1) OR
-- 整合所有唯一数,检查它们之间是否冲突
EXISTS (SELECT 1 FROM
(SELECT r,COUNT(*) AS unique_count FROM af GROUP BY r HAVING bit_count(BIT_OR(1 << (af.d - 1))) < COUNT(*) ) AS row_conflicts LIMIT 1)
or EXISTS (SELECT 1 FROM
(select c,COUNT(*) AS unique_count FROM af GROUP BY c HAVING bit_count(BIT_OR(1 << (af.d - 1))) < COUNT(*)) AS col_conflicts LIMIT 1)
or EXISTS (SELECT 1 FROM
(select g,COUNT(*) AS unique_count FROM af GROUP BY g HAVING bit_count(BIT_OR(1 << (af.d - 1))) < COUNT(*) LIMIT 1))
)
-- 唯一数填充
SELECT 'df' AS op, adf.bs, adf.rm, adf.cm, adf.bm, ss.bbs, ss.brm, ss.bcm, ss.bbm
FROM hf, ec, adf
WHERE hf.has_fills AND NOT ec.inv
UNION ALL
-- 猜测填充(回溯)
SELECT 'gf' AS op,
rg.bbs[1],
rg.brm[1],
rg.bcm[1],
rg.bbm[1],
array_cat(rg.bbs[2:], ss.bbs),
array_cat(rg.brm[2:], ss.brm),
array_cat(rg.bcm[2:], ss.bcm),
array_cat(rg.bbm[2:], ss.bbm)
FROM hf, rg
WHERE NOT hf.has_fills
UNION ALL
-- 回溯
SELECT 'bk' AS op,
ss.bbs[1],
ss.brm[1],
ss.bcm[1],
ss.bbm[1],
ss.bbs[2:],
ss.brm[2:],
ss.bcm[2:],
ss.bbm[2:]
FROM ec
WHERE ec.inv
) n
WHERE
ss.bs @> ARRAY[0]
)
--select op,count(1) from ss group by op
--select * from ss
SELECT s.id, pp.p puzzle, array_to_string(array(SELECT CASE WHEN pos % 9 = 1 AND pos > 1 THEN E'\n' ELSE '' END || v::text FROM unnest(s.bs) WITH ORDINALITY AS e(v, pos)), '') AS results
FROM ss s JOIN pp ON s.id = pp.id
WHERE NOT s.bs @> ARRAY[0];