SQLite文档中提供了一个求解数独SQL,我把它用于一个高难度的数独题:
'.....1..7....6..2.8..9..3...954....3..3...4..4......8......7..6.1..2....5..3..9..',
结果用时10秒,同样的语句,只把除法符号/替换成整除符号//,在DuckDB中运行反而更慢,要13秒,单线程和多线程的结果都是差不多时间。
原SQL是基于字符串查找操作的,我想能否用二进制位操作提高效率,把如下需求提交给DeepSeek。
用如下思路编写计算数独的SQL程序,将已知数字字符串计算成9行9列9个宫共27个二进制数放在列表里,比如某行有1 3 5,那么二进制就是
2^1+2^3+2^5,每尝试一个新格子用bitand已有2进制去判断其所在行列宫是否允许放置,以下是用instr判断行列宫是否出现的代码,供参考。
sql
WITH RECURSIVE
input(sud) AS (
VALUES('.....1..7....6..2.8..9..3...954....3..3...4..4......8......7..6.1..2....5..3..9..')
),
digits(z, lp) AS (
VALUES('1', 1)
UNION ALL SELECT
CAST(lp+1 AS TEXT), lp+1 FROM digits WHERE lp<9
),
x(s, ind) AS (
SELECT sud, instr(sud, '.') FROM input
UNION ALL
SELECT
substr(s, 1, ind-1) || z || substr(s, ind+1),
instr( substr(s, 1, ind-1) || z || substr(s, ind+1), '.' )
FROM x, digits AS z
WHERE ind>0
AND NOT EXISTS (
SELECT 1
FROM digits AS lp
WHERE z.z = substr(s, ((ind-1)//9)*9 + lp, 1)
OR z.z = substr(s, ((ind-1)%9) + (lp-1)*9 + 1, 1)
OR z.z = substr(s, (((ind-1)//3) % 3) * 3
+ ((ind-1)//27) * 27 + lp
+ ((lp-1) // 3) * 6, 1)
)
)
SELECT s FROM x WHERE ind=0;
一开始没提DuckDB,结果他改写出来的程序是用于SQLite的,结果也不对,于是继续提示
我用duckdb,它支持数组访问,比如a[1],a[27]。请用duckdb语法改写简洁版,以已知字符串初始化,不做别的
结果他给出了如下正确的SQL,令人惊叹的是,它用了那么多常量,居然全都正确,可见完全理解了需求。标量子查询也用得炉火纯青。
sql
WITH RECURSIVE
input(sud) AS (
VALUES('.....1..7....6..2.8..9..3...954....3..3...4..4......8......7..6.1..2....5..3..9..')
),
digits(z, lp, bit) AS (
VALUES('1', 1, 1)
UNION ALL SELECT
CAST(lp+1 AS TEXT), lp+1, bit * 2 FROM digits WHERE lp<9
),
-- 初始化行列宫的二进制状态(基于已有数字)
init_state AS (
SELECT
sud,
-- 行的二进制状态数组
[
(SELECT SUM(bit) FROM digits WHERE instr(substr(sud, 1, 9), z) > 0),
(SELECT SUM(bit) FROM digits WHERE instr(substr(sud, 10, 9), z) > 0),
(SELECT SUM(bit) FROM digits WHERE instr(substr(sud, 19, 9), z) > 0),
(SELECT SUM(bit) FROM digits WHERE instr(substr(sud, 28, 9), z) > 0),
(SELECT SUM(bit) FROM digits WHERE instr(substr(sud, 37, 9), z) > 0),
(SELECT SUM(bit) FROM digits WHERE instr(substr(sud, 46, 9), z) > 0),
(SELECT SUM(bit) FROM digits WHERE instr(substr(sud, 55, 9), z) > 0),
(SELECT SUM(bit) FROM digits WHERE instr(substr(sud, 64, 9), z) > 0),
(SELECT SUM(bit) FROM digits WHERE instr(substr(sud, 73, 9), z) > 0)
] as rows,
-- 列的二进制状态数组
[
(SELECT SUM(bit) FROM digits WHERE substr(sud, 1, 1) = z OR substr(sud, 10, 1) = z OR substr(sud, 19, 1) = z OR substr(sud, 28, 1) = z OR substr(sud, 37, 1) = z OR substr(sud, 46, 1) = z OR substr(sud, 55, 1) = z OR substr(sud, 64, 1) = z OR substr(sud, 73, 1) = z),
(SELECT SUM(bit) FROM digits WHERE substr(sud, 2, 1) = z OR substr(sud, 11, 1) = z OR substr(sud, 20, 1) = z OR substr(sud, 29, 1) = z OR substr(sud, 38, 1) = z OR substr(sud, 47, 1) = z OR substr(sud, 56, 1) = z OR substr(sud, 65, 1) = z OR substr(sud, 74, 1) = z),
(SELECT SUM(bit) FROM digits WHERE substr(sud, 3, 1) = z OR substr(sud, 12, 1) = z OR substr(sud, 21, 1) = z OR substr(sud, 30, 1) = z OR substr(sud, 39, 1) = z OR substr(sud, 48, 1) = z OR substr(sud, 57, 1) = z OR substr(sud, 66, 1) = z OR substr(sud, 75, 1) = z),
(SELECT SUM(bit) FROM digits WHERE substr(sud, 4, 1) = z OR substr(sud, 13, 1) = z OR substr(sud, 22, 1) = z OR substr(sud, 31, 1) = z OR substr(sud, 40, 1) = z OR substr(sud, 49, 1) = z OR substr(sud, 58, 1) = z OR substr(sud, 67, 1) = z OR substr(sud, 76, 1) = z),
(SELECT SUM(bit) FROM digits WHERE substr(sud, 5, 1) = z OR substr(sud, 14, 1) = z OR substr(sud, 23, 1) = z OR substr(sud, 32, 1) = z OR substr(sud, 41, 1) = z OR substr(sud, 50, 1) = z OR substr(sud, 59, 1) = z OR substr(sud, 68, 1) = z OR substr(sud, 77, 1) = z),
(SELECT SUM(bit) FROM digits WHERE substr(sud, 6, 1) = z OR substr(sud, 15, 1) = z OR substr(sud, 24, 1) = z OR substr(sud, 33, 1) = z OR substr(sud, 42, 1) = z OR substr(sud, 51, 1) = z OR substr(sud, 60, 1) = z OR substr(sud, 69, 1) = z OR substr(sud, 78, 1) = z),
(SELECT SUM(bit) FROM digits WHERE substr(sud, 7, 1) = z OR substr(sud, 16, 1) = z OR substr(sud, 25, 1) = z OR substr(sud, 34, 1) = z OR substr(sud, 43, 1) = z OR substr(sud, 52, 1) = z OR substr(sud, 61, 1) = z OR substr(sud, 70, 1) = z OR substr(sud, 79, 1) = z),
(SELECT SUM(bit) FROM digits WHERE substr(sud, 8, 1) = z OR substr(sud, 17, 1) = z OR substr(sud, 26, 1) = z OR substr(sud, 35, 1) = z OR substr(sud, 44, 1) = z OR substr(sud, 53, 1) = z OR substr(sud, 62, 1) = z OR substr(sud, 71, 1) = z OR substr(sud, 80, 1) = z),
(SELECT SUM(bit) FROM digits WHERE substr(sud, 9, 1) = z OR substr(sud, 18, 1) = z OR substr(sud, 27, 1) = z OR substr(sud, 36, 1) = z OR substr(sud, 45, 1) = z OR substr(sud, 54, 1) = z OR substr(sud, 63, 1) = z OR substr(sud, 72, 1) = z OR substr(sud, 81, 1) = z)
] as cols,
-- 宫的二进制状态数组
[
(SELECT SUM(bit) FROM digits WHERE substr(sud, 1, 1) = z OR substr(sud, 2, 1) = z OR substr(sud, 3, 1) = z OR substr(sud, 10, 1) = z OR substr(sud, 11, 1) = z OR substr(sud, 12, 1) = z OR substr(sud, 19, 1) = z OR substr(sud, 20, 1) = z OR substr(sud, 21, 1) = z),
(SELECT SUM(bit) FROM digits WHERE substr(sud, 4, 1) = z OR substr(sud, 5, 1) = z OR substr(sud, 6, 1) = z OR substr(sud, 13, 1) = z OR substr(sud, 14, 1) = z OR substr(sud, 15, 1) = z OR substr(sud, 22, 1) = z OR substr(sud, 23, 1) = z OR substr(sud, 24, 1) = z),
(SELECT SUM(bit) FROM digits WHERE substr(sud, 7, 1) = z OR substr(sud, 8, 1) = z OR substr(sud, 9, 1) = z OR substr(sud, 16, 1) = z OR substr(sud, 17, 1) = z OR substr(sud, 18, 1) = z OR substr(sud, 25, 1) = z OR substr(sud, 26, 1) = z OR substr(sud, 27, 1) = z),
(SELECT SUM(bit) FROM digits WHERE substr(sud, 28, 1) = z OR substr(sud, 29, 1) = z OR substr(sud, 30, 1) = z OR substr(sud, 37, 1) = z OR substr(sud, 38, 1) = z OR substr(sud, 39, 1) = z OR substr(sud, 46, 1) = z OR substr(sud, 47, 1) = z OR substr(sud, 48, 1) = z),
(SELECT SUM(bit) FROM digits WHERE substr(sud, 31, 1) = z OR substr(sud, 32, 1) = z OR substr(sud, 33, 1) = z OR substr(sud, 40, 1) = z OR substr(sud, 41, 1) = z OR substr(sud, 42, 1) = z OR substr(sud, 49, 1) = z OR substr(sud, 50, 1) = z OR substr(sud, 51, 1) = z),
(SELECT SUM(bit) FROM digits WHERE substr(sud, 34, 1) = z OR substr(sud, 35, 1) = z OR substr(sud, 36, 1) = z OR substr(sud, 43, 1) = z OR substr(sud, 44, 1) = z OR substr(sud, 45, 1) = z OR substr(sud, 52, 1) = z OR substr(sud, 53, 1) = z OR substr(sud, 54, 1) = z),
(SELECT SUM(bit) FROM digits WHERE substr(sud, 55, 1) = z OR substr(sud, 56, 1) = z OR substr(sud, 57, 1) = z OR substr(sud, 64, 1) = z OR substr(sud, 65, 1) = z OR substr(sud, 66, 1) = z OR substr(sud, 73, 1) = z OR substr(sud, 74, 1) = z OR substr(sud, 75, 1) = z),
(SELECT SUM(bit) FROM digits WHERE substr(sud, 58, 1) = z OR substr(sud, 59, 1) = z OR substr(sud, 60, 1) = z OR substr(sud, 67, 1) = z OR substr(sud, 68, 1) = z OR substr(sud, 69, 1) = z OR substr(sud, 76, 1) = z OR substr(sud, 77, 1) = z OR substr(sud, 78, 1) = z),
(SELECT SUM(bit) FROM digits WHERE substr(sud, 61, 1) = z OR substr(sud, 62, 1) = z OR substr(sud, 63, 1) = z OR substr(sud, 70, 1) = z OR substr(sud, 71, 1) = z OR substr(sud, 72, 1) = z OR substr(sud, 79, 1) = z OR substr(sud, 80, 1) = z OR substr(sud, 81, 1) = z)
] as boxes
FROM input
),
-- 递归求解
x(s, ind, rows, cols, boxes) AS (
SELECT
sud,
instr(sud, '.'),
rows,
cols,
boxes
FROM init_state
UNION ALL
SELECT
substr(s, 1, ind-1) || z || substr(s, ind+1),
instr( substr(s, 1, ind-1) || z || substr(s, ind+1), '.' ),
-- 更新行的二进制状态
list_transform(rows, (val, idx) ->
CASE WHEN idx = ((ind-1)//9) + 1 THEN val | z.bit ELSE val END),
-- 更新列的二进制状态
list_transform(cols, (val, idx) ->
CASE WHEN idx = ((ind-1)%9) + 1 THEN val | z.bit ELSE val END),
-- 更新宫的二进制状态
list_transform(boxes, (val, idx) ->
CASE WHEN idx = (((ind-1)//27)*3 + ((ind-1)%9)//3) + 1 THEN val | z.bit ELSE val END)
FROM x, digits AS z
WHERE ind>0
AND z.lp BETWEEN 1 AND 9
-- 使用位运算检查是否可以放置数字
AND (rows[((ind-1)//9) + 1] & z.bit) = 0
AND (cols[((ind-1)%9) + 1] & z.bit) = 0
AND (boxes[(((ind-1)//27)*3 + ((ind-1)%9)//3) + 1] & z.bit) = 0
)
SELECT s FROM x WHERE ind=0;
运行效率也很高,只要1.5秒。
我观察了最后的使用位运算判断条件语句,把init_state改写成如下这样,结果一样,时间基本不变,扩展性也好一些,可用于四阶、五阶等数独。
sql
-- 初始化行列宫的二进制状态(基于已有数字)
init_state AS (
SELECT
sud,(select list(s order by rn) from(select sum(bit)s,i//9 rn from digits, range(0,81) t(i) where substr(sud,i+1,1)=z group by i//9)) as rows,
-- 列的二进制状态数组
(select list(s order by rn) from(select sum(bit)s,i%9 rn from digits, range(0,81) t(i) where substr(sud,i+1,1)=z group by i%9)) as cols,
-- 宫的二进制状态数组
(select list(s order by rn) from(select sum(bit)s,(i//27*3 + i%9//3) rn from digits, range(0,81) t(i) where substr(sud,i+1,1)=z group by (i//27*3 + i%9//3))) as boxes
FROM input
),
DeepSeek另一个巧妙的点是利用list_transform更新数组,很直观,我只会写数组切片再合并的代码,比如用
rows[:(ind-1)//9]+[rows[(ind-1)//9 + 1]| z.bit]+rows[(ind-1)//9+ 2:]代替
sql
-- 更新行的二进制状态
list_transform(rows, (val, idx) ->
CASE WHEN idx = ((ind-1)//9) + 1 THEN val | z.bit ELSE val END),
经过测试,两种写法的效率基本相同。
回头看他一开始写的SQLite语句,也是用与我差不多的思路初始化二进制,但是用了复杂的相关子查询,并且写得不对,导致求解失败。
在Windows上发现一个离奇的事,DeepSeek写的上述DuckDB代码运行很慢,大约4秒,而我改后的只要0.7秒,而在Linux上两者用时基本一致,从DeepSeek代码执行计划看Windows和Linux也没什么区别,真不可理喻。