1.张泽鹏先生的SQL, 处理带换行符的输入文件
sql
with recursive heights as (
select
[
string_split(line, '')
for line in string_split(trim(content, E'\n'), E'\n')
] as matrix
from
read_text('1000x1000.txt')
), lines as (
select
generate_subscripts(matrix, 1) as row_id,
unnest(matrix) as line,
from
heights
), cells as materialized (
select
max(row_id) over () as bound,
row_id,
generate_subscripts(line, 1) as col_id,
unnest(line) as height
from
lines
), matrix as (
select
cells.*,
directory,
0 as score,
1 < least(row_id, col_id) and greatest(row_id, col_id) < bound as has_more,
row_id as next_row,
col_id as next_col,
from
cells,
range(4) as directories(directory) -- 上下左右四个方向
), scenes as (
select
bound,
row_id,
col_id,
height,
directory,
score,
has_more,
case directory
when 0 then next_row - 1 -- 上
when 1 then next_row + 1 -- 下
else next_row
end as next_row,
case directory
when 2 then next_col - 1 -- 左
when 3 then next_col + 1 -- 右
else next_col
end as next_col,
from
matrix
where
has_more
union all
select
scenes.bound,
scenes.row_id,
scenes.col_id,
scenes.height,
scenes.directory,
scenes.score + 1 as score,
scenes.has_more and scenes.height > cells.height and 1 < least(scenes.next_row, scenes.next_col) and greatest(scenes.next_row, scenes.next_col) < scenes.bound as has_more,
case scenes.directory
when 0 then scenes.next_row - 1 -- 上
when 1 then scenes.next_row + 1 -- 下
else scenes.next_row
end as next_row,
case scenes.directory
when 2 then scenes.next_col - 1 -- 左
when 3 then scenes.next_col + 1 -- 右
else scenes.next_col
end as next_col,
from
scenes
inner join
cells
on
scenes.next_row = cells.row_id
and scenes.next_col = cells.col_id
and scenes.has_more
), scores as (
select
row_id,
col_id,
product(score) as score
from
scenes
where
not has_more
group by
row_id,
col_id
)
select * from scores order by score desc, row_id, col_id limit 1;
如果要处理不带换行符的文件,把matrix之前的改成如下,用row_number代替 generate_subscripts
sql
with recursive t(n,t) as(select 300,content from read_text('90000.txt.csv')),--2208-input.txt')),
b as(select row_number()over()rn,n total_r,n total_c,(rn-1)//n+1 r, (rn-1)%n+1 c,b::int h from(select n,unnest(string_split(replace(t,chr(10), ''), ''))b from t))
,
cells as materialized (
select
max(rn) over () as bound,
r row_id,
c col_id,
h as height
from
b
),
- newkid版本1
sql
with t(n,t) as(select 300,content from read_text('90000.txt.csv')),--2208-input.txt')),
b as(select row_number()over()rn,n total_r,n total_c,(rn-1)//n+1 r, (rn-1)%n+1 c,b::int h from(select n,unnest(string_split(replace(t,chr(10), ''), ''))b from t))
--from b;
,a as(
select tr.r,tr.c,tr.h
,(coalesce(min(case when tr2.r=tr.r and tr2.h>=tr.h and tr2.c>tr.c then tr2.c end),tr.total_c)-tr.c)
*(tr.c-coalesce(max(case when tr2.r=tr.r and tr2.h>=tr.h and tr2.c<tr.c then tr2.c end),1))
*(coalesce(min(case when tr2.c=tr.c and tr2.h>=tr.h and tr2.r>tr.r then tr2.r end),tr.total_r)-tr.r)
*(tr.r-coalesce(max(case when tr2.c=tr.c and tr2.h>=tr.h and tr2.r<tr.r then tr2.r end),1)) as scenic_score
from b tr, b tr2
where tr.r=tr2.r and tr.c<>tr2.c
or tr.c=tr2.c and tr.r<>tr2.r
group by tr.r,tr.c,tr.h,tr.total_c,tr.total_r
)
select max(scenic_score) from a;
newkid版本3,就是把tr2.h>=tr.h条件从case when放到了where中。
sql
with t(n,t) as(select 300,content from read_text('90000.txt.csv')),--2208-input.txt')),
b as(select row_number()over()rn,n total_r,n total_c,(rn-1)//n+1 r, (rn-1)%n+1 c,b::int h from(select n,unnest(string_split(replace(t,chr(10), ''), ''))b from t))
--from b;
,a as(
select /*+ USE_CONCAT */ tr.r,tr.c,tr.h
,(coalesce(min(case when tr2.c>tr.c then tr2.c end),tr.total_c)-tr.c)
*(tr.c-coalesce(max(case when tr2.c<tr.c then tr2.c end),1))
*(coalesce(min(case when tr2.r>tr.r then tr2.r end),tr.total_r)-tr.r)
*(tr.r-coalesce(max(case when tr2.r<tr.r then tr2.r end),1)) as scenic_score
from b tr, b tr2
where (tr.r=tr2.r and tr.c<>tr2.c
or tr.c=tr2.c and tr.r<>tr2.r
)
and tr2.h>=tr.h
group by tr.r,tr.c,tr.h,tr.total_c,tr.total_r
)
select max(scenic_score) from a;
- 我的SQL
sql
with t(n,t) as(select 300 n,content from read_text('90000.txt.csv')),
c as(select row_number()over()rn,(rn-1)//n r, (rn-1)%n c,b from(select n,unnest(string_split(replace(t,chr(10), ''), ''))b from t))
,four_count AS (
SELECT n1.* ,
(with n as(SELECT c.*, max(c.b)over(partition by c.r order by c.rn)maxb FROM c WHERE c.r = n1.r AND c.c > n1.c )
select count(case when n.maxb<n1.b then 1 end)+ sign(count(case when n.maxb>=n1.b then 1 end)) from n)cnt_right,
(with n as(SELECT c.*, max(c.b)over(partition by c.r order by c.rn desc)maxb FROM c WHERE c.r = n1.r AND c.c < n1.c )
select count(case when n.maxb<n1.b then 1 end)+ sign(count(case when n.maxb>=n1.b then 1 end)) from n)cnt_left,
(with n as(SELECT c.*, max(c.b)over(partition by c.c order by c.rn)maxb FROM c WHERE c.c = n1.c AND c.r > n1.r )
select count(case when n.maxb<n1.b then 1 end)+ sign(count(case when n.maxb>=n1.b then 1 end)) from n)cnt_down,
(with n as(SELECT c.*, max(c.b)over(partition by c.c order by c.rn desc )maxb FROM c WHERE c.c = n1.c AND c.r < n1.r )
select count(case when n.maxb<n1.b then 1 end)+ sign(count(case when n.maxb>=n1.b then 1 end)) from n)cnt_up,
FROM c n1 where n1.c >0 and n1.r>0
)
select max(cnt_right*cnt_left*cnt_down*cnt_up) maxpcnt from four_count;
测试场景,
- 原题带的正式输入文件99*99
sql
--张泽鹏
memory D .read 2208trees.txt
┌────────┬────────┬──────────┐
│ row_id │ col_id │ score │
│ int64 │ int64 │ double │
├────────┼────────┼──────────┤
│ 53 │ 15 │ 301392.0 │
└────────┴────────┴──────────┘
Run Time (s): real 0.142 user 0.380000 sys 0.144000
--newkid
memory D .read 2208p2nkdk3b.sql
┌───────────────────┐
│ max(scenic_score) │
│ int64 │
├───────────────────┤
│ 301392 │
└───────────────────┘
Run Time (s): real 0.281 user 1.424000 sys 0.008000
memory D .read 2208p2nkdkb.sql
┌───────────────────┐
│ max(scenic_score) │
│ int64 │
├───────────────────┤
│ 301392 │
└───────────────────┘
Run Time (s): real 0.240 user 1.296000 sys 0.004000
--在Oracle两者的效率是倒过来的,版本3更快
SQL> @2208p2nk3
R C H SCENIC_SCORE RNK
---------- ---------- ---------- ------------ ----------
53 15 8 301392 1
Elapsed: 00:00:00.59
SQL> @2208p2nk
R C H SCENIC_SCORE RNK
---------- ---------- ---------- ------------ ----------
53 15 8 301392 1
Elapsed: 00:00:01.03
--我的
memory D .read 2208dk2b.sql
┌─────────┐
│ maxpcnt │
│ int64 │
├─────────┤
│ 301392 │
└─────────┘
Run Time (s): real 0.658 user 2.656000 sys 0.328000
- 用脚本生成的随机300*300
sql
copy( select listagg((random()*10)::int::text,'')t from range(90000)t(a)) to '90000.txt.csv' (header 0);
--张泽鹏
memory D .read 2208treesf.txt
┌────────┬────────┬───────────┐
│ row_id │ col_id │ score │
│ int64 │ int64 │ double │
├────────┼────────┼───────────┤
│ 202 │ 191 │ 1043955.0 │
└────────┴────────┴───────────┘
Run Time (s): real 0.676 user 1.680000 sys 0.452000
--newkid
memory D .read 2208p2nkdkbf.sql
┌───────────────────┐
│ max(scenic_score) │
│ int64 │
├───────────────────┤
│ 1043955 │
│ (1.04 million) │
└───────────────────┘
Run Time (s): real 9.389 user 69.076000 sys 0.032000
memory D .read 2208p2nkdk3bf.sql
┌───────────────────┐
│ max(scenic_score) │
│ int64 │
├───────────────────┤
│ 1043955 │
│ (1.04 million) │
└───────────────────┘
Run Time (s): real 12.808 user 96.640000 sys 0.052000
--我的
memory D .read 2208dk2bf.sql
┌────────────────┐
│ maxpcnt │
│ int64 │
├────────────────┤
│ 1043955 │
│ (1.04 million) │
└────────────────┘
Run Time (s): real 16.333 user 84.548000 sys 5.368000
3.脚本生成的随机1000*1000
sql
--张泽鹏
copy (
select
array_to_string([
floor(random() * 10)::int::text
for _ in range(1000)
], '')
from
range(1000)
)
to '1000x1000.txt'
with (header false)
;
Run Time (s): real 0.146 user 0.136000 sys 0.008000
memory D .read generate-subscripts.sql
┌────────┬────────┬───────────┐
│ row_id │ col_id │ score │
│ int64 │ int64 │ double │
├────────┼────────┼───────────┤
│ 808 │ 330 │ 4222400.0 │
└────────┴────────┴───────────┘
Run Time (s): real 8.262 user 22.448000 sys 1.032000
--换机器重新测
--把张泽鹏原版改为用row_number
memory D .read 2208treesf1k.txt
┌────────┬────────┬───────────┐
│ row_id │ col_id │ score │
│ int64 │ int64 │ double │
├────────┼────────┼───────────┤
│ 728 │ 583 │ 2838240.0 │
└────────┴────────┴───────────┘
Run Time (s): real 4.325 user 9.750000 sys 0.734375
memory D .read generate-subscripts.sql
┌────────┬────────┬───────────┐
│ row_id │ col_id │ score │
│ int64 │ int64 │ double │
├────────┼────────┼───────────┤
│ 728 │ 583 │ 2838240.0 │
└────────┴────────┴───────────┘
Run Time (s): real 4.772 user 11.312500 sys 0.921875
我的在全表扫描时调用标量子查询,newkid用笛卡尔积找出每个方向的最大值,张泽鹏先构造4个方向,用递归CTE逐层计算,中间对遇到挡住视线高树的停止迭代。
用时表明,张泽鹏的效率最高,我的最低,newkid的排中间。用row_number处理行号的方式比generate-subscripts高效,但是据张泽鹏先生说有时会错乱。