三种用SQL解决Advent of Code 2022第8题树顶木屋的比较和分析

1.张泽鹏先生的SQL, 处理带换行符的输入文件

sql 复制代码

with recursive heights as (
  select
    [
      string_split(line, '')
      for line in string_split(trim(content, E'\n'), E'\n')
    ] as matrix
  from
    read_text('1000x1000.txt')
), lines as (
  select
    generate_subscripts(matrix, 1) as row_id,
    unnest(matrix) as line,
  from
    heights
), cells as materialized (
  select
    max(row_id) over () as bound,
    row_id,
    generate_subscripts(line, 1) as col_id,
    unnest(line) as height
  from
    lines
), matrix as (
  select
    cells.*,
    directory,
    0 as score,
    1 < least(row_id, col_id) and greatest(row_id, col_id) < bound as has_more,
    row_id as next_row,
    col_id as next_col,
  from
    cells,
    range(4) as directories(directory) -- 上下左右四个方向
), scenes as (
  select
    bound,
    row_id,
    col_id,
    height,
    directory,
    score,
    has_more,
    case directory
      when 0 then next_row - 1 -- 上
      when 1 then next_row + 1 -- 下
      else next_row
    end as next_row,
    case directory
      when 2 then next_col - 1 -- 左
      when 3 then next_col + 1 -- 右
      else next_col
    end as next_col,
  from
    matrix
  where
    has_more

  union all

  select
    scenes.bound,
    scenes.row_id,
    scenes.col_id,
    scenes.height,
    scenes.directory,
    scenes.score + 1 as score,
    scenes.has_more and scenes.height > cells.height and 1 < least(scenes.next_row, scenes.next_col) and greatest(scenes.next_row, scenes.next_col) < scenes.bound as has_more,
    case scenes.directory
      when 0 then scenes.next_row - 1 -- 上
      when 1 then scenes.next_row + 1 -- 下
      else scenes.next_row
    end as next_row,
    case scenes.directory
      when 2 then scenes.next_col - 1 -- 左
      when 3 then scenes.next_col + 1 -- 右
      else scenes.next_col
    end as next_col,
  from
    scenes
  inner join
    cells
  on
    scenes.next_row = cells.row_id
    and scenes.next_col = cells.col_id
    and scenes.has_more
), scores as (
  select
    row_id,
    col_id,
    product(score) as score
  from
    scenes
  where
    not has_more
  group by
    row_id,
    col_id
)
select * from scores order by score desc, row_id, col_id limit 1;

如果要处理不带换行符的文件，把matrix之前的改成如下，用row_number代替 generate_subscripts

sql 复制代码

with recursive t(n,t) as(select 300,content from read_text('90000.txt.csv')),--2208-input.txt')), 
b as(select row_number()over()rn,n total_r,n total_c,(rn-1)//n+1 r, (rn-1)%n+1 c,b::int h from(select n,unnest(string_split(replace(t,chr(10), ''), ''))b from t))
,
cells as materialized (
  select
    max(rn) over () as bound,
    r row_id,
    c col_id,
    h as height
  from
    b
),

newkid版本1

sql 复制代码

with t(n,t) as(select 300,content from read_text('90000.txt.csv')),--2208-input.txt')), 
b as(select row_number()over()rn,n total_r,n total_c,(rn-1)//n+1 r, (rn-1)%n+1 c,b::int h from(select n,unnest(string_split(replace(t,chr(10), ''), ''))b from t))
--from b;
,a as(
select tr.r,tr.c,tr.h
      ,(coalesce(min(case when tr2.r=tr.r and tr2.h>=tr.h and tr2.c>tr.c then tr2.c end),tr.total_c)-tr.c)
      *(tr.c-coalesce(max(case when tr2.r=tr.r and tr2.h>=tr.h and tr2.c<tr.c then tr2.c  end),1))
      *(coalesce(min(case when tr2.c=tr.c and tr2.h>=tr.h and tr2.r>tr.r then tr2.r  end),tr.total_r)-tr.r)
      *(tr.r-coalesce(max(case when tr2.c=tr.c and tr2.h>=tr.h and tr2.r<tr.r then tr2.r  end),1)) as scenic_score
  from b tr, b tr2
where tr.r=tr2.r and tr.c<>tr2.c
       or tr.c=tr2.c and tr.r<>tr2.r
group by tr.r,tr.c,tr.h,tr.total_c,tr.total_r
)
select max(scenic_score) from a;

newkid版本3，就是把tr2.h>=tr.h条件从case when放到了where中。

sql 复制代码

with t(n,t) as(select 300,content from read_text('90000.txt.csv')),--2208-input.txt')), 
b as(select row_number()over()rn,n total_r,n total_c,(rn-1)//n+1 r, (rn-1)%n+1 c,b::int h from(select n,unnest(string_split(replace(t,chr(10), ''), ''))b from t))
--from b;
,a as(
select /*+ USE_CONCAT */  tr.r,tr.c,tr.h
      ,(coalesce(min(case when tr2.c>tr.c then tr2.c end),tr.total_c)-tr.c)
      *(tr.c-coalesce(max(case when tr2.c<tr.c then tr2.c  end),1))
      *(coalesce(min(case when tr2.r>tr.r then tr2.r  end),tr.total_r)-tr.r)
      *(tr.r-coalesce(max(case when tr2.r<tr.r then tr2.r  end),1)) as scenic_score
  from b tr, b tr2
where (tr.r=tr2.r and tr.c<>tr2.c
       or tr.c=tr2.c and tr.r<>tr2.r
       )
       and tr2.h>=tr.h
group by tr.r,tr.c,tr.h,tr.total_c,tr.total_r
)
select max(scenic_score) from a;

我的SQL

sql 复制代码

with t(n,t) as(select 300 n,content from read_text('90000.txt.csv')), 
c as(select row_number()over()rn,(rn-1)//n r, (rn-1)%n c,b from(select n,unnest(string_split(replace(t,chr(10), ''), ''))b from t))
,four_count AS (
  SELECT n1.* ,
    (with n as(SELECT c.*, max(c.b)over(partition by c.r order by c.rn)maxb  FROM c  WHERE c.r = n1.r AND c.c > n1.c )
    select count(case when n.maxb<n1.b then 1 end)+ sign(count(case when n.maxb>=n1.b then 1 end)) from n)cnt_right, 
    
    (with n as(SELECT c.*, max(c.b)over(partition by c.r order by c.rn desc)maxb  FROM c  WHERE c.r = n1.r AND c.c < n1.c )
    select count(case when n.maxb<n1.b then 1 end)+ sign(count(case when n.maxb>=n1.b then 1 end)) from n)cnt_left, 
    
    (with n as(SELECT c.*, max(c.b)over(partition by c.c order by c.rn)maxb  FROM c  WHERE c.c = n1.c AND c.r > n1.r )
    select count(case when n.maxb<n1.b then 1 end)+ sign(count(case when n.maxb>=n1.b then 1 end)) from n)cnt_down, 
 
    (with n as(SELECT c.*, max(c.b)over(partition by c.c order by c.rn desc )maxb  FROM c  WHERE c.c = n1.c AND c.r < n1.r )
    select count(case when n.maxb<n1.b then 1 end)+ sign(count(case when n.maxb>=n1.b then 1 end)) from n)cnt_up, 
    
  FROM c n1 where n1.c >0 and n1.r>0
)
select max(cnt_right*cnt_left*cnt_down*cnt_up) maxpcnt from four_count;

测试场景，

原题带的正式输入文件99*99

sql 复制代码

--张泽鹏
memory D .read 2208trees.txt
┌────────┬────────┬──────────┐
│ row_id │ col_id │  score   │
│ int64  │ int64  │  double  │
├────────┼────────┼──────────┤
│     53 │     15 │ 301392.0 │
└────────┴────────┴──────────┘
Run Time (s): real 0.142 user 0.380000 sys 0.144000
--newkid
memory D .read 2208p2nkdk3b.sql
┌───────────────────┐
│ max(scenic_score) │
│       int64       │
├───────────────────┤
│            301392 │
└───────────────────┘
Run Time (s): real 0.281 user 1.424000 sys 0.008000
memory D .read 2208p2nkdkb.sql
┌───────────────────┐
│ max(scenic_score) │
│       int64       │
├───────────────────┤
│            301392 │
└───────────────────┘
Run Time (s): real 0.240 user 1.296000 sys 0.004000
--在Oracle两者的效率是倒过来的，版本3更快
SQL> @2208p2nk3

	 R	    C	       H SCENIC_SCORE	     RNK
---------- ---------- ---------- ------------ ----------
	53	   15	       8       301392	       1

Elapsed: 00:00:00.59
SQL> @2208p2nk

	 R	    C	       H SCENIC_SCORE	     RNK
---------- ---------- ---------- ------------ ----------
	53	   15	       8       301392	       1

Elapsed: 00:00:01.03
--我的
memory D .read 2208dk2b.sql
┌─────────┐
│ maxpcnt │
│  int64  │
├─────────┤
│  301392 │
└─────────┘
Run Time (s): real 0.658 user 2.656000 sys 0.328000

用脚本生成的随机300*300

sql 复制代码

copy( select listagg((random()*10)::int::text,'')t from range(90000)t(a)) to '90000.txt.csv' (header 0);

--张泽鹏
memory D .read 2208treesf.txt
┌────────┬────────┬───────────┐
│ row_id │ col_id │   score   │
│ int64  │ int64  │  double   │
├────────┼────────┼───────────┤
│    202 │    191 │ 1043955.0 │
└────────┴────────┴───────────┘
Run Time (s): real 0.676 user 1.680000 sys 0.452000

--newkid 
memory D .read 2208p2nkdkbf.sql
┌───────────────────┐
│ max(scenic_score) │
│       int64       │
├───────────────────┤
│      1043955      │
│  (1.04 million)   │
└───────────────────┘
Run Time (s): real 9.389 user 69.076000 sys 0.032000

memory D .read 2208p2nkdk3bf.sql
┌───────────────────┐
│ max(scenic_score) │
│       int64       │
├───────────────────┤
│      1043955      │
│  (1.04 million)   │
└───────────────────┘
Run Time (s): real 12.808 user 96.640000 sys 0.052000
--我的
memory D .read 2208dk2bf.sql
┌────────────────┐
│    maxpcnt     │
│     int64      │
├────────────────┤
│    1043955     │
│ (1.04 million) │
└────────────────┘
Run Time (s): real 16.333 user 84.548000 sys 5.368000

3.脚本生成的随机1000*1000

sql 复制代码

--张泽鹏
copy (
  select
    array_to_string([
      floor(random() * 10)::int::text
      for _ in range(1000)
    ], '')
  from
    range(1000)
)
to '1000x1000.txt'
with (header false)
;
Run Time (s): real 0.146 user 0.136000 sys 0.008000
memory D .read generate-subscripts.sql
┌────────┬────────┬───────────┐
│ row_id │ col_id │   score   │
│ int64  │ int64  │  double   │
├────────┼────────┼───────────┤
│    808 │    330 │ 4222400.0 │
└────────┴────────┴───────────┘
Run Time (s): real 8.262 user 22.448000 sys 1.032000
--换机器重新测
--把张泽鹏原版改为用row_number
memory D .read 2208treesf1k.txt
┌────────┬────────┬───────────┐
│ row_id │ col_id │   score   │
│ int64  │ int64  │  double   │
├────────┼────────┼───────────┤
│  728   │  583   │ 2838240.0 │
└────────┴────────┴───────────┘
Run Time (s): real 4.325 user 9.750000 sys 0.734375
memory D .read generate-subscripts.sql
┌────────┬────────┬───────────┐
│ row_id │ col_id │   score   │
│ int64  │ int64  │  double   │
├────────┼────────┼───────────┤
│  728   │  583   │ 2838240.0 │
└────────┴────────┴───────────┘
Run Time (s): real 4.772 user 11.312500 sys 0.921875

我的在全表扫描时调用标量子查询，newkid用笛卡尔积找出每个方向的最大值，张泽鹏先构造4个方向，用递归CTE逐层计算，中间对遇到挡住视线高树的停止迭代。

用时表明，张泽鹏的效率最高，我的最低，newkid的排中间。用row_number处理行号的方式比generate-subscripts高效，但是据张泽鹏先生说有时会错乱。

三种用SQL解决Advent of Code 2022第8题 树顶木屋 的比较和分析

三种用SQL解决Advent of Code 2022第8题树顶木屋的比较和分析