- 版本号按自然排序函数naturalSortKey
sql
aaa@kylin-pc:~/ch$ ./clickhouse
ClickHouse local version 26.3.1.731 (official build).
:) SELECT introduced_in, count()
FROM system.functions
WHERE categories LIKE '%Geo%'
GROUP BY ALL
ORDER BY introduced_in;
SELECT
introduced_in,
count()
FROM system.functions
WHERE categories LIKE '%Geo%'
GROUP BY ALL
ORDER BY introduced_in ASC
Query id: 01e61f15-2267-4091-9c76-33093e1f35d4
┌─introduced_in─┬─count()─┐
1. │ 1.1.0 │ 5 │
2. │ 20.1.0 │ 10 │
3. │ 20.3.0 │ 6 │
4. │ 20.4.0 │ 1 │
5. │ 21.11.0 │ 4 │
6. │ 21.4.0 │ 24 │
7. │ 21.9.0 │ 11 │
8. │ 22.1.0 │ 3 │
9. │ 22.2.0 │ 5 │
10. │ 22.6.0 │ 15 │
11. │ 25.10.0 │ 4 │
12. │ 25.11.0 │ 6 │
13. │ 25.12.0 │ 1 │
14. │ 25.6.0 │ 2 │
15. │ 25.7.0 │ 2 │
└───────────────┴─────────┘
15 rows in set. Elapsed: 0.026 sec.
:) SELECT introduced_in, count()
FROM system.functions
WHERE categories LIKE '%Geo%'
GROUP BY ALL
ORDER BY naturalSortKey(introduced_in);
SELECT
introduced_in,
count()
FROM system.functions
WHERE categories LIKE '%Geo%'
GROUP BY ALL
ORDER BY naturalSortKey(introduced_in) ASC
Query id: eca88812-9c3a-48f6-9f2a-6405679f1e45
┌─introduced_in─┬─count()─┐
1. │ 1.1.0 │ 5 │
2. │ 20.1.0 │ 10 │
3. │ 20.3.0 │ 6 │
4. │ 20.4.0 │ 1 │
5. │ 21.4.0 │ 24 │
6. │ 21.9.0 │ 11 │
7. │ 21.11.0 │ 4 │
8. │ 22.1.0 │ 3 │
9. │ 22.2.0 │ 5 │
10. │ 22.6.0 │ 15 │
11. │ 25.6.0 │ 2 │
12. │ 25.7.0 │ 2 │
13. │ 25.10.0 │ 4 │
14. │ 25.11.0 │ 6 │
15. │ 25.12.0 │ 1 │
└───────────────┴─────────┘
15 rows in set. Elapsed: 0.025 sec.
可见,自然排序函数naturalSortKey按版本号、次版本号的数字顺序而不是字符串顺序排列。这个在处理章节号时也能用。
- CTE的实体化
先下载英国房地产数据源,然后用脚本导入,注意本地文件要用file(), 远程文件要用url()函数读取。
sql
aaa@kylin-pc:~/par$ curl -LO https://price-paid-data.publicdata.landregistry.gov.uk/pp-2025.csv
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 133M 100 133M 0 0 12.1M 0 0:00:10 0:00:10 --:--:-- 13.7M
aaa@kylin-pc:~/par$
aaa@kylin-pc:~$ cd ch
:) \i /home/aaa/par/ygfdc.txt
CREATE DATABASE uk
Query id: c888c7bb-713c-4c5c-aba7-97ef802d8908
Ok.
0 rows in set. Elapsed: 0.004 sec.
CREATE TABLE uk.uk_price_paid
(
`price` UInt32,
`date` Date,
`postcode1` LowCardinality(String),
`postcode2` LowCardinality(String),
`type` Enum8('terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4, 'other' = 0),
`is_new` UInt8,
`duration` Enum8('freehold' = 1, 'leasehold' = 2, 'unknown' = 0),
`addr1` String,
`addr2` String,
`street` LowCardinality(String),
`locality` LowCardinality(String),
`town` LowCardinality(String),
`district` LowCardinality(String),
`county` LowCardinality(String)
)
ENGINE = Memory
Query id: 653bcaf0-1be6-4abd-9662-eaa847eb91fe
Ok.
0 rows in set. Elapsed: 0.003 sec.
INSERT INTO uk.uk_price_paid SELECT
toUInt32(price_string) AS price,
parseDateTimeBestEffortUS(time) AS date,
splitByChar(' ', postcode)[1] AS postcode1,
splitByChar(' ', postcode)[2] AS postcode2,
transform(a, ['T', 'S', 'D', 'F', 'O'], ['terraced', 'semi-detached', 'detached', 'flat', 'other']) AS type,
b = 'Y' AS is_new,
transform(c, ['F', 'L', 'U'], ['freehold', 'leasehold', 'unknown']) AS duration,
addr1,
addr2,
street,
locality,
town,
district,
county
FROM file('/home/aaa/par/pp-2025.csv', 'CSV', 'uuid_string String,\n price_string String,\n time String,\n postcode String,\n a String,\n b String,\n c String,\n addr1 String,\n addr2 String,\n street String,\n locality String,\n town String,\n district String,\n county String,\n d String,\n e String')
Query id: 8295ab9b-d7f1-46a2-98c3-c68c4e8e8ac5
Ok.
802761 rows in set. Elapsed: 0.447 sec. Processed 802.76 thousand rows, 139.67 MB (1.79 million rows/s., 312.12 MB/s.)
Peak memory usage: 255.81 MiB.
直接运行带有MATERIALIZED关键字的CTE查询语句报错。设置enable_materialized_cte参数报错不存在。
sql
:) \i /home/aaa/par/cte1.sql
Syntax error: failed at position 43 (() (line 3, col 5):
WITH county_year_avg AS MATERIALIZED
(
SELECT county, toYear(date) AS year, avg(price) AS avg_price
FROM uk.uk_price_paid
GROUP BY county,year
)
SELECT p.price, p.addr1, p.town,
p.county,
toYear(p.date) AS year,
round(cya.avg_price) AS countyYear,
round(ca.avg_price) AS countyAllTime
FROM uk.uk_price_paid AS p
INNER JOIN county_year_avg AS cya
ON (p.county = cya.county) AND (toYear(p.date) = cya.year)
INNER JOIN
(
SELECT county, avg(avg_price) AS avg_price
FROM county_year_avg
GROUP BY county
) AS ca ON p.county = ca.county
ORDER BY p.price DESC
LIMIT 10;
Expected one of: token, Comma, FROM, SELECT, INSERT INTO
:) SET enable_materialized_cte=1;
SET enable_materialized_cte = 1
Query id: 069387cc-eac2-4c60-97ac-83a5fc6a674e
Elapsed: 0.008 sec.
Received exception:
Code: 115. DB::Exception: Unknown setting 'enable_materialized_cte'. (UNKNOWN_SETTING)
去掉MATERIALIZED关键字,能够正常查询出结果。
sql
:) \i /home/aaa/par/cte1.sql
WITH county_year_avg AS
(
SELECT
county,
toYear(date) AS year,
avg(price) AS avg_price
FROM uk.uk_price_paid
GROUP BY
county,
year
)
SELECT
p.price,
p.addr1,
p.town,
p.county,
toYear(p.date) AS year,
round(cya.avg_price) AS countyYear,
round(ca.avg_price) AS countyAllTime
FROM uk.uk_price_paid AS p
INNER JOIN county_year_avg AS cya ON (p.county = cya.county) AND (toYear(p.date) = cya.year)
INNER JOIN
(
SELECT
county,
avg(avg_price) AS avg_price
FROM county_year_avg
GROUP BY county
) AS ca ON p.county = ca.county
ORDER BY p.price DESC
LIMIT 10
Query id: 9c968fb6-1a2b-4a6e-8457-97c037b0958e
┌─────price─┬─addr1─────────────────────────┬─town───────────────┬─p.county───────────┬─year─┬─countyYear─┬─countyAllTime─┐
1. │ 793020000 │ HOWARD HOTEL, 12 │ LONDON │ GREATER LONDON │ 2025 │ 723952 │ 723952 │
2. │ 260000000 │ W LONDON LEICESTER SQUARE, 10 │ LONDON │ GREATER LONDON │ 2025 │ 723952 │ 723952 │
3. │ 190498163 │ THAMES CITY │ LONDON │ GREATER LONDON │ 2025 │ 723952 │ 723952 │
4. │ 127700000 │ TESCO │ PURFLEET-ON-THAMES │ THURROCK │ 2025 │ 433208 │ 433208 │
5. │ 124519556 │ 15 │ LONDON │ GREATER LONDON │ 2025 │ 723952 │ 723952 │
6. │ 109500000 │ 1 │ LONDON │ GREATER LONDON │ 2025 │ 723952 │ 723952 │
7. │ 81850000 │ HOLLY HOUSE PRIVATE HOSPITAL │ BUCKHURST HILL │ ESSEX │ 2025 │ 437212 │ 437212 │
8. │ 81370000 │ GRAND CENTRAL │ LIVERPOOL │ MERSEYSIDE │ 2025 │ 239735 │ 239735 │
9. │ 76000000 │ LAWRENCE HOUSE, 2A │ NOTTINGHAM │ CITY OF NOTTINGHAM │ 2025 │ 260684 │ 260684 │
10. │ 75960000 │ CHESHIRE HOSPITAL │ WARRINGTON │ WARRINGTON │ 2025 │ 324241 │ 324241 │
└───────────┴───────────────────────────────┴────────────────────┴────────────────────┴──────┴────────────┴───────────────┘
10 rows in set. Elapsed: 0.108 sec. Processed 2.41 million rows, 28.04 MB (22.36 million rows/s., 260.36 MB/s.)
Peak memory usage: 140.14 MiB.
先显示不带MATERIALIZED关键字的执行计划
sql
:) \i /home/aaa/par/cte2.sql
EXPLAIN indexes = 1, pretty = 1, compact = 1
WITH county_year_avg AS
(
SELECT
county,
toYear(date) AS year,
avg(price) AS avg_price
FROM uk.uk_price_paid
GROUP BY
county,
year
)
SELECT
p.price,
p.addr1,
p.town,
p.county,
toYear(p.date) AS year,
round(cya.avg_price) AS countyYear,
round(ca.avg_price) AS countyAllTime
FROM uk.uk_price_paid AS p
INNER JOIN county_year_avg AS cya ON (p.county = cya.county) AND (toYear(p.date) = cya.year)
INNER JOIN
(
SELECT
county,
avg(avg_price) AS avg_price
FROM county_year_avg
GROUP BY county
) AS ca ON p.county = ca.county
ORDER BY p.price DESC
LIMIT 10
Query id: d734653a-d8a6-4359-a9f6-edb2b4c7911c
┌─explain──────────────────────────────────────────────────────────────────────────┐
1. │ Limit (preliminary LIMIT) │
2. │ └──Sorting (Sorting for ORDER BY) │
3. │ └──Join (JOIN FillRightFirst) │
4. │ ├──Join (JOIN FillRightFirst) │
5. │ │ ├──Filter │
6. │ │ │ └──ReadFromMemoryStorage │
7. │ │ └──BuildRuntimeFilter (Build runtime join filter on __table2.year) │
8. │ │ └──BuildRuntimeFilter (Build runtime join filter on __table2.county) │
9. │ │ └──Aggregating │
10. │ │ └──Filter │
11. │ │ └──ReadFromMemoryStorage │
12. │ └──BuildRuntimeFilter (Build runtime join filter on __table4.county) │
13. │ └──Aggregating │
14. │ └──Aggregating │
15. │ └──ReadFromMemoryStorage │
└──────────────────────────────────────────────────────────────────────────────────┘
15 rows in set. Elapsed: 0.007 sec.
考虑我的clickhouse是3月初下载的,可能太旧了。到官方网站下载新版。
sql
aaa@kylin-pc:~/ch$ mv clickhouse ch263
aaa@kylin-pc:~/ch$ ./clickhouse
bash: ./clickhouse: 没有那个文件或目录
aaa@kylin-pc:~/par$ curl https://clickhouse.com |sh
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 2911 0 2911 0 0 3598 0 --:--:-- --:--:-- --:--:-- 3593
Will download https://builds.clickhouse.com/master/aarch64/clickhouse into clickhouse
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 156M 100 156M 0 0 7213k 0 0:00:22 0:00:22 --:--:-- 3864k
Successfully downloaded the ClickHouse binary, you can run it as:
./clickhouse
You can also install it:
sudo ./clickhouse install
aaa@kylin-pc:~/par$ mv clickhouse ch
aaa@kylin-pc:~/ch$ mv ../par/ch .
这个版本显示26.4,因为是内存数据库,需要重新导入数据。
sql
aaa@kylin-pc:~/ch$ ./ch
Decompressing the binary......
ClickHouse local version 26.4.1.622 (official build).
:) SET enable_materialized_cte=1;
SET enable_materialized_cte = 1
Query id: 637523f6-9ea3-45f3-8286-45a80f640152
Ok.
0 rows in set. Elapsed: 0.002 sec.
:) \i /home/aaa/par/ygfdc.txt
CREATE DATABASE uk
Query id: 8b6bc484-72d7-4dca-ab3c-5b3119ac694f
Ok.
0 rows in set. Elapsed: 0.007 sec.
CREATE TABLE uk.uk_price_paid
(
`price` UInt32,
`date` Date,
`postcode1` LowCardinality(String),
`postcode2` LowCardinality(String),
`type` Enum8('terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4, 'other' = 0),
`is_new` UInt8,
`duration` Enum8('freehold' = 1, 'leasehold' = 2, 'unknown' = 0),
`addr1` String,
`addr2` String,
`street` LowCardinality(String),
`locality` LowCardinality(String),
`town` LowCardinality(String),
`district` LowCardinality(String),
`county` LowCardinality(String)
)
ENGINE = Memory
Query id: 525bc88a-bf4c-4822-9fa2-c04f3adc7df0
Ok.
0 rows in set. Elapsed: 0.003 sec.
INSERT INTO uk.uk_price_paid SELECT
toUInt32(price_string) AS price,
parseDateTimeBestEffortUS(time) AS date,
splitByChar(' ', postcode)[1] AS postcode1,
splitByChar(' ', postcode)[2] AS postcode2,
transform(a, ['T', 'S', 'D', 'F', 'O'], ['terraced', 'semi-detached', 'detached', 'flat', 'other']) AS type,
b = 'Y' AS is_new,
transform(c, ['F', 'L', 'U'], ['freehold', 'leasehold', 'unknown']) AS duration,
addr1,
addr2,
street,
locality,
town,
district,
county
FROM file('/home/aaa/par/pp-2025.csv', 'CSV', 'uuid_string String,\n price_string String,\n time String,\n postcode String,\n a String,\n b String,\n c String,\n addr1 String,\n addr2 String,\n street String,\n locality String,\n town String,\n district String,\n county String,\n d String,\n e String')
Query id: 97333e72-de0c-4f9b-a48f-d276150e22b2
Ok.
802761 rows in set. Elapsed: 0.461 sec. Processed 802.76 thousand rows, 139.67 MB (1.74 million rows/s., 303.08 MB/s.)
Peak memory usage: 250.07 MiB.
:) \i /home/aaa/par/cte3.sql
EXPLAIN indexes = 1, pretty = 1, compact = 1
WITH county_year_avg AS MATERIALIZED
(
SELECT
county,
toYear(date) AS year,
avg(price) AS avg_price
FROM uk.uk_price_paid
GROUP BY
county,
year
)
SELECT
p.price,
p.addr1,
p.town,
p.county,
toYear(p.date) AS year,
round(cya.avg_price) AS countyYear,
round(ca.avg_price) AS countyAllTime
FROM uk.uk_price_paid AS p
INNER JOIN county_year_avg AS cya ON (p.county = cya.county) AND (toYear(p.date) = cya.year)
INNER JOIN
(
SELECT
county,
avg(avg_price) AS avg_price
FROM county_year_avg
GROUP BY county
) AS ca ON p.county = ca.county
ORDER BY p.price DESC
LIMIT 10
Query id: a1ebb579-457f-445a-bae8-838a246c846f
┌─explain─────────────────────────────────────────────────────────────────────────────┐
1. │ Output: price, addr1, town, p.county, year, countyYear, countyAllTime │
2. │ │
3. │ MaterializingCTEs (Materialize CTEs before main query execution) │
4. │ ├──Limit (preliminary LIMIT) │
5. │ │ └──Sorting (Sorting for ORDER BY) │
6. │ │ └──Join (JOIN FillRightFirst) │
7. │ │ ├──Join (JOIN FillRightFirst) │
8. │ │ │ ├──Filter │
9. │ │ │ │ └──ReadFromMemoryStorage │
10. │ │ │ └──BuildRuntimeFilter (Build runtime join filter on __table2.year) │
11. │ │ │ └──BuildRuntimeFilter (Build runtime join filter on __table2.county) │
12. │ │ │ └──Filter │
13. │ │ │ └──ReadFromMemoryStorage │
14. │ │ └──BuildRuntimeFilter (Build runtime join filter on __table5.county) │
15. │ │ └──Aggregating │
16. │ │ └──ReadFromMemoryStorage │
17. │ └──MaterializingCTE (Materializing CTE: county_year_avg) │
18. │ └──Aggregating │
19. │ └──ReadFromMemoryStorage │
└─────────────────────────────────────────────────────────────────────────────────────┘
19 rows in set. Elapsed: 0.008 sec.
:)
果然可以设置enable_materialized_cte参数了, 带MATERIALIZED关键字也显示不同的执行计划了。