详解GaussDB(DWS)中3个防过载检查项

本文分享自华为云社区《【防过载检查项】》,作者: 譡里个檔。

1. GUC参数检查

目的:针对不同版本建议设定不同的参数值,当前先检查出来,后续diagnosis会给出建议值

sql 复制代码
SELECT
    split_part((substring(version() from '\((.*)\)')), ' ', 2) AS version,
    (EXISTS (SELECT 1 FROM (SELECT count(DISTINCT node_name) AS dn_cnt 
      FROM pgxc_node WHERE node_type = 'D' AND node_host <> 'localhost' GROUP BY node_host) WHERE dn_cnt <> 1)
    ) AS sdn_per_node,
    node_name, name AS guc_name,
    CASE WHEN unit = 'ms' THEN setting::bigint/1000 || ' s'
         WHEN unit = 's' THEN setting || ' s'
         WHEN unit = 'KB' THEN pg_size_pretty(setting*1024)
         WHEN unit = '8KB' THEN pg_size_pretty(setting*1024*8)
         ELSE setting
    END AS setting, unit,
    CASE WHEN name = 'default_distribution_mode' THEN
             CASE WHEN setting <> 'roundrobin' THEN 'roundrobin' END
         WHEN name = 'autovacuum' THEN
             CASE WHEN setting <> 'on' THEN 'on' END
         WHEN name = 'autovacuum_max_workers' THEN
             CASE WHEN setting::int > 6 THEN '6' END
         WHEN name = 'autovacuum_max_workers' THEN
             CASE WHEN setting::int > 3 THEN '3' END
         WHEN name = 'session_timeout' THEN
             CASE WHEN setting::int > 600 OR setting::int = 0 THEN '<= 10min' END
         WHEN name = 'statement_timeout' THEN
             CASE WHEN setting::int > 24* 60 * 1000 OR setting::int = 0 THEN '<= 24h' END
         WHEN name = 'sql_use_spacelimit' THEN
             CASE WHEN setting::int > 300*1024*1024 OR setting IN ('0', '-1') THEN '<= 300GB' END
         WHEN name = 'temp_file_limit' THEN
             CASE WHEN setting::int > 100*1024*1024 OR setting IN ('0', '-1') THEN '<= 100GB' END
         WHEN name = 'udf_memory_limit' THEN
             CASE WHEN setting::int > 1*1024*1024  THEN '<= 1GB' END
         WHEN name = 'query_dop' THEN
             CASE WHEN setting::int = 0 THEN ' -4' END
         WHEN name = 'max_streams_per_query' THEN
             CASE WHEN setting::int = -1 THEN '50' END
         WHEN name = 'max_connections' THEN
             CASE WHEN node_name = 'dn_6001_6002' AND setting::int < 5000 THEN '>=5000' END
         ELSE ''
    END AS diagnosis
FROM pgxc_parallel_query('all', 'SELECT
    pgxc_node_str() AS node_name, name, setting, unit
FROM pg_settings 
WHERE pgxc_node_str() IN (''cn_5001'',''dn_6001_6002'')
AND name in (''max_streams_per_query'',''query_dop''
,''sql_use_spacelimit'',''temp_file_limit'',''default_distribution_mode'',
''autovacuum_mode'',''autovacuum'',''autovacuum_max_workers_hstore'',''autovacuum_max_workers'',
''session_timeout'',''statement_timeout'',''ddl_lock_timeout'',''idle_in_transaction_timeout'',
''max_connections'',''min_pool_size'',''max_pool_size'',
''max_stream_pool'',''max_active_statements'',''max_prepared_transactions'',
''cstore_buffers'',''shared_buffers'', ''max_process_memory'', ''udf_memory_limit'',
''max_process_memory_balanced'', ''bbox_dump_count'', ''enable_bbox_dump'')') 
AS (node_name name, name text, setting text, unit text)
ORDER BY node_name, name;

2. 大表检查

目的:识别大表,建议客户整改,避免磁盘过载

8.1.3版本使用如下SQL

sql 复制代码
SELECT
    CASE WHEN (skewsize > avgsize * 0.10 AND skewsize > 50 * 1024) THEN 'skew table'
        WHEN (reloptions::text LIKE '%orientation=column%' AND reloptions::text LIKE '%compression=no%') THEN 'uncompressed column table'
        WHEN (x.pclocatortype = 'R' AND avgsize > 10 * 1024) THEN 'large replicattion table'
        WHEN (pg_stat_get_dead_tuples(c.oid) >100000 AND pg_stat_get_dead_tuples(c.oid)/(pg_stat_get_dead_tuples(c.oid)+pg_stat_get_live_tuples(c.oid)) > 0.4) THEN 'dirty table'
        ELSE 'normal large table'
    END AS diagnostic,
    t1.schemaname,  -- 表的schema
    t1.tablename,   -- 表名
    a.rolname AS tableowner,
    x.pgroup AS nodegroup,
    CASE x.pclocatortype WHEN 'H' THEN 'Hash' 
        WHEN 'N' THEN 'Round Robin' 
        WHEN 'R' THEN 'Replicate' 
    END AS locatortype,
    CASE WHEN c.parttype = 'p' THEN true ELSE false END AS ispartitioned,
    CASE WHEN reloptions::text LIKE '%orientation=column%' THEN 'column'
         WHEN reloptions::text LIKE '%orientation=row%' THEN 'row'
    END AS orientation,
    t1.dnnum,                         -- 表的node group的DN数
    t1.totalsize AS "totalsize(MB)",  -- 表的size ,单位MB
    t1.avgsize AS "avgsize(MB)",      -- 平均每个DN上数据量,单位MB
    t1.skewsize AS "skewsize(MB)",    -- 不同DN上数据size的最大差值,单位MB
    t1.skewdn,                        -- 数据量最大的DN
    t1.maxratio,                      -- 数据量最大DN的size/平均size
    t1.minratio,                      -- 数据量最小DN的size/平均size
    t1.skewratio                      -- 不同DN上数据size的最大差值/平均size
FROM ( -- 预处理,识别倾斜表
    SELECT
        schemaname,
        tablename,
        skewdn,
        dnnum,
        totalsize,
        avgsize,
        skewsize,
        (maxsize/avgsize)::numeric(20,2) AS maxratio,
        (minsize/avgsize)::numeric(20,2) AS minratio,
        (skewsize/avgsize)::numeric(20,2) AS skewratio
    FROM (
        SELECT
            schemaname,tablename,skewdn,count(1) AS dnnum,sum(dnsize) AS totalsize,
            avg(dnsize) AS avgsize,max(dnsize) AS maxsize,min(dnsize) AS minsize, (max(dnsize) - min(dnsize)) AS skewsize
        FROM ( --对每个表的数据按照DN数据量大小排序,以及获取倾斜的dn
            SELECT     
                schemaname,
                tablename,
                nodename,
                (dnsize/1024/1024)::bigint AS dnsize, -- 单位换算为MB
                first_value(nodename) over (PARTITION BY schemaname, tablename ORDER BY dnsize DESC, nodename) AS skewdn  -- --数据量最大的DN
            FROM ( -- 获取大于10GB的表
                SELECT 
                    schemaname, tablename,(rd).nodename, ((rd).dnsize + 1) AS dnsize
                FROM (
                    SELECT
                        schemaname, tablename, gs_table_distribution(schemaname, tablename) AS rd
                    FROM gs_table_distribution()
                    WHERE schemaname NOT IN ('pg_catalog', 'dbms_om', 'cstore')
                    AND relkind = 'r'
                    GROUP BY schemaname, tablename
                    HAVING sum(dnsize) > 50.0 * 1024 * 1024 * 1024 -- 总大小大于100GB
                )
            )
        )
        GROUP BY schemaname,tablename, skewdn
    )
) t1
INNER JOIN pg_class c ON c.relname = t1.tablename
LEFT JOIN pg_namespace n ON n.nspname = t1.schemaname
LEFT JOIN pg_authid a ON a.oid = c.relowner
LEFT JOIN pgxc_class x ON x.pcrelid = c.oid
WHERE c.reloptions::text NOT LIKE '%internal_mask%'
ORDER BY totalsize DESC, diagnostic, skewsize DESC
;

8.2.1和8.2.0版本使用如下

sql 复制代码
-- 大表诊断
SELECT
    CASE WHEN (skewsize > avgsize * 0.10 AND skewsize > 50 * 1024) THEN 'skew table'
        WHEN (reloptions::text LIKE '%orientation=column%' AND reloptions::text LIKE '%compression=no%') THEN 'uncompressed column table'
        WHEN (x.pclocatortype = 'R' AND avgsize > 10 * 1024) THEN 'large replicattion table'
        WHEN (pg_stat_get_dead_tuples(c.oid) >100000 AND pg_stat_get_dead_tuples(c.oid)/(pg_stat_get_dead_tuples(c.oid)+pg_stat_get_live_tuples(c.oid)) > 0.4) THEN 'dirty table'
        WHEN (reloptions::text LIKE '%orientation=column%') THEN
            CASE WHEN (SELECT total_cu_count > 0 AND (zero_size_cu_count + small_cu_count)/total_cu_count > 0.5 FROM get_col_cu_info(t1.schemaname, t1.tablename)) THEN 'small cu table'
                ELSE 'normal large table'
            END
        ELSE 'normal large table'
    END AS diagnostic,
    t1.schemaname,  -- 表的schema
    t1.tablename,   -- 表名
    a.rolname AS tableowner,
    x.pgroup AS nodegroup,
    CASE x.pclocatortype WHEN 'H' THEN 'Hash' 
        WHEN 'N' THEN 'Round Robin' 
        WHEN 'R' THEN 'Replicate' 
    END AS locatortype,
    CASE WHEN c.parttype = 'p' THEN true ELSE false END AS ispartitioned,
    CASE WHEN reloptions::text LIKE '%orientation=column%' THEN 'column'
         WHEN reloptions::text LIKE '%orientation=row%' THEN 'row'
    END AS orientation,
    t1.dnnum,                         -- 表的node group的DN数
    t1.totalsize AS "totalsize(MB)",  -- 表的size ,单位MB
    t1.avgsize AS "avgsize(MB)",      -- 平均每个DN上数据量,单位MB
    t1.skewsize AS "skewsize(MB)",    -- 不同DN上数据size的最大差值,单位MB
    t1.skewdn,                        -- 数据量最大的DN
    t1.maxratio,                      -- 数据量最大DN的size/平均size
    t1.minratio,                      -- 数据量最小DN的size/平均size
    t1.skewratio                      -- 不同DN上数据size的最大差值/平均size
FROM ( -- 预处理,识别倾斜表
    SELECT
        schemaname,
        tablename,
        skewdn,
        dnnum,
        totalsize,
        avgsize,
        skewsize,
        (maxsize/avgsize)::numeric(20,2) AS maxratio,
        (minsize/avgsize)::numeric(20,2) AS minratio,
        (skewsize/avgsize)::numeric(20,2) AS skewratio
    FROM (
        SELECT
            schemaname,
            tablename,
            skewdn,
            count(1) AS dnnum,
            sum(dnsize) AS totalsize,
            avg(dnsize) AS avgsize,
            max(dnsize) AS maxsize,
            min(dnsize) AS minsize,
            (max(dnsize) - min(dnsize)) AS skewsize
        FROM ( --对每个表的数据按照DN数据量大小排序,以及获取倾斜的dn
            SELECT     
                schemaname,
                tablename,
                nodename,
                (dnsize/1024/1024)::bigint AS dnsize, -- 单位换算为MB
                first_value(nodename) over (PARTITION BY schemaname, tablename ORDER BY dnsize DESC, nodename) AS skewdn  -- --数据量最大的DN
            FROM ( -- 获取大于10GB的表
                SELECT 
                    schemaname, tablename,(rd).nodename, ((rd).dnsize + 1) AS dnsize
                FROM (
                    SELECT
                        schemaname, tablename, gs_table_distribution(schemaname, tablename) AS rd
                    FROM gs_table_distribution()
                    WHERE schemaname NOT IN ('pg_catalog', 'dbms_om', 'cstore')
                    AND relkind = 'r'
                    GROUP BY schemaname, tablename
                    HAVING sum(dnsize) > 50* 1024 * 1024 * 1024.0 -- 总大小大于100GB
                )
            )
        )
        GROUP BY schemaname,tablename, skewdn
    )
) t1
INNER JOIN pg_class c ON c.relname = t1.tablename
LEFT JOIN pg_namespace n ON n.nspname = t1.schemaname
LEFT JOIN pg_authid a ON a.oid = c.relowner
LEFT JOIN pgxc_class x ON x.pcrelid = c.oid
WHERE c.reloptions::text NOT LIKE '%internal_mask%'
ORDER BY totalsize DESC, diagnostic, skewsize DESC
;

8.3.0版本使用

sql 复制代码
-- 大表诊断
SELECT
    CASE WHEN (skewsize > avgsize * 0.10 AND skewsize > 50 * 1024) THEN 'skew table'
        WHEN (reloptions::text LIKE '%orientation=column%' AND reloptions::text LIKE '%compression=no%') THEN 'uncompressed column table'
        WHEN (x.pclocatortype = 'R' AND avgsize > 10 * 1024) THEN 'large replicattion table'
        WHEN (pg_stat_get_dead_tuples(c.oid) >100000 AND pg_stat_get_dead_tuples(c.oid)/(pg_stat_get_dead_tuples(c.oid)+pg_stat_get_live_tuples(c.oid)) > 0.4) THEN 'dirty table'
        WHEN (reloptions::text LIKE '%orientation=column%') THEN
            CASE WHEN (SELECT total_cu_count > 0 AND (zero_cu_count + small_cu_count)/total_cu_count > 0.5 FROM pgxc_get_small_cu_info(c.oid)) THEN 'small cu table'
                ELSE 'normal large table'
            END
        ELSE 'normal large table'
    END AS diagnostic,
    t1.schemaname,  -- 表的schema
    t1.tablename,   -- 表名
    a.rolname AS tableowner,
    x.pgroup AS nodegroup,
    CASE x.pclocatortype WHEN 'H' THEN 'Hash' 
        WHEN 'N' THEN 'Round Robin' 
        WHEN 'R' THEN 'Replicate' 
    END AS locatortype,
    CASE WHEN c.parttype = 'p' THEN true ELSE false END AS ispartitioned,
    CASE WHEN reloptions::text LIKE '%orientation=column%' THEN 'column'
         WHEN reloptions::text LIKE '%orientation=row%' THEN 'row'
    END AS orientation,
    t1.dnnum,                         -- 表的node group的DN数
    t1.totalsize AS "totalsize(MB)",  -- 表的size ,单位MB
    t1.avgsize AS "avgsize(MB)",      -- 平均每个DN上数据量,单位MB
    t1.skewsize AS "skewsize(MB)",    -- 不同DN上数据size的最大差值,单位MB
    t1.skewdn,                        -- 数据量最大的DN
    t1.maxratio,                      -- 数据量最大DN的size/平均size
    t1.minratio,                      -- 数据量最小DN的size/平均size
    t1.skewratio                      -- 不同DN上数据size的最大差值/平均size
FROM ( -- 预处理,识别倾斜表
    SELECT
        schemaname,
        tablename,
        skewdn,
        dnnum,
        totalsize,
        avgsize,
        skewsize,
        (maxsize/avgsize)::numeric(20,2) AS maxratio,
        (minsize/avgsize)::numeric(20,2) AS minratio,
        (skewsize/avgsize)::numeric(20,2) AS skewratio
    FROM (
        SELECT
            schemaname,
            tablename,
            skewdn,
            count(1) AS dnnum,
            sum(dnsize) AS totalsize,
            avg(dnsize) AS avgsize,
            max(dnsize) AS maxsize,
            min(dnsize) AS minsize,
            (max(dnsize) - min(dnsize)) AS skewsize
        FROM ( --对每个表的数据按照DN数据量大小排序,以及获取倾斜的dn
            SELECT     
                schemaname,
                tablename,
                nodename,
                (dnsize/1024/1024)::bigint AS dnsize, -- 单位换算为MB
                first_value(nodename) over (PARTITION BY schemaname, tablename ORDER BY dnsize DESC, nodename) AS skewdn  -- --数据量最大的DN
            FROM ( -- 获取大于10GB的表
                SELECT 
                    schemaname, tablename,(rd).nodename, ((rd).dnsize + 1) AS dnsize
                FROM (
                    SELECT
                        schemaname, tablename, gs_table_distribution(schemaname, tablename) AS rd
                    FROM gs_table_distribution()
                    WHERE schemaname NOT IN ('pg_catalog', 'dbms_om', 'cstore')
                    AND relkind = 'r'
                    GROUP BY schemaname, tablename
                    HAVING sum(dnsize) > 50.0 * 1024 * 1024 * 1024 -- 总大小大于100GB
                )
            )
        )
        GROUP BY schemaname,tablename, skewdn
    )
) t1
INNER JOIN pg_class c ON c.relname = t1.tablename
LEFT JOIN pg_namespace n ON n.nspname = t1.schemaname
LEFT JOIN pg_authid a ON a.oid = c.relowner
LEFT JOIN pgxc_class x ON x.pcrelid = c.oid
WHERE c.reloptions::text NOT LIKE '%internal_mask%'
ORDER BY totalsize DESC, diagnostic, skewsize DESC
;

针对不同的诊断结果使用如下诊断措施

3. 冗余索引诊断

目的:识别冗余索引,建议客户删除。可以降低磁盘空间,并降低大规模数据导入的时候的xlog规模

sql 复制代码
-- optimizable policy为duplicate的检查项
--        表明两个索引字段和字段顺序完全一致
--         建议直接删除optimizable index指定的索引;
-- optimizable policy为redundancy检查项表明
--         optimizable index指定的索引的索引列刚好是base index的索引列的前面字段
--         建议直接删除optimizable index指定的索引;
-- optimizable policy为optimizable检查项
--         表明optimizable index和base index这两个索引的索引列完全重复,但是索引列的顺序不一致
--         这种场景需要人工介入分析是否可以优化
WITH info AS(
    SELECT 
        quote_ident(n.nspname) || '.' || quote_ident(c.relname) AS tablename,
        pgroup AS nodegroup,
        x.indrelid AS indrelid,
        x.indexrelid AS indexrelid,
        indisunique,
        indisprimary,
        indnatts,
        indkey,
        indexprs
    FROM pg_index x
    INNER JOIN pg_class c ON c.oid = x.indrelid
    INNER JOIN pg_class i ON i.oid = x.indexrelid
    LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
    INNER JOIN pgxc_class xc ON xc.pcrelid = c.oid
    WHERE c.relkind = 'r' AND c.oid >= 16384 AND (c.reloptions IS NULL OR c.reloptions::text NOT LIKE '%internal_mask%')
    AND i.relkind = 'i' AND i.oid >= 16384
    AND x.indpred IS NULL
),

base AS(
    SELECT
        tablename,
        nodegroup,
        i.indrelid,
        i.indexrelid baseidx,
        i.indisunique AS base_unique,
        i.indisprimary AS base_primary,
        x.indexrelid AS optidx,
        x.indisunique AS opt_unique,
        x.indisprimary AS opt_primary,
        CASE WHEN opt_primary > base_primary OR opt_unique > base_unique THEN true ELSE false END AS swap,
        CASE WHEN i.indkey = x.indkey AND coalesce(pg_get_expr(i.indexprs, i.indrelid), 'NULL') = coalesce(pg_get_expr(x.indexprs, x.indrelid), 'NULL') THEN 'duplicate'::text
            WHEN x.indexprs IS NOT NULL OR i.indexprs IS NOT NULL THEN NULL::text
            WHEN strpos(i.indkey::text, x.indkey::text||' ') = 1 OR strpos(x.indkey::text, i.indkey::text||' ') = 1 THEN 'redundancy'::text
            WHEN i.indkey @> x.indkey AND x.indkey @> i.indkey THEN 'optimizable'::text
            ELSE NULL
        END AS optpolicy
    FROM info i
    INNER JOIN pg_index x ON (i.indrelid = x.indrelid AND i.indexrelid > x.indexrelid)
    WHERE x.indpred IS NULL AND optpolicy IS NOT NULL
),

tmp AS(
    SELECT
        tablename,
        indrelid,
        nodegroup,
        CASE WHEN swap THEN optidx       ELSE baseidx      END AS base_idx,
        CASE WHEN swap THEN opt_primary  ELSE base_primary END AS base_primary,
        CASE WHEN swap THEN opt_unique   ELSE base_unique  END AS base_unique,
        CASE WHEN swap THEN baseidx      ELSE optidx       END AS opt_idx,
        CASE WHEN swap THEN base_primary ELSE opt_primary  END AS opt_primary,
        CASE WHEN swap THEN base_unique  ELSE opt_unique   END AS opt_unique,
        optpolicy
    FROM base
)

SELECT
    tablename,
    nodegroup,
    base_idx::regclass::text AS base_index,
    base_primary,
    base_unique,
    substring(pg_get_indexdef(base_idx) from 'USING .+\)') AS base_idxdef,
    opt_idx::regclass::text AS opt_index,
    opt_primary,
    opt_unique,
    substring(pg_get_indexdef(opt_idx) from 'USING .+\)') AS opt_idxdef,
    optpolicy,
    pg_get_tabledef(indrelid)
FROM tmp
ORDER BY 1, 2, 3
;

点击关注,第一时间了解华为云新鲜技术~

相关推荐
Hello.Reader1 小时前
Redis 延迟监控深度指南
数据库·redis·缓存
ybq195133454311 小时前
Redis-主从复制-分布式系统
java·数据库·redis
好奇的菜鸟4 小时前
如何在IntelliJ IDEA中设置数据库连接全局共享
java·数据库·intellij-idea
tan180°4 小时前
MySQL表的操作(3)
linux·数据库·c++·vscode·后端·mysql
满昕欢喜4 小时前
SQL Server从入门到项目实践(超值版)读书笔记 20
数据库·sql·sqlserver
优创学社25 小时前
基于springboot的社区生鲜团购系统
java·spring boot·后端
why技术5 小时前
Stack Overflow,轰然倒下!
前端·人工智能·后端
幽络源小助理5 小时前
SpringBoot基于Mysql的商业辅助决策系统设计与实现
java·vue.js·spring boot·后端·mysql·spring
Hello.Reader6 小时前
Redis 延迟排查与优化全攻略
数据库·redis·缓存
ai小鬼头6 小时前
AIStarter如何助力用户与创作者?Stable Diffusion一键管理教程!
后端·架构·github