监控Source端Pg对Flink CDC的影响

1.pom

复制代码
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.sunwoda</groupId>
    <artifactId>pg-test</artifactId>
    <version>2.0-SNAPSHOT</version>

    <properties>
        <java.version>1.8</java.version>
        <maven.compiler.source>${java.version}</maven.compiler.source>
        <maven.compiler.target>${java.version}</maven.compiler.target>
        <fastjson.vsersion>2.0.52</fastjson.vsersion>
        <postgresql.version>42.2.12</postgresql.version>
        <mysql.version>8.0.33</mysql.version>
        <logback.version>1.2.11</logback.version>
        <lombok.version>1.18.20</lombok.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.postgresql</groupId>
            <artifactId>postgresql</artifactId>
            <version>${postgresql.version}</version>
        </dependency>
        <dependency>
            <groupId>com.mysql</groupId>
            <artifactId>mysql-connector-j</artifactId>
            <version>${mysql.version}</version>
        </dependency>
        <dependency>
            <groupId>ch.qos.logback</groupId>
            <artifactId>logback-classic</artifactId>
            <version>${logback.version}</version>
        </dependency>
        <dependency>
            <groupId>ch.qos.logback</groupId>
            <artifactId>logback-core</artifactId>
            <version>${logback.version}</version>
        </dependency>
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>${fastjson.vsersion}</version>
        </dependency>
        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <version>${lombok.version}</version>
            <scope>provided</scope>
        </dependency>
    </dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-assembly-plugin</artifactId>
                <version>3.0.0</version>
                <configuration>
                    <descriptorRefs>
                        <descriptorRef>jar-with-dependencies</descriptorRef>
                    </descriptorRefs>
                    <archive>
                        <manifest>
                            <mainClass>com.test.cdc.PgCdcUltimateMonitor</mainClass>
                        </manifest>
                    </archive>
                </configuration>
                <executions>
                    <execution>
                        <id>make-assembly</id>
                        <phase>package</phase>
                        <goals>
                            <goal>single</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>
</project>

2.java代码主类

复制代码
package com.test.cdc;
import com.alibaba.druid.support.json.JSONUtils;
import lombok.extern.slf4j.Slf4j;

import java.sql.*;
import java.util.*;
import java.util.Date;
@Slf4j
public class PgCdcUltimateMonitor {

    // PostgreSQL 配置(多个数据库)
    private static final String PG_HOST = "";
    private static final int PG_PORT = 5432;
    private static final String PG_USER = "";
    private static final String PG_PASSWORD = "";
    private static final List<String> PG_DATABASES = Arrays.asList(
            "dbName1",
            "dbName2",
            "dbName3"
            );

    // Doris 配置
    private static final String DORIS_URL = "jdbc:mysql://ip:9030/ods";
    private static final String DORIS_USER = "";
    private static final String DORIS_PASSWORD = "";

    // 采集间隔(5 分钟)
    private static final long INTERVAL = 60 * 5000;
    static {
        try {
            Class.forName("com.mysql.cj.jdbc.Driver");
        } catch (ClassNotFoundException e) {
           log.error("找不到 MySQL 驱动:{}", e.getMessage());
        }
    }
    public static void main(String[] args) {
        new PgCdcUltimateMonitor().start();
    }

    public void start() {
        log.info("===== PostgreSQL CDC 终极监控程序启动 =====");
        new Timer().scheduleAtFixedRate(new MonitorTask(), 0, INTERVAL);
    }

    static class MonitorTask extends TimerTask {

        @Override
        public void run() {
            // 遍历所有数据库
            for (String dbName : PG_DATABASES) {
                String pgUrl = String.format("jdbc:postgresql://%s:%d/%s", PG_HOST, PG_PORT, dbName);

                try (Connection pgConn = DriverManager.getConnection(pgUrl, PG_USER, PG_PASSWORD)) {
                    log.info("[{}] 开始采集数据库: {}", new Date(), dbName);

                    // --------------------------
                    // 1. 采集 PostgreSQL 指标
                    // --------------------------

                    long checkpointsTimed = 0;
                    long checkpointsReq = 0;
                    long walBuffersFull = 0;
                    double cpuUsage = 0.0;
                    double memoryUsage = 0.0;
                    int activeConn = 0;
                    int idleConn = 0;
                    int lockedConn = 0;
                    long xactCommit = 0;
                    long xactRollback = 0;
                    double cacheHitRatio = 0.0;
                    String longTransactionJson = "";

                    // WAL & Checkpoint
                    try (ResultSet rs = pgConn.prepareStatement(
                            "SELECT checkpoints_timed, checkpoints_req FROM pg_stat_bgwriter"
                    ).executeQuery()) {
                        if (rs.next()) {
                            checkpointsTimed = rs.getLong("checkpoints_timed");
                            checkpointsReq = rs.getLong("checkpoints_req");
                        }
                    }

                    // WAL Buffers Full
                    try (ResultSet rs = pgConn.prepareStatement(
                            "SELECT wal_buffers_full FROM pg_stat_wal"
                    ).executeQuery()) {
                        if (rs.next()) {
                            walBuffersFull = rs.getLong("wal_buffers_full");
                        }
                    }

                    // CPU / Memory(最终稳定版,PostgreSQL 15 已验证)
                    try (ResultSet rs = pgConn.prepareStatement(
                            "SELECT " +
                                    "ROUND( " +
                                    "    (COUNT(*) FILTER (WHERE state = 'active'))::NUMERIC / COUNT(*) * 100, " +
                                    "    2 " +
                                    ") AS cpu_usage, " +
                                    "ROUND( " +
                                    "    (SELECT setting::NUMERIC FROM pg_settings WHERE name = 'shared_buffers') / 1024 / 1024 / 1024, " +
                                    "    2 " +
                                    ") AS memory_usage " +
                                    "FROM pg_stat_activity " +
                                    "LIMIT 1"
                    ).executeQuery()) {
                        if (rs.next()) {
                            cpuUsage = rs.getDouble("cpu_usage");
                            memoryUsage = rs.getDouble("memory_usage");
                        }
                    }


                    // Connections
                    try (ResultSet rs = pgConn.prepareStatement(
                            "SELECT " +
                                    "COUNT(*) FILTER (WHERE state = 'active') AS active, " +
                                    "COUNT(*) FILTER (WHERE state IN ('idle','idle in transaction')) AS idle, " +
                                    "COUNT(*) FILTER (WHERE wait_event_type = 'LOCK') AS locked " +
                                    "FROM pg_stat_activity"
                    ).executeQuery()) {
                        if (rs.next()) {
                            activeConn = rs.getInt("active");
                            idleConn = rs.getInt("idle");
                            lockedConn = rs.getInt("locked");
                        }
                    }

                    // Transaction & Cache
                    try (ResultSet rs = pgConn.prepareStatement(
                            "SELECT " +
                                    "xact_commit, " +
                                    "xact_rollback, " +
                                    "(blks_hit::FLOAT/(blks_hit + blks_read))*100 AS cache_hit_ratio " +
                                    "FROM pg_stat_database WHERE datname = current_database()"
                    ).executeQuery()) {
                        if (rs.next()) {
                            xactCommit = rs.getLong("xact_commit");
                            xactRollback = rs.getLong("xact_rollback");
                            cacheHitRatio = rs.getDouble("cache_hit_ratio");
                        }
                    }

                     //采集PostgreSQL中耗时超3分钟的大事务
                      List<LongTransaction> longTransactions = new ArrayList<>();
                        try (PreparedStatement pstmt = pgConn.prepareStatement(
                                "SELECT " +
                                "pid AS transaction_id, " +
                                "EXTRACT(EPOCH FROM (NOW() - xact_start)) AS duration_seconds, " +
                                "query AS sql, " +
                                "state " +
                                "FROM pg_stat_activity " +
                                "WHERE " +
                                "xact_start IS NOT NULL " + // 存在活跃事务
                                "AND EXTRACT(EPOCH FROM (NOW() - xact_start)) > ? " + // 事务时长超阈值
                                "AND state <> 'idle'")) {
                            pstmt.setLong(1, 180);
                            try (ResultSet rs = pstmt.executeQuery()) {
                                while (rs.next()) {
                                    LongTransaction transaction = new LongTransaction();
                                    transaction.setTransactionId(rs.getString("transaction_id"));
                                    transaction.setDurationSeconds(rs.getLong("duration_seconds"));
                                    transaction.setSql(rs.getString("sql"));
                                    transaction.setState(rs.getString("state"));
                                    longTransactions.add(transaction);
                                    longTransactionJson= JSONUtils.toJSONString(longTransactions);
                                }
                            }
                        }

                    // --------------------------
                    // 2. CDC 健康度分析
                    // --------------------------

                    CdcHealthResult health = CdcHealthResult.analyze(
                            dbName,
                            walBuffersFull,
                            checkpointsReq,
                            lockedConn,
                            cpuUsage,
                            cacheHitRatio,
                            activeConn
                    );

                    // --------------------------
                    // 3. 写入 Doris
                    // --------------------------

                    try (Connection dorisConn = DriverManager.getConnection(DORIS_URL, DORIS_USER, DORIS_PASSWORD)) {

                        String sql = "INSERT INTO t_pg_log (" +
                                "id, " +
                                "database_name, " +
                                "checkpoints_timed, checkpoints_req, wal_buffers_full, " +
                                "cpu_usage, memory_usage, active_connections, idle_connections, locked_connections, " +
                                "xact_commit, xact_rollback, cache_hit_ratio, " +
                                "cdc_health_level, cdc_health_reason, cdc_health_suggestion, longTransactionJson, create_time" +
                                ") VALUES (?, ?, ?,?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)";

                        try (PreparedStatement pstmt = dorisConn.prepareStatement(sql)) {
                            pstmt.setString(1, UUID.randomUUID().toString().replace("-",""));
                            pstmt.setString(2, dbName);
                            pstmt.setLong(3, checkpointsTimed);
                            pstmt.setLong(4, checkpointsReq);
                            pstmt.setLong(5, walBuffersFull);
                            pstmt.setDouble(6, cpuUsage);
                            pstmt.setDouble(7, memoryUsage);
                            pstmt.setInt(8, activeConn);
                            pstmt.setInt(9, idleConn);
                            pstmt.setInt(10, lockedConn);
                            pstmt.setLong(11, xactCommit);
                            pstmt.setLong(12, xactRollback);
                            pstmt.setDouble(13, cacheHitRatio);
                            pstmt.setString(14, health.level);
                            pstmt.setString(15, health.reason);
                            pstmt.setString(16, health.suggestion);
                            pstmt.setString(17, longTransactionJson);
                            pstmt.setTimestamp(18, new Timestamp(System.currentTimeMillis()));

                            pstmt.executeUpdate();
                            log.info("[{}] 数据库 {} 写入 Doris 成功:CDC 健康度={}",
                                    new Date(), dbName, health.level);
                        }
                    }

                } catch (SQLException e) {
                    log.error("[{}] 数据库 {} 采集失败: {}", new Date(), dbName, e.getMessage());
                }
            }
        }
    }

3.输出优化建议

复制代码
package com.test.cdc;

import org.apache.commons.lang3.StringUtils;

public class CdcHealthResult {
    public String level;
    public String reason;
    public String suggestion;

    public CdcHealthResult(String level, String reason, String suggestion) {
        this.level = level;
        this.reason = reason;
        this.suggestion = suggestion;
    }
    public static CdcHealthResult analyze(
            String dbName,
            long walBuffersFull,
            long checkpointsReq,
            int lockedConn,
            double cpuUsage,
            double cacheHitRatio,
            int activeConn
    ) {

        // 计算周期增量(调用MetricCache)
        long walBuffersFullIncrement = MetricCache.getInstance()
                .getIncrement(dbName, "wal_buffers_full", walBuffersFull);
        // ALERT
        if (walBuffersFullIncrement > 100) {
            return new CdcHealthResult(
                    "ALERT",
                    "WAL 缓冲区写满,CDC 会被阻塞,延迟必然上升",
                    "增大 wal_buffers、提升磁盘 IO、减少大事务"
            );
        }

        if (lockedConn > 5) {
            return new CdcHealthResult(
                    "ALERT",
                    "锁等待过多(" + lockedConn + "),事务提交变慢,CDC 延迟会上升",
                    "检查慢 SQL、长事务、锁竞争"
            );
        }
        long checkpointsReqIncrement = MetricCache.getInstance()
                .getIncrement(dbName, "checkpoints_req", checkpointsReq);
        // WARNING
        if (checkpointsReqIncrement > 10 || cpuUsage > 80) {
            return new CdcHealthResult(
                    "WARNING",
                    "checkpoint 频繁或 CPU 过高,可能影响 CDC 性能",
                    "增大 shared_buffers、优化 SQL、降低写入压力"
            );
        }

        if (cacheHitRatio < 95) {
            return new CdcHealthResult(
                    "WARNING",
                    "缓存命中率低(" + String.format("%.2f", cacheHitRatio) + "%),磁盘 IO 升高",
                    "增大 shared_buffers、优化索引"
            );
        }

        if (activeConn > 200) {
            return new CdcHealthResult(
                    "WARNING",
                    "活跃连接过多(" + activeConn + "),PostgreSQL 压力增大",
                    "优化连接池、减少长连接"
            );
        }
        // HEALTHY
        return new CdcHealthResult(
                "HEALTHY",
                "PostgreSQL 状态良好,CDC 运行稳定",
                "继续保持当前配置"
        );

    }
}

4.对部分指标进行增量计算

复制代码
package com.test.cdc;

import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;

// 历史数据缓存类,单例模式
public class MetricCache {
    private static final MetricCache INSTANCE = new MetricCache();
    // 存储各数据库的历史指标(key: databaseName,value: 指标Map)
    private final Map<String, Map<String, Long>> historyMetrics = new ConcurrentHashMap<>();

    private MetricCache() {}

    public static MetricCache getInstance() {
        return INSTANCE;
    }

    // 保存当前指标,返回增量
    public long getIncrement(String dbName, String metricName, long currentValue) {
        // 初始化数据库对应的指标Map
        historyMetrics.computeIfAbsent(dbName, k -> new HashMap<>());
        Map<String, Long> dbMetrics = historyMetrics.get(dbName);
        if (!dbMetrics.containsKey(metricName)) {
            dbMetrics.put(metricName, currentValue); // 缓存当前值作为下次历史值
            return -1; // 标识首次采集,不做增量计算,只初始值
        }
        // 计算增量(首次采集时增量为0)
        long lastValue = dbMetrics.getOrDefault(metricName, 0L);
        long increment = currentValue - lastValue;

        // 更新历史值
        dbMetrics.put(metricName, currentValue);
        return increment;
    }
}

5.创建个事务类,用来记录监控耗时长的sql

复制代码
package com.test.cdc;

import lombok.Data;

@Data
public class LongTransaction {
    // 事务ID
    private String transactionId;
    // 事务时长(秒)
    private long durationSeconds;
    // 事务执行的SQL语句
    private String sql;
    // 事务状态
    private String state;
}

6.doris表创建,创建个存放指标结果的日志表

复制代码
CREATE TABLE t_pg_log (
    `id` varchar(200) COMMENT '日志唯一标识',
    `database_name` varchar(150) COMMENT '数据库名称',
    `checkpoints_timed` BIGINT COMMENT '定时检查点次数',
    `checkpoints_req` BIGINT COMMENT '请求触发检查点次数',
    `wal_buffers_full` BIGINT COMMENT 'WAL 缓冲区写满次数',
    `cpu_usage` DOUBLE COMMENT 'CPU 使用率(%)',
    `memory_usage` DOUBLE COMMENT '内存使用率(GB)',
    `active_connections` INT COMMENT '活跃连接数',
    `idle_connections` INT COMMENT '空闲连接数',
    `locked_connections` INT COMMENT '被锁连接数',
    `xact_commit` BIGINT COMMENT '事务提交次数',
    `xact_rollback` BIGINT COMMENT '事务回滚次数',
    `cache_hit_ratio` DOUBLE COMMENT '缓存命中率(%)',
    `cdc_health_level` VARCHAR(20) COMMENT 'CDC 健康等级(HEALTHY/WARN/CRITICAL)',
    `cdc_health_reason` VARCHAR(500) COMMENT 'CDC 健康异常原因',
    `cdc_health_suggestion` VARCHAR(500) COMMENT 'CDC 健康优化建议',
     `longTransactionJson` string COMMENT '长事务json信息',
     `create_time` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '记录创建时间'
)
unique KEY(`id`)
COMMENT 'PostgreSQL 监控日志表'  
DISTRIBUTED BY HASH(`id`)
PROPERTIES (
    "replication_allocation" = "tag.location.default: 1"
);

7.加个logback.xml辅助日志打印

复制代码
<?xml version="1.0" encoding="UTF-8"?>
<configuration scan="true" scanPeriod="60 seconds">
    <!-- 关键修改:相对路径(与 Jar 包同级的 logs 目录) -->
    <property name="LOG_BASE_PATH" value="logs" /> <!-- 无需绝对路径,直接写目录名 -->
    <property name="LOG_FILE_NAME" value="pg-cdc-monitor" />
    <property name="MAX_HISTORY" value="30" />
    <property name="FILE_ENCODING" value="UTF-8" />

    <!-- 控制台输出(可选,服务器部署可注释) -->
    <appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender">
        <encoder>
            <pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{50} - %msg%n</pattern>
            <charset>${FILE_ENCODING}</charset>
        </encoder>
        <filter class="ch.qos.logback.classic.filter.ThresholdFilter">
            <level>INFO</level>
        </filter>
    </appender>

    <!-- 文件输出:相对路径滚动(核心不变) -->
    <appender name="FILE_ROLLING" class="ch.qos.logback.core.rolling.RollingFileAppender">
        <file>${LOG_BASE_PATH}/${LOG_FILE_NAME}.log</file>
        <rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
            <fileNamePattern>${LOG_BASE_PATH}/${LOG_FILE_NAME}.%d{yyyy-MM-dd}.%i.log</fileNamePattern>
            <maxHistory>${MAX_HISTORY}</maxHistory>
            <timeBasedFileNamingAndTriggeringPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedFNATP">
                <maxFileSize>200MB</maxFileSize>
            </timeBasedFileNamingAndTriggeringPolicy>
        </rollingPolicy>
        <encoder>
            <pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{50} - %msg%n</pattern>
            <charset>${FILE_ENCODING}</charset>
        </encoder>
        <filter class="ch.qos.logback.classic.filter.ThresholdFilter">
            <level>INFO</level>
        </filter>
    </appender>

    <!-- 错误日志单独输出(相对路径) -->
    <appender name="ERROR_FILE" class="ch.qos.logback.core.rolling.RollingFileAppender">
        <file>${LOG_BASE_PATH}/${LOG_FILE_NAME}-error.log</file>
        <rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
            <fileNamePattern>${LOG_BASE_PATH}/${LOG_FILE_NAME}-error.%d{yyyy-MM-dd}.log</fileNamePattern>
            <maxHistory>${MAX_HISTORY}</maxHistory>
        </rollingPolicy>
        <encoder>
            <pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{50} - %msg%n</pattern>
            <charset>${FILE_ENCODING}</charset>
        </encoder>
        <filter class="ch.qos.logback.classic.filter.LevelFilter">
            <level>ERROR</level>
            <onMatch>ACCEPT</onMatch>
            <onMismatch>DENY</onMismatch>
        </filter>
    </appender>

    <root level="INFO">
        <appender-ref ref="FILE_ROLLING" />
        <appender-ref ref="ERROR_FILE" />
         <appender-ref ref="CONSOLE" />
    </root>

    <!-- 第三方依赖日志控制 -->
    <logger name="org.postgresql" level="WARN" />
    <logger name="com.mysql.cj" level="WARN" />
    <logger name="java.sql" level="WARN" />
</configuration>
相关推荐
技术宅星云2 小时前
0x00.Spring AI Agent开发指南专栏简介
java·人工智能·spring
早日退休!!!2 小时前
Roofline模型核心原理:延迟、吞吐与并发的底层逻辑
大数据·网络·数据库
Arenaschi2 小时前
关于垃圾的CSDN
java·网络·chrome·笔记·其他·oracle·pdf
欧洵.2 小时前
深入理解TCP协议
java·网络·tcp/ip
说私域2 小时前
基于定制开发AI智能名片商城小程序的运营创新与资金效率提升研究
大数据·人工智能·小程序
砚边数影2 小时前
KingbaseES基础(二):SQL进阶 —— 批量插入/查询 AI 样本数据实战
java·数据库·人工智能·sql·ai
独自归家的兔2 小时前
Java性能优化实战:从基础调优到系统效率倍增 -2
java·开发语言·性能优化
独自归家的兔2 小时前
Java性能优化实战:从基础调优到系统效率倍增 - 1
java·开发语言·性能优化