1.pom
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.sunwoda</groupId>
<artifactId>pg-test</artifactId>
<version>2.0-SNAPSHOT</version>
<properties>
<java.version>1.8</java.version>
<maven.compiler.source>${java.version}</maven.compiler.source>
<maven.compiler.target>${java.version}</maven.compiler.target>
<fastjson.vsersion>2.0.52</fastjson.vsersion>
<postgresql.version>42.2.12</postgresql.version>
<mysql.version>8.0.33</mysql.version>
<logback.version>1.2.11</logback.version>
<lombok.version>1.18.20</lombok.version>
</properties>
<dependencies>
<dependency>
<groupId>org.postgresql</groupId>
<artifactId>postgresql</artifactId>
<version>${postgresql.version}</version>
</dependency>
<dependency>
<groupId>com.mysql</groupId>
<artifactId>mysql-connector-j</artifactId>
<version>${mysql.version}</version>
</dependency>
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
<version>${logback.version}</version>
</dependency>
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-core</artifactId>
<version>${logback.version}</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>${fastjson.vsersion}</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>${lombok.version}</version>
<scope>provided</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>3.0.0</version>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<archive>
<manifest>
<mainClass>com.test.cdc.PgCdcUltimateMonitor</mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
2.java代码主类
package com.test.cdc;
import com.alibaba.druid.support.json.JSONUtils;
import lombok.extern.slf4j.Slf4j;
import java.sql.*;
import java.util.*;
import java.util.Date;
@Slf4j
public class PgCdcUltimateMonitor {
// PostgreSQL 配置(多个数据库)
private static final String PG_HOST = "";
private static final int PG_PORT = 5432;
private static final String PG_USER = "";
private static final String PG_PASSWORD = "";
private static final List<String> PG_DATABASES = Arrays.asList(
"dbName1",
"dbName2",
"dbName3"
);
// Doris 配置
private static final String DORIS_URL = "jdbc:mysql://ip:9030/ods";
private static final String DORIS_USER = "";
private static final String DORIS_PASSWORD = "";
// 采集间隔(5 分钟)
private static final long INTERVAL = 60 * 5000;
static {
try {
Class.forName("com.mysql.cj.jdbc.Driver");
} catch (ClassNotFoundException e) {
log.error("找不到 MySQL 驱动:{}", e.getMessage());
}
}
public static void main(String[] args) {
new PgCdcUltimateMonitor().start();
}
public void start() {
log.info("===== PostgreSQL CDC 终极监控程序启动 =====");
new Timer().scheduleAtFixedRate(new MonitorTask(), 0, INTERVAL);
}
static class MonitorTask extends TimerTask {
@Override
public void run() {
// 遍历所有数据库
for (String dbName : PG_DATABASES) {
String pgUrl = String.format("jdbc:postgresql://%s:%d/%s", PG_HOST, PG_PORT, dbName);
try (Connection pgConn = DriverManager.getConnection(pgUrl, PG_USER, PG_PASSWORD)) {
log.info("[{}] 开始采集数据库: {}", new Date(), dbName);
// --------------------------
// 1. 采集 PostgreSQL 指标
// --------------------------
long checkpointsTimed = 0;
long checkpointsReq = 0;
long walBuffersFull = 0;
double cpuUsage = 0.0;
double memoryUsage = 0.0;
int activeConn = 0;
int idleConn = 0;
int lockedConn = 0;
long xactCommit = 0;
long xactRollback = 0;
double cacheHitRatio = 0.0;
String longTransactionJson = "";
// WAL & Checkpoint
try (ResultSet rs = pgConn.prepareStatement(
"SELECT checkpoints_timed, checkpoints_req FROM pg_stat_bgwriter"
).executeQuery()) {
if (rs.next()) {
checkpointsTimed = rs.getLong("checkpoints_timed");
checkpointsReq = rs.getLong("checkpoints_req");
}
}
// WAL Buffers Full
try (ResultSet rs = pgConn.prepareStatement(
"SELECT wal_buffers_full FROM pg_stat_wal"
).executeQuery()) {
if (rs.next()) {
walBuffersFull = rs.getLong("wal_buffers_full");
}
}
// CPU / Memory(最终稳定版,PostgreSQL 15 已验证)
try (ResultSet rs = pgConn.prepareStatement(
"SELECT " +
"ROUND( " +
" (COUNT(*) FILTER (WHERE state = 'active'))::NUMERIC / COUNT(*) * 100, " +
" 2 " +
") AS cpu_usage, " +
"ROUND( " +
" (SELECT setting::NUMERIC FROM pg_settings WHERE name = 'shared_buffers') / 1024 / 1024 / 1024, " +
" 2 " +
") AS memory_usage " +
"FROM pg_stat_activity " +
"LIMIT 1"
).executeQuery()) {
if (rs.next()) {
cpuUsage = rs.getDouble("cpu_usage");
memoryUsage = rs.getDouble("memory_usage");
}
}
// Connections
try (ResultSet rs = pgConn.prepareStatement(
"SELECT " +
"COUNT(*) FILTER (WHERE state = 'active') AS active, " +
"COUNT(*) FILTER (WHERE state IN ('idle','idle in transaction')) AS idle, " +
"COUNT(*) FILTER (WHERE wait_event_type = 'LOCK') AS locked " +
"FROM pg_stat_activity"
).executeQuery()) {
if (rs.next()) {
activeConn = rs.getInt("active");
idleConn = rs.getInt("idle");
lockedConn = rs.getInt("locked");
}
}
// Transaction & Cache
try (ResultSet rs = pgConn.prepareStatement(
"SELECT " +
"xact_commit, " +
"xact_rollback, " +
"(blks_hit::FLOAT/(blks_hit + blks_read))*100 AS cache_hit_ratio " +
"FROM pg_stat_database WHERE datname = current_database()"
).executeQuery()) {
if (rs.next()) {
xactCommit = rs.getLong("xact_commit");
xactRollback = rs.getLong("xact_rollback");
cacheHitRatio = rs.getDouble("cache_hit_ratio");
}
}
//采集PostgreSQL中耗时超3分钟的大事务
List<LongTransaction> longTransactions = new ArrayList<>();
try (PreparedStatement pstmt = pgConn.prepareStatement(
"SELECT " +
"pid AS transaction_id, " +
"EXTRACT(EPOCH FROM (NOW() - xact_start)) AS duration_seconds, " +
"query AS sql, " +
"state " +
"FROM pg_stat_activity " +
"WHERE " +
"xact_start IS NOT NULL " + // 存在活跃事务
"AND EXTRACT(EPOCH FROM (NOW() - xact_start)) > ? " + // 事务时长超阈值
"AND state <> 'idle'")) {
pstmt.setLong(1, 180);
try (ResultSet rs = pstmt.executeQuery()) {
while (rs.next()) {
LongTransaction transaction = new LongTransaction();
transaction.setTransactionId(rs.getString("transaction_id"));
transaction.setDurationSeconds(rs.getLong("duration_seconds"));
transaction.setSql(rs.getString("sql"));
transaction.setState(rs.getString("state"));
longTransactions.add(transaction);
longTransactionJson= JSONUtils.toJSONString(longTransactions);
}
}
}
// --------------------------
// 2. CDC 健康度分析
// --------------------------
CdcHealthResult health = CdcHealthResult.analyze(
dbName,
walBuffersFull,
checkpointsReq,
lockedConn,
cpuUsage,
cacheHitRatio,
activeConn
);
// --------------------------
// 3. 写入 Doris
// --------------------------
try (Connection dorisConn = DriverManager.getConnection(DORIS_URL, DORIS_USER, DORIS_PASSWORD)) {
String sql = "INSERT INTO t_pg_log (" +
"id, " +
"database_name, " +
"checkpoints_timed, checkpoints_req, wal_buffers_full, " +
"cpu_usage, memory_usage, active_connections, idle_connections, locked_connections, " +
"xact_commit, xact_rollback, cache_hit_ratio, " +
"cdc_health_level, cdc_health_reason, cdc_health_suggestion, longTransactionJson, create_time" +
") VALUES (?, ?, ?,?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)";
try (PreparedStatement pstmt = dorisConn.prepareStatement(sql)) {
pstmt.setString(1, UUID.randomUUID().toString().replace("-",""));
pstmt.setString(2, dbName);
pstmt.setLong(3, checkpointsTimed);
pstmt.setLong(4, checkpointsReq);
pstmt.setLong(5, walBuffersFull);
pstmt.setDouble(6, cpuUsage);
pstmt.setDouble(7, memoryUsage);
pstmt.setInt(8, activeConn);
pstmt.setInt(9, idleConn);
pstmt.setInt(10, lockedConn);
pstmt.setLong(11, xactCommit);
pstmt.setLong(12, xactRollback);
pstmt.setDouble(13, cacheHitRatio);
pstmt.setString(14, health.level);
pstmt.setString(15, health.reason);
pstmt.setString(16, health.suggestion);
pstmt.setString(17, longTransactionJson);
pstmt.setTimestamp(18, new Timestamp(System.currentTimeMillis()));
pstmt.executeUpdate();
log.info("[{}] 数据库 {} 写入 Doris 成功:CDC 健康度={}",
new Date(), dbName, health.level);
}
}
} catch (SQLException e) {
log.error("[{}] 数据库 {} 采集失败: {}", new Date(), dbName, e.getMessage());
}
}
}
}
3.输出优化建议
package com.test.cdc;
import org.apache.commons.lang3.StringUtils;
public class CdcHealthResult {
public String level;
public String reason;
public String suggestion;
public CdcHealthResult(String level, String reason, String suggestion) {
this.level = level;
this.reason = reason;
this.suggestion = suggestion;
}
public static CdcHealthResult analyze(
String dbName,
long walBuffersFull,
long checkpointsReq,
int lockedConn,
double cpuUsage,
double cacheHitRatio,
int activeConn
) {
// 计算周期增量(调用MetricCache)
long walBuffersFullIncrement = MetricCache.getInstance()
.getIncrement(dbName, "wal_buffers_full", walBuffersFull);
// ALERT
if (walBuffersFullIncrement > 100) {
return new CdcHealthResult(
"ALERT",
"WAL 缓冲区写满,CDC 会被阻塞,延迟必然上升",
"增大 wal_buffers、提升磁盘 IO、减少大事务"
);
}
if (lockedConn > 5) {
return new CdcHealthResult(
"ALERT",
"锁等待过多(" + lockedConn + "),事务提交变慢,CDC 延迟会上升",
"检查慢 SQL、长事务、锁竞争"
);
}
long checkpointsReqIncrement = MetricCache.getInstance()
.getIncrement(dbName, "checkpoints_req", checkpointsReq);
// WARNING
if (checkpointsReqIncrement > 10 || cpuUsage > 80) {
return new CdcHealthResult(
"WARNING",
"checkpoint 频繁或 CPU 过高,可能影响 CDC 性能",
"增大 shared_buffers、优化 SQL、降低写入压力"
);
}
if (cacheHitRatio < 95) {
return new CdcHealthResult(
"WARNING",
"缓存命中率低(" + String.format("%.2f", cacheHitRatio) + "%),磁盘 IO 升高",
"增大 shared_buffers、优化索引"
);
}
if (activeConn > 200) {
return new CdcHealthResult(
"WARNING",
"活跃连接过多(" + activeConn + "),PostgreSQL 压力增大",
"优化连接池、减少长连接"
);
}
// HEALTHY
return new CdcHealthResult(
"HEALTHY",
"PostgreSQL 状态良好,CDC 运行稳定",
"继续保持当前配置"
);
}
}
4.对部分指标进行增量计算
package com.test.cdc;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
// 历史数据缓存类,单例模式
public class MetricCache {
private static final MetricCache INSTANCE = new MetricCache();
// 存储各数据库的历史指标(key: databaseName,value: 指标Map)
private final Map<String, Map<String, Long>> historyMetrics = new ConcurrentHashMap<>();
private MetricCache() {}
public static MetricCache getInstance() {
return INSTANCE;
}
// 保存当前指标,返回增量
public long getIncrement(String dbName, String metricName, long currentValue) {
// 初始化数据库对应的指标Map
historyMetrics.computeIfAbsent(dbName, k -> new HashMap<>());
Map<String, Long> dbMetrics = historyMetrics.get(dbName);
if (!dbMetrics.containsKey(metricName)) {
dbMetrics.put(metricName, currentValue); // 缓存当前值作为下次历史值
return -1; // 标识首次采集,不做增量计算,只初始值
}
// 计算增量(首次采集时增量为0)
long lastValue = dbMetrics.getOrDefault(metricName, 0L);
long increment = currentValue - lastValue;
// 更新历史值
dbMetrics.put(metricName, currentValue);
return increment;
}
}
5.创建个事务类,用来记录监控耗时长的sql
package com.test.cdc;
import lombok.Data;
@Data
public class LongTransaction {
// 事务ID
private String transactionId;
// 事务时长(秒)
private long durationSeconds;
// 事务执行的SQL语句
private String sql;
// 事务状态
private String state;
}
6.doris表创建,创建个存放指标结果的日志表
CREATE TABLE t_pg_log (
`id` varchar(200) COMMENT '日志唯一标识',
`database_name` varchar(150) COMMENT '数据库名称',
`checkpoints_timed` BIGINT COMMENT '定时检查点次数',
`checkpoints_req` BIGINT COMMENT '请求触发检查点次数',
`wal_buffers_full` BIGINT COMMENT 'WAL 缓冲区写满次数',
`cpu_usage` DOUBLE COMMENT 'CPU 使用率(%)',
`memory_usage` DOUBLE COMMENT '内存使用率(GB)',
`active_connections` INT COMMENT '活跃连接数',
`idle_connections` INT COMMENT '空闲连接数',
`locked_connections` INT COMMENT '被锁连接数',
`xact_commit` BIGINT COMMENT '事务提交次数',
`xact_rollback` BIGINT COMMENT '事务回滚次数',
`cache_hit_ratio` DOUBLE COMMENT '缓存命中率(%)',
`cdc_health_level` VARCHAR(20) COMMENT 'CDC 健康等级(HEALTHY/WARN/CRITICAL)',
`cdc_health_reason` VARCHAR(500) COMMENT 'CDC 健康异常原因',
`cdc_health_suggestion` VARCHAR(500) COMMENT 'CDC 健康优化建议',
`longTransactionJson` string COMMENT '长事务json信息',
`create_time` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '记录创建时间'
)
unique KEY(`id`)
COMMENT 'PostgreSQL 监控日志表'
DISTRIBUTED BY HASH(`id`)
PROPERTIES (
"replication_allocation" = "tag.location.default: 1"
);
7.加个logback.xml辅助日志打印
<?xml version="1.0" encoding="UTF-8"?>
<configuration scan="true" scanPeriod="60 seconds">
<!-- 关键修改:相对路径(与 Jar 包同级的 logs 目录) -->
<property name="LOG_BASE_PATH" value="logs" /> <!-- 无需绝对路径,直接写目录名 -->
<property name="LOG_FILE_NAME" value="pg-cdc-monitor" />
<property name="MAX_HISTORY" value="30" />
<property name="FILE_ENCODING" value="UTF-8" />
<!-- 控制台输出(可选,服务器部署可注释) -->
<appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{50} - %msg%n</pattern>
<charset>${FILE_ENCODING}</charset>
</encoder>
<filter class="ch.qos.logback.classic.filter.ThresholdFilter">
<level>INFO</level>
</filter>
</appender>
<!-- 文件输出:相对路径滚动(核心不变) -->
<appender name="FILE_ROLLING" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${LOG_BASE_PATH}/${LOG_FILE_NAME}.log</file>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<fileNamePattern>${LOG_BASE_PATH}/${LOG_FILE_NAME}.%d{yyyy-MM-dd}.%i.log</fileNamePattern>
<maxHistory>${MAX_HISTORY}</maxHistory>
<timeBasedFileNamingAndTriggeringPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedFNATP">
<maxFileSize>200MB</maxFileSize>
</timeBasedFileNamingAndTriggeringPolicy>
</rollingPolicy>
<encoder>
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{50} - %msg%n</pattern>
<charset>${FILE_ENCODING}</charset>
</encoder>
<filter class="ch.qos.logback.classic.filter.ThresholdFilter">
<level>INFO</level>
</filter>
</appender>
<!-- 错误日志单独输出(相对路径) -->
<appender name="ERROR_FILE" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${LOG_BASE_PATH}/${LOG_FILE_NAME}-error.log</file>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<fileNamePattern>${LOG_BASE_PATH}/${LOG_FILE_NAME}-error.%d{yyyy-MM-dd}.log</fileNamePattern>
<maxHistory>${MAX_HISTORY}</maxHistory>
</rollingPolicy>
<encoder>
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{50} - %msg%n</pattern>
<charset>${FILE_ENCODING}</charset>
</encoder>
<filter class="ch.qos.logback.classic.filter.LevelFilter">
<level>ERROR</level>
<onMatch>ACCEPT</onMatch>
<onMismatch>DENY</onMismatch>
</filter>
</appender>
<root level="INFO">
<appender-ref ref="FILE_ROLLING" />
<appender-ref ref="ERROR_FILE" />
<appender-ref ref="CONSOLE" />
</root>
<!-- 第三方依赖日志控制 -->
<logger name="org.postgresql" level="WARN" />
<logger name="com.mysql.cj" level="WARN" />
<logger name="java.sql" level="WARN" />
</configuration>