-- ============================================================
-- Oracle RAC 日常监控脚本
-- 适用版本: Oracle 11gR2 / 12c / 19c RAC
-- 执行用户: SYS / SYSTEM / 具备DBA权限账号
-- ============================================================
SET LINESIZE 220
SET PAGESIZE 100
SET TIMING OFF
SET FEEDBACK OFF
COLUMN dummy NOPRINT
WHENEVER SQLERROR CONTINUE
PROMPT
PROMPT ╔══════════════════════════════════════════════════════╗
PROMPT ║ Oracle RAC 日常巡检报告 ║
PROMPT ╚══════════════════════════════════════════════════════╝
PROMPT 检查时间:
SELECT TO_CHAR(SYSDATE,'YYYY-MM-DD HH24:MI:SS') AS CHECK_TIME FROM DUAL;
-- ============================================================
-- 【1】集群节点状态
-- ============================================================
PROMPT
PROMPT ┌─────────────────────────────────────┐
PROMPT │ 1. 集群节点状态 (GV$INSTANCE) │
PROMPT └─────────────────────────────────────┘
SELECT
INST_ID AS "节点",
INSTANCE_NAME AS "实例名",
HOST_NAME AS "主机名",
STATUS AS "状态",
DATABASE_STATUS AS "DB状态",
ACTIVE_STATE AS "活动状态",
TO_CHAR(STARTUP_TIME,'MM-DD HH24:MI') AS "启动时间",
THREAD# AS "线程号",
ARCHIVER AS "归档进程"
FROM GV$INSTANCE
ORDER BY INST_ID;
-- ============================================================
-- 【2】节点资源使用率(CPU/内存/会话)
-- ============================================================
PROMPT
PROMPT ┌─────────────────────────────────────┐
PROMPT │ 2. 节点资源概览 │
PROMPT └─────────────────────────────────────┘
SELECT
I.INST_ID AS "节点",
I.HOST_NAME AS "主机",
S.SESSIONS_CURRENT AS "当前会话",
S.SESSIONS_HIGHWATER AS "历史峰值会话",
ROUND(S.SESSIONS_CURRENT/S.SESSIONS_MAX*100,1) AS "会话使用率%",
P.PROCESSES_CURRENT AS "当前进程数",
P.PROCESSES_MAX AS "进程上限"
FROM GV$INSTANCE I
JOIN (
SELECT INST_ID,
MAX(CASE WHEN NAME='sessions current' THEN VALUE END) AS SESSIONS_CURRENT,
MAX(CASE WHEN NAME='sessions highwater' THEN VALUE END) AS SESSIONS_HIGHWATER,
MAX(CASE WHEN NAME='sessions' THEN VALUE END) AS SESSIONS_MAX
FROM GV$SYSSTAT
WHERE NAME IN ('sessions current','sessions highwater','sessions')
GROUP BY INST_ID
) S ON I.INST_ID = S.INST_ID
JOIN (
SELECT INST_ID,
COUNT(*) AS PROCESSES_CURRENT,
(SELECT TO_NUMBER(VALUE) FROM V$PARAMETER WHERE NAME='processes') AS PROCESSES_MAX
FROM GV$PROCESS
GROUP BY INST_ID
) P ON I.INST_ID = P.INST_ID
ORDER BY I.INST_ID;
-- ============================================================
-- 【3】私有互联网络 (Interconnect) 流量
-- ============================================================
PROMPT
PROMPT ┌─────────────────────────────────────┐
PROMPT │ 3. Interconnect 网络状态 │
PROMPT └─────────────────────────────────────┘
-- 3.1 互联接口信息
SELECT
INST_ID AS "节点",
NAME AS "接口名",
IP_ADDRESS AS "IP地址",
IS_PUBLIC AS "是否Public",
SOURCE AS "来源"
FROM GV$CLUSTER_INTERCONNECTS
ORDER BY INST_ID;
-- 3.2 互联流量统计
PROMPT --- Interconnect 流量统计 ---
SELECT
INST_ID AS "节点",
ROUND(DLMGR_MSGS_SENT/1024/1024, 2) AS "发送MB",
ROUND(DLMGR_MSGS_RCVD/1024/1024, 2) AS "接收MB",
GCS_MSGS_SENT AS "GCS发送次数",
GES_MSGS_SENT AS "GES发送次数",
ROUND(BYTES_SENT/1024/1024, 2) AS "总发送MB",
ROUND(BYTES_RCVD/1024/1024, 2) AS "总接收MB"
FROM GV$SYSSTAT
WHERE NAME IN ('gc cr blocks served') -- 占位,下方用更精准视图
ORDER BY INST_ID;
-- 更精准的互联统计
SELECT
INST_ID AS "节点",
ROUND(SUM(CASE WHEN NAME='gc cr blocks served'
THEN VALUE ELSE 0 END)) AS "CR块发送",
ROUND(SUM(CASE WHEN NAME='gc current blocks served'
THEN VALUE ELSE 0 END)) AS "Current块发送",
ROUND(SUM(CASE WHEN NAME='gc cr blocks received'
THEN VALUE ELSE 0 END)) AS "CR块接收",
ROUND(SUM(CASE WHEN NAME='gc current blocks received'
THEN VALUE ELSE 0 END)) AS "Current块接收"
FROM GV$SYSSTAT
WHERE NAME IN (
'gc cr blocks served','gc current blocks served',
'gc cr blocks received','gc current blocks received'
)
GROUP BY INST_ID
ORDER BY INST_ID;
-- ============================================================
-- 【4】ASM 磁盘组状态
-- ============================================================
PROMPT
PROMPT ┌─────────────────────────────────────┐
PROMPT │ 4. ASM 磁盘组状态 │
PROMPT └─────────────────────────────────────┘
SELECT
GROUP_NUMBER AS "组号",
NAME AS "磁盘组",
TYPE AS "冗余类型",
STATE AS "状态",
TOTAL_MB AS "总容量MB",
FREE_MB AS "剩余MB",
TOTAL_MB - FREE_MB AS "已用MB",
ROUND((1 - FREE_MB/TOTAL_MB)*100, 1) AS "使用率%",
OFFLINE_DISKS AS "离线磁盘数",
VOTING_FILES AS "投票文件"
FROM V$ASM_DISKGROUP
ORDER BY NAME;
-- ASM 磁盘详情(发现离线磁盘)
PROMPT --- ASM 磁盘明细(异常状态) ---
SELECT
GROUP_NUMBER AS "组号",
DISK_NUMBER AS "盘号",
NAME AS "磁盘名",
PATH AS "路径",
STATE AS "状态",
MODE_STATUS AS "模式",
TOTAL_MB AS "容量MB",
FREE_MB AS "剩余MB",
READS AS "读次数",
WRITES AS "写次数",
READ_ERRS AS "读错误",
WRITE_ERRS AS "写错误"
FROM V$ASM_DISK
WHERE STATE != 'NORMAL' OR MODE_STATUS != 'ONLINE'
OR READ_ERRS > 0 OR WRITE_ERRS > 0
ORDER BY GROUP_NUMBER, DISK_NUMBER;
-- ============================================================
-- 【5】全局缓存性能 GCS (Global Cache Service)
-- ============================================================
PROMPT
PROMPT ┌─────────────────────────────────────┐
PROMPT │ 5. 全局缓存 GCS/GES 性能 │
PROMPT └─────────────────────────────────────┘
-- CR块平均获取时间(>10ms需关注)
SELECT
INST_ID AS "节点",
ROUND(GC_CR_BLOCK_RECEIVE_TIME /
DECODE(GC_CR_BLOCKS_RECEIVED,0,1,GC_CR_BLOCKS_RECEIVED), 2) AS "CR块平均延迟ms",
ROUND(GC_CURRENT_BLOCK_RECEIVE_TIME /
DECODE(GC_CURRENT_BLOCKS_RECEIVED,0,1,GC_CURRENT_BLOCKS_RECEIVED), 2) AS "Current块平均延迟ms",
GC_CR_BLOCKS_RECEIVED AS "CR块接收总数",
GC_CURRENT_BLOCKS_RECEIVED AS "Current块接收总数",
GC_CR_BLOCK_RECEIVE_TIME AS "CR总延迟ms",
GC_CURRENT_BLOCK_RECEIVE_TIME AS "Current总延迟ms"
FROM GV$SYSSTAT
WHERE NAME = 'gc cr blocks received' -- 占位
ORDER BY INST_ID;
-- 使用专用视图更精准
SELECT
INST_ID AS "节点",
ROUND(AVG_CR_GET_TIME * 10, 2) AS "CR块获取延迟ms",
ROUND(AVG_CUR_GET_TIME * 10, 2) AS "Current块获取延迟ms",
MSGS_SENT_QUEUED AS "消息发送队列",
MSGS_RCVD_QUEUED AS "消息接收队列"
FROM GV$INSTANCE_CACHE_TRANSFER
WHERE INST_ID != DEST_INST -- 跨节点传输
ORDER BY INST_ID
FETCH FIRST 20 ROWS ONLY;
-- ============================================================
-- 【6】TOP 等待事件(RAC 相关优先)
-- ============================================================
PROMPT
PROMPT ┌─────────────────────────────────────┐
PROMPT │ 6. TOP 等待事件 │
PROMPT └─────────────────────────────────────┘
SELECT
INST_ID AS "节点",
EVENT AS "等待事件",
TOTAL_WAITS AS "等待次数",
ROUND(TIME_WAITED/100, 2) AS "等待时间(秒)",
ROUND(AVERAGE_WAIT/100, 4) AS "平均等待(秒)",
WAIT_CLASS AS "等待类型"
FROM (
SELECT INST_ID, EVENT, TOTAL_WAITS,
TIME_WAITED, AVERAGE_WAIT, WAIT_CLASS,
ROW_NUMBER() OVER (PARTITION BY INST_ID
ORDER BY TIME_WAITED DESC) AS RN
FROM GV$SYSTEM_EVENT
WHERE WAIT_CLASS != 'Idle'
AND TOTAL_WAITS > 0
)
WHERE RN <= 10
ORDER BY INST_ID, RN;
-- ============================================================
-- 【7】当前活跃会话与锁等待
-- ============================================================
PROMPT
PROMPT ┌─────────────────────────────────────┐
PROMPT │ 7. 活跃会话 & 锁等待 │
PROMPT └─────────────────────────────────────┘
-- 7.1 各节点活跃会话汇总
SELECT
INST_ID AS "节点",
STATUS AS "状态",
TYPE AS "类型",
COUNT(*) AS "会话数"
FROM GV$SESSION
WHERE TYPE = 'USER'
GROUP BY INST_ID, STATUS, TYPE
ORDER BY INST_ID, STATUS;
-- 7.2 正在等待的会话(实时锁等待)
PROMPT --- 当前等待会话(TOP 20)---
SELECT
S.INST_ID AS "节点",
S.SID AS "SID",
S.SERIAL# AS "SERIAL#",
S.USERNAME AS "用户",
S.PROGRAM AS "程序",
S.STATUS AS "状态",
S.EVENT AS "等待事件",
S.WAIT_TIME_MICRO/1000 AS "等待ms",
S.SQL_ID AS "SQL_ID",
SUBSTR(Q.SQL_TEXT,1,80) AS "SQL文本"
FROM GV$SESSION S
LEFT JOIN GV$SQL Q ON S.SQL_ID = Q.SQL_ID AND S.INST_ID = Q.INST_ID
WHERE S.STATUS = 'ACTIVE'
AND S.TYPE = 'USER'
AND S.EVENT NOT LIKE '%SQL*Net%'
AND S.EVENT NOT LIKE '%client message%'
ORDER BY S.WAIT_TIME_MICRO DESC
FETCH FIRST 20 ROWS ONLY;
-- 7.3 跨实例锁等待(RAC 特有)
PROMPT --- 跨节点锁等待 ---
SELECT
LH.INST_ID AS "持锁节点",
LH.SID AS "持锁SID",
SH.USERNAME AS "持锁用户",
LW.INST_ID AS "等待节点",
LW.SID AS "等待SID",
SW.USERNAME AS "等待用户",
LH.TYPE AS "锁类型",
LH.ID1 AS "对象ID1",
LH.ID2 AS "对象ID2",
ROUND(SW.SECONDS_IN_WAIT/60,1) AS "等待分钟"
FROM GV$LOCK LH
JOIN GV$LOCK LW ON LH.ID1=LW.ID1 AND LH.ID2=LW.ID2
AND LH.TYPE=LW.TYPE
AND LH.BLOCK=1 AND LW.REQUEST>0
AND (LH.INST_ID != LW.INST_ID -- 跨节点
OR LH.SID != LW.SID)
JOIN GV$SESSION SH ON LH.INST_ID=SH.INST_ID AND LH.SID=SH.SID
JOIN GV$SESSION SW ON LW.INST_ID=SW.INST_ID AND LW.SID=SW.SID
ORDER BY SW.SECONDS_IN_WAIT DESC;
-- ============================================================
-- 【8】归档日志 & 表空间空间
-- ============================================================
PROMPT
PROMPT ┌─────────────────────────────────────┐
PROMPT │ 8. 归档日志 & 表空间 │
PROMPT └─────────────────────────────────────┘
-- 8.1 各节点归档日志生成量(最近24小时)
SELECT
INST_ID AS "节点",
TRUNC(FIRST_TIME,'HH') AS "小时",
COUNT(*) AS "归档数",
ROUND(SUM(BLOCKS*BLOCK_SIZE)/1024/1024,1) AS "大小MB"
FROM GV$ARCHIVED_LOG
WHERE FIRST_TIME >= SYSDATE - 1
AND STANDBY_DEST = 'NO'
AND DEST_ID = 1
GROUP BY INST_ID, TRUNC(FIRST_TIME,'HH')
ORDER BY INST_ID, TRUNC(FIRST_TIME,'HH') DESC
FETCH FIRST 48 ROWS ONLY;
-- 8.2 Redo 日志切换频率(每小时切换>5次需关注)
PROMPT --- Redo 日志切换频率(最近12小时)---
SELECT
INST_ID AS "节点",
TO_CHAR(FIRST_TIME,'YYYY-MM-DD HH24') AS "小时",
COUNT(*) AS "切换次数"
FROM GV$LOG_HISTORY
WHERE FIRST_TIME >= SYSDATE - 0.5
GROUP BY INST_ID, TO_CHAR(FIRST_TIME,'YYYY-MM-DD HH24')
ORDER BY INST_ID, TO_CHAR(FIRST_TIME,'YYYY-MM-DD HH24') DESC;
-- 8.3 表空间使用率(超过 80% 告警)
PROMPT --- 表空间使用率 ---
SELECT
T.TABLESPACE_NAME AS "表空间",
T.CONTENTS AS "类型",
ROUND(D.TOTAL_MB, 0) AS "总大小MB",
ROUND(D.TOTAL_MB - F.FREE_MB,0) AS "已用MB",
ROUND(F.FREE_MB, 0) AS "剩余MB",
ROUND((1-F.FREE_MB/D.TOTAL_MB)*100,1) AS "使用率%",
CASE WHEN (1-F.FREE_MB/D.TOTAL_MB)*100 >= 90 THEN '🔴 严重'
WHEN (1-F.FREE_MB/D.TOTAL_MB)*100 >= 80 THEN '🟡 警告'
ELSE '🟢 正常' END AS "状态"
FROM DBA_TABLESPACES T
JOIN (SELECT TABLESPACE_NAME, ROUND(SUM(BYTES)/1024/1024,2) AS TOTAL_MB
FROM DBA_DATA_FILES GROUP BY TABLESPACE_NAME) D
ON T.TABLESPACE_NAME = D.TABLESPACE_NAME
LEFT JOIN (SELECT TABLESPACE_NAME, ROUND(SUM(BYTES)/1024/1024,2) AS FREE_MB
FROM DBA_FREE_SPACE GROUP BY TABLESPACE_NAME) F
ON T.TABLESPACE_NAME = F.TABLESPACE_NAME
ORDER BY (1-NVL(F.FREE_MB,0)/D.TOTAL_MB) DESC;
-- ============================================================
-- 【9】UNDOTBS 使用情况
-- ============================================================
PROMPT
PROMPT ┌─────────────────────────────────────┐
PROMPT │ 9. UNDO 表空间状态 │
PROMPT └─────────────────────────────────────┘
SELECT
U.INST_ID AS "节点",
P.VALUE AS "UNDO表空间", -- 从参数获取表空间名
ROUND(U.UNDOBLKS * 8 / 1024, 2) AS "活跃UNDO_MB",
U.TXNCOUNT AS "活跃事务数",
U.MAXQUERYLEN AS "最长查询(秒)",
U.MAXCONCURRENCY AS "最大并发事务",
U.SSOLDERRCNT AS "ORA-1555次数",
U.NOSPACEERRCNT AS "空间不足次数"
FROM GV$UNDOSTAT U
JOIN GV$PARAMETER P
ON U.INST_ID = P.INST_ID
AND P.NAME = 'undo_tablespace' -- 关联各节点 undo 参数
WHERE U.BEGIN_TIME = (
SELECT MAX(U2.BEGIN_TIME)
FROM GV$UNDOSTAT U2
WHERE U2.INST_ID = U.INST_ID
)
ORDER BY U.INST_ID;
-- ============================================================
-- 【10】TOP SQL(RAC全局)
-- ============================================================
PROMPT
PROMPT ┌─────────────────────────────────────┐
PROMPT │ 10. TOP SQL (全局) │
PROMPT └─────────────────────────────────────┘
-- 按 CPU 消耗排序
PROMPT --- TOP 10 高CPU SQL ---
SELECT
SQL_ID AS "SQL_ID",
INST_ID AS "节点",
ROUND(CPU_TIME/1000000,2) AS "CPU时间(秒)",
ROUND(ELAPSED_TIME/1000000,2) AS "总耗时(秒)",
EXECUTIONS AS "执行次数",
ROUND(CPU_TIME/DECODE(EXECUTIONS,0,1,EXECUTIONS)/1000000,4) AS "每次CPU(秒)",
BUFFER_GETS AS "逻辑读",
DISK_READS AS "物理读",
ROWS_PROCESSED AS "返回行数",
SUBSTR(SQL_TEXT,1,100) AS "SQL摘要"
FROM GV$SQL
WHERE EXECUTIONS > 0
AND PARSING_USER_ID > 0 -- 排除系统SQL
ORDER BY CPU_TIME DESC
FETCH FIRST 10 ROWS ONLY;
-- 按全局缓存等待排序(RAC专项)
PROMPT --- TOP 10 Global Cache等待 SQL ---
SELECT
SQL_ID AS "SQL_ID",
INST_ID AS "节点",
CLUSTER_WAIT_TIME AS "集群等待时间",
ELAPSED_TIME AS "总耗时",
ROUND(CLUSTER_WAIT_TIME/DECODE(ELAPSED_TIME,0,1,ELAPSED_TIME)*100,1) AS "集群等待占比%",
EXECUTIONS AS "执行次数",
SUBSTR(SQL_TEXT,1,100) AS "SQL摘要"
FROM GV$SQL
WHERE CLUSTER_WAIT_TIME > 0
AND EXECUTIONS > 0
ORDER BY CLUSTER_WAIT_TIME DESC
FETCH FIRST 10 ROWS ONLY;
-- ============================================================
-- 【11】Voting Disk & OCR 状态(需 OS 层执行)
-- ============================================================
PROMPT
PROMPT ┌──────────────────────────────────────────────────────┐
PROMPT │ 11. Voting Disk / OCR 检查命令(在OS层执行) │
PROMPT └──────────────────────────────────────────────────────┘
PROMPT
PROMPT -- 检查投票盘
PROMPT $ crsctl query css votedisk
PROMPT
PROMPT -- 检查 OCR
PROMPT $ ocrcheck
PROMPT
PROMPT -- 检查集群资源整体状态
PROMPT $ crsctl status res -t
PROMPT
PROMPT -- 检查节点状态
PROMPT $ olsnodes -s -t
-- ============================================================
-- 【12】巡检汇总告警
-- ============================================================
PROMPT
PROMPT ┌─────────────────────────────────────┐
PROMPT │ 12. 告警汇总 │
PROMPT └─────────────────────────────────────┘
SELECT '【告警】' || ITEM AS "巡检告警项"
FROM (
-- 实例未 OPEN
SELECT '节点' || INST_ID || '实例状态异常: ' || STATUS AS ITEM
FROM GV$INSTANCE WHERE STATUS != 'OPEN'
UNION ALL
-- ASM 磁盘组使用率 > 80%
SELECT 'ASM磁盘组[' || NAME || ']使用率: ' ||
ROUND((1-FREE_MB/TOTAL_MB)*100,1) || '%' AS ITEM
FROM V$ASM_DISKGROUP
WHERE FREE_MB/TOTAL_MB < 0.2
UNION ALL
-- 表空间使用率 > 85%
SELECT '表空间[' || T.TABLESPACE_NAME || ']使用率: ' ||
ROUND((1-NVL(F.FREE_MB,0)/D.TOTAL_MB)*100,1) || '%' AS ITEM
FROM DBA_TABLESPACES T
JOIN (SELECT TABLESPACE_NAME, SUM(BYTES)/1024/1024 AS TOTAL_MB
FROM DBA_DATA_FILES GROUP BY TABLESPACE_NAME) D
ON T.TABLESPACE_NAME = D.TABLESPACE_NAME
LEFT JOIN (SELECT TABLESPACE_NAME, SUM(BYTES)/1024/1024 AS FREE_MB
FROM DBA_FREE_SPACE GROUP BY TABLESPACE_NAME) F
ON T.TABLESPACE_NAME = F.TABLESPACE_NAME
WHERE (1-NVL(F.FREE_MB,0)/D.TOTAL_MB) > 0.85
UNION ALL
-- 将原来的 ORA-1555 告警子查询替换为:
SELECT '节点' || U.INST_ID || ' ORA-1555发生' || U.SSOLDERRCNT || '次' AS ITEM
FROM GV$UNDOSTAT U
WHERE U.SSOLDERRCNT > 0
AND U.BEGIN_TIME >= SYSDATE - 1/24
AND U.BEGIN_TIME = (
SELECT MAX(U2.BEGIN_TIME)
FROM GV$UNDOSTAT U2
WHERE U2.INST_ID = U.INST_ID
)
UNION ALL
-- Redo 切换过频(1小时内超过10次)
SELECT '节点' || INST_ID || ' Redo切换过频: ' || CNT || '次/小时' AS ITEM
FROM (
SELECT INST_ID, COUNT(*) AS CNT
FROM GV$LOG_HISTORY
WHERE FIRST_TIME >= SYSDATE - 1/24
GROUP BY INST_ID
HAVING COUNT(*) > 10
)
);
PROMPT
PROMPT ===== 巡检完成 =====
PROMPT