启停、切换、升级、网络改造等场景下,需要对数据库有些基本检查操作,确认当前是否运行正常,主打一个简单和一键搞定。
bash
#!/bin/bash
## 实例个数 告警日志 实例状态 会话 活动会话 锁 集群状态 服务状态 磁盘空间 侦听日志
## linux
vmstat 2 3
# ps -o pid,user,%mem,size,command ax | sort -b -k3 -r
##实例个数大于1,提醒 ora_ckpt_ZHCX2
inst_cnt=`ps xao pid,user,cmd|grep ckpt|grep -vE 'grep|ASM|MGMT'|awk '{print $3}' | wc -l`
if [ $inst_cnt -gt 1 ]; then
ps xao pid,user,cmd|grep ckpt|grep -vE 'grep|ASM|MGMT' |awk '{print $3}'|awk -F'_' '{print "INSTANCE: " $3}'|sort
fi
##磁盘空间使用率是否超过80
df -h|grep -v Size|sed 's#[[:space:]][[:space:]]*# #g'|cut -d ' ' -f5,6|sort -t '%' -k1 -nr|egrep '[8-9][0-9]%|100%'
CMDFILE=/tmp/grid_check.sh
(cat << EOF
#!/bin/bash
source /home/grid/.bash_profile
#集群状态 服务状态 侦听日志(待做)
#crsctl stat res -t -init -w "(STATE = OFFLINE) and (NAME != ora.crf) and (NAME != ora.diskmon) and (NAME != ora.cha)"|grep -v '\----'|grep -v Cluster
crsctl stat res -t -init -w "(STATE = OFFLINE) and (NAME != ora.crf) and (NAME != ora.diskmon) and (NAME != ora.cha)"
crsctl stat res -t -w "(STATE = OFFLINE) and (NAME != ora.proxy_advm)" |grep -v '\-------'|grep -v 'Cluster Resources'
crsctl stat res -t -w "NAME co srv" |grep -v '\-------'|grep -v 'Cluster Resources' #如果配置了service服务
EOF
)>$CMDFILE
su - grid -s /bin/bash $CMDFILE
##显示告警日志
for sid in `ps xao pid,user,cmd|grep ckpt|grep -vE 'grep|ASM|MGMT' |awk '{print $3}'|awk -F'_' '{print $3}'|sort`
do
aa=`adrci exec="show home"|grep $sid`
adrci exec="set home $aa;show alert -p \"message_text like '%ORA-%'\" -term " |tail -10
done
# adrci exec="set home $aa;show alert -tail 5000"|grep ORA |tail -10 ## 11g
CMDFILE=/tmp/oracle_check.sh
(cat << EOF
#!/bin/bash
source /home/oracle/.bash_profile
##实例状态
sqlplus -S '/ as sysdba' <<!
set lines 120
col status for a12
col instance_name for a15
col instance_name for a15
col startup_time for a20
col db_role for a20
col host_name for a25
select instance_name,status,to_char(startup_time,'yyyy-mm-dd hh24:mi:ss')startup_time,host_name,(select database_role from v\\\$database)db_role from gv\\\$instance order by 1;
##会话个数
col status for a12
col username for a30
select inst_id,username,count(0) cnt from gv\\\$session group by inst_id,username order by 1,2;
--top 10 活动会话
set lines 200 pages 100
col txt for a65
col sql_id for a13
select a.sql_id,a.cnt,a.pctload,b.sql_text txt from (select * from (select sql_id,count(0) cnt,round(count(0)/sum(count(0)) over(),4)*100 pctload
from gv\\\$active_session_history A
where A.SAMPLE_TIME>sysdate-15/60/24
and sql_id is not null GROUP BY SQL_ID ORDER BY COUNT(0) DESC)
where rownum<11) a left join (select distinct sql_text,sql_id from v\\\$sqltext where piece=0) b on a.sql_id=b.sql_id order by 2 desc ,1;
col state for a20
col event for a25 trunc
select inst_id inst,sid,sql_id,event,state,blocking_session blk,last_call_et,seconds_in_wait miao
from gv\\\$session where status='ACTIVE' and username is not null and sid<>sys_context('userenv','sid') and wait_class<>'Idle'
order by last_call_et;
##表空间使用率(简洁版)
col tablespace_name for a20
select a.tablespace_name, round(a.bytes / 1024 / 1024) "Sum MB", round((a.bytes - b.bytes) / 1024 / 1024) "used MB", round(b.bytes / 1024 / 1024) "free MB", round(((a.bytes - b.bytes) / a.bytes) * 100, 2) "percent_used" from (select tablespace_name, sum(bytes) bytes from dba_data_files group by tablespace_name) a, (select tablespace_name, sum(bytes) bytes, max(bytes) largest from dba_free_space group by tablespace_name) b where a.tablespace_name = b.tablespace_name order by ((a.bytes - b.bytes) / a.bytes) desc;
##查被阻塞会话
set lin 200 pages 1000
col USERNAME for a15
col PROGRAM for a40
col EVENT for a30
col WAITING_SESSION for a20
WITH tkf_block_info AS
(SELECT a.inst_id || '_' || a.sid waiting_session,
a.username, a.program, a.event, a.sql_id, a.last_call_et,
DECODE(a.blocking_instance || '_' || a.blocking_session,
'_', NULL, a.blocking_instance || '_' || a.blocking_session) holding_session
FROM gv\\\$session a,
(SELECT inst_id, sid
FROM gv\\\$session
WHERE blocking_session IS NOT NULL
UNION
SELECT blocking_instance, blocking_session
FROM gv\\\$session
WHERE blocking_session IS NOT NULL) b
WHERE a.inst_id = b.inst_id
AND a.SID = b.sid)
SELECT LPAD(' ', 3 * (LEVEL - 1)) || waiting_session waiting_session,
username, program, event, sql_id, last_call_et
FROM tkf_block_info
CONNECT BY PRIOR waiting_session = holding_session
START WITH holding_session IS NULL;
##ADG延时
--dg差异(在主库执行)
col OPEN_MODE for a20
col PROTECTION_MODE for a20
col DATABASE_ROLE for a18
col SWITCHOVER_STATUS for a20
col thread# for 99
col name for a10
col diff for 9999
set lin 200
select A.THREAD#,C.NAME,C.OPEN_MODE,C.PROTECTION_MODE,C.DATABASE_ROLE,C.SWITCHOVER_STATUS,A.APPLOG,B.NOWLOG, A.APPLOG- B.NOWLOG DIFF from (SELECT THREAD#, MAX(SEQUENCE#) AS "APPLOG" FROM v\\\$ARCHIVED_LOG WHERE APPLIED='YES' and RESETLOGS_CHANGE#=(select RESETLOGS_CHANGE# from v\\\$database) GROUP BY THREAD#) A,(SELECT THREAD#, MAX(SEQUENCE#) AS "NOWLOG" FROM v\\\$LOG GROUP BY THREAD#) B,v\\\$database C where A.THREAD#=B.THREAD#;
--DG应用延时检查(在备库执行)
set lin 150
col name for a23
col VALUE for a18
col UNIT for a30
col TIME_COMPUTED for a20
col DATUM_TIME for a20
col SOURCE_DBID for 99999999999
col SOURCE_DB_UNIQUE_NAME for a20
select name,value, TIME_COMPUTED,DATUM_TIME from v\\\$dataguard_stats;
exit
!
EOF
)>$CMDFILE
su - oracle -s /bin/bash $CMDFILE
下一步可以拆解到ansible中更加高效。