背景:
1.生产环境某个应用CPU偶发瞬时冲高情况,每次持续2-5分钟,运维人员来不及立马定位原因
2.pod应用,没有添加到集成监控系统中,需要自己想办法监测
3.pod应用中不让安装crontab工具
脚本内容如下:
#!/bin/bash
while [ true ]
do
cpu=(top -bn 1\| sed -n '3p'\|awk '{print 2}')
echo "cpu"
if [[ `expr $cpu \> 60` == 1 ]]; then
echo "cpu surge, find process, cpu=$cpu"
PACPU=((top -bn 1\|grep java\| sed -n '1,5p'\|awk '{print 1,$9}'))
echo "${PACPU[*]}"
len=${#PACPU[*]}
for ((i=0;i<$len;i+=2))
do
pid={PACPU\[i]}
tids=(pid -H|sed -n '8,12p'|awk '{print $1}'))
cpu={PACPU\[i+1]}
file=(date '+%Y%m%d%H%M%S').log
echo "print process {tids[*]}, cpu=file"
for tid in ${tids[*]}
do
printf "nid=0x%x\n" file
done
jstack -l file
done
fi
sleep 10s
done