系统信息
主机名、操作系统、内核版本、物理机/虚拟机信息可通过以下命令获取:
hostnamectl
uname -r
systemd-detect-virt
运行时间
读取 /proc/uptime 并计算运行天数和小时数:
awk '{printf "%.2f days %.2f hours\n", $1/86400, $1%86400/3600}' /proc/uptime
CPU 信息
获取逻辑核心数和当前使用率:
nproc
top -bn1 | grep "Cpu(s)" | sed "s/.*, *\([0-9.]*\)%* id.*/\1/" | awk '{print 100 - $1"%"}'
内存信息
查看内存总量、已用(含缓存)和使用率百分比:
free -m | awk '/Mem/{printf "%.1fG/%.1fG (%.1f%%)\n", $3/1024, $2/1024, $3/$2*100}'
磁盘信息
排除虚拟设备并分级告警:
df -h --exclude=tmpfs --exclude=devtmpfs | awk 'NR>1 {print $1,$5,$6}' | grep -vE '^udev|^tmpfs'
网络信息
列出所有接口名称及 IPv4 地址:
ip -4 addr show | grep -oP '(?<=inet\s)\d+(\.\d+){3}' | paste - <(ip -4 addr show | grep -oP '^\d+:\s\K[^:]+')
进程信息
按 RSS 排序显示内存占用 Top 5:
ps -eo pid,user,%mem,rss,comm --sort=-rss | head -n 6
关键服务
检查 sshd、crond、network、firewalld 运行状态:
systemctl is-active sshd crond network firewalld
系统更新
获取可更新包数量:
apt list --upgradable 2>/dev/null | wc -l # Debian/Ubuntu
yum list updates 2>/dev/null | wc -l # RHEL/CentOS
dnf list updates 2>/dev/null | wc -l # Fedora
错误日志
显示最近 5 条错误日志:
journalctl -p err -n 5 --no-pager
系统负载
查看平均负载:
cat /proc/loadavg | cut -d' ' -f1-3
连接状态
统计 TCP 连接:
ss -s | grep TCP:
僵尸进程
检测并统计 Z 状态进程:
ps -eo stat | grep -c '^Z'
磁盘告警规则
| 使用率 | 级别 | 颜色 |
|---|---|---|
| ≤ 80% | 正常 | --- |
| 80% ~ 90% | 注意 | 黄色 |
| > 90% | 警告 | 红色 |
支持:CentOS 7/8、Ubuntu 18.04/20.04/22.04、Debian 9/10/11
bash
#!/bin/bash
# Linux 系统巡检脚本
set -e
TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
REPORT_TIME=$(date +"%Y-%m-%d %H:%M:%S")
OUTPUT_FILE="/tmp/linux_check_${TIMESTAMP}.txt"
# ---------- 检测操作系统 ----------
if [ -f /etc/os-release ]; then
. /etc/os-release
OS=$ID
OS_VERSION=$VERSION_ID
else
echo "无法检测操作系统版本"
exit 1
fi
# ---------- 生成报告 ----------
{
echo "Linux系统巡检报告"
echo "===================="
echo "巡检时间: $REPORT_TIME"
echo ""
# 系统信息
echo "【系统信息】"
echo "主机名: $(hostname)"
echo "操作系统: $PRETTY_NAME"
echo "内核: $(uname -r)"
if systemd-detect-virt -q 2>/dev/null; then
echo "运行环境: 虚拟机 ($(systemd-detect-virt))"
else
echo "运行环境: 物理机"
fi
echo ""
# 运行时间
echo "【运行时间】"
UPTIME_SECONDS=$(awk '{print $1}' /proc/uptime | cut -d. -f1)
UPTIME_DAYS=$((UPTIME_SECONDS / 86400))
UPTIME_HOURS=$(((UPTIME_SECONDS % 86400) / 3600))
echo "已运行: ${UPTIME_DAYS}天 ${UPTIME_HOURS}小时"
echo ""
# CPU 信息
echo "【CPU信息】"
CPU_CORES=$(nproc)
CPU_USAGE=$(top -bn1 | grep "Cpu(s)" | sed "s/.*, *\([0-9.]*\)%* id.*/\1/" | awk '{print 100 - $1}')
echo "核心数: $CPU_CORES"
echo "当前使用率: ${CPU_USAGE}%"
echo ""
# 内存信息
echo "【内存信息】"
MEM_TOTAL=$(awk '/^Mem:/ {print $2}' /proc/meminfo)
MEM_USED=$(free -b | awk '/Mem:/ {print $3}')
MEM_USAGE_PERCENT=$(awk "BEGIN {printf \"%.1f\", ($MEM_USED / $MEM_TOTAL) * 100}")
echo "总内存: $(numfmt --to=iec $MEM_TOTAL)"
echo "已用: $(numfmt --to=iec $MEM_USED) (${MEM_USAGE_PERCENT}%)"
echo ""
# 磁盘信息
echo "【磁盘信息】"
df -h -x tmpfs -x devtmpfs -x cdrom | tail -n +2 | while read -r filesystem size used avail use_percent mount; do
use_num=$(echo "$use_percent" | sed 's/%//')
if [ -n "$use_num" ] && [ "$use_num" -eq "$use_num" ] 2>/dev/null; then
if [ "$use_num" -gt 90 ]; then echo "$mount: 警告 (${use_percent})"
elif [ "$use_num" -gt 80 ]; then echo "$mount: 注意 (${use_percent})"
else echo "$mount: 正常 (${use_percent})"
fi
else
echo "$mount: 信息 (${use_percent})"
fi
done
echo ""
# 网络信息
echo "【网络信息】"
ip -4 addr show | grep -E "^[0-9]+:|inet " | while read -r line; do
if echo "$line" | grep -q "^[0-9]"; then
IFACE=$(echo "$line" | awk -F: '{print $2}' | xargs)
echo "$IFACE:"
elif echo "$line" | grep -q "inet "; then
IP=$(echo "$line" | awk '{print $2}' | cut -d/ -f1)
echo " $IP"
fi
done
echo ""
# 进程信息
echo "【进程信息】"
echo "内存占用 Top 5 (RSS):"
ps aux --sort=-rss | head -6 | tail -5 | awk '{printf " %s: %s (%.1f%%)\n", $11, $6, $4}'
echo ""
# 关键服务
echo "【关键服务】"
for service in sshd crond network firewalld; do
if systemctl is-active --quiet "$service" 2>/dev/null; then
echo "$service: 运行中"
else
echo "$service: 未运行"
fi
done
echo ""
# 系统更新
echo "【系统更新】"
if [ "$OS" = "ubuntu" ] || [ "$OS" = "debian" ]; then
UPDATES=$(apt list --upgradable 2>/dev/null | grep -v "^Listing" | wc -l)
echo "可更新包: $UPDATES 个"
elif [ "$OS" = "centos" ] || [ "$OS" = "rhel" ] || [ "$OS" = "ol" ]; then
if command -v dnf &> /dev/null; then
UPDATES=$(dnf check-update --quiet 2>/dev/null | grep -v "^$" | wc -l)
else
UPDATES=$(yum check-update --quiet 2>/dev/null | grep -v "^$" | wc -l)
fi
echo "可更新包: $UPDATES 个"
else
echo "未知的包管理器"
fi
echo ""
# 错误日志
echo "【错误日志】"
journalctl -p err -n 5 --no-pager 2>/dev/null | while read -r line; do
echo " $line"
done
echo ""
echo "报告完成"
} > "$OUTPUT_FILE"
cat "$OUTPUT_FILE"
# ---------- 附加检查(报告外单独输出) ----------
# 系统负载
echo "【系统负载】"
LOAD=$(uptime | awk -F'load average:' '{print $2}')
echo "平均负载: $LOAD"
# TCP 连接状态
echo "【连接状态】"
ss -s | grep -E "(TCP| estab| closed| syn)"
# 僵尸进程
echo "【僵尸进程】"
ZOMBIES=$(ps aux | awk '$8=="Z"' | wc -l)
if [ "$ZOMBIES" -gt 0 ]; then
echo "发现 $ZOMBIES 个僵尸进程"
ps aux | awk '$8=="Z"'
else
echo "无僵尸进程"
fi
使用方式
bash
# 赋予执行权限后直接运行
chmod +x linuxcheck.sh
./linuxcheck.sh
定时巡检(crontab)
bash
crontab -e
添加以下行,每天早上 8 点自动执行并把结果发送邮件:
bash
0 8 * * * /root/linuxcheck.sh | mail -s "服务器巡检报告-$(date +\%Y\%m\%d)" admin@yourcompany.com
说明: 报告主体写入 /tmp/linux_check_YYYYMMDD_HHMMSS.txt;系统负载、连接状态和僵尸进程三项额外输出到终端,不写入文件。