1. systemd 基础概念解析
1.1 什么是 systemd
systemd 是现代 Linux 系统的初始化系统和服务管理器,它取代了传统的 SysV init 系统。systemd 不仅负责启动系统服务,还提供了强大的服务管理、依赖解析、日志记录和资源控制功能。
1.2 systemd 核心组件
- systemctl: 服务管理的主要命令行工具
- journalctl: 系统日志查看工具
- 单元文件 (Unit Files): 服务配置的核心文件
- 目标 (Targets): 类似运行级别的概念
1.3 单元类型详解
bash
# 查看所有支持的单元类型
systemctl --type=help
2. 创建自定义应用服务
2.1 示例应用:Python Web 服务
创建应用文件:/opt/myapp/app.py
python
#!/usr/bin/env python3
"""
自定义 Python Web 应用示例
用于演示 systemd 服务管理
"""
import http.server
import socketserver
import logging
import sys
import os
import signal
import time
from datetime import datetime
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('/var/log/myapp/app.log'),
logging.StreamHandler(sys.stdout)
]
)
logger = logging.getLogger('myapp')
class HealthCheckHandler(http.server.SimpleHTTPRequestHandler):
"""健康检查处理器"""
def do_GET(self):
if self.path == '/health':
self.send_response(200)
self.send_header('Content-type', 'application/json')
self.end_headers()
response = {
'status': 'healthy',
'timestamp': datetime.now().isoformat(),
'pid': os.getpid(),
'uptime': time.time() - start_time
}
self.wfile.write(str(response).encode())
elif self.path == '/metrics':
self.send_response(200)
self.send_header('Content-type', 'text/plain')
self.end_headers()
metrics = f"""# HELP myapp_requests_total Total requests served
# TYPE myapp_requests_total counter
myapp_requests_total {self.server.request_count}
# HELP myapp_uptime_seconds Application uptime in seconds
# TYPE myapp_uptime_seconds gauge
myapp_uptime_seconds {time.time() - start_time}
"""
self.wfile.write(metrics.encode())
else:
self.send_response(404)
self.end_headers()
class MyHTTPServer(socketserver.TCPServer):
"""自定义 HTTP 服务器"""
def __init__(self, server_address, RequestHandlerClass):
super().__init__(server_address, RequestHandlerClass)
self.request_count = 0
self.allow_reuse_address = True
def process_request(self, request, client_address):
self.request_count += 1
logger.info(f"处理请求 #{self.request_count} 来自 {client_address}")
super().process_request(request, client_address)
class Application:
"""主应用类"""
def __init__(self, host='localhost', port=8080):
self.host = host
self.port = port
self.server = None
self.is_running = False
def start(self):
"""启动应用服务"""
try:
logger.info(f"启动应用服务在 {self.host}:{self.port}")
self.server = MyHTTPServer((self.host, self.port), HealthCheckHandler)
self.is_running = True
# 设置信号处理器
signal.signal(signal.SIGTERM, self.signal_handler)
signal.signal(signal.SIGINT, self.signal_handler)
logger.info("应用服务启动完成")
self.server.serve_forever()
except Exception as e:
logger.error(f"启动服务失败: {e}")
sys.exit(1)
def stop(self):
"""停止应用服务"""
if self.server:
logger.info("正在停止应用服务...")
self.server.shutdown()
self.server.server_close()
self.is_running = False
logger.info("应用服务已停止")
def signal_handler(self, signum, frame):
"""信号处理函数"""
logger.info(f"接收到信号 {signum}, 正在关闭服务...")
self.stop()
sys.exit(0)
# 全局变量
start_time = time.time()
app = None
def main():
global app
app = Application(host='0.0.0.0', port=8080)
app.start()
if __name__ == '__main__':
main()
2.2 基础服务单元配置
创建服务文件:/etc/systemd/system/myapp.service
ini
[Unit]
Description=MyApp Custom Python Web Service
Documentation=https://example.com/docs/myapp
After=network.target network-online.target
Wants=network-online.target
Requires=syslog.target
[Service]
Type=simple
User=myapp
Group=myapp
RuntimeDirectory=myapp
RuntimeDirectoryMode=0755
StateDirectory=myapp
StateDirectoryMode=0750
LogsDirectory=myapp
LogsDirectoryMode=0750
# 工作目录和执行命令
WorkingDirectory=/opt/myapp
ExecStart=/usr/bin/python3 /opt/myapp/app.py
ExecReload=/bin/kill -HUP $MAINPID
ExecStop=/bin/kill -TERM $MAINPID
# 进程管理
Restart=always
RestartSec=5
StartLimitInterval=100
StartLimitBurst=10
# 安全配置
NoNewPrivileges=yes
PrivateTmp=yes
ProtectSystem=strict
ProtectHome=yes
ReadWritePaths=/var/log/myapp /opt/myapp/data
ProtectKernelTunables=yes
ProtectKernelModules=yes
ProtectControlGroups=yes
# 资源限制
MemoryLimit=512M
CPUQuota=150%
LimitNOFILE=65536
LimitNPROC=4096
# 环境变量
Environment="PYTHONPATH=/opt/myapp"
Environment="APP_ENV=production"
Environment="LOG_LEVEL=INFO"
# 标准输出配置
StandardOutput=journal
StandardError=journal
SyslogIdentifier=myapp
[Install]
WantedBy=multi-user.target
Alias=myapp.service
2.3 服务管理脚本
创建管理脚本:/usr/local/bin/myapp-manager.sh
bash
#!/bin/bash
# MyApp 服务管理器
# 提供完整的服务管理功能
set -e
# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
PURPLE='\033[0;35m'
CYAN='\033[0;36m'
NC='\033[0m' # No Color
# 服务名称
SERVICE_NAME="myapp.service"
APP_USER="myapp"
APP_DIR="/opt/myapp"
LOG_DIR="/var/log/myapp"
DATA_DIR="/opt/myapp/data"
# 日志函数
log_info() {
echo -e "${GREEN}[INFO]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1"
}
log_warn() {
echo -e "${YELLOW}[WARN]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1"
}
# 检查服务状态
check_service_status() {
log_info "检查服务状态..."
systemctl is-active --quiet $SERVICE_NAME && {
log_info "服务正在运行"
return 0
} || {
log_error "服务未运行"
return 1
}
}
# 启动服务
start_service() {
log_info "启动 $SERVICE_NAME ..."
sudo systemctl start $SERVICE_NAME
# 等待服务启动
local count=0
while [ $count -lt 10 ]; do
if systemctl is-active --quiet $SERVICE_NAME; then
log_info "服务启动成功"
return 0
fi
sleep 1
((count++))
done
log_error "服务启动超时"
return 1
}
# 停止服务
stop_service() {
log_info "停止 $SERVICE_NAME ..."
sudo systemctl stop $SERVICE_NAME
# 等待服务停止
local count=0
while [ $count -lt 10 ]; do
if ! systemctl is-active --quiet $SERVICE_NAME; then
log_info "服务停止成功"
return 0
fi
sleep 1
((count++))
done
log_error "服务停止超时"
return 1
}
# 重启服务
restart_service() {
log_info "重启 $SERVICE_NAME ..."
sudo systemctl restart $SERVICE_NAME
sleep 2
check_service_status
}
# 重载服务配置
reload_service() {
log_info "重载服务配置..."
sudo systemctl daemon-reload
sudo systemctl reset-failed $SERVICE_NAME
log_info "服务配置重载完成"
}
# 查看服务日志
show_service_logs() {
local lines=${1:-50}
log_info "显示服务最后 ${lines} 行日志:"
sudo journalctl -u $SERVICE_NAME -n $lines -f
}
# 查看服务状态详情
show_service_status() {
log_info "服务状态详情:"
sudo systemctl status $SERVICE_NAME -l --no-pager
}
# 启用开机自启
enable_service() {
log_info "启用开机自启..."
sudo systemctl enable $SERVICE_NAME
log_info "开机自启已启用"
}
# 禁用开机自启
disable_service() {
log_info "禁用开机自启..."
sudo systemctl disable $SERVICE_NAME
log_info "开机自启已禁用"
}
# 检查服务依赖
check_dependencies() {
log_info "检查系统依赖..."
# 检查 Python
if ! command -v python3 &> /dev/null; then
log_error "Python3 未安装"
return 1
fi
# 检查应用目录
if [ ! -d "$APP_DIR" ]; then
log_error "应用目录不存在: $APP_DIR"
return 1
fi
# 检查应用用户
if ! id "$APP_USER" &> /dev/null; then
log_error "应用用户不存在: $APP_USER"
return 1
fi
log_info "所有依赖检查通过"
return 0
}
# 创建应用用户和目录
setup_environment() {
log_info "设置应用环境..."
# 创建应用用户
if ! id "$APP_USER" &> /dev/null; then
log_info "创建应用用户: $APP_USER"
sudo useradd -r -s /bin/false -d "$APP_DIR" "$APP_USER"
fi
# 创建目录
sudo mkdir -p "$APP_DIR" "$LOG_DIR" "$DATA_DIR"
# 设置权限
sudo chown -R "$APP_USER:$APP_USER" "$APP_DIR" "$LOG_DIR" "$DATA_DIR"
sudo chmod 755 "$APP_DIR"
sudo chmod 750 "$LOG_DIR" "$DATA_DIR"
log_info "环境设置完成"
}
# 备份服务数据
backup_service() {
local backup_dir="/var/backups/myapp"
local timestamp=$(date '+%Y%m%d_%H%M%S')
local backup_file="myapp_backup_${timestamp}.tar.gz"
log_info "开始备份服务数据..."
sudo mkdir -p "$backup_dir"
sudo tar -czf "$backup_dir/$backup_file" \
-C "/" \
"$APP_DIR" \
"$LOG_DIR" \
"$DATA_DIR" \
"/etc/systemd/system/$SERVICE_NAME" 2>/dev/null || true
sudo chown root:root "$backup_dir/$backup_file"
log_info "备份完成: $backup_dir/$backup_file"
}
# 显示使用帮助
show_usage() {
echo -e "${CYAN}MyApp 服务管理器${NC}"
echo
echo "使用方法: $0 [命令]"
echo
echo "可用命令:"
echo -e " ${GREEN}start${NC} 启动服务"
echo -e " ${GREEN}stop${NC} 停止服务"
echo -e " ${GREEN}restart${NC} 重启服务"
echo -e " ${GREEN}status${NC} 查看服务状态"
echo -e " ${GREEN}logs${NC} 查看服务日志"
echo -e " ${GREEN}enable${NC} 启用开机自启"
echo -e " ${GREEN}disable${NC} 禁用开机自启"
echo -e " ${GREEN}reload${NC} 重载服务配置"
echo -e " ${GREEN}setup${NC} 设置应用环境"
echo -e " ${GREEN}backup${NC} 备份服务数据"
echo -e " ${GREEN}check${NC} 检查服务依赖"
echo -e " ${GREEN}monitor${NC} 实时监控服务"
echo
}
# 实时监控服务
monitor_service() {
log_info "启动实时监控 (Ctrl+C 退出)"
watch -n 2 "systemctl status $SERVICE_NAME --no-pager"
}
# 主函数
main() {
local command=${1:-"status"}
case $command in
"start")
start_service
;;
"stop")
stop_service
;;
"restart")
restart_service
;;
"status")
show_service_status
;;
"logs")
show_service_logs "$2"
;;
"enable")
enable_service
;;
"disable")
disable_service
;;
"reload")
reload_service
;;
"setup")
setup_environment
;;
"backup")
backup_service
;;
"check")
check_dependencies
;;
"monitor")
monitor_service
;;
"help"|"--help"|"-h")
show_usage
;;
*)
log_error "未知命令: $command"
show_usage
exit 1
;;
esac
}
# 脚本入口
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
main "$@"
fi
3. 高级服务配置模式
3.1 多实例服务配置
创建模板服务文件:/etc/systemd/system/myapp@.service
ini
[Unit]
Description=MyApp Instance %i
Documentation=https://example.com/docs/myapp
After=network.target
Wants=network-online.target
[Service]
Type=simple
User=myapp
Group=myapp
# 实例特定目录
RuntimeDirectory=myapp-%i
StateDirectory=myapp-%i
LogsDirectory=myapp-%i
# 动态端口分配 (从8080开始)
Environment=INSTANCE_PORT=8%i80
WorkingDirectory=/opt/myapp
ExecStart=/usr/bin/python3 /opt/myapp/app.py --port ${INSTANCE_PORT}
ExecReload=/bin/kill -HUP $MAINPID
# 进程管理
Restart=always
RestartSec=5
StartLimitInterval=100
StartLimitBurst=5
# 安全配置
NoNewPrivileges=yes
PrivateTmp=yes
ProtectSystem=strict
ProtectHome=yes
ReadWritePaths=/var/log/myapp-%i /opt/myapp/data-%i
# 资源限制
MemoryLimit=256M
CPUQuota=100%
LimitNOFILE=32768
# 环境变量
Environment="APP_ENV=production"
Environment="INSTANCE_NAME=%i"
Environment="LOG_LEVEL=INFO"
StandardOutput=journal
StandardError=journal
SyslogIdentifier=myapp-%i
[Install]
WantedBy=multi-user.target
3.2 依赖关系管理
创建依赖服务文件:/etc/systemd/system/myapp-dependencies.service
ini
[Unit]
Description=MyApp Dependencies Pre-start
DefaultDependencies=no
Requires=network.target
Before=myapp.service
[Service]
Type=oneshot
RemainAfterExit=yes
User=root
Group=root
# 检查并等待依赖服务
ExecStartPre=/bin/bash -c 'until systemctl is-active --quiet postgresql.service; do sleep 1; done'
ExecStartPre=/bin/bash -c 'until systemctl is-active --quiet redis-server.service; do sleep 1; done'
# 初始化数据库和目录
ExecStart=/usr/bin/mkdir -p /var/log/myapp /opt/myapp/data
ExecStart=/usr/bin/chown myapp:myapp /var/log/myapp /opt/myapp/data
ExecStart=/usr/bin/chmod 750 /var/log/myapp /opt/myapp/data
# 健康检查
ExecStartPost=/bin/bash -c 'curl -f http://localhost:5432 || exit 0'
ExecStartPost=/bin/sleep 5
[Install]
RequiredBy=myapp.service
4. 定时器服务配置
4.1 备份定时器
创建备份服务文件:/etc/systemd/system/myapp-backup.service
ini
[Unit]
Description=MyApp Data Backup Service
Documentation=https://example.com/docs/myapp/backup
After=network.target
Requires=myapp.service
[Service]
Type=oneshot
User=backup
Group=backup
# 环境变量
Environment=BACKUP_DIR=/var/backups/myapp
Environment=RETENTION_DAYS=30
# 执行备份命令
ExecStart=/usr/local/bin/myapp-backup.sh
ExecStartPost=/usr/bin/find ${BACKUP_DIR} -name "myapp_backup_*.tar.gz" -mtime +${RETENTION_DAYS} -delete
# 标准输出
StandardOutput=journal
StandardError=journal
SyslogIdentifier=myapp-backup
# 成功退出码
SuccessExitStatus=0 1
[Install]
WantedBy=multi-user.target
创建备份定时器:/etc/systemd/system/myapp-backup.timer
ini
[Unit]
Description=MyApp Daily Backup Timer
Documentation=https://example.com/docs/myapp/backup
Requires=myapp-backup.service
[Timer]
# 每天凌晨2点执行
OnCalendar=*-*-* 02:00:00
# 随机延迟0-30分钟,避免多个服务同时备份
RandomizedDelaySec=1800
# 如果错过执行时间,立即执行
Persistent=true
# 精度控制
AccuracySec=1h
# 单元激活关系
Unit=myapp-backup.service
[Install]
WantedBy=timers.target
4.2 健康检查定时器
创建健康检查服务:/etc/systemd/system/myapp-healthcheck.service
ini
[Unit]
Description=MyApp Health Check Service
After=network.target myapp.service
[Service]
Type=oneshot
User=myapp
Group=myapp
# 健康检查命令
ExecStart=/usr/bin/curl -f -s http://localhost:8080/health
ExecStartPost=/bin/bash -c 'if [ $? -ne 0 ]; then systemctl restart myapp.service; fi'
# 超时设置
TimeoutStartSec=30
StandardOutput=journal
StandardError=journal
SyslogIdentifier=myapp-healthcheck
创建健康检查定时器:/etc/systemd/system/myapp-healthcheck.timer
ini
[Unit]
Description=MyApp Health Check Timer
Requires=myapp-healthcheck.service
[Timer]
# 每5分钟执行一次
OnBootSec=5min
OnUnitActiveSec=5min
# 精度控制
AccuracySec=1m
[Install]
WantedBy=timers.target
5. 路径和套接字激活
5.1 文件变化监控服务
创建路径监控单元:/etc/systemd/system/myapp-config-watcher.path
ini
[Unit]
Description=MyApp Configuration File Watcher
Documentation=https://example.com/docs/myapp/config
After=myapp.service
[Path]
# 监控配置文件变化
PathChanged=/etc/myapp/config.yaml
PathChanged=/opt/myapp/settings.ini
# 监控目录变化
DirectoryNotEmpty=/opt/myapp/queue
# 文件存在性检查
PathExists=/opt/myapp/restart.flag
# 监控模式
MakeDirectory=yes
Unit=myapp-config-reload.service
[Install]
WantedBy=multi-user.target
创建配置重载服务:/etc/systemd/system/myapp-config-reload.service
ini
[Unit]
Description=MyApp Configuration Reload
Documentation=https://example.com/docs/myapp/config
[Service]
Type=oneshot
User=myapp
Group=myapp
# 重载配置命令
ExecStart=/bin/kill -HUP $MAINPID
ExecStart=/bin/rm -f /opt/myapp/restart.flag
# 环境变量
Environment=MAINPID=/var/run/myapp/myapp.pid
StandardOutput=journal
StandardError=journal
SyslogIdentifier=myapp-config-reload
5.2 套接字激活服务
创建套接字单元:/etc/systemd/system/myapp.socket
ini
[Unit]
Description=MyApp Socket Activation
Documentation=https://example.com/docs/myapp/socket
Before=myapp.service
[Socket]
# 监听配置
ListenStream=0.0.0.0:8080
ListenStream=[::]:8080
# Socket 选项
Accept=yes
SocketUser=myapp
SocketGroup=myapp
SocketMode=0660
# 缓冲区和超时设置
ReceiveBuffer=8M
SendBuffer=8M
KeepAlive=yes
NoDelay=yes
Backlog=4096
# 安全配置
RemoveOnStop=yes
Service=myapp.service
[Install]
WantedBy=sockets.target
Also=myapp.service
6. 高级资源管理和安全配置
6.1 资源限制服务配置
创建资源限制服务:/etc/systemd/system/myapp-with-limits.service
ini
[Unit]
Description=MyApp with Resource Limits
Documentation=https://example.com/docs/myapp/resources
After=network.target
[Service]
Type=simple
User=myapp
Group=myapp
# 基础配置
WorkingDirectory=/opt/myapp
ExecStart=/usr/bin/python3 /opt/myapp/app.py
Restart=always
# CPU 资源限制
CPUQuota=200%
CPUWeight=100
CPUAffinity=0-3
StartupCPUWeight=500
# 内存资源限制
MemoryMax=1G
MemoryHigh=800M
MemorySwapMax=500M
# IO 资源限制
IOWeight=100
StartupIOWeight=500
IODeviceWeight=/dev/sda 200
IOReadBandwidthMax=/dev/sda 50M
IOWriteBandwidthMax=/dev/sda 50M
# 任务数量限制
TasksMax=2000
# 安全配置
CapabilityBoundingSet=CAP_NET_BIND_SERVICE
NoNewPrivileges=yes
PrivateTmp=yes
PrivateDevices=yes
PrivateUsers=yes
ProtectSystem=strict
ProtectHome=yes
ProtectKernelTunables=yes
ProtectKernelModules=yes
ProtectControlGroups=yes
RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6
RestrictNamespaces=yes
RestrictRealtime=yes
RestrictSUIDSGID=yes
SystemCallFilter=@system-service
SystemCallArchitectures=native
LockPersonality=yes
# 文件系统限制
ReadWritePaths=/var/log/myapp /opt/myapp/data
ReadOnlyPaths=/opt/myapp
InaccessiblePaths=-/etc/secrets
# 环境沙盒
ProtectHostname=yes
ProtectClock=yes
ProtectKernelLogs=yes
ProtectProc=invisible
ProcSubset=pid
# 日志配置
StandardOutput=journal
StandardError=journal
SyslogIdentifier=myapp-limited
[Install]
WantedBy=multi-user.target
6.2 系统资源监控脚本
创建资源监控脚本:/usr/local/bin/myapp-resource-monitor.sh
bash
#!/bin/bash
# MyApp 资源监控脚本
# 监控服务资源使用情况并自动调整
set -e
# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
SERVICE_NAME="myapp.service"
LOG_FILE="/var/log/myapp/resource-monitor.log"
ALERT_THRESHOLD_CPU=90
ALERT_THRESHOLD_MEMORY=85
ALERT_THRESHOLD_DISK=80
# 日志函数
log() {
echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" >> "$LOG_FILE"
echo -e "${GREEN}[MONITOR]${NC} $1"
}
alert() {
local message="$1"
log "ALERT: $message"
# 这里可以添加邮件通知、Slack通知等
echo -e "${RED}[ALERT]${NC} $message"
}
# 获取服务资源使用情况
get_service_resources() {
local pid=$(systemctl show --property MainPID "$SERVICE_NAME" | cut -d= -f2)
if [ "$pid" -eq 0 ]; then
echo "服务未运行"
return 1
fi
# CPU 使用率
local cpu_usage=$(ps -p "$pid" -o %cpu --no-headers 2>/dev/null || echo "0")
# 内存使用率
local memory_usage=$(ps -p "$pid" -o %mem --no-headers 2>/dev/null || echo "0")
# 内存使用量 (KB)
local memory_kb=$(ps -p "$pid" -o rss --no-headers 2>/dev/null || echo "0")
echo "$cpu_usage $memory_usage $memory_kb"
}
# 检查系统资源
check_system_resources() {
# CPU 使用率
local cpu_usage=$(top -bn1 | grep "Cpu(s)" | awk '{print $2 + $4}')
# 内存使用率
local memory_usage=$(free | awk 'NR==2{printf "%.2f", $3*100/$2}')
# 磁盘使用率
local disk_usage=$(df / | awk 'NR==2{print $5}' | sed 's/%//')
echo "$cpu_usage $memory_usage $disk_usage"
}
# 自动调整资源限制
adjust_resource_limits() {
local cpu_usage=$1
local memory_usage=$2
local memory_kb=$3
log "当前资源使用: CPU=${cpu_usage}%, 内存=${memory_usage}%, 内存大小=${memory_kb}KB"
# 如果内存使用超过阈值,增加内存限制
if [ "$(echo "$memory_usage > $ALERT_THRESHOLD_MEMORY" | bc)" -eq 1 ]; then
local current_limit=$(systemctl show --property MemoryMax "$SERVICE_NAME" | cut -d= -f2)
local new_limit=$((current_limit * 120 / 100)) # 增加20%
log "内存使用过高,调整内存限制从 ${current_limit} 到 ${new_limit}"
systemctl set-property "$SERVICE_NAME" MemoryMax="${new_limit}M"
alert "内存使用率 ${memory_usage}% 超过阈值 ${ALERT_THRESHOLD_MEMORY}%,已自动调整限制"
fi
# 如果CPU使用超过阈值,调整CPU权重
if [ "$(echo "$cpu_usage > $ALERT_THRESHOLD_CPU" | bc)" -eq 1 ]; then
local current_weight=$(systemctl show --property CPUWeight "$SERVICE_NAME" | cut -d= -f2)
local new_weight=$((current_weight * 110 / 100)) # 增加10%
if [ "$new_weight" -gt 1000 ]; then
new_weight=1000
fi
log "CPU使用过高,调整CPU权重从 ${current_weight} 到 ${new_weight}"
systemctl set-property "$SERVICE_NAME" CPUWeight="$new_weight"
alert "CPU使用率 ${cpu_usage}% 超过阈值 ${ALERT_THRESHOLD_CPU}%,已调整CPU权重"
fi
}
# 检查服务健康状态
check_service_health() {
if ! systemctl is-active --quiet "$SERVICE_NAME"; then
alert "服务 $SERVICE_NAME 未运行,尝试重启..."
systemctl restart "$SERVICE_NAME"
return 1
fi
# 检查服务是否响应
if ! curl -f -s http://localhost:8080/health > /dev/null; then
alert "服务健康检查失败,尝试重启..."
systemctl restart "$SERVICE_NAME"
return 1
fi
return 0
}
# 生成资源报告
generate_resource_report() {
local report_file="/var/log/myapp/resource-report-$(date +%Y%m%d).log"
{
echo "=== MyApp 资源使用报告 ==="
echo "生成时间: $(date)"
echo "服务状态: $(systemctl is-active $SERVICE_NAME)"
echo
echo "系统资源:"
echo "- CPU使用率: $(top -bn1 | grep "Cpu(s)" | awk '{print $2 + $4}')%"
echo "- 内存使用率: $(free | awk 'NR==2{printf "%.2f", $3*100/$2}')%"
echo "- 磁盘使用率: $(df / | awk 'NR==2{print $5}')"
echo
echo "服务资源:"
systemctl show "$SERVICE_NAME" | grep -E "(Memory|CPU|Tasks)" | grep -v "=0"
echo
echo "最近日志:"
journalctl -u "$SERVICE_NAME" --since="1 hour ago" | tail -10
} > "$report_file"
log "资源报告已生成: $report_file"
}
# 主监控循环
monitor_loop() {
log "启动资源监控..."
while true; do
# 检查服务健康状态
if ! check_service_health; then
sleep 30
continue
fi
# 获取资源使用情况
local service_resources=$(get_service_resources)
if [ $? -ne 0 ]; then
sleep 30
continue
fi
local cpu_usage=$(echo "$service_resources" | awk '{print $1}')
local memory_usage=$(echo "$service_resources" | awk '{print $2}')
local memory_kb=$(echo "$service_resources" | awk '{print $3}')
# 自动调整资源限制
adjust_resource_limits "$cpu_usage" "$memory_usage" "$memory_kb"
# 每小时生成一次报告
if [ "$(date +%M)" == "00" ]; then
generate_resource_report
fi
sleep 60
done
}
# 显示使用帮助
show_usage() {
echo "MyApp 资源监控脚本"
echo
echo "使用方法: $0 [命令]"
echo
echo "命令:"
echo " start 启动监控"
echo " stop 停止监控"
echo " status 查看监控状态"
echo " report 生成资源报告"
echo " stats 显示当前统计"
}
# 主函数
main() {
local command=${1:-"start"}
case $command in
"start")
# 检查是否已经在运行
if pgrep -f "myapp-resource-monitor" > /dev/null; then
echo "监控脚本已经在运行"
exit 1
fi
# 后台运行监控循环
nohup "$0" monitor > /dev/null 2>&1 &
echo "监控脚本已启动 (PID: $!)"
;;
"stop")
pkill -f "myapp-resource-monitor"
echo "监控脚本已停止"
;;
"status")
if pgrep -f "myapp-resource-monitor" > /dev/null; then
echo "监控脚本正在运行"
else
echo "监控脚本未运行"
fi
;;
"report")
generate_resource_report
;;
"stats")
echo "当前资源使用情况:"
get_service_resources
echo "系统资源情况:"
check_system_resources
;;
"monitor")
monitor_loop
;;
*)
show_usage
exit 1
;;
esac
}
# 脚本入口
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
main "$@"
fi
7. 部署和维护工具
7.1 自动化部署脚本
创建部署脚本:/usr/local/bin/deploy-myapp.sh
bash
#!/bin/bash
# MyApp 自动化部署脚本
set -e
# 配置变量
APP_NAME="myapp"
APP_USER="myapp"
APP_DIR="/opt/myapp"
SERVICE_FILE="/etc/systemd/system/myapp.service"
BACKUP_DIR="/var/backups/myapp"
DEPLOY_VERSION="${1:-latest}"
# 颜色定义
GREEN='\033[0;32m'
RED='\033[0;31m'
YELLOW='\033[1;33m'
NC='\033[0m'
log() {
echo -e "${GREEN}[DEPLOY]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1"
}
error() {
echo -e "${RED}[ERROR]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1"
exit 1
}
warn() {
echo -e "${YELLOW}[WARN]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1"
}
# 检查权限
check_permissions() {
if [[ $EUID -ne 0 ]]; then
error "需要 root 权限运行此脚本"
fi
}
# 备份当前版本
backup_current_version() {
log "备份当前版本..."
local timestamp=$(date '+%Y%m%d_%H%M%S')
local backup_file="${BACKUP_DIR}/myapp_${timestamp}.tar.gz"
mkdir -p "$BACKUP_DIR"
if [ -d "$APP_DIR" ]; then
tar -czf "$backup_file" -C "/" \
"$APP_DIR" \
"/etc/systemd/system/myapp.service" \
"/var/log/myapp" 2>/dev/null || true
log "备份完成: $backup_file"
else
warn "应用目录不存在,跳过备份"
fi
}
# 创建应用用户
create_app_user() {
if ! id "$APP_USER" &> /dev/null; then
log "创建应用用户: $APP_USER"
useradd -r -s /bin/false -d "$APP_DIR" "$APP_USER"
fi
}
# 创建目录结构
create_directories() {
log "创建目录结构..."
local directories=(
"$APP_DIR"
"/var/log/myapp"
"/opt/myapp/data"
"/etc/myapp"
)
for dir in "${directories[@]}"; do
mkdir -p "$dir"
chown "$APP_USER:$APP_USER" "$dir"
chmod 755 "$dir"
done
# 设置日志目录权限
chmod 750 "/var/log/myapp"
}
# 部署新版本
deploy_new_version() {
log "部署版本: $DEPLOY_VERSION"
# 这里应该是从仓库拉取代码或复制文件
# 示例:模拟部署过程
# 停止当前服务
if systemctl is-active --quiet "$APP_NAME"; then
log "停止当前服务..."
systemctl stop "$APP_NAME"
fi
# 部署应用文件
log "部署应用文件..."
# 复制应用文件(这里应该是实际的部署逻辑)
cp /tmp/myapp-new-version/* "$APP_DIR/" || {
# 如果部署失败,回滚
error "部署文件失败,开始回滚..."
rollback_deployment
}
# 设置文件权限
chown -R "$APP_USER:$APP_USER" "$APP_DIR"
chmod -R 755 "$APP_DIR"
chmod +x "$APP_DIR/app.py"
}
# 配置系统服务
setup_system_service() {
log "配置系统服务..."
# 复制服务文件
cp "${APP_DIR}/systemd/myapp.service" "$SERVICE_FILE"
# 重载 systemd 配置
systemctl daemon-reload
# 启用服务
systemctl enable "$APP_NAME"
log "系统服务配置完成"
}
# 启动服务
start_application() {
log "启动应用服务..."
systemctl start "$APP_NAME"
# 等待服务启动
local attempts=0
while [ $attempts -lt 10 ]; do
if systemctl is-active --quiet "$APP_NAME"; then
log "服务启动成功"
return 0
fi
sleep 3
((attempts++))
done
error "服务启动失败,开始回滚..."
rollback_deployment
}
# 健康检查
health_check() {
log "执行健康检查..."
local attempts=0
while [ $attempts -lt 10 ]; do
if curl -f -s http://localhost:8080/health > /dev/null; then
log "健康检查通过"
return 0
fi
sleep 5
((attempts++))
done
error "健康检查失败,开始回滚..."
rollback_deployment
}
# 回滚部署
rollback_deployment() {
log "开始回滚部署..."
# 查找最新的备份
local latest_backup=$(ls -t "${BACKUP_DIR}/myapp_"*.tar.gz 2>/dev/null | head -1)
if [ -z "$latest_backup" ]; then
error "找不到备份文件,无法回滚"
fi
log "恢复备份: $latest_backup"
# 停止服务
systemctl stop "$APP_NAME" 2>/dev/null || true
# 恢复备份
tar -xzf "$latest_backup" -C "/"
# 启动服务
systemctl start "$APP_NAME"
log "回滚完成"
exit 1
}
# 清理旧备份
cleanup_old_backups() {
log "清理旧备份..."
# 保留最近7天的备份
find "$BACKUP_DIR" -name "myapp_*.tar.gz" -mtime +7 -delete
log "备份清理完成"
}
# 显示部署状态
show_deployment_status() {
log "=== 部署状态 ==="
echo "服务状态: $(systemctl is-active $APP_NAME)"
echo "服务启用状态: $(systemctl is-enabled $APP_NAME)"
echo "应用版本: $DEPLOY_VERSION"
echo "部署时间: $(date)"
# 显示服务日志最后几行
log "最近服务日志:"
journalctl -u "$APP_NAME" -n 5 --no-pager
}
# 主部署流程
main_deployment() {
log "开始部署 MyApp..."
check_permissions
backup_current_version
create_app_user
create_directories
deploy_new_version
setup_system_service
start_application
health_check
cleanup_old_backups
show_deployment_status
log "部署完成!"
}
# 显示使用帮助
show_usage() {
echo "MyApp 自动化部署脚本"
echo
echo "使用方法: $0 [版本]"
echo
echo "示例:"
echo " $0 v1.2.3 部署指定版本"
echo " $0 部署最新版本"
echo
echo "环境变量:"
echo " DEPLOY_VERSION 部署版本号"
}
# 脚本入口
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
if [[ "$1" == "-h" || "$1" == "--help" ]]; then
show_usage
exit 0
fi
main_deployment
fi
8. 总结
通过本文的详细介绍,您已经掌握了使用 systemd 管理自定义应用服务的完整知识体系:
- 基础服务配置 - 创建标准的 systemd 服务单元文件
- 高级服务模式 - 多实例服务、依赖管理、模板服务
- 定时器服务 - 自动化任务和健康检查
- 路径和套接字激活 - 事件驱动的服务管理
- 资源管理和安全 - 完整的资源限制和安全沙盒配置
- 监控和维护 - 自动化监控、部署和维护工具
通过 systemd 的强大功能,您可以确保自定义应用的稳定性、安全性和可维护性。