终极武器:用 systemd 管理你的自定义应用服务(附配置文件)

graph TD A[systemd服务管理] --> B[服务单元配置] A --> C[定时器单元配置] A --> D[路径监控单元配置] A --> E[套接字单元配置] B --> B1[Service类型选择] B --> B2[启动依赖管理] B --> B3[资源限制配置] B --> B4[环境变量配置] C --> C1[日历定时器] C --> C2[单调定时器] C --> C3[随机延迟] C --> C4[精度控制] D --> D1[目录监控] D --> D2[文件监控] D --> D3[路径变化触发] E --> E1[网络套接字] E --> E2[Unix域套接字] E --> E3[套接字激活] B1 --> B1a[simple] B1 --> B1b[forking] B1 --> B1c[oneshot] B1 --> B1d[notify] B1 --> B1e[idle] style A fill:#2c3e50,stroke:#3498db,stroke-width:3px,color:#ffffff style B fill:#34495e,stroke:#2980b9,stroke-width:2px,color:#ffffff style C fill:#34495e,stroke:#2980b9,stroke-width:2px,color:#ffffff style D fill:#34495e,stroke:#2980b9,stroke-width:2px,color:#ffffff style E fill:#34495e,stroke:#2980b9,stroke-width:2px,color:#ffffff style B1 fill:#16a085,stroke:#27ae60,color:#ffffff style C1 fill:#16a085,stroke:#27ae60,color:#ffffff style D1 fill:#16a085,stroke:#27ae60,color:#ffffff style E1 fill:#16a085,stroke:#27ae60,color:#ffffff

1. systemd 基础概念解析

1.1 什么是 systemd

systemd 是现代 Linux 系统的初始化系统和服务管理器,它取代了传统的 SysV init 系统。systemd 不仅负责启动系统服务,还提供了强大的服务管理、依赖解析、日志记录和资源控制功能。

1.2 systemd 核心组件

  • systemctl: 服务管理的主要命令行工具
  • journalctl: 系统日志查看工具
  • 单元文件 (Unit Files): 服务配置的核心文件
  • 目标 (Targets): 类似运行级别的概念

1.3 单元类型详解

bash 复制代码
# 查看所有支持的单元类型
systemctl --type=help

2. 创建自定义应用服务

2.1 示例应用:Python Web 服务

创建应用文件:/opt/myapp/app.py

python 复制代码
#!/usr/bin/env python3
"""
自定义 Python Web 应用示例
用于演示 systemd 服务管理
"""

import http.server
import socketserver
import logging
import sys
import os
import signal
import time
from datetime import datetime

# 配置日志
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('/var/log/myapp/app.log'),
        logging.StreamHandler(sys.stdout)
    ]
)
logger = logging.getLogger('myapp')

class HealthCheckHandler(http.server.SimpleHTTPRequestHandler):
    """健康检查处理器"""
    
    def do_GET(self):
        if self.path == '/health':
            self.send_response(200)
            self.send_header('Content-type', 'application/json')
            self.end_headers()
            response = {
                'status': 'healthy',
                'timestamp': datetime.now().isoformat(),
                'pid': os.getpid(),
                'uptime': time.time() - start_time
            }
            self.wfile.write(str(response).encode())
        elif self.path == '/metrics':
            self.send_response(200)
            self.send_header('Content-type', 'text/plain')
            self.end_headers()
            metrics = f"""# HELP myapp_requests_total Total requests served
# TYPE myapp_requests_total counter
myapp_requests_total {self.server.request_count}

# HELP myapp_uptime_seconds Application uptime in seconds
# TYPE myapp_uptime_seconds gauge
myapp_uptime_seconds {time.time() - start_time}
"""
            self.wfile.write(metrics.encode())
        else:
            self.send_response(404)
            self.end_headers()

class MyHTTPServer(socketserver.TCPServer):
    """自定义 HTTP 服务器"""
    
    def __init__(self, server_address, RequestHandlerClass):
        super().__init__(server_address, RequestHandlerClass)
        self.request_count = 0
        self.allow_reuse_address = True
    
    def process_request(self, request, client_address):
        self.request_count += 1
        logger.info(f"处理请求 #{self.request_count} 来自 {client_address}")
        super().process_request(request, client_address)

class Application:
    """主应用类"""
    
    def __init__(self, host='localhost', port=8080):
        self.host = host
        self.port = port
        self.server = None
        self.is_running = False
    
    def start(self):
        """启动应用服务"""
        try:
            logger.info(f"启动应用服务在 {self.host}:{self.port}")
            self.server = MyHTTPServer((self.host, self.port), HealthCheckHandler)
            self.is_running = True
            
            # 设置信号处理器
            signal.signal(signal.SIGTERM, self.signal_handler)
            signal.signal(signal.SIGINT, self.signal_handler)
            
            logger.info("应用服务启动完成")
            self.server.serve_forever()
            
        except Exception as e:
            logger.error(f"启动服务失败: {e}")
            sys.exit(1)
    
    def stop(self):
        """停止应用服务"""
        if self.server:
            logger.info("正在停止应用服务...")
            self.server.shutdown()
            self.server.server_close()
            self.is_running = False
            logger.info("应用服务已停止")
    
    def signal_handler(self, signum, frame):
        """信号处理函数"""
        logger.info(f"接收到信号 {signum}, 正在关闭服务...")
        self.stop()
        sys.exit(0)

# 全局变量
start_time = time.time()
app = None

def main():
    global app
    app = Application(host='0.0.0.0', port=8080)
    app.start()

if __name__ == '__main__':
    main()

2.2 基础服务单元配置

创建服务文件:/etc/systemd/system/myapp.service

ini 复制代码
[Unit]
Description=MyApp Custom Python Web Service
Documentation=https://example.com/docs/myapp
After=network.target network-online.target
Wants=network-online.target
Requires=syslog.target

[Service]
Type=simple
User=myapp
Group=myapp
RuntimeDirectory=myapp
RuntimeDirectoryMode=0755
StateDirectory=myapp
StateDirectoryMode=0750
LogsDirectory=myapp
LogsDirectoryMode=0750

# 工作目录和执行命令
WorkingDirectory=/opt/myapp
ExecStart=/usr/bin/python3 /opt/myapp/app.py
ExecReload=/bin/kill -HUP $MAINPID
ExecStop=/bin/kill -TERM $MAINPID

# 进程管理
Restart=always
RestartSec=5
StartLimitInterval=100
StartLimitBurst=10

# 安全配置
NoNewPrivileges=yes
PrivateTmp=yes
ProtectSystem=strict
ProtectHome=yes
ReadWritePaths=/var/log/myapp /opt/myapp/data
ProtectKernelTunables=yes
ProtectKernelModules=yes
ProtectControlGroups=yes

# 资源限制
MemoryLimit=512M
CPUQuota=150%
LimitNOFILE=65536
LimitNPROC=4096

# 环境变量
Environment="PYTHONPATH=/opt/myapp"
Environment="APP_ENV=production"
Environment="LOG_LEVEL=INFO"

# 标准输出配置
StandardOutput=journal
StandardError=journal
SyslogIdentifier=myapp

[Install]
WantedBy=multi-user.target
Alias=myapp.service

2.3 服务管理脚本

创建管理脚本:/usr/local/bin/myapp-manager.sh

bash 复制代码
#!/bin/bash

# MyApp 服务管理器
# 提供完整的服务管理功能

set -e

# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
PURPLE='\033[0;35m'
CYAN='\033[0;36m'
NC='\033[0m' # No Color

# 服务名称
SERVICE_NAME="myapp.service"
APP_USER="myapp"
APP_DIR="/opt/myapp"
LOG_DIR="/var/log/myapp"
DATA_DIR="/opt/myapp/data"

# 日志函数
log_info() {
    echo -e "${GREEN}[INFO]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1"
}

log_warn() {
    echo -e "${YELLOW}[WARN]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1"
}

log_error() {
    echo -e "${RED}[ERROR]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1"
}

# 检查服务状态
check_service_status() {
    log_info "检查服务状态..."
    systemctl is-active --quiet $SERVICE_NAME && {
        log_info "服务正在运行"
        return 0
    } || {
        log_error "服务未运行"
        return 1
    }
}

# 启动服务
start_service() {
    log_info "启动 $SERVICE_NAME ..."
    sudo systemctl start $SERVICE_NAME
    
    # 等待服务启动
    local count=0
    while [ $count -lt 10 ]; do
        if systemctl is-active --quiet $SERVICE_NAME; then
            log_info "服务启动成功"
            return 0
        fi
        sleep 1
        ((count++))
    done
    
    log_error "服务启动超时"
    return 1
}

# 停止服务
stop_service() {
    log_info "停止 $SERVICE_NAME ..."
    sudo systemctl stop $SERVICE_NAME
    
    # 等待服务停止
    local count=0
    while [ $count -lt 10 ]; do
        if ! systemctl is-active --quiet $SERVICE_NAME; then
            log_info "服务停止成功"
            return 0
        fi
        sleep 1
        ((count++))
    done
    
    log_error "服务停止超时"
    return 1
}

# 重启服务
restart_service() {
    log_info "重启 $SERVICE_NAME ..."
    sudo systemctl restart $SERVICE_NAME
    sleep 2
    check_service_status
}

# 重载服务配置
reload_service() {
    log_info "重载服务配置..."
    sudo systemctl daemon-reload
    sudo systemctl reset-failed $SERVICE_NAME
    log_info "服务配置重载完成"
}

# 查看服务日志
show_service_logs() {
    local lines=${1:-50}
    log_info "显示服务最后 ${lines} 行日志:"
    sudo journalctl -u $SERVICE_NAME -n $lines -f
}

# 查看服务状态详情
show_service_status() {
    log_info "服务状态详情:"
    sudo systemctl status $SERVICE_NAME -l --no-pager
}

# 启用开机自启
enable_service() {
    log_info "启用开机自启..."
    sudo systemctl enable $SERVICE_NAME
    log_info "开机自启已启用"
}

# 禁用开机自启
disable_service() {
    log_info "禁用开机自启..."
    sudo systemctl disable $SERVICE_NAME
    log_info "开机自启已禁用"
}

# 检查服务依赖
check_dependencies() {
    log_info "检查系统依赖..."
    
    # 检查 Python
    if ! command -v python3 &> /dev/null; then
        log_error "Python3 未安装"
        return 1
    fi
    
    # 检查应用目录
    if [ ! -d "$APP_DIR" ]; then
        log_error "应用目录不存在: $APP_DIR"
        return 1
    fi
    
    # 检查应用用户
    if ! id "$APP_USER" &> /dev/null; then
        log_error "应用用户不存在: $APP_USER"
        return 1
    fi
    
    log_info "所有依赖检查通过"
    return 0
}

# 创建应用用户和目录
setup_environment() {
    log_info "设置应用环境..."
    
    # 创建应用用户
    if ! id "$APP_USER" &> /dev/null; then
        log_info "创建应用用户: $APP_USER"
        sudo useradd -r -s /bin/false -d "$APP_DIR" "$APP_USER"
    fi
    
    # 创建目录
    sudo mkdir -p "$APP_DIR" "$LOG_DIR" "$DATA_DIR"
    
    # 设置权限
    sudo chown -R "$APP_USER:$APP_USER" "$APP_DIR" "$LOG_DIR" "$DATA_DIR"
    sudo chmod 755 "$APP_DIR"
    sudo chmod 750 "$LOG_DIR" "$DATA_DIR"
    
    log_info "环境设置完成"
}

# 备份服务数据
backup_service() {
    local backup_dir="/var/backups/myapp"
    local timestamp=$(date '+%Y%m%d_%H%M%S')
    local backup_file="myapp_backup_${timestamp}.tar.gz"
    
    log_info "开始备份服务数据..."
    
    sudo mkdir -p "$backup_dir"
    sudo tar -czf "$backup_dir/$backup_file" \
        -C "/" \
        "$APP_DIR" \
        "$LOG_DIR" \
        "$DATA_DIR" \
        "/etc/systemd/system/$SERVICE_NAME" 2>/dev/null || true
    
    sudo chown root:root "$backup_dir/$backup_file"
    log_info "备份完成: $backup_dir/$backup_file"
}

# 显示使用帮助
show_usage() {
    echo -e "${CYAN}MyApp 服务管理器${NC}"
    echo
    echo "使用方法: $0 [命令]"
    echo
    echo "可用命令:"
    echo -e "  ${GREEN}start${NC}       启动服务"
    echo -e "  ${GREEN}stop${NC}        停止服务"
    echo -e "  ${GREEN}restart${NC}     重启服务"
    echo -e "  ${GREEN}status${NC}      查看服务状态"
    echo -e "  ${GREEN}logs${NC}        查看服务日志"
    echo -e "  ${GREEN}enable${NC}      启用开机自启"
    echo -e "  ${GREEN}disable${NC}     禁用开机自启"
    echo -e "  ${GREEN}reload${NC}      重载服务配置"
    echo -e "  ${GREEN}setup${NC}       设置应用环境"
    echo -e "  ${GREEN}backup${NC}      备份服务数据"
    echo -e "  ${GREEN}check${NC}       检查服务依赖"
    echo -e "  ${GREEN}monitor${NC}     实时监控服务"
    echo
}

# 实时监控服务
monitor_service() {
    log_info "启动实时监控 (Ctrl+C 退出)"
    watch -n 2 "systemctl status $SERVICE_NAME --no-pager"
}

# 主函数
main() {
    local command=${1:-"status"}
    
    case $command in
        "start")
            start_service
            ;;
        "stop")
            stop_service
            ;;
        "restart")
            restart_service
            ;;
        "status")
            show_service_status
            ;;
        "logs")
            show_service_logs "$2"
            ;;
        "enable")
            enable_service
            ;;
        "disable")
            disable_service
            ;;
        "reload")
            reload_service
            ;;
        "setup")
            setup_environment
            ;;
        "backup")
            backup_service
            ;;
        "check")
            check_dependencies
            ;;
        "monitor")
            monitor_service
            ;;
        "help"|"--help"|"-h")
            show_usage
            ;;
        *)
            log_error "未知命令: $command"
            show_usage
            exit 1
            ;;
    esac
}

# 脚本入口
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
    main "$@"
fi

3. 高级服务配置模式

3.1 多实例服务配置

创建模板服务文件:/etc/systemd/system/myapp@.service

ini 复制代码
[Unit]
Description=MyApp Instance %i
Documentation=https://example.com/docs/myapp
After=network.target
Wants=network-online.target

[Service]
Type=simple
User=myapp
Group=myapp

# 实例特定目录
RuntimeDirectory=myapp-%i
StateDirectory=myapp-%i
LogsDirectory=myapp-%i

# 动态端口分配 (从8080开始)
Environment=INSTANCE_PORT=8%i80
WorkingDirectory=/opt/myapp
ExecStart=/usr/bin/python3 /opt/myapp/app.py --port ${INSTANCE_PORT}
ExecReload=/bin/kill -HUP $MAINPID

# 进程管理
Restart=always
RestartSec=5
StartLimitInterval=100
StartLimitBurst=5

# 安全配置
NoNewPrivileges=yes
PrivateTmp=yes
ProtectSystem=strict
ProtectHome=yes
ReadWritePaths=/var/log/myapp-%i /opt/myapp/data-%i

# 资源限制
MemoryLimit=256M
CPUQuota=100%
LimitNOFILE=32768

# 环境变量
Environment="APP_ENV=production"
Environment="INSTANCE_NAME=%i"
Environment="LOG_LEVEL=INFO"

StandardOutput=journal
StandardError=journal
SyslogIdentifier=myapp-%i

[Install]
WantedBy=multi-user.target

3.2 依赖关系管理

创建依赖服务文件:/etc/systemd/system/myapp-dependencies.service

ini 复制代码
[Unit]
Description=MyApp Dependencies Pre-start
DefaultDependencies=no
Requires=network.target
Before=myapp.service

[Service]
Type=oneshot
RemainAfterExit=yes
User=root
Group=root

# 检查并等待依赖服务
ExecStartPre=/bin/bash -c 'until systemctl is-active --quiet postgresql.service; do sleep 1; done'
ExecStartPre=/bin/bash -c 'until systemctl is-active --quiet redis-server.service; do sleep 1; done'

# 初始化数据库和目录
ExecStart=/usr/bin/mkdir -p /var/log/myapp /opt/myapp/data
ExecStart=/usr/bin/chown myapp:myapp /var/log/myapp /opt/myapp/data
ExecStart=/usr/bin/chmod 750 /var/log/myapp /opt/myapp/data

# 健康检查
ExecStartPost=/bin/bash -c 'curl -f http://localhost:5432 || exit 0'
ExecStartPost=/bin/sleep 5

[Install]
RequiredBy=myapp.service

4. 定时器服务配置

4.1 备份定时器

创建备份服务文件:/etc/systemd/system/myapp-backup.service

ini 复制代码
[Unit]
Description=MyApp Data Backup Service
Documentation=https://example.com/docs/myapp/backup
After=network.target
Requires=myapp.service

[Service]
Type=oneshot
User=backup
Group=backup

# 环境变量
Environment=BACKUP_DIR=/var/backups/myapp
Environment=RETENTION_DAYS=30

# 执行备份命令
ExecStart=/usr/local/bin/myapp-backup.sh
ExecStartPost=/usr/bin/find ${BACKUP_DIR} -name "myapp_backup_*.tar.gz" -mtime +${RETENTION_DAYS} -delete

# 标准输出
StandardOutput=journal
StandardError=journal
SyslogIdentifier=myapp-backup

# 成功退出码
SuccessExitStatus=0 1

[Install]
WantedBy=multi-user.target

创建备份定时器:/etc/systemd/system/myapp-backup.timer

ini 复制代码
[Unit]
Description=MyApp Daily Backup Timer
Documentation=https://example.com/docs/myapp/backup
Requires=myapp-backup.service

[Timer]
# 每天凌晨2点执行
OnCalendar=*-*-* 02:00:00

# 随机延迟0-30分钟,避免多个服务同时备份
RandomizedDelaySec=1800

# 如果错过执行时间,立即执行
Persistent=true

# 精度控制
AccuracySec=1h

# 单元激活关系
Unit=myapp-backup.service

[Install]
WantedBy=timers.target

4.2 健康检查定时器

创建健康检查服务:/etc/systemd/system/myapp-healthcheck.service

ini 复制代码
[Unit]
Description=MyApp Health Check Service
After=network.target myapp.service

[Service]
Type=oneshot
User=myapp
Group=myapp

# 健康检查命令
ExecStart=/usr/bin/curl -f -s http://localhost:8080/health
ExecStartPost=/bin/bash -c 'if [ $? -ne 0 ]; then systemctl restart myapp.service; fi'

# 超时设置
TimeoutStartSec=30

StandardOutput=journal
StandardError=journal
SyslogIdentifier=myapp-healthcheck

创建健康检查定时器:/etc/systemd/system/myapp-healthcheck.timer

ini 复制代码
[Unit]
Description=MyApp Health Check Timer
Requires=myapp-healthcheck.service

[Timer]
# 每5分钟执行一次
OnBootSec=5min
OnUnitActiveSec=5min

# 精度控制
AccuracySec=1m

[Install]
WantedBy=timers.target

5. 路径和套接字激活

5.1 文件变化监控服务

创建路径监控单元:/etc/systemd/system/myapp-config-watcher.path

ini 复制代码
[Unit]
Description=MyApp Configuration File Watcher
Documentation=https://example.com/docs/myapp/config
After=myapp.service

[Path]
# 监控配置文件变化
PathChanged=/etc/myapp/config.yaml
PathChanged=/opt/myapp/settings.ini

# 监控目录变化
DirectoryNotEmpty=/opt/myapp/queue

# 文件存在性检查
PathExists=/opt/myapp/restart.flag

# 监控模式
MakeDirectory=yes
Unit=myapp-config-reload.service

[Install]
WantedBy=multi-user.target

创建配置重载服务:/etc/systemd/system/myapp-config-reload.service

ini 复制代码
[Unit]
Description=MyApp Configuration Reload
Documentation=https://example.com/docs/myapp/config

[Service]
Type=oneshot
User=myapp
Group=myapp

# 重载配置命令
ExecStart=/bin/kill -HUP $MAINPID
ExecStart=/bin/rm -f /opt/myapp/restart.flag

# 环境变量
Environment=MAINPID=/var/run/myapp/myapp.pid

StandardOutput=journal
StandardError=journal
SyslogIdentifier=myapp-config-reload

5.2 套接字激活服务

创建套接字单元:/etc/systemd/system/myapp.socket

ini 复制代码
[Unit]
Description=MyApp Socket Activation
Documentation=https://example.com/docs/myapp/socket
Before=myapp.service

[Socket]
# 监听配置
ListenStream=0.0.0.0:8080
ListenStream=[::]:8080

# Socket 选项
Accept=yes
SocketUser=myapp
SocketGroup=myapp
SocketMode=0660

# 缓冲区和超时设置
ReceiveBuffer=8M
SendBuffer=8M
KeepAlive=yes
NoDelay=yes
Backlog=4096

# 安全配置
RemoveOnStop=yes
Service=myapp.service

[Install]
WantedBy=sockets.target
Also=myapp.service

6. 高级资源管理和安全配置

6.1 资源限制服务配置

创建资源限制服务:/etc/systemd/system/myapp-with-limits.service

ini 复制代码
[Unit]
Description=MyApp with Resource Limits
Documentation=https://example.com/docs/myapp/resources
After=network.target

[Service]
Type=simple
User=myapp
Group=myapp

# 基础配置
WorkingDirectory=/opt/myapp
ExecStart=/usr/bin/python3 /opt/myapp/app.py
Restart=always

# CPU 资源限制
CPUQuota=200%
CPUWeight=100
CPUAffinity=0-3
StartupCPUWeight=500

# 内存资源限制
MemoryMax=1G
MemoryHigh=800M
MemorySwapMax=500M

# IO 资源限制
IOWeight=100
StartupIOWeight=500
IODeviceWeight=/dev/sda 200
IOReadBandwidthMax=/dev/sda 50M
IOWriteBandwidthMax=/dev/sda 50M

# 任务数量限制
TasksMax=2000

# 安全配置
CapabilityBoundingSet=CAP_NET_BIND_SERVICE
NoNewPrivileges=yes
PrivateTmp=yes
PrivateDevices=yes
PrivateUsers=yes
ProtectSystem=strict
ProtectHome=yes
ProtectKernelTunables=yes
ProtectKernelModules=yes
ProtectControlGroups=yes
RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6
RestrictNamespaces=yes
RestrictRealtime=yes
RestrictSUIDSGID=yes
SystemCallFilter=@system-service
SystemCallArchitectures=native
LockPersonality=yes

# 文件系统限制
ReadWritePaths=/var/log/myapp /opt/myapp/data
ReadOnlyPaths=/opt/myapp
InaccessiblePaths=-/etc/secrets

# 环境沙盒
ProtectHostname=yes
ProtectClock=yes
ProtectKernelLogs=yes
ProtectProc=invisible
ProcSubset=pid

# 日志配置
StandardOutput=journal
StandardError=journal
SyslogIdentifier=myapp-limited

[Install]
WantedBy=multi-user.target

6.2 系统资源监控脚本

创建资源监控脚本:/usr/local/bin/myapp-resource-monitor.sh

bash 复制代码
#!/bin/bash

# MyApp 资源监控脚本
# 监控服务资源使用情况并自动调整

set -e

# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'

SERVICE_NAME="myapp.service"
LOG_FILE="/var/log/myapp/resource-monitor.log"
ALERT_THRESHOLD_CPU=90
ALERT_THRESHOLD_MEMORY=85
ALERT_THRESHOLD_DISK=80

# 日志函数
log() {
    echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" >> "$LOG_FILE"
    echo -e "${GREEN}[MONITOR]${NC} $1"
}

alert() {
    local message="$1"
    log "ALERT: $message"
    # 这里可以添加邮件通知、Slack通知等
    echo -e "${RED}[ALERT]${NC} $message"
}

# 获取服务资源使用情况
get_service_resources() {
    local pid=$(systemctl show --property MainPID "$SERVICE_NAME" | cut -d= -f2)
    
    if [ "$pid" -eq 0 ]; then
        echo "服务未运行"
        return 1
    fi
    
    # CPU 使用率
    local cpu_usage=$(ps -p "$pid" -o %cpu --no-headers 2>/dev/null || echo "0")
    
    # 内存使用率
    local memory_usage=$(ps -p "$pid" -o %mem --no-headers 2>/dev/null || echo "0")
    
    # 内存使用量 (KB)
    local memory_kb=$(ps -p "$pid" -o rss --no-headers 2>/dev/null || echo "0")
    
    echo "$cpu_usage $memory_usage $memory_kb"
}

# 检查系统资源
check_system_resources() {
    # CPU 使用率
    local cpu_usage=$(top -bn1 | grep "Cpu(s)" | awk '{print $2 + $4}')
    
    # 内存使用率
    local memory_usage=$(free | awk 'NR==2{printf "%.2f", $3*100/$2}')
    
    # 磁盘使用率
    local disk_usage=$(df / | awk 'NR==2{print $5}' | sed 's/%//')
    
    echo "$cpu_usage $memory_usage $disk_usage"
}

# 自动调整资源限制
adjust_resource_limits() {
    local cpu_usage=$1
    local memory_usage=$2
    local memory_kb=$3
    
    log "当前资源使用: CPU=${cpu_usage}%, 内存=${memory_usage}%, 内存大小=${memory_kb}KB"
    
    # 如果内存使用超过阈值,增加内存限制
    if [ "$(echo "$memory_usage > $ALERT_THRESHOLD_MEMORY" | bc)" -eq 1 ]; then
        local current_limit=$(systemctl show --property MemoryMax "$SERVICE_NAME" | cut -d= -f2)
        local new_limit=$((current_limit * 120 / 100))  # 增加20%
        
        log "内存使用过高,调整内存限制从 ${current_limit} 到 ${new_limit}"
        systemctl set-property "$SERVICE_NAME" MemoryMax="${new_limit}M"
        
        alert "内存使用率 ${memory_usage}% 超过阈值 ${ALERT_THRESHOLD_MEMORY}%,已自动调整限制"
    fi
    
    # 如果CPU使用超过阈值,调整CPU权重
    if [ "$(echo "$cpu_usage > $ALERT_THRESHOLD_CPU" | bc)" -eq 1 ]; then
        local current_weight=$(systemctl show --property CPUWeight "$SERVICE_NAME" | cut -d= -f2)
        local new_weight=$((current_weight * 110 / 100))  # 增加10%
        
        if [ "$new_weight" -gt 1000 ]; then
            new_weight=1000
        fi
        
        log "CPU使用过高,调整CPU权重从 ${current_weight} 到 ${new_weight}"
        systemctl set-property "$SERVICE_NAME" CPUWeight="$new_weight"
        
        alert "CPU使用率 ${cpu_usage}% 超过阈值 ${ALERT_THRESHOLD_CPU}%,已调整CPU权重"
    fi
}

# 检查服务健康状态
check_service_health() {
    if ! systemctl is-active --quiet "$SERVICE_NAME"; then
        alert "服务 $SERVICE_NAME 未运行,尝试重启..."
        systemctl restart "$SERVICE_NAME"
        return 1
    fi
    
    # 检查服务是否响应
    if ! curl -f -s http://localhost:8080/health > /dev/null; then
        alert "服务健康检查失败,尝试重启..."
        systemctl restart "$SERVICE_NAME"
        return 1
    fi
    
    return 0
}

# 生成资源报告
generate_resource_report() {
    local report_file="/var/log/myapp/resource-report-$(date +%Y%m%d).log"
    
    {
        echo "=== MyApp 资源使用报告 ==="
        echo "生成时间: $(date)"
        echo "服务状态: $(systemctl is-active $SERVICE_NAME)"
        echo
        echo "系统资源:"
        echo "- CPU使用率: $(top -bn1 | grep "Cpu(s)" | awk '{print $2 + $4}')%"
        echo "- 内存使用率: $(free | awk 'NR==2{printf "%.2f", $3*100/$2}')%"
        echo "- 磁盘使用率: $(df / | awk 'NR==2{print $5}')"
        echo
        echo "服务资源:"
        systemctl show "$SERVICE_NAME" | grep -E "(Memory|CPU|Tasks)" | grep -v "=0"
        echo
        echo "最近日志:"
        journalctl -u "$SERVICE_NAME" --since="1 hour ago" | tail -10
    } > "$report_file"
    
    log "资源报告已生成: $report_file"
}

# 主监控循环
monitor_loop() {
    log "启动资源监控..."
    
    while true; do
        # 检查服务健康状态
        if ! check_service_health; then
            sleep 30
            continue
        fi
        
        # 获取资源使用情况
        local service_resources=$(get_service_resources)
        if [ $? -ne 0 ]; then
            sleep 30
            continue
        fi
        
        local cpu_usage=$(echo "$service_resources" | awk '{print $1}')
        local memory_usage=$(echo "$service_resources" | awk '{print $2}')
        local memory_kb=$(echo "$service_resources" | awk '{print $3}')
        
        # 自动调整资源限制
        adjust_resource_limits "$cpu_usage" "$memory_usage" "$memory_kb"
        
        # 每小时生成一次报告
        if [ "$(date +%M)" == "00" ]; then
            generate_resource_report
        fi
        
        sleep 60
    done
}

# 显示使用帮助
show_usage() {
    echo "MyApp 资源监控脚本"
    echo
    echo "使用方法: $0 [命令]"
    echo
    echo "命令:"
    echo "  start     启动监控"
    echo "  stop      停止监控"
    echo "  status    查看监控状态"
    echo "  report    生成资源报告"
    echo "  stats     显示当前统计"
}

# 主函数
main() {
    local command=${1:-"start"}
    
    case $command in
        "start")
            # 检查是否已经在运行
            if pgrep -f "myapp-resource-monitor" > /dev/null; then
                echo "监控脚本已经在运行"
                exit 1
            fi
            
            # 后台运行监控循环
            nohup "$0" monitor > /dev/null 2>&1 &
            echo "监控脚本已启动 (PID: $!)"
            ;;
        "stop")
            pkill -f "myapp-resource-monitor"
            echo "监控脚本已停止"
            ;;
        "status")
            if pgrep -f "myapp-resource-monitor" > /dev/null; then
                echo "监控脚本正在运行"
            else
                echo "监控脚本未运行"
            fi
            ;;
        "report")
            generate_resource_report
            ;;
        "stats")
            echo "当前资源使用情况:"
            get_service_resources
            echo "系统资源情况:"
            check_system_resources
            ;;
        "monitor")
            monitor_loop
            ;;
        *)
            show_usage
            exit 1
            ;;
    esac
}

# 脚本入口
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
    main "$@"
fi

7. 部署和维护工具

7.1 自动化部署脚本

创建部署脚本:/usr/local/bin/deploy-myapp.sh

bash 复制代码
#!/bin/bash

# MyApp 自动化部署脚本

set -e

# 配置变量
APP_NAME="myapp"
APP_USER="myapp"
APP_DIR="/opt/myapp"
SERVICE_FILE="/etc/systemd/system/myapp.service"
BACKUP_DIR="/var/backups/myapp"
DEPLOY_VERSION="${1:-latest}"

# 颜色定义
GREEN='\033[0;32m'
RED='\033[0;31m'
YELLOW='\033[1;33m'
NC='\033[0m'

log() {
    echo -e "${GREEN}[DEPLOY]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1"
}

error() {
    echo -e "${RED}[ERROR]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1"
    exit 1
}

warn() {
    echo -e "${YELLOW}[WARN]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1"
}

# 检查权限
check_permissions() {
    if [[ $EUID -ne 0 ]]; then
        error "需要 root 权限运行此脚本"
    fi
}

# 备份当前版本
backup_current_version() {
    log "备份当前版本..."
    
    local timestamp=$(date '+%Y%m%d_%H%M%S')
    local backup_file="${BACKUP_DIR}/myapp_${timestamp}.tar.gz"
    
    mkdir -p "$BACKUP_DIR"
    
    if [ -d "$APP_DIR" ]; then
        tar -czf "$backup_file" -C "/" \
            "$APP_DIR" \
            "/etc/systemd/system/myapp.service" \
            "/var/log/myapp" 2>/dev/null || true
        
        log "备份完成: $backup_file"
    else
        warn "应用目录不存在,跳过备份"
    fi
}

# 创建应用用户
create_app_user() {
    if ! id "$APP_USER" &> /dev/null; then
        log "创建应用用户: $APP_USER"
        useradd -r -s /bin/false -d "$APP_DIR" "$APP_USER"
    fi
}

# 创建目录结构
create_directories() {
    log "创建目录结构..."
    
    local directories=(
        "$APP_DIR"
        "/var/log/myapp"
        "/opt/myapp/data"
        "/etc/myapp"
    )
    
    for dir in "${directories[@]}"; do
        mkdir -p "$dir"
        chown "$APP_USER:$APP_USER" "$dir"
        chmod 755 "$dir"
    done
    
    # 设置日志目录权限
    chmod 750 "/var/log/myapp"
}

# 部署新版本
deploy_new_version() {
    log "部署版本: $DEPLOY_VERSION"
    
    # 这里应该是从仓库拉取代码或复制文件
    # 示例:模拟部署过程
    
    # 停止当前服务
    if systemctl is-active --quiet "$APP_NAME"; then
        log "停止当前服务..."
        systemctl stop "$APP_NAME"
    fi
    
    # 部署应用文件
    log "部署应用文件..."
    
    # 复制应用文件(这里应该是实际的部署逻辑)
    cp /tmp/myapp-new-version/* "$APP_DIR/" || {
        # 如果部署失败,回滚
        error "部署文件失败,开始回滚..."
        rollback_deployment
    }
    
    # 设置文件权限
    chown -R "$APP_USER:$APP_USER" "$APP_DIR"
    chmod -R 755 "$APP_DIR"
    chmod +x "$APP_DIR/app.py"
}

# 配置系统服务
setup_system_service() {
    log "配置系统服务..."
    
    # 复制服务文件
    cp "${APP_DIR}/systemd/myapp.service" "$SERVICE_FILE"
    
    # 重载 systemd 配置
    systemctl daemon-reload
    
    # 启用服务
    systemctl enable "$APP_NAME"
    
    log "系统服务配置完成"
}

# 启动服务
start_application() {
    log "启动应用服务..."
    
    systemctl start "$APP_NAME"
    
    # 等待服务启动
    local attempts=0
    while [ $attempts -lt 10 ]; do
        if systemctl is-active --quiet "$APP_NAME"; then
            log "服务启动成功"
            return 0
        fi
        sleep 3
        ((attempts++))
    done
    
    error "服务启动失败,开始回滚..."
    rollback_deployment
}

# 健康检查
health_check() {
    log "执行健康检查..."
    
    local attempts=0
    while [ $attempts -lt 10 ]; do
        if curl -f -s http://localhost:8080/health > /dev/null; then
            log "健康检查通过"
            return 0
        fi
        sleep 5
        ((attempts++))
    done
    
    error "健康检查失败,开始回滚..."
    rollback_deployment
}

# 回滚部署
rollback_deployment() {
    log "开始回滚部署..."
    
    # 查找最新的备份
    local latest_backup=$(ls -t "${BACKUP_DIR}/myapp_"*.tar.gz 2>/dev/null | head -1)
    
    if [ -z "$latest_backup" ]; then
        error "找不到备份文件,无法回滚"
    fi
    
    log "恢复备份: $latest_backup"
    
    # 停止服务
    systemctl stop "$APP_NAME" 2>/dev/null || true
    
    # 恢复备份
    tar -xzf "$latest_backup" -C "/"
    
    # 启动服务
    systemctl start "$APP_NAME"
    
    log "回滚完成"
    exit 1
}

# 清理旧备份
cleanup_old_backups() {
    log "清理旧备份..."
    
    # 保留最近7天的备份
    find "$BACKUP_DIR" -name "myapp_*.tar.gz" -mtime +7 -delete
    
    log "备份清理完成"
}

# 显示部署状态
show_deployment_status() {
    log "=== 部署状态 ==="
    echo "服务状态: $(systemctl is-active $APP_NAME)"
    echo "服务启用状态: $(systemctl is-enabled $APP_NAME)"
    echo "应用版本: $DEPLOY_VERSION"
    echo "部署时间: $(date)"
    
    # 显示服务日志最后几行
    log "最近服务日志:"
    journalctl -u "$APP_NAME" -n 5 --no-pager
}

# 主部署流程
main_deployment() {
    log "开始部署 MyApp..."
    
    check_permissions
    backup_current_version
    create_app_user
    create_directories
    deploy_new_version
    setup_system_service
    start_application
    health_check
    cleanup_old_backups
    show_deployment_status
    
    log "部署完成!"
}

# 显示使用帮助
show_usage() {
    echo "MyApp 自动化部署脚本"
    echo
    echo "使用方法: $0 [版本]"
    echo
    echo "示例:"
    echo "  $0 v1.2.3    部署指定版本"
    echo "  $0          部署最新版本"
    echo
    echo "环境变量:"
    echo "  DEPLOY_VERSION  部署版本号"
}

# 脚本入口
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
    if [[ "$1" == "-h" || "$1" == "--help" ]]; then
        show_usage
        exit 0
    fi
    
    main_deployment
fi

8. 总结

通过本文的详细介绍,您已经掌握了使用 systemd 管理自定义应用服务的完整知识体系:

  1. 基础服务配置 - 创建标准的 systemd 服务单元文件
  2. 高级服务模式 - 多实例服务、依赖管理、模板服务
  3. 定时器服务 - 自动化任务和健康检查
  4. 路径和套接字激活 - 事件驱动的服务管理
  5. 资源管理和安全 - 完整的资源限制和安全沙盒配置
  6. 监控和维护 - 自动化监控、部署和维护工具

通过 systemd 的强大功能,您可以确保自定义应用的稳定性、安全性和可维护性。

相关推荐
脏脏a1 小时前
【Linux】Linux进程状态深度解析
linux·运维·服务器
凉晓风1 小时前
Linux中常见几种自启动方式的区别
linux·运维·服务器
LCG元1 小时前
考古利器:find 命令的高级用法,按时间、大小、内容精准查找
linux
U***74693 小时前
Linux(CentOS)安装 MySQL
linux·mysql·centos
3***g2053 小时前
Linux系统离线部署MySQL详细教程(带每步骤图文教程)
linux·mysql·adb
Dovis(誓平步青云)3 小时前
《内核视角下的 Linux 锁与普通生产消费模型:同步原语设计与性能优化思路》
linux·运维·性能优化
xu_yule3 小时前
Linux_13(多线程)页表详解+轻量级进程+pthread_create
linux·运维·服务器
江湖有缘5 小时前
Linux系统之htop命令基本使用
linux·运维·服务器
CodeByV5 小时前
【Linux】基础 IO 深度解析:文件、描述符与缓冲区
linux