python3脚本监控java服务,如果服务挂了则主动重启服务

启动脚本

bash 复制代码
#!/bin/sh

source /etc/bashrc >/dev/null 2>&1
source /etc/profile  >/dev/null 2>&1


NAME=`echo $(basename $0)`
BASE="/data/deploy/$NAME"
LOGBASE="/data/logs/$NAME"
PORT="8888"
#JARPORT="-Dserver.port=${PORT}"
#JMXPORT=$[PORT + 9]
#JMXPORT='999'
#DEBUGPORT=$[PORT + 10000]
LIMIT='-Xms6144m -Xmx6144m -XX:MetaspaceSize=256m  -XX:MaxMetaspaceSize=256m -XX:+UseG1GC'
LANG='-Dfile.encoding=UTF-8 -Dsun.jnu.encoding=UTF-8'
SAFEMODE='-Dfastjson.parser.safeMode=true'
WALLET='--network.params=mainnet --coin.consensus=SOL'
#PROF='-Dspring.profiles.active=prod'
#DEBUG="-Xdebug -Xrunjdwp:transport=dt_socket,address=${DEBUGPORT},server=y,suspend=n"
#JMX="-Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.port=${JMXPORT}"
JMX="-javaagent:/data/jmx/jmx_prometheus_javaagent-0.13.0.jar=${PORT}:/data/jmx/tomcat.yml"
#NACOS="--spring.cloud.nacos.config.server-addr=nacos-config-server"
#if [ "${NACOS}" == "--spring.cloud.nacos.config.server-addr=nacos-config-server" ];then
#       NACOS=""
#fi
cd ${BASE}

if [ ! -d $LOGBASE ]
then
    mkdir -p "$LOGBASE"
#    chown -R $NAME:$NAME "$LOGBASE"
#else
#    chown -R $NAME:$NAME "$LOGBASE"
fi

SERVER="${NAME}"
PUSER="${NAME}"
GROUP="${NAME}"
PID_FILE="/var/run/${NAME}.pid"
BIN_DIR="${BASE}"
LOG_DIR="${BASE}/${NAME}/logs"
#MUID=`grep -P "^$NAME:" /etc/passwd | awk -F":" '{print $3}'`
#JMXPORT=""$MUID"9"

#if [ ${#JMXPORT} -ne 5 ]
#then
#    echo "JMXPORT FAIL $JMXPORT..."
#    exit 1
#fi

#CMD_BASE="java -Dspring.profiles.active=preprodtest -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.port=$JMXPORT"
CMD_BASE="java ${LIMIT} ${LANG} ${SAFEMODE} ${DEBUG} ${JMX}"


JAVA_CMD="$CMD_BASE -jar $BASE/$NAME.jar ${PARTER} ${NACOS} ${WALLET}"


RETVAL=0
case "$1" in
    start)
        echo -n "Starting ${SERVER} ..."
#        ps -u ${PUSER}
#        if [ $? -eq 0 ]
        PIDNUM=`ps -ef |grep $SERVER.jar |grep -v grep |awk '{print $2}' |wc -l`
        if [ $PIDNUM -ne 0 ]
        then
            echo "${SERVER} is running"
            exit 0
        fi
        echo "$JAVA_CMD"
        cd ${BASE}
        #daemon --user ${PUSER} ${JAVA_CMD} 2> /dev/null
        #su - ${PUSER} -c "nohup ${JAVA_CMD}" 2> /dev/null
        #nohup ${JAVA_CMD} > $BASE/up-$(date +%F).log 2>&1 &
        #nohup ${JAVA_CMD} > ${LOGBASE}/up-$(date +%F).log 2>&1 &
        #配合logrotate切割
        #nohup ${JAVA_CMD} >> ${LOGBASE}/up.log 2>&1 &
        nohup ${JAVA_CMD} > /dev/null 2>&1 &
    #    setsid ${JAVA_CMD}
#        if [ $? -eq 0 ]
        sleep 5
        PIDNUM=`ps -ef |grep $SERVER.jar |grep -v grep |awk '{print $2}' |wc -l`
        if [ $PIDNUM -ne 0 ]
        then
            PID=`ps -ef |grep $SERVER.jar |grep -v grep |awk '{print $2}'`
            echo "start ${SERVER} success! PID : $PID"
            exit 0
        else
            echo "start ${SERVER} failure"
            exit 1
        fi
        ;;
    stop)
        echo -n "Shutting down ${SERVER}"
#        ps -u ${PUSER}
#        if [ $? -eq 0 ]
        PIDNUM=`ps -ef |grep $SERVER.jar |grep -v grep |awk '{print $2}' |wc -l`
        if [ $PIDNUM -eq 0 ]
        then
            echo "${SERVER} is no running"
            exit 0
#        fi
#        then
#            cd ${BIN_DIR}
        else
            #PID=`ps hww -u ${PUSER} -o pid | xargs echo -n`
            PID=`ps -ef |grep $SERVER.jar |grep -v grep |awk '{print $2}'`
            echo "PID : $PID"
            #${STOP_CMD} 
            kill ${PID}
            sleep 30
            kill -9 ${PID}
            sleep 1
            PID=`ps -ef |grep $SERVER.jar |grep -v grep |awk '{print $2}'`
            echo "PID : $PID"
        fi
        ;;
    restart)
        echo "restart ${SERVER} ..."
        $0 stop
        sleep 1
        $0 start
        ;;
    status)
#        ps -u ${PUSER}
#        if [ $? -eq 0 ]
        PIDNUM=`ps -ef |grep $SERVER.jar |grep -v grep |awk '{print $2}' |wc -l`
        if [ $PIDNUM -ne 0 ]
        then
            echo "${SERVER} is running"
            PID=`ps -ef |grep $SERVER.jar |grep -v grep |awk '{print $2}'`
            echo "PID : $PID"
           #echo "${SERVER} PORT: $PORT"
           #echo "${SERVER} JMXPORT: $JMXPORT"
            exit 0
        else
            echo "${SERVER} is down"
            echo "PID : NULL"
            exit 1
        fi
        ;;
    *)
        echo "Usage: $0 {start|stop|status|restart}"
        exit 1
        ;;
esac

监控脚本

python 复制代码
import subprocess
import time
import logging

def check_and_start_process(script_path, process_name, log_file):
    logging.basicConfig(filename=log_file, level=logging.INFO, format='%(asctime)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
    logger = logging.getLogger()

    try:
        logger.info("Checking process status...")
        with open(log_file, 'a+') as f:
            lines = f.readlines()

            last_line = lines[-1].strip() if lines else ''

            process = subprocess.run([script_path, 'status'], stdout=subprocess.PIPE, text=True, check=True)

            new_lines = process.stdout.splitlines()
            new_last_line = new_lines[-1].strip() if new_lines else ''

            if "running" in new_last_line.lower():
                if "running" not in last_line.lower():
                    logger.info(f"{process_name} is running.")
            else:
                logger.info(f"{process_name} is not running. Starting...")
                subprocess.run([script_path, 'start'], stdout=f, text=True, check=True)
                logger.info(f"Started {process_name}.")

    except subprocess.CalledProcessError as e:
        logger.error(f"Error occurred: {e}")

if __name__ == "__main__":
    script_path = "/etc/init.d/wallet-job-SOL"  # 替换为你的脚本路径
    process_name = "wallet-job-SOL"  # 替换为你的进程名称
    log_file = "sol_monitor.log"  # 日志文件名称

    while True:
        check_and_start_process(script_path, process_name, log_file)
        time.sleep(300)  # 每隔五分钟执行一次
相关推荐
数据智能老司机1 小时前
精通 Python 设计模式——分布式系统模式
python·设计模式·架构
数据智能老司机2 小时前
精通 Python 设计模式——并发与异步模式
python·设计模式·编程语言
数据智能老司机2 小时前
精通 Python 设计模式——测试模式
python·设计模式·架构
数据智能老司机2 小时前
精通 Python 设计模式——性能模式
python·设计模式·架构
c8i2 小时前
drf初步梳理
python·django
每日AI新事件2 小时前
python的异步函数
python
这里有鱼汤3 小时前
miniQMT下载历史行情数据太慢怎么办?一招提速10倍!
前端·python
databook13 小时前
Manim实现脉冲闪烁特效
后端·python·动效
程序设计实验室13 小时前
2025年了,在 Django 之外,Python Web 框架还能怎么选?
python
倔强青铜三14 小时前
苦练Python第46天:文件写入与上下文管理器
人工智能·python·面试