python3脚本监控java服务,如果服务挂了则主动重启服务

启动脚本

bash 复制代码
#!/bin/sh

source /etc/bashrc >/dev/null 2>&1
source /etc/profile  >/dev/null 2>&1


NAME=`echo $(basename $0)`
BASE="/data/deploy/$NAME"
LOGBASE="/data/logs/$NAME"
PORT="8888"
#JARPORT="-Dserver.port=${PORT}"
#JMXPORT=$[PORT + 9]
#JMXPORT='999'
#DEBUGPORT=$[PORT + 10000]
LIMIT='-Xms6144m -Xmx6144m -XX:MetaspaceSize=256m  -XX:MaxMetaspaceSize=256m -XX:+UseG1GC'
LANG='-Dfile.encoding=UTF-8 -Dsun.jnu.encoding=UTF-8'
SAFEMODE='-Dfastjson.parser.safeMode=true'
WALLET='--network.params=mainnet --coin.consensus=SOL'
#PROF='-Dspring.profiles.active=prod'
#DEBUG="-Xdebug -Xrunjdwp:transport=dt_socket,address=${DEBUGPORT},server=y,suspend=n"
#JMX="-Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.port=${JMXPORT}"
JMX="-javaagent:/data/jmx/jmx_prometheus_javaagent-0.13.0.jar=${PORT}:/data/jmx/tomcat.yml"
#NACOS="--spring.cloud.nacos.config.server-addr=nacos-config-server"
#if [ "${NACOS}" == "--spring.cloud.nacos.config.server-addr=nacos-config-server" ];then
#       NACOS=""
#fi
cd ${BASE}

if [ ! -d $LOGBASE ]
then
    mkdir -p "$LOGBASE"
#    chown -R $NAME:$NAME "$LOGBASE"
#else
#    chown -R $NAME:$NAME "$LOGBASE"
fi

SERVER="${NAME}"
PUSER="${NAME}"
GROUP="${NAME}"
PID_FILE="/var/run/${NAME}.pid"
BIN_DIR="${BASE}"
LOG_DIR="${BASE}/${NAME}/logs"
#MUID=`grep -P "^$NAME:" /etc/passwd | awk -F":" '{print $3}'`
#JMXPORT=""$MUID"9"

#if [ ${#JMXPORT} -ne 5 ]
#then
#    echo "JMXPORT FAIL $JMXPORT..."
#    exit 1
#fi

#CMD_BASE="java -Dspring.profiles.active=preprodtest -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.port=$JMXPORT"
CMD_BASE="java ${LIMIT} ${LANG} ${SAFEMODE} ${DEBUG} ${JMX}"


JAVA_CMD="$CMD_BASE -jar $BASE/$NAME.jar ${PARTER} ${NACOS} ${WALLET}"


RETVAL=0
case "$1" in
    start)
        echo -n "Starting ${SERVER} ..."
#        ps -u ${PUSER}
#        if [ $? -eq 0 ]
        PIDNUM=`ps -ef |grep $SERVER.jar |grep -v grep |awk '{print $2}' |wc -l`
        if [ $PIDNUM -ne 0 ]
        then
            echo "${SERVER} is running"
            exit 0
        fi
        echo "$JAVA_CMD"
        cd ${BASE}
        #daemon --user ${PUSER} ${JAVA_CMD} 2> /dev/null
        #su - ${PUSER} -c "nohup ${JAVA_CMD}" 2> /dev/null
        #nohup ${JAVA_CMD} > $BASE/up-$(date +%F).log 2>&1 &
        #nohup ${JAVA_CMD} > ${LOGBASE}/up-$(date +%F).log 2>&1 &
        #配合logrotate切割
        #nohup ${JAVA_CMD} >> ${LOGBASE}/up.log 2>&1 &
        nohup ${JAVA_CMD} > /dev/null 2>&1 &
    #    setsid ${JAVA_CMD}
#        if [ $? -eq 0 ]
        sleep 5
        PIDNUM=`ps -ef |grep $SERVER.jar |grep -v grep |awk '{print $2}' |wc -l`
        if [ $PIDNUM -ne 0 ]
        then
            PID=`ps -ef |grep $SERVER.jar |grep -v grep |awk '{print $2}'`
            echo "start ${SERVER} success! PID : $PID"
            exit 0
        else
            echo "start ${SERVER} failure"
            exit 1
        fi
        ;;
    stop)
        echo -n "Shutting down ${SERVER}"
#        ps -u ${PUSER}
#        if [ $? -eq 0 ]
        PIDNUM=`ps -ef |grep $SERVER.jar |grep -v grep |awk '{print $2}' |wc -l`
        if [ $PIDNUM -eq 0 ]
        then
            echo "${SERVER} is no running"
            exit 0
#        fi
#        then
#            cd ${BIN_DIR}
        else
            #PID=`ps hww -u ${PUSER} -o pid | xargs echo -n`
            PID=`ps -ef |grep $SERVER.jar |grep -v grep |awk '{print $2}'`
            echo "PID : $PID"
            #${STOP_CMD} 
            kill ${PID}
            sleep 30
            kill -9 ${PID}
            sleep 1
            PID=`ps -ef |grep $SERVER.jar |grep -v grep |awk '{print $2}'`
            echo "PID : $PID"
        fi
        ;;
    restart)
        echo "restart ${SERVER} ..."
        $0 stop
        sleep 1
        $0 start
        ;;
    status)
#        ps -u ${PUSER}
#        if [ $? -eq 0 ]
        PIDNUM=`ps -ef |grep $SERVER.jar |grep -v grep |awk '{print $2}' |wc -l`
        if [ $PIDNUM -ne 0 ]
        then
            echo "${SERVER} is running"
            PID=`ps -ef |grep $SERVER.jar |grep -v grep |awk '{print $2}'`
            echo "PID : $PID"
           #echo "${SERVER} PORT: $PORT"
           #echo "${SERVER} JMXPORT: $JMXPORT"
            exit 0
        else
            echo "${SERVER} is down"
            echo "PID : NULL"
            exit 1
        fi
        ;;
    *)
        echo "Usage: $0 {start|stop|status|restart}"
        exit 1
        ;;
esac

监控脚本

python 复制代码
import subprocess
import time
import logging

def check_and_start_process(script_path, process_name, log_file):
    logging.basicConfig(filename=log_file, level=logging.INFO, format='%(asctime)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
    logger = logging.getLogger()

    try:
        logger.info("Checking process status...")
        with open(log_file, 'a+') as f:
            lines = f.readlines()

            last_line = lines[-1].strip() if lines else ''

            process = subprocess.run([script_path, 'status'], stdout=subprocess.PIPE, text=True, check=True)

            new_lines = process.stdout.splitlines()
            new_last_line = new_lines[-1].strip() if new_lines else ''

            if "running" in new_last_line.lower():
                if "running" not in last_line.lower():
                    logger.info(f"{process_name} is running.")
            else:
                logger.info(f"{process_name} is not running. Starting...")
                subprocess.run([script_path, 'start'], stdout=f, text=True, check=True)
                logger.info(f"Started {process_name}.")

    except subprocess.CalledProcessError as e:
        logger.error(f"Error occurred: {e}")

if __name__ == "__main__":
    script_path = "/etc/init.d/wallet-job-SOL"  # 替换为你的脚本路径
    process_name = "wallet-job-SOL"  # 替换为你的进程名称
    log_file = "sol_monitor.log"  # 日志文件名称

    while True:
        check_and_start_process(script_path, process_name, log_file)
        time.sleep(300)  # 每隔五分钟执行一次
相关推荐
以卿a34 分钟前
C++ 模板初阶
开发语言·c++
s:10338 分钟前
【框架】参考 Spring Security 安全框架设计出,轻量化高可扩展的身份认证与授权架构
java·开发语言
道不尽世间的沧桑2 小时前
第17篇:网络请求与Axios集成
开发语言·前端·javascript
久绊A2 小时前
Python 基本语法的详细解释
开发语言·windows·python
Hylan_J5 小时前
【VSCode】MicroPython环境配置
ide·vscode·python·编辑器
软件黑马王子5 小时前
C#初级教程(4)——流程控制:从基础到实践
开发语言·c#
莫忘初心丶5 小时前
在 Ubuntu 22 上使用 Gunicorn 启动 Flask 应用程序
python·ubuntu·flask·gunicorn
闲猫5 小时前
go orm GORM
开发语言·后端·golang
李白同学7 小时前
【C语言】结构体内存对齐问题
c语言·开发语言
黑子哥呢?8 小时前
安装Bash completion解决tab不能补全问题
开发语言·bash