启动脚本
bash
#!/bin/sh
source /etc/bashrc >/dev/null 2>&1
source /etc/profile >/dev/null 2>&1
NAME=`echo $(basename $0)`
BASE="/data/deploy/$NAME"
LOGBASE="/data/logs/$NAME"
PORT="8888"
#JARPORT="-Dserver.port=${PORT}"
#JMXPORT=$[PORT + 9]
#JMXPORT='999'
#DEBUGPORT=$[PORT + 10000]
LIMIT='-Xms6144m -Xmx6144m -XX:MetaspaceSize=256m -XX:MaxMetaspaceSize=256m -XX:+UseG1GC'
LANG='-Dfile.encoding=UTF-8 -Dsun.jnu.encoding=UTF-8'
SAFEMODE='-Dfastjson.parser.safeMode=true'
WALLET='--network.params=mainnet --coin.consensus=SOL'
#PROF='-Dspring.profiles.active=prod'
#DEBUG="-Xdebug -Xrunjdwp:transport=dt_socket,address=${DEBUGPORT},server=y,suspend=n"
#JMX="-Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.port=${JMXPORT}"
JMX="-javaagent:/data/jmx/jmx_prometheus_javaagent-0.13.0.jar=${PORT}:/data/jmx/tomcat.yml"
#NACOS="--spring.cloud.nacos.config.server-addr=nacos-config-server"
#if [ "${NACOS}" == "--spring.cloud.nacos.config.server-addr=nacos-config-server" ];then
# NACOS=""
#fi
cd ${BASE}
if [ ! -d $LOGBASE ]
then
mkdir -p "$LOGBASE"
# chown -R $NAME:$NAME "$LOGBASE"
#else
# chown -R $NAME:$NAME "$LOGBASE"
fi
SERVER="${NAME}"
PUSER="${NAME}"
GROUP="${NAME}"
PID_FILE="/var/run/${NAME}.pid"
BIN_DIR="${BASE}"
LOG_DIR="${BASE}/${NAME}/logs"
#MUID=`grep -P "^$NAME:" /etc/passwd | awk -F":" '{print $3}'`
#JMXPORT=""$MUID"9"
#if [ ${#JMXPORT} -ne 5 ]
#then
# echo "JMXPORT FAIL $JMXPORT..."
# exit 1
#fi
#CMD_BASE="java -Dspring.profiles.active=preprodtest -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.port=$JMXPORT"
CMD_BASE="java ${LIMIT} ${LANG} ${SAFEMODE} ${DEBUG} ${JMX}"
JAVA_CMD="$CMD_BASE -jar $BASE/$NAME.jar ${PARTER} ${NACOS} ${WALLET}"
RETVAL=0
case "$1" in
start)
echo -n "Starting ${SERVER} ..."
# ps -u ${PUSER}
# if [ $? -eq 0 ]
PIDNUM=`ps -ef |grep $SERVER.jar |grep -v grep |awk '{print $2}' |wc -l`
if [ $PIDNUM -ne 0 ]
then
echo "${SERVER} is running"
exit 0
fi
echo "$JAVA_CMD"
cd ${BASE}
#daemon --user ${PUSER} ${JAVA_CMD} 2> /dev/null
#su - ${PUSER} -c "nohup ${JAVA_CMD}" 2> /dev/null
#nohup ${JAVA_CMD} > $BASE/up-$(date +%F).log 2>&1 &
#nohup ${JAVA_CMD} > ${LOGBASE}/up-$(date +%F).log 2>&1 &
#配合logrotate切割
#nohup ${JAVA_CMD} >> ${LOGBASE}/up.log 2>&1 &
nohup ${JAVA_CMD} > /dev/null 2>&1 &
# setsid ${JAVA_CMD}
# if [ $? -eq 0 ]
sleep 5
PIDNUM=`ps -ef |grep $SERVER.jar |grep -v grep |awk '{print $2}' |wc -l`
if [ $PIDNUM -ne 0 ]
then
PID=`ps -ef |grep $SERVER.jar |grep -v grep |awk '{print $2}'`
echo "start ${SERVER} success! PID : $PID"
exit 0
else
echo "start ${SERVER} failure"
exit 1
fi
;;
stop)
echo -n "Shutting down ${SERVER}"
# ps -u ${PUSER}
# if [ $? -eq 0 ]
PIDNUM=`ps -ef |grep $SERVER.jar |grep -v grep |awk '{print $2}' |wc -l`
if [ $PIDNUM -eq 0 ]
then
echo "${SERVER} is no running"
exit 0
# fi
# then
# cd ${BIN_DIR}
else
#PID=`ps hww -u ${PUSER} -o pid | xargs echo -n`
PID=`ps -ef |grep $SERVER.jar |grep -v grep |awk '{print $2}'`
echo "PID : $PID"
#${STOP_CMD}
kill ${PID}
sleep 30
kill -9 ${PID}
sleep 1
PID=`ps -ef |grep $SERVER.jar |grep -v grep |awk '{print $2}'`
echo "PID : $PID"
fi
;;
restart)
echo "restart ${SERVER} ..."
$0 stop
sleep 1
$0 start
;;
status)
# ps -u ${PUSER}
# if [ $? -eq 0 ]
PIDNUM=`ps -ef |grep $SERVER.jar |grep -v grep |awk '{print $2}' |wc -l`
if [ $PIDNUM -ne 0 ]
then
echo "${SERVER} is running"
PID=`ps -ef |grep $SERVER.jar |grep -v grep |awk '{print $2}'`
echo "PID : $PID"
#echo "${SERVER} PORT: $PORT"
#echo "${SERVER} JMXPORT: $JMXPORT"
exit 0
else
echo "${SERVER} is down"
echo "PID : NULL"
exit 1
fi
;;
*)
echo "Usage: $0 {start|stop|status|restart}"
exit 1
;;
esac
监控脚本
python
import subprocess
import time
import logging
def check_and_start_process(script_path, process_name, log_file):
logging.basicConfig(filename=log_file, level=logging.INFO, format='%(asctime)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
logger = logging.getLogger()
try:
logger.info("Checking process status...")
with open(log_file, 'a+') as f:
lines = f.readlines()
last_line = lines[-1].strip() if lines else ''
process = subprocess.run([script_path, 'status'], stdout=subprocess.PIPE, text=True, check=True)
new_lines = process.stdout.splitlines()
new_last_line = new_lines[-1].strip() if new_lines else ''
if "running" in new_last_line.lower():
if "running" not in last_line.lower():
logger.info(f"{process_name} is running.")
else:
logger.info(f"{process_name} is not running. Starting...")
subprocess.run([script_path, 'start'], stdout=f, text=True, check=True)
logger.info(f"Started {process_name}.")
except subprocess.CalledProcessError as e:
logger.error(f"Error occurred: {e}")
if __name__ == "__main__":
script_path = "/etc/init.d/wallet-job-SOL" # 替换为你的脚本路径
process_name = "wallet-job-SOL" # 替换为你的进程名称
log_file = "sol_monitor.log" # 日志文件名称
while True:
check_and_start_process(script_path, process_name, log_file)
time.sleep(300) # 每隔五分钟执行一次