单节点大数据平台运维脚本

单节点的大数据集群运维脚本

bash 复制代码
vi /opt/bash/bigdata-operate-script.sh
bash 复制代码
#!/bin/bash


source ~/.bashrc
source /etc/profile

hostname=bigdata

#程序运行必要组件
important_components=("kafka" "clickhouse-server" "elasticsearch" "kibana")
#非必要组件
monitor_components=("prometheus" "node_exporter" "pushgateway" "kafka_exporter" "grafana")
#monitor脚本启动标志量
m_flag=0
#yarn上任务数
job_num_count=12
# Kafka 集群的地址
KAFKA_BROKER="$hostname:9092"

#ES API
ES_API="http://$hostname:9200"
#Kibana API
KIBANA_API="http://$hostname:5601/api/status"
#如果需要,添加基本认证
ES_USER="ES用户名"
ES_PASS="ES密码"

function start_component() {
    if ! systemctl is-active --quiet "$1"; then
        echo "start $1"
        sudo systemctl start "$1"
    fi
}

function check_status() {
    #检查HDFS状态
    #echo "========Check HDFS status========"
    if /usr/java/jdk1.8.0_212/bin/jps | grep -q NameNode && /usr/java/jdk1.8.0_212/bin/jps | grep -q DataNode && hadoop fs -ls / >/dev/null 2>&1; then
        if hdfs dfsadmin -safemode get | grep -q ON; then
            echo "HDFS in safe mode"
        else
            echo "========HDFS is Running========"
            ((m_flag++))
        fi       
    else
        echo "--------HDFS is Not Running--------"
    fi

    #检查YARN状态
    #echo "========Check YARN status========"
    if /usr/java/jdk1.8.0_212/bin/jps | grep -q ResourceManager && /usr/java/jdk1.8.0_212/bin/jps | grep -q NodeManager && yarn node -list >/dev/null 2>&1; then
        echo "========YARN is Running========"
        ((m_flag++))
    else
        echo "--------YARN is Not Running--------"
    fi

    #检查kafka状态
    #echo "========Check KAFKA status========"
    if kafka-topics.sh --bootstrap-server $KAFKA_BROKER --list >/dev/null 2>&1; then
        echo "========Kafka cluster is Running========"
        ((m_flag++))
    else
        echo "--------Kafka cluster is Not Running--------"
    fi

    #检查clickhouse状态
    #echo "========Check clickhouse-server status========"
    if systemctl is-active --quiet clickhouse-server && clickhouse-client --port 9123 --user clickhouse --password clickhouse密码 --query "SELECT 1" >/dev/null 2>&1; then
        echo "========ClickHouse-server service is Running========"
        ((m_flag++))
    else
        echo "--------ClickHouse-server service is Not Running--------"
    fi

    # 检查 Elasticsearch 服务状态
    if systemctl is-active --quiet elasticsearch && curl -s -u "$ES_USER:$ES_PASS" "$ES_API" >/dev/null 2>&1; then
        echo "========Elasticsearch service is Running========"
        ((m_flag++))
    else
        echo "--------Elasticsearch service is Not Running--------"
    fi

    # 检查 Kibana 服务状态
    kibana_status=$(curl -s -u "$ES_USER:$ES_PASS" "$KIBANA_API" | /usr/local/bin/jq -r '.status.overall.state')
    if systemctl is-active --quiet kibana && [ "$kibana_status" == "green" ]; then
        echo "========Kibana service is Running========"
        ((m_flag++))
    else
        echo "--------Kiaba service is Not Running--------"
    fi

    #检查监控组件状态
    for i in "${monitor_components[@]}"; do
        if systemctl is-active --quiet $i; then
            echo "========$i service is Running========"
        else
            echo "--------$i service is Not Running--------"
        fi
    done
}

case $1 in
start)

    if ! /usr/java/jdk1.8.0_212/bin/jps | grep -q NameNode && ! hdfs dfsadmin -report >/dev/null 2>&1 && ! hadoop fs -ls / >/dev/null 2>&1; then
        echo "start HDFS"
        /bin/bash /opt/module/hadoop-3.3.5/sbin/start-dfs.sh
    fi

    if ! /usr/java/jdk1.8.0_212/bin/jps | grep -q ResourceManager && ! /usr/java/jdk1.8.0_212/bin/jps | grep -q NodeManager; then
        echo "start Yarn"
        /bin/bash /opt/module/hadoop-3.3.5/sbin/start-yarn.sh
    fi

    for i in "${important_components[@]}"; do
        start_component "$i"
    done

    sleep 30

    for i in "${monitor_components[@]}"; do
        start_component "$i"
    done

    sleep 20

    #检查组件状态
    check_status

    #判断是否满足启动任务条件
    if [ "$m_flag" -eq 6 ] && ! [ "$(curl -s http://$hostname:8088/ws/v1/cluster/metrics | /usr/local/bin/jq .clusterMetrics.appsRunning)" -eq $job_num_count ]; then
        /bin/bash /home/bigdata/job/dw3/monitor.sh
    fi
    ;;

status)
    check_status
    ;;

stop)
    if /usr/java/jdk1.8.0_212/bin/jps | grep -q ResourceManager && /usr/java/jdk1.8.0_212/bin/jps | grep -q NodeManager; then
        #kill掉所有程序
        /bin/bash /home/bigdata/killAllApp.sh
        #判断程序是否都停掉
        if [ "$(curl -s http://$hostname:8088/ws/v1/cluster/metrics | /usr/local/bin/jq .clusterMetrics.appsRunning)" -eq "0" ]; then
            #关闭所有组件
            /bin/bash /opt/module/hadoop-3.3.5/sbin/stop-yarn.sh
            /bin/bash /opt/module/hadoop-3.3.5/sbin/stop-dfs.sh
            for i in "${important_components[@]}"; do
                sudo systemctl stop "$i"
            done

        else
            echo "Yarn上程序未停完,需手动停止"
        fi
    fi

    for i in "${monitor_components[@]}"; do
        if systemctl is-active --quiet $i; then
            echo "========stop $i service========"
            sudo systemctl stop "$i"
        fi
    done
    check_status
    ;;
*)
    echo "请输入合法的参数"
    echo "  start   启动所有组件集群"
    echo "  status  查看所有组件集群状态"
    echo "  stop    停止所有组件集群"
    ;;
esac

kill掉yarn所有带有ETL名字的任务

bash 复制代码
vi /home/bigdata/killAllApp.sh
bash 复制代码
#!/bin/bash
yarn application -list | grep 'ETL'| grep -v grep | awk '{print $1}' | xargs  yarn application -kill
echo -e "\033[1;33m============Yarn Application List============\033[0m"
yarn application -list
相关推荐
涛思数据(TDengine)1 小时前
通过最严时序标准,再登产业图谱榜首,TDengine 时序数据库在可信数据库大会荣获双荣誉
大数据·数据库·时序数据库·tdengine·涛思数据
All In丶1 小时前
Tdengine 时序库年月日小时分组汇总问题
大数据·时序数据库·tdengine
涛思数据(TDengine)1 小时前
新客户 | TDengine 时序数据库是怎么在钢厂“撬动”PI 的?
大数据·运维·数据库·时序数据库·tdengine
Giser探索家2 小时前
什么是2米分辨率卫星影像数据?
大数据·人工智能·数码相机·算法·分类·云计算
芯希望2 小时前
芯伯乐XBL6019 60V/5A DC-DC升压芯片的优质选择
大数据·人工智能·物联网·dc-dc·电子元器件·电源管理ic·xblw芯伯乐
菜鸟也会Fly2 小时前
【/usr/bin/env: “bash\r”: 没有那个文件或目录】问题解决
linux·bash
程序员黄老师3 小时前
Linux tail 命令详解
linux·运维
我怎么又饿了呀4 小时前
Windows&Linux系统 安装 CUDA 和 cuDNN
linux·运维·windows
xybDIY5 小时前
智能云探索:基于Amazon Bedrock与MCP Server的AWS资源AI运维实践
运维·人工智能·aws