xxl-job 整合 Seatunnel 实现定时任务

流处理

shell 复制代码
#!/bin/bash
SEATUNNEL_CMD="$SEATUNNEL_HOME/bin/seatunnel.sh"
SEATUNNEL_HOST=localhost
SEATUNNEL_PORT=5801

# 定义任务停止时执行的清理操作
exit_func() {
    # 在这里放入你希望在任务停止时执行的操作,比如释放资源、记录日志等
	$SEATUNNEL_CMD -can "$JOB_ID"
    exit;
}

# 捕获 SIGINT (Ctrl+C) 和 SIGTERM (手动终止) 信号
trap exit_func SIGINT SIGTERM SIGHUP SIGQUIT SIGKILL

# 将配置内容写入变量
config_content=$(cat <<EOL
env {
    "job.mode"=STREAMING
    "job.name"="SeaTunnel_Job"
    "savemode.execute.location"=CLUSTER
}
source {
    MySQL-CDC {
        "snapshot.split.size"="8096"
        "snapshot.fetch.size"="1024"
        "incremental.parallelism"="1"
        "connect.timeout.ms"="30000"
        "connect.max-retries"="3"
        "connection.pool.size"="20"
        "chunk-key.even-distribution.factor.lower-bound"="0.05"
        "chunk-key.even-distribution.factor.upper-bound"="100.0"
        "sample-sharding.threshold"="1000"
        "inverse-sampling.rate"="1000"
        "startup.mode"=INITIAL
        "exactly_once"="false"
        "stop.mode"=NEVER
        parallelism="1"
        "result_table_name"=Table15381274549824
        catalog {
            factory=Mysql
        }
        database-names=[
            "test_source"
        ]
        table-names=[
            "test_source.user"
        ]
        format=DEFAULT
        password="123456"
        username=root
        base-url="jdbc:mysql://127.0.0.1:3306/test_cdc"
        server-time-zone=UTC
    }
}
transform {
}
sink {
    Jdbc {
        "schema_save_mode"="CREATE_SCHEMA_WHEN_NOT_EXIST"
        "data_save_mode"="APPEND_DATA"
        "create_index"="true"
        "connection_check_timeout_sec"="30"
        "batch_size"="1000"
        "is_exactly_once"="false"
        "max_commit_attempts"="3"
        "transaction_timeout_sec"="-1"
        "max_retries"="0"
        "auto_commit"="true"
        "support_upsert_by_query_primary_key_exist"="false"
        "multi_table_sink_replica"="1"
        "source_table_name"=Table15381274549824
        "generate_sink_sql"=true
        database="test_jdbc"
        table=user
        driver="com.mysql.cj.jdbc.Driver"
        url="jdbc:mysql://127.0.0.1:3306/test_jdbc"
        password="123456"
        user=root
    }
}
EOL
)

echo "开始执行任务"
echo "--------    配置信息    --------------"
echo "$config_content"
echo "--------    end    --------------"

# 将配置内容写入标准输入并传递给 SeaTunnel
SUBJOB_OUTPUT=$(echo "$config_content" | $SEATUNNEL_CMD --config /dev/stdin --async 2>&1)
JOB_ID=$(echo "$SUBJOB_OUTPUT" | grep "job name" | awk -F'job id: ' '{print $2}' | awk -F',' '{print $1}')

echo "任务Id: $JOB_ID"

# 监控任务状态
while true; do
    STATUS_OUTPUT=$(curl -s http://$SEATUNNEL_HOST:$SEATUNNEL_PORT/hazelcast/rest/maps/job-info/$JOB_ID)
    echo $(date "+%Y-%m-%d %H:%M:%S.%3N") "写入数量 : "$(echo "$STATUS_OUTPUT" | awk -F'"SinkWriteCount":"' '{print $2}' | awk -F '","' '{print $1}')", 读取数量 :"$(echo "$STATUS_OUTPUT" | awk -F'"SourceReceivedCount":"' '{print $2}' | awk -F '","' '{print $1}')
    
	TASK_STATE=$(echo "$STATUS_OUTPUT" | awk -F'"jobStatus":"' '{print $2}' | awk -F '","' '{print $1}')

    if [[ "$TASK_STATE" == "FINISHED" ]]; then
        echo "任务完成, 状态: $TASK_STATE"
        exit 0
    fi
    
    if [[ "$TASK_STATE" != "RUNNING" ]]; then
        echo "任务已结束,状态:$TASK_STATE"
        exit 1
    else
        echo "任务运行中 ... 状态: $TASK_STATE"
        sleep 300
    fi
done

批处理

shell 复制代码
#!/bin/bash

SEATUNNEL_CMD="$SEATUNNEL_HOME/bin/seatunnel.sh"

# 定义任务停止时执行的清理操作
exit_func() {
    # 在这里放入你希望在任务停止时执行的操作,比如释放资源、记录日志等
	$SEATUNNEL_CMD -can "$JOB_ID"
    exit;
}

# 捕获 SIGINT (Ctrl+C) 和 SIGTERM (手动终止) 信号
trap exit_func SIGINT SIGTERM SIGHUP SIGQUIT SIGKILL


# 将配置内容写入变量
config_content=$(cat <<EOL
env {
  # You can set SeaTunnel environment configuration here
  parallelism = 2
  job.mode = "BATCH"
  checkpoint.interval = 10000
}

source {
  # This is a example source plugin **only for test and demonstrate the feature source plugin**
  FakeSource {
    parallelism = 2
    result_table_name = "fake"
    row.num = 16
    schema = {
      fields {
        name = "string"
        age = "int"
      }
    }
  }

  # If you would like to get more information about how to configure SeaTunnel and see full list of source plugins,
  # please go to https://seatunnel.apache.org/docs/connector-v2/source
}

sink {
  Console {
  }

  # If you would like to get more information about how to configure SeaTunnel and see full list of sink plugins,
  # please go to https://seatunnel.apache.org/docs/connector-v2/sink
}
EOL
)

echo "开始执行任务"
# 将配置内容写入标准输入并传递给 SeaTunnel
SUBJOB_OUTPUT=$(echo "$config_content" | $SEATUNNEL_CMD --config /dev/stdin --async 2>&1)
JOB_ID=$(echo "$SUBJOB_OUTPUT" | grep "job name" | awk -F'job id: ' '{print $2}' | awk -F',' '{print $1}')

echo "任务Id: $JOB_ID"

# 监控任务状态
while true; do
    # 查询任务状态
    STATUS_OUTPUT=$($SEATUNNEL_CMD -j "$JOB_ID" 2>&1)
    TASK_STATE=$(echo "$STATUS_OUTPUT" | grep "$JOB_ID" | awk -F'"jobStatus":"' '{print $2}' | awk -F '","' '{print $1}')

    if [[ "$TASK_STATE" == "FINISHED" ]]; then
        echo "任务完成, 状态: $TASK_STATE"
        exit 0
    fi
    # 检查任务是否已完成
    if [[ "$TASK_STATE" != "RUNNING" ]]; then
        echo "任务已结束,状态:$TASK_STATE"
        exit 1
    else
        echo "任务运行中 ... 状态: $TASK_STATE"
        # 等待 5 秒后再次查询
        sleep 5
    fi
done
相关推荐
数据智能老司机31 分钟前
CockroachDB权威指南——开始使用
数据库·分布式·架构
松果猿1 小时前
空间数据库学习(二)—— PostgreSQL数据库的备份转储和导入恢复
数据库
无名之逆1 小时前
Rust 开发提效神器:lombok-macros 宏库
服务器·开发语言·前端·数据库·后端·python·rust
s9123601011 小时前
rust 同时处理多个异步任务
java·数据库·rust
cg50171 小时前
Spring Boot 的配置文件
java·linux·spring boot
数据智能老司机1 小时前
CockroachDB权威指南——CockroachDB 架构
数据库·分布式·架构
暮云星影1 小时前
三、FFmpeg学习笔记
linux·ffmpeg
hzulwy2 小时前
Redis常用的数据结构及其使用场景
数据库·redis
rainFFrain2 小时前
单例模式与线程安全
linux·运维·服务器·vscode·单例模式
GalaxyPokemon2 小时前
Muduo网络库实现 [九] - EventLoopThread模块
linux·服务器·c++