流处理
shell
#!/bin/bash
SEATUNNEL_CMD="$SEATUNNEL_HOME/bin/seatunnel.sh"
SEATUNNEL_HOST=localhost
SEATUNNEL_PORT=5801
# 定义任务停止时执行的清理操作
exit_func() {
# 在这里放入你希望在任务停止时执行的操作,比如释放资源、记录日志等
$SEATUNNEL_CMD -can "$JOB_ID"
exit;
}
# 捕获 SIGINT (Ctrl+C) 和 SIGTERM (手动终止) 信号
trap exit_func SIGINT SIGTERM SIGHUP SIGQUIT SIGKILL
# 将配置内容写入变量
config_content=$(cat <<EOL
env {
"job.mode"=STREAMING
"job.name"="SeaTunnel_Job"
"savemode.execute.location"=CLUSTER
}
source {
MySQL-CDC {
"snapshot.split.size"="8096"
"snapshot.fetch.size"="1024"
"incremental.parallelism"="1"
"connect.timeout.ms"="30000"
"connect.max-retries"="3"
"connection.pool.size"="20"
"chunk-key.even-distribution.factor.lower-bound"="0.05"
"chunk-key.even-distribution.factor.upper-bound"="100.0"
"sample-sharding.threshold"="1000"
"inverse-sampling.rate"="1000"
"startup.mode"=INITIAL
"exactly_once"="false"
"stop.mode"=NEVER
parallelism="1"
"result_table_name"=Table15381274549824
catalog {
factory=Mysql
}
database-names=[
"test_source"
]
table-names=[
"test_source.user"
]
format=DEFAULT
password="123456"
username=root
base-url="jdbc:mysql://127.0.0.1:3306/test_cdc"
server-time-zone=UTC
}
}
transform {
}
sink {
Jdbc {
"schema_save_mode"="CREATE_SCHEMA_WHEN_NOT_EXIST"
"data_save_mode"="APPEND_DATA"
"create_index"="true"
"connection_check_timeout_sec"="30"
"batch_size"="1000"
"is_exactly_once"="false"
"max_commit_attempts"="3"
"transaction_timeout_sec"="-1"
"max_retries"="0"
"auto_commit"="true"
"support_upsert_by_query_primary_key_exist"="false"
"multi_table_sink_replica"="1"
"source_table_name"=Table15381274549824
"generate_sink_sql"=true
database="test_jdbc"
table=user
driver="com.mysql.cj.jdbc.Driver"
url="jdbc:mysql://127.0.0.1:3306/test_jdbc"
password="123456"
user=root
}
}
EOL
)
echo "开始执行任务"
echo "-------- 配置信息 --------------"
echo "$config_content"
echo "-------- end --------------"
# 将配置内容写入标准输入并传递给 SeaTunnel
SUBJOB_OUTPUT=$(echo "$config_content" | $SEATUNNEL_CMD --config /dev/stdin --async 2>&1)
JOB_ID=$(echo "$SUBJOB_OUTPUT" | grep "job name" | awk -F'job id: ' '{print $2}' | awk -F',' '{print $1}')
echo "任务Id: $JOB_ID"
# 监控任务状态
while true; do
STATUS_OUTPUT=$(curl -s http://$SEATUNNEL_HOST:$SEATUNNEL_PORT/hazelcast/rest/maps/job-info/$JOB_ID)
echo $(date "+%Y-%m-%d %H:%M:%S.%3N") "写入数量 : "$(echo "$STATUS_OUTPUT" | awk -F'"SinkWriteCount":"' '{print $2}' | awk -F '","' '{print $1}')", 读取数量 :"$(echo "$STATUS_OUTPUT" | awk -F'"SourceReceivedCount":"' '{print $2}' | awk -F '","' '{print $1}')
TASK_STATE=$(echo "$STATUS_OUTPUT" | awk -F'"jobStatus":"' '{print $2}' | awk -F '","' '{print $1}')
if [[ "$TASK_STATE" == "FINISHED" ]]; then
echo "任务完成, 状态: $TASK_STATE"
exit 0
fi
if [[ "$TASK_STATE" != "RUNNING" ]]; then
echo "任务已结束,状态:$TASK_STATE"
exit 1
else
echo "任务运行中 ... 状态: $TASK_STATE"
sleep 300
fi
done
批处理
shell
#!/bin/bash
SEATUNNEL_CMD="$SEATUNNEL_HOME/bin/seatunnel.sh"
# 定义任务停止时执行的清理操作
exit_func() {
# 在这里放入你希望在任务停止时执行的操作,比如释放资源、记录日志等
$SEATUNNEL_CMD -can "$JOB_ID"
exit;
}
# 捕获 SIGINT (Ctrl+C) 和 SIGTERM (手动终止) 信号
trap exit_func SIGINT SIGTERM SIGHUP SIGQUIT SIGKILL
# 将配置内容写入变量
config_content=$(cat <<EOL
env {
# You can set SeaTunnel environment configuration here
parallelism = 2
job.mode = "BATCH"
checkpoint.interval = 10000
}
source {
# This is a example source plugin **only for test and demonstrate the feature source plugin**
FakeSource {
parallelism = 2
result_table_name = "fake"
row.num = 16
schema = {
fields {
name = "string"
age = "int"
}
}
}
# If you would like to get more information about how to configure SeaTunnel and see full list of source plugins,
# please go to https://seatunnel.apache.org/docs/connector-v2/source
}
sink {
Console {
}
# If you would like to get more information about how to configure SeaTunnel and see full list of sink plugins,
# please go to https://seatunnel.apache.org/docs/connector-v2/sink
}
EOL
)
echo "开始执行任务"
# 将配置内容写入标准输入并传递给 SeaTunnel
SUBJOB_OUTPUT=$(echo "$config_content" | $SEATUNNEL_CMD --config /dev/stdin --async 2>&1)
JOB_ID=$(echo "$SUBJOB_OUTPUT" | grep "job name" | awk -F'job id: ' '{print $2}' | awk -F',' '{print $1}')
echo "任务Id: $JOB_ID"
# 监控任务状态
while true; do
# 查询任务状态
STATUS_OUTPUT=$($SEATUNNEL_CMD -j "$JOB_ID" 2>&1)
TASK_STATE=$(echo "$STATUS_OUTPUT" | grep "$JOB_ID" | awk -F'"jobStatus":"' '{print $2}' | awk -F '","' '{print $1}')
if [[ "$TASK_STATE" == "FINISHED" ]]; then
echo "任务完成, 状态: $TASK_STATE"
exit 0
fi
# 检查任务是否已完成
if [[ "$TASK_STATE" != "RUNNING" ]]; then
echo "任务已结束,状态:$TASK_STATE"
exit 1
else
echo "任务运行中 ... 状态: $TASK_STATE"
# 等待 5 秒后再次查询
sleep 5
fi
done