Seatunnel Docker image镜像制作
- 
#下载 seatunnel
- export version="2.3.3"
 - wget "Index of /dist/seatunnel{version}/apache-seatunnel-{version}-bin.tar.gz"
 - tar -xzvf "apache-seatunnel-${version}-bin.tar.gz"
 
#解压
- tar -xzvf apache-seatunnel-${SEATUNNEL_VERSION}-bin.tar.gz
 
#配置connector, config/plugin_config 根据需要配置,Demo只需要以下两个即可
- --seatunnel-connectors--
 - connector-fake
 - connector-console
 - --end--
 
#执行插件安装
- sh bin/install-plugin.sh 2.3.3
 - #执行后,会自动下载maven包到 ~/.m2/wrapper/dists/apache-maven-3.8.4-bin/目录中
 - #连接到 Central Repository:,下载传统比较慢
 - #需要一下镜像,全用本地的setting.xml
- #:~/.m2/wrapper/dists/apache-maven-3.8.4-bin/52ccbt68d252mdldqsfsn03jlf/apache-maven-3.8.4/conf#
 
 - #再重新执行 sh bin/install-plugin.sh 2.3.3 使用国内镜像,例如:阿里云,此时就很快了
 - #bin/install-plugin.sh 会将对就的jar包复制到 connectors/seatunnel和lib目录下
 
#修改apache-seatunnel-${version}-bin目录下的配置文件
- #编写plugin_config文件
- vi config/plugin_config
- --seatunnel-connectors--
 - connector-fake
 - connector-console
 - --end--
 
 
 - vi config/plugin_config
 - #编写批处理配置文件
- vi config/v2.batch.config.template
 
 
env {
execution.parallelism = 1
job.mode = "BATCH"
}source {
FakeSource {
result_table_name = "fake"
row.num = 16
schema = {
fields {
name = "string"
age = "int"
}
}
}
}transform {
FieldMapper {
source_table_name = "fake"
result_table_name = "fake1"
field_mapper = {
age = age
name = new_name
}
}
}sink {
Console {
source_table_name = "fake1"
}
} - 
#下载openjdk:8镜像
- docker pull openjdk:8
 
#创建Dockerfile
- vi dockerfile-seatunnel-2.3.3
 - #内容 当前目录下的seatunnel包、lib目录、connectors目录复制到镜像中
- FROM openjdk:8
 - ENV SEATUNNEL_VERSION="2.3.3"
 - COPY ./apache-seatunnel-{SEATUNNEL_VERSION}-bin.tar.gz /opt/apache-seatunnel-{SEATUNNEL_VERSION}-bin.tar.gz
 - WORKDIR /opt
 - RUN tar -xzvf apache-seatunnel-${SEATUNNEL_VERSION}-bin.tar.gz
 - RUN mv apache-seatunnel-${SEATUNNEL_VERSION} seatunnel
 - RUN rm -f /opt/apache-seatunnel-${SEATUNNEL_VERSION}-bin.tar.gz
 - WORKDIR /opt/seatunnel
 - ENTRYPOINT ["sh","-c"," bin/seatunnel.sh --config $config -e local"]
 
 
#build镜像
- docker build -t seatunnel:2.3.3 -f dockerfile-seatunnel-2.3.3 .
 
#使用镜像后台运行
- docker run -d -p 9000:9000 --restart=unless-stopped --name seatunnel -d --hostname seatunnel-node1 --network my-net -e config="/data/seatunnel.batch.conf" -v /mnt/sda1/seatunnel/apache-seatunnel-2.3.3/config/v2.batch.config.template:/data/seatunnel.batch.conf -v /mnt/sda1/seatunnel/apache-seatunnel-2.3.3/config/plugin_config:/opt/seatunnel/plugin_config -v /mnt/sda1/seatunnel/apache-seatunnel-2.3.3/lib:/opt/seatunnel/lib -v /mnt/sda1/seatunnel/apache-seatunnel-2.3.3/plugins:/opt/seatunnel/plugins -v /mnt/sda1/seatunnel/apache-seatunnel-2.3.3/connectors/seatunnel:/opt/seatunnel/connectors/seatunnel -v /etc/localtime:/etc/localtime seatunnel:2.3.3
 
#使用镜像临时测试
- docker run --name seatunnel --hostname seatunnel-node1 --network my-net -e config="/data/seatunnel.batch.conf" -v /mnt/sda1/seatunnel/apache-seatunnel-2.3.3/config/v2.batch.config.template:/data/seatunnel.batch.conf -v /mnt/sda1/seatunnel/apache-seatunnel-2.3.3/config/plugin_config:/opt/seatunnel/plugin_config -v /mnt/sda1/seatunnel/apache-seatunnel-2.3.3/lib:/opt/seatunnel/lib -v /mnt/sda1/seatunnel/apache-seatunnel-2.3.3/plugins:/opt/seatunnel/plugins -v /mnt/sda1/seatunnel/apache-seatunnel-2.3.3/connectors/seatunnel:/opt/seatunnel/connectors/seatunnel -v /etc/localtime:/etc/localtime seatunnel:2.3.3
 
#使用JDBC模式,同步两个库的数据
- 修改plugin_config
 
 
- 
- 
- 
- --seatunnel-connectors--
 - connector-fake
 - connector-console
 - connector-jdbc
 - --end--
 
 - 安装插件,安装过程中会自动把相应的jar包复制到对应的目录中
- sh bin/install-plugin.sh 2.3.3
 
 - 修改v2.streaming.conf.template配置
 
Defining the runtime environment
env {
You can set flink configuration here
execution.parallelism = 1
job.mode = "BATCH"
}
source{
Jdbc {
url = "jdbc:mysql://mysql:3306/test"
driver = "com.mysql.cj.jdbc.Driver"
connection_check_timeout_sec = 100
user = "root"
password = "123456"
query = "select * from help_keyword_1 limit 2"
}
}transform {
# If you would like to get more information about how to configure seatunnel and see full list of transform plugins,
# please go to https://seatunnel.apache.org/docs/transform-v2/sql
}sink {
Console {}
jdbc {
url = "jdbc:mysql://mysql:3306/test2"
driver = "com.mysql.cj.jdbc.Driver"
user = "root"
password = "123456"
query = "insert into help_keyword_1(help_keyword_1_id,name) values(?,?)"
}
} - 
 
 - 
 
#配置MYSQL-CDC通过binlog实时数据同步
- 
- 
MYSQL开启binlog
 - 
开始CDC同步
 - 
修改配置文件,与batch不同,是conf不是config
 - 
vi v2.streaming.conf.template
 - 
内容如下
env {
# You can set SeaTunnel environment configuration here
execution.parallelism = 1
job.mode = "STREAMING"
# 10秒检查一次,可以适当加大这个值
checkpoint.interval = 10000
#execution.checkpoint.interval = 10000
#execution.checkpoint.data-uri = "hdfs://localhost:9000/checkpoint"
}# 配置数据源 source { MySQL-CDC { # 数据库账号 username = "root" password = "123456" # 源表,格式:数据库名.表名 table-names = ["test.help_keyword_1"] base-url = "jdbc:mysql://mysql:3306/test" } } # 配置目标库 sink { jdbc { url = "jdbc:mysql://mysql:3306/test2" driver = "com.mysql.cj.jdbc.Driver" user = "root" password = "123456" generate_sink_sql = true # 目标数据库名 database = "test2" # 目标表名 table = "help_keyword_1" # 主键名称 primary_keys = ["help_keyword_1_id"] } } 
 - 
 - 
- 临时启动容器
- docker run --name seatunnel --hostname seatunnel-node1 --network my-net -e config="/data/seatunnel.streaming.conf" -v /mnt/sda1/seatunnel/apache-seatunnel-2.3.3/config/v2.streaming.conf.template:/data/seatunnel.streaming.conf -v /mnt/sda1/seatunnel/apache-seatunnel-2.3.3/config/plugin_config:/opt/seatunnel/plugin_config -v /mnt/sda1/seatunnel/apache-seatunnel-2.3.3/lib:/opt/seatunnel/lib -v /mnt/sda1/seatunnel/apache-seatunnel-2.3.3/plugins:/opt/seatunnel/plugins -v /mnt/sda1/seatunnel/apache-seatunnel-2.3.3/connectors/seatunnel:/opt/seatunnel/connectors/seatunnel -v /etc/localtime:/etc/localtime seatunnel:2.3.3
 
 
 - 临时启动容器