通过kafka-connect 实现debezium数据监听采集

1.运行kafka和kafka-connect

复制代码
services:
  kafka:
    image: kafka:4.0.0
    container_name: kafka
    ports:
      - "9092:9092"
      - "9093:9093"
    environment:
      KAFKA_CLUSTER_ID: ""
      KAFKA_INITAL_CONTROLLERS: CONTROLLER
      KAFKA_CFG_NODE_ID: 1
      KAFKA_CFG_PROCESS_ROLES: 'broker,controller'
      KAFKA_CFG_CONTROLLER_QUORUM_VOTERS: 1@kafka:9094
      KAFKA_CFG_LISTENERS: INTERNAL://:9092,EXTERNAL://0.0.0.0:9093,CONTROLLER://:9094
      KAFKA_CFG_ADVERTISED_LISTENERS: INTERNAL://kafka:9092,EXTERNAL://127.0.0.1:9093
      KAFKA_CFG_AUTO_CREATE_TOPICS_ENABLE: 'true'
      KAFKA_CFG_LISTENER_SECURITY_PROTOCOL_MAP: INTERNAL:SASL_PLAINTEXT,EXTERNAL:SASL_PLAINTEXT,CONTROLLER:SASL_PLAINTEXT
      KAFKA_CFG_CONTROLLER_LISTENER_NAMES: CONTROLLER
      KAFKA_CFG_INTER_BROKER_LISTENER_NAME: INTERNAL
      KAFKA_OPTS: -Djava.security.auth.login.config=/etc/kafka/kafka_server_jaas.conf
      KAFKA_CFG_SASL_ENABLED_MECHANISMS: PLAIN
      KAFKA_CFG_SASL_MECHANISM_INTER_BROKER_PROTOCOL: PLAIN
      KAFKA_CFG_SASL_MECHANISM_CONTROLLER_PROTOCOL: PLAIN
      KAFKA_INTER_BROKER_USER: ******
      KAFKA_INTER_BROKER_PASSWORD: ******
      KAFKA_CLIENT_USERS: adminClient
      KAFKA_CLIENT_PASSWORDS: ******
      KAFKA_AUTHORIZER_CLASS_NAME: kafka.security.auth.SimpleAclAuthorizer
      KAFKA_SUPER_USERS: User:adminClient
      KAFKA_ALLOW_EVERYONE_IF_NO_ACL_FOUND: "true"
    volumes:
      - /etc/kafka/kafka_server_jaas.conf:/etc/kafka/kafka_server_jaas.conf:ro
      - /root/****/kafka_data:/bitnami/kafka
    networks:
       net:
        ipv4_address: ******(ip)
  kafka-connect:
    image: kafka-connect:3.2
    container_name: kafka-connect
    ports:
      - "8083:8083"
    environment:
      GROUP_ID: kafka_connect_group
      CONFIG_STORAGE_TOPIC: connect_configs
      OFFSET_STORAGE_TOPIC: connect_offsets
      STATUS_STORAGE_TOPIC: connect_statuses
      #      kafka的IP:Port
      BOOTSTRAP_SERVERS: kafka:9092
      #      mysql的IP:Port
      MYSQL_HOST: ******:3306
      CONNECT_SECURITY_PROTOCOL: SASL_PLAINTEXT
      CONNECT_SASL_MECHANISM: PLAIN
      CONNECT_SASL_JAAS_CONFIG: 'org.apache.kafka.common.security.plain.PlainLoginModule required username="adminBroker" password="*******";'
      CONNECT_PRODUCER_SECURITY_PROTOCOL: SASL_PLAINTEXT
      CONNECT_PRODUCER_SASL_MECHANISM: PLAIN
      CONNECT_PRODUCER_SASL_JAAS_CONFIG: 'org.apache.kafka.common.security.plain.PlainLoginModule required username="*******" password="******";'
      CONNECT_CONSUMER_SECURITY_PROTOCOL: SASL_PLAINTEXT
      CONNECT_CONSUMER_SASL_MECHANISM: PLAIN
      CONNECT_CONSUMER_SASL_JAAS_CONFIG: 'org.apache.kafka.common.security.plain.PlainLoginModule required username="*******" password="******";'
      CONFIG_STORAGE_REPLICATION_FACTOR: 1
      OFFSET_STORAGE_REPLICATION_FACTOR: 1
      STATUS_STORAGE_REPLICATION_FACTOR: 1
    depends_on:
      - kafka
    networks:
      YW-net:
        ipv4_address: *******
networks:
  net:
    external: true

2.添加kafka_server 和client配置文件

复制代码
/etc/kafka/kafka_server_jaas.conf



   KafkaServer {
     org.apache.kafka.common.security.plain.PlainLoginModule required
     username="****"
     password="*****"
     user_adminClient="******"
     user_adminBroker="*****";
   };
   KafkaClient{
     org.apache.kafka.common.security.plain.PlainLoginModule required
     username="*****"
     password="*****";
   };

3.新增连接器,相当于监听某张表(项目启动后直接执行)

复制代码
curl -i -X POST -H "Accept:application/json" -H "Content-Type:application/json" ip:8083/connectors/ -d '
   {
   "name": "connector",
   "config": {
   "decimal.handling.mode": "string",
   "connector.class": "io.debezium.connector.mysql.MySqlConnector",
   "tasks.max": "1",
   "database.hostname": "mysql8",
   "database.port": "3306",
   "database.user": "root",
   "database.password": "000000",
   "database.server.id": "*不同的连接器不同*",
   "topic.prefix": "cdc_",
   "database.include.list": "inventory",
   "schema.history.internal.kafka.bootstrap.servers": "kafka:9092",
   "schema.history.internal.kafka.topic": "history_inventory",
   "table.include.list": "监听具体的表,用,隔开",
   "transforms": "convertTimezone",
   "transforms.convertTimezone.type": "io.debezium.transforms.TimezoneConverter",
   "transforms.convertTimezone.converted.timezone": "Asia/Shanghai",
   "schema.history.internal.producer.security.protocol": "SASL_PLAINTEXT",
   "schema.history.internal.producer.sasl.mechanism": "PLAIN",
   "schema.history.internal.producer.sasl.jaas.config": "org.apache.kafka.common.security.plain.PlainLoginModule required username=\"adminClient\" password=\"000000\";",
   "schema.history.internal.consumer.security.protocol": "SASL_PLAINTEXT",
   "schema.history.internal.consumer.sasl.mechanism": "PLAIN",
   "schema.history.internal.consumer.sasl.jaas.config": "org.apache.kafka.common.security.plain.PlainLoginModule required username=\"adminClient\" password=\"000000\";"
   }
   }';

4.连接器相关操作

1.查看所有连接器列表

curl http://localhost:8083/connectors

2.删除连接器

curl -X DELETE http://localhost:8083/connectors/\<connector-name>

3.启动时全量同步一次

1.在连接器加入相关配置:"snapshot.mode": "when_needed"

2.强制同步一次(因为如果kafka的offset未情况会则不会去全量同步):"snapshot.mode": "initial"

3.如果上述两条都不起作用则修改name的名称,换一个 connector 名字,这种方式不需要清 offset topic,对其他 connector 没影响

4.其他方式(自测未生效)在配置中加入 "snapshot.signal.data.collection": "表名.signals",通过signal去触发全量同步
复制代码
CREATE TABLE inventory.signals (
  id VARCHAR(64) PRIMARY KEY,
  type VARCHAR(64) NOT NULL,
  data VARCHAR(2048)
);

插入一条触发全量快照的记录

复制代码
INSERT INTO inventory.signals (id, type, data)
VALUES ('1', 'execute-snapshot', '{"data-collections": ["库名.表名","库.表名"], "type": "incremental"}');
相关推荐
面向Google编程1 小时前
从零学习Kafka:数据存储
后端·kafka
Jackeyzhe2 小时前
从零学习Kafka:数据存储
kafka
SoleMotive.13 小时前
谢飞机爆笑面经:Java大厂3轮12问真题拆解(Redis穿透/Kafka分区/MCP Agent)
redis·spring cloud·kafka·java面试·mcp
程序猿阿伟14 小时前
《分布式追踪Span-业务标识融合:端到端业务可观测手册》
分布式
爆米花byh14 小时前
在RockyLinux9环境的Kafka4.1.1单机版安装(无ZK依赖)
中间件·kafka
yumgpkpm15 小时前
预测:2026年大数据软件+AI大模型的发展趋势
大数据·人工智能·算法·zookeeper·kafka·开源·cloudera
消失的旧时光-194316 小时前
第十六课实战:分布式锁与限流设计 —— 从原理到可跑 Demo
redis·分布式·缓存
若水不如远方16 小时前
分布式一致性(三):共识的黎明——Quorum 机制与 Basic Paxos
分布式·后端·算法
DemonAvenger16 小时前
Kafka消费者深度剖析:消费组与再平衡原理
性能优化·kafka·消息队列
会算数的⑨17 小时前
Kafka知识点问题驱动式的回顾与复习——(一)
分布式·后端·中间件·kafka