实现伪从技术:基于Binlog的Following表变更监听与缓存更新
技术方案概述
要实现一个专门消费者服务作为Following表的伪从,订阅binlog并在数据变更时更新缓存,可以采用以下技术方案:
主要组件
- MySQL Binlog监听:使用开源工具监听MySQL的binlog
- 消息队列:将变更事件发布到消息队列(可选)
- 消费者服务:处理变更事件并更新缓存
- 缓存系统:Redis或其他缓存解决方案
具体实现步骤
1. 配置MySQL Binlog
首先确保MySQL已开启binlog并配置为ROW模式:
            
            
              sql
              
              
            
          
          -- 检查当前binlog配置
SHOW VARIABLES LIKE 'log_bin';
SHOW VARIABLES LIKE 'binlog_format';
-- 修改my.cnf/my.ini文件
[mysqld]
log-bin=mysql-bin
binlog-format=ROW
server-id=12. 使用Java实现Binlog监听
可以使用开源的mysql-binlog-connector-java库:
            
            
              xml
              
              
            
          
          <!-- pom.xml 依赖 -->
<dependency>
    <groupId>com.github.shyiko</groupId>
    <artifactId>mysql-binlog-connector-java</artifactId>
    <version>0.25.4</version>
</dependency>3. 消费者服务实现
            
            
              java
              
              
            
          
          import com.github.shyiko.mysql.binlog.BinaryLogClient;
import com.github.shyiko.mysql.binlog.event.*;
public class FollowingTableBinlogConsumer {
    
    private final BinaryLogClient client;
    private final CacheService cacheService;
    
    public FollowingTableBinlogConsumer(String hostname, int port, String username, String password, 
                                      CacheService cacheService) {
        this.cacheService = cacheService;
        this.client = new BinaryLogClient(hostname, port, username, password);
        
        client.registerEventListener(event -> {
            EventData data = event.getData();
            
            if (data instanceof TableMapEventData) {
                // 表映射事件
                TableMapEventData tableMapEvent = (TableMapEventData) data;
                if ("your_database".equals(tableMapEvent.getDatabase()) && 
                    "Following".equals(tableMapEvent.getTable())) {
                    // 处理Following表的事件
                }
            } else if (data instanceof WriteRowsEventData) {
                // 插入操作
                processWriteEvent((WriteRowsEventData) data);
            } else if (data instanceof UpdateRowsEventData) {
                // 更新操作
                processUpdateEvent((UpdateRowsEventData) data);
            } else if (data instanceof DeleteRowsEventData) {
                // 删除操作
                processDeleteEvent((DeleteRowsEventData) data);
            }
        });
    }
    
    private void processWriteEvent(WriteRowsEventData data) {
        // 处理新增关注事件
        for (Serializable[] row : data.getRows()) {
            Long followerId = (Long) row[0]; // 假设第一列是follower_id
            Long followeeId = (Long) row[1]; // 假设第二列是followee_id
            cacheService.addFollowing(followerId, followeeId);
        }
    }
    
    private void processUpdateEvent(UpdateRowsEventData data) {
        // 处理更新事件(如果Following表有更新操作)
        for (Map.Entry<Serializable[], Serializable[]> row : data.getRows()) {
            Serializable[] before = row.getKey();
            Serializable[] after = row.getValue();
            // 根据业务逻辑处理更新
        }
    }
    
    private void processDeleteEvent(DeleteRowsEventData data) {
        // 处理取消关注事件
        for (Serializable[] row : data.getRows()) {
            Long followerId = (Long) row[0];
            Long followeeId = (Long) row[1];
            cacheService.removeFollowing(followerId, followeeId);
        }
    }
    
    public void start() {
        try {
            client.connect();
        } catch (IOException e) {
            throw new RuntimeException("Failed to connect to MySQL binlog", e);
        }
    }
    
    public void stop() {
        try {
            client.disconnect();
        } catch (IOException e) {
            // 处理异常
        }
    }
}4. 缓存服务实现
            
            
              java
              
              
            
          
          public interface CacheService {
    void addFollowing(Long followerId, Long followeeId);
    void removeFollowing(Long followerId, Long followeeId);
    Set<Long> getFollowings(Long followerId);
    Set<Long> getFollowers(Long followeeId);
}
public class RedisCacheService implements CacheService {
    
    private final JedisPool jedisPool;
    
    public RedisCacheService(JedisPool jedisPool) {
        this.jedisPool = jedisPool;
    }
    
    @Override
    public void addFollowing(Long followerId, Long followeeId) {
        try (Jedis jedis = jedisPool.getResource()) {
            // 用户关注列表
            jedis.sadd("user:" + followerId + ":followings", followeeId.toString());
            // 用户粉丝列表
            jedis.sadd("user:" + followeeId + ":followers", followerId.toString());
        }
    }
    
    @Override
    public void removeFollowing(Long followerId, Long followeeId) {
        try (Jedis jedis = jedisPool.getResource()) {
            // 用户关注列表
            jedis.srem("user:" + followerId + ":followings", followeeId.toString());
            // 用户粉丝列表
            jedis.srem("user:" + followeeId + ":followers", followerId.toString());
        }
    }
    
    @Override
    public Set<Long> getFollowings(Long followerId) {
        try (Jedis jedis = jedisPool.getResource()) {
            Set<String> followings = jedis.smembers("user:" + followerId + ":followings");
            return followings.stream().map(Long::valueOf).collect(Collectors.toSet());
        }
    }
    
    @Override
    public Set<Long> getFollowers(Long followeeId) {
        try (Jedis jedis = jedisPool.getResource()) {
            Set<String> followers = jedis.smembers("user:" + followeeId + ":followers");
            return followers.stream().map(Long::valueOf).collect(Collectors.toSet());
        }
    }
}5. 服务启动
            
            
              java
              
              
            
          
          public class Application {
    public static void main(String[] args) {
        // 配置Redis连接池
        JedisPool jedisPool = new JedisPool("localhost", 6379);
        CacheService cacheService = new RedisCacheService(jedisPool);
        
        // 启动binlog消费者
        FollowingTableBinlogConsumer consumer = new FollowingTableBinlogConsumer(
            "localhost", 3306, "username", "password", cacheService);
        
        consumer.start();
        
        // 添加关闭钩子
        Runtime.getRuntime().addShutdownHook(new Thread(() -> {
            consumer.stop();
            jedisPool.close();
        }));
    }
}高级优化方案
1. 引入消息队列(如Kafka)
            
            
              java
              
              
            
          
          // 在Binlog消费者中,将事件发布到Kafka
public class KafkaEventPublisher {
    private final Producer<String, String> producer;
    
    public KafkaEventPublisher(String bootstrapServers) {
        Properties props = new Properties();
        props.put("bootstrap.servers", bootstrapServers);
        props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        this.producer = new KafkaProducer<>(props);
    }
    
    public void publishFollowingEvent(String eventType, Long followerId, Long followeeId) {
        String key = followerId + ":" + followeeId;
        String value = String.format("{\"eventType\":\"%s\",\"followerId\":%d,\"followeeId\":%d}", 
                                    eventType, followerId, followeeId);
        producer.send(new ProducerRecord<>("following-events", key, value));
    }
    
    public void close() {
        producer.close();
    }
}
// 然后有独立的消费者服务从Kafka消费并更新缓存2. 处理初始数据同步
            
            
              java
              
              
            
          
          // 在服务启动时,先全量同步Following表数据到缓存
public void initialSync() {
    // 从数据库读取所有Following关系
    List<Following> allFollowings = followingRepository.findAll();
    
    // 批量写入缓存
    try (Jedis jedis = jedisPool.getResource()) {
        Pipeline pipeline = jedis.pipelined();
        
        for (Following following : allFollowings) {
            pipeline.sadd("user:" + following.getFollowerId() + ":followings", 
                        following.getFolloweeId().toString());
            pipeline.sadd("user:" + following.getFolloweeId() + ":followers", 
                        following.getFollowerId().toString());
        }
        
        pipeline.sync();
    }
}3. 监控与容错
- 记录binlog位置,以便重启后从正确位置继续
- 实现重试机制处理缓存更新失败
- 添加监控指标跟踪事件处理延迟和错误率
总结
这个方案实现了Following表的伪从技术,通过监听MySQL binlog实时捕获数据变更,并更新Redis缓存。这种架构具有以下优点:
- 低延迟:几乎实时同步数据库变更
- 解耦:消费者服务独立于主业务服务
- 可扩展:可以轻松添加更多消费者处理不同业务逻辑
- 高性能:Redis提供了高效的关系数据存储和查询
根据业务规模,可以选择简单的直接更新缓存方案,或者引入消息队列的更复杂架构。
经过对数据库设计、缓存设计的详细论证,总结并提炼出缓存与数据库结合的最终方案。
伪从方案应用场景如:用户关系服务,关注与取消关注的接口。
- 即接口直接更新数据库Following表即响应用户,后续流程对用户来说是完全异步的。
- Follower表、计数服务、Redis缓存会依赖Following表产生的binlog日志分别更新数据。
关于Binlog监听在服务重启/暂停时的数据丢失问题
Binlog监听在服务重启或暂停时是否会导致数据丢失,取决于具体的实现方式和配置。下面我将详细分析这个问题及解决方案。
关键影响因素
1. Binlog位置记录
- 不记录位置:如果服务没有记录已处理的binlog位置,重启后会从当前最新的binlog位置开始,导致中间变更丢失
- 记录位置:正确记录binlog位置可以确保重启后从断点继续
2. MySQL binlog保留策略
- expire_logs_days参数决定binlog保留天数
- 如果binlog被过早清除,而服务长时间停机,可能导致无法恢复
3. 事务完整性
- 部分处理的事务在重启后可能导致不一致
解决方案
1. 持久化binlog位置
修改之前的消费者服务,增加位置记录功能:
            
            
              java
              
              
            
          
          public class FollowingTableBinlogConsumer {
    // 增加binlog位置存储接口
    private final BinlogPositionStore positionStore;
    
    public FollowingTableBinlogConsumer(..., BinlogPositionStore positionStore) {
        this.positionStore = positionStore;
        
        // 设置binlog文件名和位置
        BinlogPosition position = positionStore.getPosition();
        if (position != null) {
            client.setBinlogFilename(position.getFilename());
            client.setBinlogPosition(position.getPosition());
        }
        
        client.registerEventListener(event -> {
            // 处理事件...
            
            // 记录位置
            if (event.getHeader().getEventType() == EventType.ROTATE) {
                RotateEventData rotateEvent = (RotateEventData) event.getData();
                positionStore.savePosition(
                    new BinlogPosition(rotateEvent.getBinlogFilename(), rotateEvent.getBinlogPosition())
                );
            } else if (event.getHeader().getEventType() != EventType.FORMAT_DESCRIPTION) {
                positionStore.savePosition(
                    new BinlogPosition(client.getBinlogFilename(), event.getHeader().getNextPosition())
                );
            }
        });
    }
}
// Binlog位置存储接口
public interface BinlogPositionStore {
    void savePosition(BinlogPosition position);
    BinlogPosition getPosition();
}
// 简单的文件存储实现
public class FileBinlogPositionStore implements BinlogPositionStore {
    private final File positionFile;
    
    public FileBinlogPositionStore(String filePath) {
        this.positionFile = new File(filePath);
    }
    
    @Override
    public void savePosition(BinlogPosition position) {
        try (ObjectOutputStream out = new ObjectOutputStream(new FileOutputStream(positionFile))) {
            out.writeObject(position);
        } catch (IOException e) {
            throw new RuntimeException("Failed to save binlog position", e);
        }
    }
    
    @Override
    public BinlogPosition getPosition() {
        if (!positionFile.exists()) return null;
        
        try (ObjectInputStream in = new ObjectInputStream(new FileInputStream(positionFile))) {
            return (BinlogPosition) in.readObject();
        } catch (Exception e) {
            throw new RuntimeException("Failed to read binlog position", e);
        }
    }
}
// Binlog位置对象
public class BinlogPosition implements Serializable {
    private final String filename;
    private final long position;
    
    // constructor, getters...
}2. MySQL配置优化
确保MySQL配置合理:
            
            
              sql
              
              
            
          
          -- 设置足够的binlog保留时间(根据业务需求调整)
SET GLOBAL expire_logs_days = 7;
-- 或使用新的变量(MySQL 8.0+)
SET GLOBAL binlog_expire_logs_seconds = 604800;  -- 7天3. 启动时数据校验和修复
服务启动时增加校验逻辑:
            
            
              java
              
              
            
          
          public void start() {
    // 检查binlog位置是否有效
    BinlogPosition position = positionStore.getPosition();
    if (position != null) {
        if (!isBinlogFileExists(position.getFilename())) {
            // 执行全量同步
            initialSync();
            positionStore.clearPosition();
        }
    }
    
    client.connect();
}
private boolean isBinlogFileExists(String filename) {
    // 实现检查binlog文件是否存在的逻辑
    // 可以通过SHOW BINARY LOGS命令获取当前存在的binlog文件列表
}4. 优雅停机处理
确保服务停止时正确处理:
            
            
              java
              
              
            
          
          public void stop() {
    try {
        // 等待当前事件处理完成
        client.disconnect();
        
        // 确保最后的位置已保存
        positionStore.flush();
    } catch (IOException e) {
        // 处理异常
    }
}高级保障方案
1. 引入事务表记录处理状态
创建一张事务记录表:
            
            
              sql
              
              
            
          
          CREATE TABLE binlog_consumer_state (
    consumer_id VARCHAR(100) PRIMARY KEY,
    binlog_filename VARCHAR(100) NOT NULL,
    binlog_position BIGINT NOT NULL,
    last_heartbeat TIMESTAMP NOT NULL,
    processed_checksum VARCHAR(100)
);2. 定期检查点(checkpoint)
            
            
              java
              
              
            
          
          // 每处理N个事件或每隔M秒记录一次完整状态
private void checkpoint(Event event) {
    // 计算当前已处理数据的校验和
    String checksum = computeChecksum();
    
    // 更新数据库状态
    jdbcTemplate.update(
        "INSERT INTO binlog_consumer_state VALUES (?, ?, ?, NOW(), ?) " +
        "ON DUPLICATE KEY UPDATE binlog_filename=?, binlog_position=?, last_heartbeat=NOW(), processed_checksum=?",
        consumerId, client.getBinlogFilename(), event.getHeader().getNextPosition(), checksum,
        client.getBinlogFilename(), event.getHeader().getNextPosition(), checksum
    );
}3. 数据修复机制
当检测到不一致时:
            
            
              java
              
              
            
          
          public void repairIfNeeded() {
    // 从数据库获取最后处理的状态
    StateRecord state = getLastStateFromDB();
    
    // 从缓存获取最后处理的状态
    StateRecord cacheState = getLastStateFromCache();
    
    if (!state.equals(cacheState)) {
        // 执行修复逻辑
        executeRepair(state);
    }
}总结
正确实现的Binlog监听服务在重启/暂停时不会丢失数据,但需要:
- 持久化记录binlog位置(文件名+偏移量)
- 配置足够的binlog保留时间
- 实现优雅的停机和恢复机制
- 考虑增加校验和修复逻辑(针对关键业务)
建议的完整方案:
- 使用混合位置存储(本地文件+数据库)
- 定期检查点
- 启动时数据校验
- 足够的binlog保留期
- 监控binlog消费延迟
这样即使在服务重启、暂停甚至长时间停机后,也能保证数据不会丢失,并能从正确的位置恢复处理。