实现伪从技术:基于Binlog的Following表变更监听与缓存更新
技术方案概述
要实现一个专门消费者服务作为Following表的伪从,订阅binlog并在数据变更时更新缓存,可以采用以下技术方案:
主要组件
- MySQL Binlog监听:使用开源工具监听MySQL的binlog
- 消息队列:将变更事件发布到消息队列(可选)
- 消费者服务:处理变更事件并更新缓存
- 缓存系统:Redis或其他缓存解决方案
具体实现步骤
1. 配置MySQL Binlog
首先确保MySQL已开启binlog并配置为ROW模式:
sql
-- 检查当前binlog配置
SHOW VARIABLES LIKE 'log_bin';
SHOW VARIABLES LIKE 'binlog_format';
-- 修改my.cnf/my.ini文件
[mysqld]
log-bin=mysql-bin
binlog-format=ROW
server-id=1
2. 使用Java实现Binlog监听
可以使用开源的mysql-binlog-connector-java
库:
xml
<!-- pom.xml 依赖 -->
<dependency>
<groupId>com.github.shyiko</groupId>
<artifactId>mysql-binlog-connector-java</artifactId>
<version>0.25.4</version>
</dependency>
3. 消费者服务实现
java
import com.github.shyiko.mysql.binlog.BinaryLogClient;
import com.github.shyiko.mysql.binlog.event.*;
public class FollowingTableBinlogConsumer {
private final BinaryLogClient client;
private final CacheService cacheService;
public FollowingTableBinlogConsumer(String hostname, int port, String username, String password,
CacheService cacheService) {
this.cacheService = cacheService;
this.client = new BinaryLogClient(hostname, port, username, password);
client.registerEventListener(event -> {
EventData data = event.getData();
if (data instanceof TableMapEventData) {
// 表映射事件
TableMapEventData tableMapEvent = (TableMapEventData) data;
if ("your_database".equals(tableMapEvent.getDatabase()) &&
"Following".equals(tableMapEvent.getTable())) {
// 处理Following表的事件
}
} else if (data instanceof WriteRowsEventData) {
// 插入操作
processWriteEvent((WriteRowsEventData) data);
} else if (data instanceof UpdateRowsEventData) {
// 更新操作
processUpdateEvent((UpdateRowsEventData) data);
} else if (data instanceof DeleteRowsEventData) {
// 删除操作
processDeleteEvent((DeleteRowsEventData) data);
}
});
}
private void processWriteEvent(WriteRowsEventData data) {
// 处理新增关注事件
for (Serializable[] row : data.getRows()) {
Long followerId = (Long) row[0]; // 假设第一列是follower_id
Long followeeId = (Long) row[1]; // 假设第二列是followee_id
cacheService.addFollowing(followerId, followeeId);
}
}
private void processUpdateEvent(UpdateRowsEventData data) {
// 处理更新事件(如果Following表有更新操作)
for (Map.Entry<Serializable[], Serializable[]> row : data.getRows()) {
Serializable[] before = row.getKey();
Serializable[] after = row.getValue();
// 根据业务逻辑处理更新
}
}
private void processDeleteEvent(DeleteRowsEventData data) {
// 处理取消关注事件
for (Serializable[] row : data.getRows()) {
Long followerId = (Long) row[0];
Long followeeId = (Long) row[1];
cacheService.removeFollowing(followerId, followeeId);
}
}
public void start() {
try {
client.connect();
} catch (IOException e) {
throw new RuntimeException("Failed to connect to MySQL binlog", e);
}
}
public void stop() {
try {
client.disconnect();
} catch (IOException e) {
// 处理异常
}
}
}
4. 缓存服务实现
java
public interface CacheService {
void addFollowing(Long followerId, Long followeeId);
void removeFollowing(Long followerId, Long followeeId);
Set<Long> getFollowings(Long followerId);
Set<Long> getFollowers(Long followeeId);
}
public class RedisCacheService implements CacheService {
private final JedisPool jedisPool;
public RedisCacheService(JedisPool jedisPool) {
this.jedisPool = jedisPool;
}
@Override
public void addFollowing(Long followerId, Long followeeId) {
try (Jedis jedis = jedisPool.getResource()) {
// 用户关注列表
jedis.sadd("user:" + followerId + ":followings", followeeId.toString());
// 用户粉丝列表
jedis.sadd("user:" + followeeId + ":followers", followerId.toString());
}
}
@Override
public void removeFollowing(Long followerId, Long followeeId) {
try (Jedis jedis = jedisPool.getResource()) {
// 用户关注列表
jedis.srem("user:" + followerId + ":followings", followeeId.toString());
// 用户粉丝列表
jedis.srem("user:" + followeeId + ":followers", followerId.toString());
}
}
@Override
public Set<Long> getFollowings(Long followerId) {
try (Jedis jedis = jedisPool.getResource()) {
Set<String> followings = jedis.smembers("user:" + followerId + ":followings");
return followings.stream().map(Long::valueOf).collect(Collectors.toSet());
}
}
@Override
public Set<Long> getFollowers(Long followeeId) {
try (Jedis jedis = jedisPool.getResource()) {
Set<String> followers = jedis.smembers("user:" + followeeId + ":followers");
return followers.stream().map(Long::valueOf).collect(Collectors.toSet());
}
}
}
5. 服务启动
java
public class Application {
public static void main(String[] args) {
// 配置Redis连接池
JedisPool jedisPool = new JedisPool("localhost", 6379);
CacheService cacheService = new RedisCacheService(jedisPool);
// 启动binlog消费者
FollowingTableBinlogConsumer consumer = new FollowingTableBinlogConsumer(
"localhost", 3306, "username", "password", cacheService);
consumer.start();
// 添加关闭钩子
Runtime.getRuntime().addShutdownHook(new Thread(() -> {
consumer.stop();
jedisPool.close();
}));
}
}
高级优化方案
1. 引入消息队列(如Kafka)
java
// 在Binlog消费者中,将事件发布到Kafka
public class KafkaEventPublisher {
private final Producer<String, String> producer;
public KafkaEventPublisher(String bootstrapServers) {
Properties props = new Properties();
props.put("bootstrap.servers", bootstrapServers);
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
this.producer = new KafkaProducer<>(props);
}
public void publishFollowingEvent(String eventType, Long followerId, Long followeeId) {
String key = followerId + ":" + followeeId;
String value = String.format("{\"eventType\":\"%s\",\"followerId\":%d,\"followeeId\":%d}",
eventType, followerId, followeeId);
producer.send(new ProducerRecord<>("following-events", key, value));
}
public void close() {
producer.close();
}
}
// 然后有独立的消费者服务从Kafka消费并更新缓存
2. 处理初始数据同步
java
// 在服务启动时,先全量同步Following表数据到缓存
public void initialSync() {
// 从数据库读取所有Following关系
List<Following> allFollowings = followingRepository.findAll();
// 批量写入缓存
try (Jedis jedis = jedisPool.getResource()) {
Pipeline pipeline = jedis.pipelined();
for (Following following : allFollowings) {
pipeline.sadd("user:" + following.getFollowerId() + ":followings",
following.getFolloweeId().toString());
pipeline.sadd("user:" + following.getFolloweeId() + ":followers",
following.getFollowerId().toString());
}
pipeline.sync();
}
}
3. 监控与容错
- 记录binlog位置,以便重启后从正确位置继续
- 实现重试机制处理缓存更新失败
- 添加监控指标跟踪事件处理延迟和错误率
总结
这个方案实现了Following表的伪从技术,通过监听MySQL binlog实时捕获数据变更,并更新Redis缓存。这种架构具有以下优点:
- 低延迟:几乎实时同步数据库变更
- 解耦:消费者服务独立于主业务服务
- 可扩展:可以轻松添加更多消费者处理不同业务逻辑
- 高性能:Redis提供了高效的关系数据存储和查询
根据业务规模,可以选择简单的直接更新缓存方案,或者引入消息队列的更复杂架构。
经过对数据库设计、缓存设计的详细论证,总结并提炼出缓存与数据库结合的最终方案。
伪从方案应用场景如:用户关系服务,关注与取消关注的接口。
- 即接口直接更新数据库Following表即响应用户,后续流程对用户来说是完全异步的。
- Follower表、计数服务、Redis缓存会依赖Following表产生的binlog日志分别更新数据。
关于Binlog监听在服务重启/暂停时的数据丢失问题
Binlog监听在服务重启或暂停时是否会导致数据丢失,取决于具体的实现方式和配置。下面我将详细分析这个问题及解决方案。
关键影响因素
1. Binlog位置记录
- 不记录位置:如果服务没有记录已处理的binlog位置,重启后会从当前最新的binlog位置开始,导致中间变更丢失
- 记录位置:正确记录binlog位置可以确保重启后从断点继续
2. MySQL binlog保留策略
expire_logs_days
参数决定binlog保留天数- 如果binlog被过早清除,而服务长时间停机,可能导致无法恢复
3. 事务完整性
- 部分处理的事务在重启后可能导致不一致
解决方案
1. 持久化binlog位置
修改之前的消费者服务,增加位置记录功能:
java
public class FollowingTableBinlogConsumer {
// 增加binlog位置存储接口
private final BinlogPositionStore positionStore;
public FollowingTableBinlogConsumer(..., BinlogPositionStore positionStore) {
this.positionStore = positionStore;
// 设置binlog文件名和位置
BinlogPosition position = positionStore.getPosition();
if (position != null) {
client.setBinlogFilename(position.getFilename());
client.setBinlogPosition(position.getPosition());
}
client.registerEventListener(event -> {
// 处理事件...
// 记录位置
if (event.getHeader().getEventType() == EventType.ROTATE) {
RotateEventData rotateEvent = (RotateEventData) event.getData();
positionStore.savePosition(
new BinlogPosition(rotateEvent.getBinlogFilename(), rotateEvent.getBinlogPosition())
);
} else if (event.getHeader().getEventType() != EventType.FORMAT_DESCRIPTION) {
positionStore.savePosition(
new BinlogPosition(client.getBinlogFilename(), event.getHeader().getNextPosition())
);
}
});
}
}
// Binlog位置存储接口
public interface BinlogPositionStore {
void savePosition(BinlogPosition position);
BinlogPosition getPosition();
}
// 简单的文件存储实现
public class FileBinlogPositionStore implements BinlogPositionStore {
private final File positionFile;
public FileBinlogPositionStore(String filePath) {
this.positionFile = new File(filePath);
}
@Override
public void savePosition(BinlogPosition position) {
try (ObjectOutputStream out = new ObjectOutputStream(new FileOutputStream(positionFile))) {
out.writeObject(position);
} catch (IOException e) {
throw new RuntimeException("Failed to save binlog position", e);
}
}
@Override
public BinlogPosition getPosition() {
if (!positionFile.exists()) return null;
try (ObjectInputStream in = new ObjectInputStream(new FileInputStream(positionFile))) {
return (BinlogPosition) in.readObject();
} catch (Exception e) {
throw new RuntimeException("Failed to read binlog position", e);
}
}
}
// Binlog位置对象
public class BinlogPosition implements Serializable {
private final String filename;
private final long position;
// constructor, getters...
}
2. MySQL配置优化
确保MySQL配置合理:
sql
-- 设置足够的binlog保留时间(根据业务需求调整)
SET GLOBAL expire_logs_days = 7;
-- 或使用新的变量(MySQL 8.0+)
SET GLOBAL binlog_expire_logs_seconds = 604800; -- 7天
3. 启动时数据校验和修复
服务启动时增加校验逻辑:
java
public void start() {
// 检查binlog位置是否有效
BinlogPosition position = positionStore.getPosition();
if (position != null) {
if (!isBinlogFileExists(position.getFilename())) {
// 执行全量同步
initialSync();
positionStore.clearPosition();
}
}
client.connect();
}
private boolean isBinlogFileExists(String filename) {
// 实现检查binlog文件是否存在的逻辑
// 可以通过SHOW BINARY LOGS命令获取当前存在的binlog文件列表
}
4. 优雅停机处理
确保服务停止时正确处理:
java
public void stop() {
try {
// 等待当前事件处理完成
client.disconnect();
// 确保最后的位置已保存
positionStore.flush();
} catch (IOException e) {
// 处理异常
}
}
高级保障方案
1. 引入事务表记录处理状态
创建一张事务记录表:
sql
CREATE TABLE binlog_consumer_state (
consumer_id VARCHAR(100) PRIMARY KEY,
binlog_filename VARCHAR(100) NOT NULL,
binlog_position BIGINT NOT NULL,
last_heartbeat TIMESTAMP NOT NULL,
processed_checksum VARCHAR(100)
);
2. 定期检查点(checkpoint)
java
// 每处理N个事件或每隔M秒记录一次完整状态
private void checkpoint(Event event) {
// 计算当前已处理数据的校验和
String checksum = computeChecksum();
// 更新数据库状态
jdbcTemplate.update(
"INSERT INTO binlog_consumer_state VALUES (?, ?, ?, NOW(), ?) " +
"ON DUPLICATE KEY UPDATE binlog_filename=?, binlog_position=?, last_heartbeat=NOW(), processed_checksum=?",
consumerId, client.getBinlogFilename(), event.getHeader().getNextPosition(), checksum,
client.getBinlogFilename(), event.getHeader().getNextPosition(), checksum
);
}
3. 数据修复机制
当检测到不一致时:
java
public void repairIfNeeded() {
// 从数据库获取最后处理的状态
StateRecord state = getLastStateFromDB();
// 从缓存获取最后处理的状态
StateRecord cacheState = getLastStateFromCache();
if (!state.equals(cacheState)) {
// 执行修复逻辑
executeRepair(state);
}
}
总结
正确实现的Binlog监听服务在重启/暂停时不会丢失数据,但需要:
- 持久化记录binlog位置(文件名+偏移量)
- 配置足够的binlog保留时间
- 实现优雅的停机和恢复机制
- 考虑增加校验和修复逻辑(针对关键业务)
建议的完整方案:
- 使用混合位置存储(本地文件+数据库)
- 定期检查点
- 启动时数据校验
- 足够的binlog保留期
- 监控binlog消费延迟
这样即使在服务重启、暂停甚至长时间停机后,也能保证数据不会丢失,并能从正确的位置恢复处理。