前言
你有没有想过:在分布式系统中,订单ID、用户ID、消息ID是怎么生成的?为什么它们能保证全局唯一且趋势递增?
分布式ID生成器是微服务架构的基础组件。
今天我们用C语言从零实现两种分布式ID生成器:
-
雪花算法(Snowflake)------ 高性能、趋势递增
-
号段模式(Segment)------ 批量获取、高可用
一、分布式ID的核心要求
要求 说明
全局唯一 所有服务生成的ID不重复
趋势递增 ID按时间有序,利于数据库索引
高性能 单机每秒生成百万级ID
高可用 不依赖单一节点
可反解 从ID能解析出生成时间等信息
二、完整代码实现
- 雪花算法(Snowflake)
```c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <pthread.h>
#include <time.h>
#include <errno.h>
#include <stdint.h>
// 雪花算法配置
typedef struct snowflake_config {
int worker_id_bits; // 机器ID位数(默认5)
int datacenter_id_bits; // 数据中心ID位数(默认5)
int sequence_bits; // 序列号位数(默认12)
uint64_t worker_id;
uint64_t datacenter_id;
uint64_t epoch; // 起始时间戳(毫秒)
} snowflake_config_t;
// 雪花算法生成器
typedef struct snowflake_generator {
snowflake_config_t config;
uint64_t last_timestamp;
uint64_t sequence;
uint64_t max_sequence;
uint64_t worker_id_shift;
uint64_t datacenter_id_shift;
uint64_t timestamp_shift;
uint64_t twepoch;
pthread_mutex_t mutex;
} snowflake_generator_t;
// 创建雪花算法生成器
snowflake_generator_t *snowflake_create(uint64_t worker_id, uint64_t datacenter_id) {
snowflake_generator_t *gen = malloc(sizeof(snowflake_generator_t));
memset(gen, 0, sizeof(snowflake_generator_t));
// 默认配置:5位worker + 5位datacenter + 12位sequence
gen->config.worker_id_bits = 5;
gen->config.datacenter_id_bits = 5;
gen->config.sequence_bits = 12;
gen->config.worker_id = worker_id & ((1 << 5) - 1);
gen->config.datacenter_id = datacenter_id & ((1 << 5) - 1);
gen->config.epoch = 1288834974657ULL; // 2010-11-04 09:42:54.657
// 计算移位
gen->max_sequence = (1 << gen->config.sequence_bits) - 1;
gen->worker_id_shift = gen->config.sequence_bits;
gen->datacenter_id_shift = gen->config.sequence_bits + gen->config.worker_id_bits;
gen->timestamp_shift = gen->config.sequence_bits + gen->config.worker_id_bits + gen->config.datacenter_id_bits;
gen->twepoch = gen->config.epoch;
gen->last_timestamp = 0;
gen->sequence = 0;
pthread_mutex_init(&gen->mutex, NULL);
printf("雪花算法生成器创建: worker=%lu, datacenter=%lu\n",
gen->config.worker_id, gen->config.datacenter_id);
return gen;
}
// 获取当前时间戳(毫秒)
uint64_t get_timestamp_ms(void) {
struct timeval tv;
gettimeofday(&tv, NULL);
return (uint64_t)tv.tv_sec * 1000 + tv.tv_usec / 1000;
}
// 等待直到下一个毫秒
uint64_t wait_next_millis(uint64_t last_timestamp) {
uint64_t ts = get_timestamp_ms();
while (ts <= last_timestamp) {
ts = get_timestamp_ms();
usleep(100);
}
return ts;
}
// 生成ID
uint64_t snowflake_next_id(snowflake_generator_t *gen) {
pthread_mutex_lock(&gen->mutex);
uint64_t timestamp = get_timestamp_ms();
// 时钟回拨处理
if (timestamp < gen->last_timestamp) {
// 时钟回拨小于5ms,等待
int64_t diff = gen->last_timestamp - timestamp;
if (diff < 5) {
usleep(diff * 1000);
timestamp = get_timestamp_ms();
} else {
// 时钟回拨过大,抛出异常
pthread_mutex_unlock(&gen->mutex);
return 0;
}
}
// 同一毫秒内,序列号递增
if (timestamp == gen->last_timestamp) {
gen->sequence = (gen->sequence + 1) & gen->max_sequence;
if (gen->sequence == 0) {
// 序列号用尽,等待下一毫秒
timestamp = wait_next_millis(gen->last_timestamp);
}
} else {
gen->sequence = 0;
}
gen->last_timestamp = timestamp;
// 组装ID
uint64_t id = ((timestamp - gen->twepoch) << gen->timestamp_shift) |
(gen->config.datacenter_id << gen->datacenter_id_shift) |
(gen->config.worker_id << gen->worker_id_shift) |
gen->sequence;
pthread_mutex_unlock(&gen->mutex);
return id;
}
// 解析ID
void snowflake_parse_id(uint64_t id, snowflake_config_t *config,
uint64_t *timestamp, uint64_t *datacenter_id,
uint64_t *worker_id, uint64_t *sequence) {
uint64_t timestamp_shift = config->sequence_bits +
config->worker_id_bits +
config->datacenter_id_bits;
*timestamp = (id >> timestamp_shift) + config->epoch;
*datacenter_id = (id >> (config->sequence_bits + config->worker_id_bits)) &
((1 << config->datacenter_id_bits) - 1);
*worker_id = (id >> config->sequence_bits) &
((1 << config->worker_id_bits) - 1);
*sequence = id & ((1 << config->sequence_bits) - 1);
}
```
- 号段模式(Segment)
```c
// 号段生成器
typedef struct segment_generator {
char biz_tag64; // 业务标识
uint64_t max_id; // 当前最大ID
uint64_t step; // 步长
uint64_t current_id; // 当前已分配ID
uint64_t segment_start; // 号段起始
uint64_t segment_end; // 号段结束
int initialized;
pthread_mutex_t mutex;
struct segment_generator *next;
} segment_generator_t;
// 号段管理器
typedef struct segment_manager {
segment_generator_t *generators;
pthread_mutex_t mutex;
char db_file256; // 持久化文件
} segment_manager_t;
segment_manager_t *g_segment_manager = NULL;
// 加载或创建号段
segment_generator_t *segment_load_or_create(const char *biz_tag, uint64_t step) {
segment_generator_t *gen = malloc(sizeof(segment_generator_t));
memset(gen, 0, sizeof(segment_generator_t));
strcpy(gen->biz_tag, biz_tag);
gen->step = step;
gen->current_id = 0;
gen->initialized = 0;
pthread_mutex_init(&gen->mutex, NULL);
return gen;
}
// 获取下一段号段
void segment_fetch_next(segment_generator_t *gen) {
pthread_mutex_lock(&gen->mutex);
if (!gen->initialized) {
// 从持久化存储加载(模拟)
gen->max_id = 0;
gen->initialized = 1;
}
// 分配新号段
gen->segment_start = gen->max_id + 1;
gen->segment_end = gen->max_id + gen->step;
gen->max_id = gen->segment_end;
gen->current_id = gen->segment_start - 1;
printf("Segment 分配号段: %s %llu, %llu\n",
gen->biz_tag, gen->segment_start, gen->segment_end);
pthread_mutex_unlock(&gen->mutex);
}
// 获取下一个ID
uint64_t segment_next_id(segment_generator_t *gen) {
pthread_mutex_lock(&gen->mutex);
// 检查是否用完当前号段
if (!gen->initialized || gen->current_id >= gen->segment_end) {
pthread_mutex_unlock(&gen->mutex);
segment_fetch_next(gen);
pthread_mutex_lock(&gen->mutex);
}
// 分配ID
uint64_t id = ++gen->current_id;
pthread_mutex_unlock(&gen->mutex);
return id;
}
// 批量获取ID
uint64_t *segment_next_ids(segment_generator_t *gen, int count) {
uint64_t *ids = malloc(sizeof(uint64_t) * count);
for (int i = 0; i < count; i++) {
idsi = segment_next_id(gen);
}
return ids;
}
```
- 混合ID生成器
```c
// 混合生成器(整合雪花+号段)
typedef struct hybrid_generator {
snowflake_generator_t *snowflake;
segment_generator_t *segment;
int mode; // 0=雪花, 1=号段
} hybrid_generator_t;
hybrid_generator_t *hybrid_create(uint64_t worker_id, uint64_t datacenter_id,
const char *biz_tag, uint64_t step) {
hybrid_generator_t *hybrid = malloc(sizeof(hybrid_generator_t));
hybrid->snowflake = snowflake_create(worker_id, datacenter_id);
hybrid->segment = segment_load_or_create(biz_tag, step);
hybrid->mode = 0;
return hybrid;
}
uint64_t hybrid_next_id(hybrid_generator_t *hybrid) {
if (hybrid->mode == 0) {
return snowflake_next_id(hybrid->snowflake);
} else {
return segment_next_id(hybrid->segment);
}
}
```
- 性能测试
```c
void test_snowflake_performance(void) {
printf("\n=== 雪花算法性能测试 ===\n");
snowflake_generator_t *gen = snowflake_create(1, 1);
int count = 1000000;
uint64_t *ids = malloc(sizeof(uint64_t) * count);
clock_t start = clock();
for (int i = 0; i < count; i++) {
idsi = snowflake_next_id(gen);
}
clock_t end = clock();
double elapsed = (double)(end - start) / CLOCKS_PER_SEC;
printf("生成 %d 个ID, 耗时 %.3f 秒\n", count, elapsed);
printf("QPS: %.0f\n", count / elapsed);
// 验证唯一性
int unique = 1;
for (int i = 1; i < count; i++) {
if (idsi == idsi-1) {
unique = 0;
break;
}
}
printf("唯一性: %s\n", unique ? "✅ 通过" : "❌ 失败");
free(ids);
free(gen);
}
void test_snowflake_parse(void) {
printf("\n=== 雪花算法解析测试 ===\n");
snowflake_generator_t *gen = snowflake_create(3, 5);
uint64_t id = snowflake_next_id(gen);
uint64_t timestamp, datacenter_id, worker_id, sequence;
snowflake_parse_id(id, &gen->config, ×tamp, &datacenter_id, &worker_id, &sequence);
printf("ID: %llu\n", id);
printf("时间戳: %llu (%s)", timestamp, ctime((time_t*)×tamp));
printf("数据中心: %llu\n", datacenter_id);
printf("机器ID: %llu\n", worker_id);
printf("序列号: %llu\n", sequence);
free(gen);
}
void test_segment_performance(void) {
printf("\n=== 号段模式性能测试 ===\n");
segment_generator_t *gen = segment_load_or_create("order", 1000);
int count = 1000000;
uint64_t *ids = malloc(sizeof(uint64_t) * count);
clock_t start = clock();
for (int i = 0; i < count; i++) {
idsi = segment_next_id(gen);
}
clock_t end = clock();
double elapsed = (double)(end - start) / CLOCKS_PER_SEC;
printf("生成 %d 个ID, 耗时 %.3f 秒\n", count, elapsed);
printf("QPS: %.0f\n", count / elapsed);
// 验证连续递增
int sorted = 1;
for (int i = 1; i < count; i++) {
if (idsi <= idsi-1) {
sorted = 0;
break;
}
}
printf("连续递增: %s\n", sorted ? "✅ 通过" : "❌ 失败");
free(ids);
free(gen);
}
int main() {
printf("=== 分布式ID生成器测试 ===\n");
test_snowflake_parse();
test_snowflake_performance();
test_segment_performance();
return 0;
}
```
三、编译和运行
```bash
gcc -o id_generator id_generator.c -lpthread
./id_generator
```
四、两种算法对比
特性 雪花算法 号段模式
ID类型 64位整数 64位整数
性能 极高(百万级/秒) 高(受号段大小影响)
依赖 时钟同步 持久化存储
可用性 时钟回拨风险 号段耗尽需DB
有序性 趋势递增 连续递增
可反解 ✅ 可解析时间 ❌ 不可反解
适用场景 高并发、需解析 数据库主键、批量
五、工业级扩展
- 时钟回拨解决
```c
// 方案1:等待时钟追上
// 方案2:使用机器ID的备用位标记回拨
// 方案3:上报监控,使用备用节点
```
- 号段持久化
```c
// 保存号段状态到文件或数据库
void segment_save(segment_generator_t *gen) {
char filepath256;
snprintf(filepath, sizeof(filepath), "./segments/%s.dat", gen->biz_tag);
FILE *fp = fopen(filepath, "w");
fprintf(fp, "%llu\n", gen->max_id);
fclose(fp);
}
void segment_load(segment_generator_t *gen) {
char filepath256;
snprintf(filepath, sizeof(filepath), "./segments/%s.dat", gen->biz_tag);
FILE *fp = fopen(filepath, "r");
if (fp) {
fscanf(fp, "%llu", &gen->max_id);
fclose(fp);
}
}
```
六、总结
通过这篇文章,你学会了:
· 分布式ID的核心要求(唯一、递增、高性能、高可用)
· 雪花算法(结构、位分配、时钟回拨处理)
· 号段模式(批量获取、续期机制)
· ID解析和性能测试
· 两种算法的适用场景
分布式ID生成器是微服务架构的基础组件。掌握它,你就理解了订单号、流水号等业务ID的设计原理。
下一篇预告:《从零实现一个分布式调度器:任务分片与容错》
评论区分享一下你用过什么分布式ID方案~