从零实现一个分布式ID生成器:雪花算法与号段模式

前言

你有没有想过:在分布式系统中,订单ID、用户ID、消息ID是怎么生成的?为什么它们能保证全局唯一且趋势递增?

分布式ID生成器是微服务架构的基础组件。

今天我们用C语言从零实现两种分布式ID生成器:

  1. 雪花算法(Snowflake)------ 高性能、趋势递增

  2. 号段模式(Segment)------ 批量获取、高可用


一、分布式ID的核心要求

要求 说明

全局唯一 所有服务生成的ID不重复

趋势递增 ID按时间有序,利于数据库索引

高性能 单机每秒生成百万级ID

高可用 不依赖单一节点

可反解 从ID能解析出生成时间等信息


二、完整代码实现

  1. 雪花算法(Snowflake)

```c

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

#include <unistd.h>

#include <pthread.h>

#include <time.h>

#include <errno.h>

#include <stdint.h>

// 雪花算法配置

typedef struct snowflake_config {

int worker_id_bits; // 机器ID位数(默认5)

int datacenter_id_bits; // 数据中心ID位数(默认5)

int sequence_bits; // 序列号位数(默认12)

uint64_t worker_id;

uint64_t datacenter_id;

uint64_t epoch; // 起始时间戳(毫秒)

} snowflake_config_t;

// 雪花算法生成器

typedef struct snowflake_generator {

snowflake_config_t config;

uint64_t last_timestamp;

uint64_t sequence;

uint64_t max_sequence;

uint64_t worker_id_shift;

uint64_t datacenter_id_shift;

uint64_t timestamp_shift;

uint64_t twepoch;

pthread_mutex_t mutex;

} snowflake_generator_t;

// 创建雪花算法生成器

snowflake_generator_t *snowflake_create(uint64_t worker_id, uint64_t datacenter_id) {

snowflake_generator_t *gen = malloc(sizeof(snowflake_generator_t));

memset(gen, 0, sizeof(snowflake_generator_t));

// 默认配置:5位worker + 5位datacenter + 12位sequence

gen->config.worker_id_bits = 5;

gen->config.datacenter_id_bits = 5;

gen->config.sequence_bits = 12;

gen->config.worker_id = worker_id & ((1 << 5) - 1);

gen->config.datacenter_id = datacenter_id & ((1 << 5) - 1);

gen->config.epoch = 1288834974657ULL; // 2010-11-04 09:42:54.657

// 计算移位

gen->max_sequence = (1 << gen->config.sequence_bits) - 1;

gen->worker_id_shift = gen->config.sequence_bits;

gen->datacenter_id_shift = gen->config.sequence_bits + gen->config.worker_id_bits;

gen->timestamp_shift = gen->config.sequence_bits + gen->config.worker_id_bits + gen->config.datacenter_id_bits;

gen->twepoch = gen->config.epoch;

gen->last_timestamp = 0;

gen->sequence = 0;

pthread_mutex_init(&gen->mutex, NULL);

printf("雪花算法生成器创建: worker=%lu, datacenter=%lu\n",

gen->config.worker_id, gen->config.datacenter_id);

return gen;

}

// 获取当前时间戳(毫秒)

uint64_t get_timestamp_ms(void) {

struct timeval tv;

gettimeofday(&tv, NULL);

return (uint64_t)tv.tv_sec * 1000 + tv.tv_usec / 1000;

}

// 等待直到下一个毫秒

uint64_t wait_next_millis(uint64_t last_timestamp) {

uint64_t ts = get_timestamp_ms();

while (ts <= last_timestamp) {

ts = get_timestamp_ms();

usleep(100);

}

return ts;

}

// 生成ID

uint64_t snowflake_next_id(snowflake_generator_t *gen) {

pthread_mutex_lock(&gen->mutex);

uint64_t timestamp = get_timestamp_ms();

// 时钟回拨处理

if (timestamp < gen->last_timestamp) {

// 时钟回拨小于5ms,等待

int64_t diff = gen->last_timestamp - timestamp;

if (diff < 5) {

usleep(diff * 1000);

timestamp = get_timestamp_ms();

} else {

// 时钟回拨过大,抛出异常

pthread_mutex_unlock(&gen->mutex);

return 0;

}

}

// 同一毫秒内,序列号递增

if (timestamp == gen->last_timestamp) {

gen->sequence = (gen->sequence + 1) & gen->max_sequence;

if (gen->sequence == 0) {

// 序列号用尽,等待下一毫秒

timestamp = wait_next_millis(gen->last_timestamp);

}

} else {

gen->sequence = 0;

}

gen->last_timestamp = timestamp;

// 组装ID

uint64_t id = ((timestamp - gen->twepoch) << gen->timestamp_shift) |

(gen->config.datacenter_id << gen->datacenter_id_shift) |

(gen->config.worker_id << gen->worker_id_shift) |

gen->sequence;

pthread_mutex_unlock(&gen->mutex);

return id;

}

// 解析ID

void snowflake_parse_id(uint64_t id, snowflake_config_t *config,

uint64_t *timestamp, uint64_t *datacenter_id,

uint64_t *worker_id, uint64_t *sequence) {

uint64_t timestamp_shift = config->sequence_bits +

config->worker_id_bits +

config->datacenter_id_bits;

*timestamp = (id >> timestamp_shift) + config->epoch;

*datacenter_id = (id >> (config->sequence_bits + config->worker_id_bits)) &

((1 << config->datacenter_id_bits) - 1);

*worker_id = (id >> config->sequence_bits) &

((1 << config->worker_id_bits) - 1);

*sequence = id & ((1 << config->sequence_bits) - 1);

}

```

  1. 号段模式(Segment)

```c

// 号段生成器

typedef struct segment_generator {

char biz_tag64; // 业务标识

uint64_t max_id; // 当前最大ID

uint64_t step; // 步长

uint64_t current_id; // 当前已分配ID

uint64_t segment_start; // 号段起始

uint64_t segment_end; // 号段结束

int initialized;

pthread_mutex_t mutex;

struct segment_generator *next;

} segment_generator_t;

// 号段管理器

typedef struct segment_manager {

segment_generator_t *generators;

pthread_mutex_t mutex;

char db_file256; // 持久化文件

} segment_manager_t;

segment_manager_t *g_segment_manager = NULL;

// 加载或创建号段

segment_generator_t *segment_load_or_create(const char *biz_tag, uint64_t step) {

segment_generator_t *gen = malloc(sizeof(segment_generator_t));

memset(gen, 0, sizeof(segment_generator_t));

strcpy(gen->biz_tag, biz_tag);

gen->step = step;

gen->current_id = 0;

gen->initialized = 0;

pthread_mutex_init(&gen->mutex, NULL);

return gen;

}

// 获取下一段号段

void segment_fetch_next(segment_generator_t *gen) {

pthread_mutex_lock(&gen->mutex);

if (!gen->initialized) {

// 从持久化存储加载(模拟)

gen->max_id = 0;

gen->initialized = 1;

}

// 分配新号段

gen->segment_start = gen->max_id + 1;

gen->segment_end = gen->max_id + gen->step;

gen->max_id = gen->segment_end;

gen->current_id = gen->segment_start - 1;

printf("Segment 分配号段: %s %llu, %llu\n",

gen->biz_tag, gen->segment_start, gen->segment_end);

pthread_mutex_unlock(&gen->mutex);

}

// 获取下一个ID

uint64_t segment_next_id(segment_generator_t *gen) {

pthread_mutex_lock(&gen->mutex);

// 检查是否用完当前号段

if (!gen->initialized || gen->current_id >= gen->segment_end) {

pthread_mutex_unlock(&gen->mutex);

segment_fetch_next(gen);

pthread_mutex_lock(&gen->mutex);

}

// 分配ID

uint64_t id = ++gen->current_id;

pthread_mutex_unlock(&gen->mutex);

return id;

}

// 批量获取ID

uint64_t *segment_next_ids(segment_generator_t *gen, int count) {

uint64_t *ids = malloc(sizeof(uint64_t) * count);

for (int i = 0; i < count; i++) {

idsi = segment_next_id(gen);

}

return ids;

}

```

  1. 混合ID生成器

```c

// 混合生成器(整合雪花+号段)

typedef struct hybrid_generator {

snowflake_generator_t *snowflake;

segment_generator_t *segment;

int mode; // 0=雪花, 1=号段

} hybrid_generator_t;

hybrid_generator_t *hybrid_create(uint64_t worker_id, uint64_t datacenter_id,

const char *biz_tag, uint64_t step) {

hybrid_generator_t *hybrid = malloc(sizeof(hybrid_generator_t));

hybrid->snowflake = snowflake_create(worker_id, datacenter_id);

hybrid->segment = segment_load_or_create(biz_tag, step);

hybrid->mode = 0;

return hybrid;

}

uint64_t hybrid_next_id(hybrid_generator_t *hybrid) {

if (hybrid->mode == 0) {

return snowflake_next_id(hybrid->snowflake);

} else {

return segment_next_id(hybrid->segment);

}

}

```

  1. 性能测试

```c

void test_snowflake_performance(void) {

printf("\n=== 雪花算法性能测试 ===\n");

snowflake_generator_t *gen = snowflake_create(1, 1);

int count = 1000000;

uint64_t *ids = malloc(sizeof(uint64_t) * count);

clock_t start = clock();

for (int i = 0; i < count; i++) {

idsi = snowflake_next_id(gen);

}

clock_t end = clock();

double elapsed = (double)(end - start) / CLOCKS_PER_SEC;

printf("生成 %d 个ID, 耗时 %.3f 秒\n", count, elapsed);

printf("QPS: %.0f\n", count / elapsed);

// 验证唯一性

int unique = 1;

for (int i = 1; i < count; i++) {

if (idsi == idsi-1) {

unique = 0;

break;

}

}

printf("唯一性: %s\n", unique ? "✅ 通过" : "❌ 失败");

free(ids);

free(gen);

}

void test_snowflake_parse(void) {

printf("\n=== 雪花算法解析测试 ===\n");

snowflake_generator_t *gen = snowflake_create(3, 5);

uint64_t id = snowflake_next_id(gen);

uint64_t timestamp, datacenter_id, worker_id, sequence;

snowflake_parse_id(id, &gen->config, &timestamp, &datacenter_id, &worker_id, &sequence);

printf("ID: %llu\n", id);

printf("时间戳: %llu (%s)", timestamp, ctime((time_t*)&timestamp));

printf("数据中心: %llu\n", datacenter_id);

printf("机器ID: %llu\n", worker_id);

printf("序列号: %llu\n", sequence);

free(gen);

}

void test_segment_performance(void) {

printf("\n=== 号段模式性能测试 ===\n");

segment_generator_t *gen = segment_load_or_create("order", 1000);

int count = 1000000;

uint64_t *ids = malloc(sizeof(uint64_t) * count);

clock_t start = clock();

for (int i = 0; i < count; i++) {

idsi = segment_next_id(gen);

}

clock_t end = clock();

double elapsed = (double)(end - start) / CLOCKS_PER_SEC;

printf("生成 %d 个ID, 耗时 %.3f 秒\n", count, elapsed);

printf("QPS: %.0f\n", count / elapsed);

// 验证连续递增

int sorted = 1;

for (int i = 1; i < count; i++) {

if (idsi <= idsi-1) {

sorted = 0;

break;

}

}

printf("连续递增: %s\n", sorted ? "✅ 通过" : "❌ 失败");

free(ids);

free(gen);

}

int main() {

printf("=== 分布式ID生成器测试 ===\n");

test_snowflake_parse();

test_snowflake_performance();

test_segment_performance();

return 0;

}

```


三、编译和运行

```bash

gcc -o id_generator id_generator.c -lpthread

./id_generator

```


四、两种算法对比

特性 雪花算法 号段模式

ID类型 64位整数 64位整数

性能 极高(百万级/秒) 高(受号段大小影响)

依赖 时钟同步 持久化存储

可用性 时钟回拨风险 号段耗尽需DB

有序性 趋势递增 连续递增

可反解 ✅ 可解析时间 ❌ 不可反解

适用场景 高并发、需解析 数据库主键、批量


五、工业级扩展

  1. 时钟回拨解决

```c

// 方案1:等待时钟追上

// 方案2:使用机器ID的备用位标记回拨

// 方案3:上报监控,使用备用节点

```

  1. 号段持久化

```c

// 保存号段状态到文件或数据库

void segment_save(segment_generator_t *gen) {

char filepath256;

snprintf(filepath, sizeof(filepath), "./segments/%s.dat", gen->biz_tag);

FILE *fp = fopen(filepath, "w");

fprintf(fp, "%llu\n", gen->max_id);

fclose(fp);

}

void segment_load(segment_generator_t *gen) {

char filepath256;

snprintf(filepath, sizeof(filepath), "./segments/%s.dat", gen->biz_tag);

FILE *fp = fopen(filepath, "r");

if (fp) {

fscanf(fp, "%llu", &gen->max_id);

fclose(fp);

}

}

```


六、总结

通过这篇文章,你学会了:

· 分布式ID的核心要求(唯一、递增、高性能、高可用)

· 雪花算法(结构、位分配、时钟回拨处理)

· 号段模式(批量获取、续期机制)

· ID解析和性能测试

· 两种算法的适用场景

分布式ID生成器是微服务架构的基础组件。掌握它,你就理解了订单号、流水号等业务ID的设计原理。

下一篇预告:《从零实现一个分布式调度器:任务分片与容错》


评论区分享一下你用过什么分布式ID方案~