前言
你有没有想过:Google是怎么存储EB级别的数据的?GFS(Google File System)是Google分布式存储的基石,支撑了搜索、YouTube、Gmail等所有服务。
今天我们用C语言从零实现GFS的核心设计:
· Master(元数据管理)
· ChunkServer(数据存储)
· 文件分块(Chunk)
· 副本复制(Replication)
· 心跳与健康检查
· 读取与写入流程
一、GFS核心原理
- 架构图
```
┌─────────────────────────────────────────────────────────────┐
│ Client │
└─────────────────────────────────────────────────────────────┘
│ │
▼ ▼
┌─────────────────────────────────────────────────────────────┐
│ Master │
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
│ │ 文件元数据 │ │ Chunk位置 │ │ 操作日志 │ │
│ │ 命名空间 │ │ (缓存) │ │ (WAL) │ │
│ └─────────────┘ └─────────────┘ └─────────────┘ │
└─────────────────────────────────────────────────────────────┘
│ │
▼ ▼
┌─────────────────────────────────────────────────────────────┐
│ ChunkServer 集群 │
│ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ │
│ │Chunk A │ │Chunk B │ │Chunk C │ │Chunk A │ │
│ │(主副本) │ │(副本1) │ │(副本2) │ │(主副本) │ │
│ └─────────┘ └─────────┘ └─────────┘ └─────────┘ │
└─────────────────────────────────────────────────────────────┘
```
- 核心概念
概念 说明
Master 单点,管理元数据和Chunk位置
Chunk 固定大小(64MB)的数据块
ChunkServer 存储Chunk的节点
副本 每个Chunk默认3个副本
租约 主副本的写入权限
- 读写流程
```
写入流程:
Client → Master(获取主副本位置)
→ 主副本(写入数据)
→ 二级副本(同步数据)
→ Client(确认)
读取流程:
Client → Master(获取Chunk位置)
→ ChunkServer(读取数据)
→ Client(返回数据)
```
二、完整代码实现
- 基础数据结构
```c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <pthread.h>
#include <time.h>
#include <errno.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <dirent.h>
#include <arpa/inet.h>
#define MAX_FILENAME 256
#define MAX_CHUNK_ID 64
#define MAX_CHUNK_SERVERS 10
#define CHUNK_SIZE 64 * 1024 * 1024 // 64MB
#define REPLICATION_FACTOR 3
#define HEARTBEAT_INTERVAL 5
// Chunk句柄
typedef struct chunk_handle {
char chunk_idMAX_CHUNK_ID;
char primary_server32;
char secondary_serversREPLICATION_FACTOR32;
int version;
int ref_count;
} chunk_handle_t;
// 文件元数据
typedef struct file_metadata {
char filenameMAX_FILENAME;
chunk_handle_t *chunks;
int chunk_count;
time_t create_time;
time_t modify_time;
struct file_metadata *next;
} file_metadata_t;
// ChunkServer
typedef struct chunk_server {
char server_id32;
char host32;
int port;
int disk_free;
int status; // 0: offline, 1: online, 2: busy
time_t last_heartbeat;
struct chunk_server *next;
} chunk_server_t;
// Master
typedef struct gfs_master {
file_metadata_t *files;
chunk_server_t *servers;
char chunk_dir256;
pthread_mutex_t mutex;
int running;
pthread_t heartbeat_thread;
} gfs_master_t;
// ChunkServer节点
typedef struct gfs_chunkserver {
char server_id32;
char chunk_dir256;
int port;
pthread_mutex_t mutex;
int running;
pthread_t heartbeat_thread;
} gfs_chunkserver_t;
```
- Master实现
```c
// 创建Master
gfs_master_t *master_create(const char *chunk_dir) {
gfs_master_t *m = malloc(sizeof(gfs_master_t));
memset(m, 0, sizeof(gfs_master_t));
strcpy(m->chunk_dir, chunk_dir);
m->running = 1;
pthread_mutex_init(&m->mutex, NULL);
mkdir(chunk_dir, 0755);
mkdir("./metadata", 0755);
printf("Master GFS Master启动,Chunk目录: %s\n", chunk_dir);
return m;
}
// 注册ChunkServer
int master_register_server(gfs_master_t *m, const char *server_id,
const char *host, int port) {
pthread_mutex_lock(&m->mutex);
chunk_server_t *cs = m->servers;
while (cs) {
if (strcmp(cs->server_id, server_id) == 0) {
cs->status = 1;
cs->last_heartbeat = time(NULL);
pthread_mutex_unlock(&m->mutex);
return 0;
}
cs = cs->next;
}
cs = malloc(sizeof(chunk_server_t));
strcpy(cs->server_id, server_id);
strcpy(cs->host, host);
cs->port = port;
cs->status = 1;
cs->disk_free = 1024 * 1024 * 1024; // 1GB
cs->last_heartbeat = time(NULL);
cs->next = m->servers;
m->servers = cs;
pthread_mutex_unlock(&m->mutex);
printf("Master 注册ChunkServer: %s (%s:%d)\n", server_id, host, port);
return 0;
}
// 心跳更新
void master_heartbeat(gfs_master_t *m, const char *server_id, int disk_free) {
pthread_mutex_lock(&m->mutex);
chunk_server_t *cs = m->servers;
while (cs) {
if (strcmp(cs->server_id, server_id) == 0) {
cs->last_heartbeat = time(NULL);
cs->disk_free = disk_free;
cs->status = 1;
break;
}
cs = cs->next;
}
pthread_mutex_unlock(&m->mutex);
}
// 创建文件
int master_create_file(gfs_master_t *m, const char *filename) {
pthread_mutex_lock(&m->mutex);
// 检查是否存在
file_metadata_t *f = m->files;
while (f) {
if (strcmp(f->filename, filename) == 0) {
pthread_mutex_unlock(&m->mutex);
return -1; // 已存在
}
f = f->next;
}
// 创建新文件
f = malloc(sizeof(file_metadata_t));
strcpy(f->filename, filename);
f->chunks = NULL;
f->chunk_count = 0;
f->create_time = time(NULL);
f->modify_time = time(NULL);
f->next = m->files;
m->files = f;
pthread_mutex_unlock(&m->mutex);
printf("Master 创建文件: %s\n", filename);
return 0;
}
// 分配Chunk
chunk_handle_t *master_allocate_chunk(gfs_master_t *m, const char *filename) {
pthread_mutex_lock(&m->mutex);
// 查找文件
file_metadata_t *f = m->files;
while (f) {
if (strcmp(f->filename, filename) == 0) break;
f = f->next;
}
if (!f) {
pthread_mutex_unlock(&m->mutex);
return NULL;
}
// 创建Chunk
chunk_handle_t *chunk = malloc(sizeof(chunk_handle_t));
snprintf(chunk->chunk_id, sizeof(chunk->chunk_id),
"chunk_%s_%d", filename, f->chunk_count);
chunk->version = 1;
// 选择ChunkServer(简化:轮询)
chunk_server_t *cs = m->servers;
int idx = 0;
for (int i = 0; i < REPLICATION_FACTOR && cs; i++) {
if (cs->status == 1 && cs->disk_free > CHUNK_SIZE) {
if (i == 0) {
strcpy(chunk->primary_server, cs->server_id);
} else {
strcpy(chunk->secondary_serversi-1, cs->server_id);
}
// 模拟分配空间
cs->disk_free -= CHUNK_SIZE;
cs = cs->next;
} else {
// 找下一个可用服务器
cs = cs->next;
i--;
}
if (!cs) {
// 从头开始找
cs = m->servers;
}
}
// 添加到文件
chunk->next = f->chunks;
f->chunks = chunk;
f->chunk_count++;
pthread_mutex_unlock(&m->mutex);
printf("Master 分配Chunk: %s (主: %s)\n",
chunk->chunk_id, chunk->primary_server);
return chunk;
}
```
- ChunkServer实现
```c
// 创建ChunkServer
gfs_chunkserver_t *chunkserver_create(const char *server_id,
const char *chunk_dir, int port) {
gfs_chunkserver_t *cs = malloc(sizeof(gfs_chunkserver_t));
strcpy(cs->server_id, server_id);
strcpy(cs->chunk_dir, chunk_dir);
cs->port = port;
cs->running = 1;
pthread_mutex_init(&cs->mutex, NULL);
mkdir(chunk_dir, 0755);
printf("ChunkServer %s启动,端口: %d\n", server_id, port);
return cs;
}
// 存储Chunk
int chunkserver_store_chunk(gfs_chunkserver_t *cs, const char *chunk_id,
const char *data, int data_len) {
pthread_mutex_lock(&cs->mutex);
char filepath512;
snprintf(filepath, sizeof(filepath), "%s/%s.dat", cs->chunk_dir, chunk_id);
FILE *fp = fopen(filepath, "wb");
if (!fp) {
pthread_mutex_unlock(&cs->mutex);
return -1;
}
fwrite(data, 1, data_len, fp);
fclose(fp);
pthread_mutex_unlock(&cs->mutex);
return 0;
}
// 读取Chunk
int chunkserver_read_chunk(gfs_chunkserver_t *cs, const char *chunk_id,
char *data, int *data_len) {
pthread_mutex_lock(&cs->mutex);
char filepath512;
snprintf(filepath, sizeof(filepath), "%s/%s.dat", cs->chunk_dir, chunk_id);
FILE *fp = fopen(filepath, "rb");
if (!fp) {
pthread_mutex_unlock(&cs->mutex);
return -1;
}
fseek(fp, 0, SEEK_END);
*data_len = ftell(fp);
fseek(fp, 0, SEEK_SET);
fread(data, 1, *data_len, fp);
fclose(fp);
pthread_mutex_unlock(&cs->mutex);
return 0;
}
// 心跳发送
void *chunkserver_heartbeat_thread(void *arg) {
gfs_chunkserver_t *cs = (gfs_chunkserver_t*)arg;
while (cs->running) {
sleep(HEARTBEAT_INTERVAL);
printf("ChunkServer %s 心跳\n", cs->server_id);
// 实际应通过网络发送心跳到Master
}
return NULL;
}
```
- 客户端操作
```c
// GFS客户端
typedef struct gfs_client {
char master_host32;
int master_port;
char server_id32;
} gfs_client_t;
// 创建客户端
gfs_client_t *client_create(const char *master_host, int master_port) {
gfs_client_t *c = malloc(sizeof(gfs_client_t));
strcpy(c->master_host, master_host);
c->master_port = master_port;
snprintf(c->server_id, sizeof(c->server_id), "client-%d", getpid());
return c;
}
// 写入文件
int client_write(gfs_client_t *c, const char *filename, const char *data) {
printf("Client 写入文件: %s, 大小: %zu\n", filename, strlen(data));
// 1. 向Master获取主ChunkServer位置
// 2. 向主ChunkServer写入数据
// 3. 主ChunkServer同步到副本
// 4. 返回确认
// 模拟实现
return 0;
}
// 读取文件
int client_read(gfs_client_t *c, const char *filename, char *data) {
printf("Client 读取文件: %s\n", filename);
// 1. 向Master获取Chunk位置
// 2. 从最近的ChunkServer读取
// 3. 组装返回
strcpy(data, "GFS file content");
return 0;
}
```
- 测试代码
```c
void test_gfs() {
printf("=== GFS分布式文件系统测试 ===\n\n");
// 创建Master
gfs_master_t *master = master_create("./chunks");
// 创建ChunkServer
gfs_chunkserver_t *cs1 = chunkserver_create("cs-1", "./cs1_data", 8001);
gfs_chunkserver_t *cs2 = chunkserver_create("cs-2", "./cs2_data", 8002);
gfs_chunkserver_t *cs3 = chunkserver_create("cs-3", "./cs3_data", 8003);
// 注册到Master
master_register_server(master, "cs-1", "127.0.0.1", 8001);
master_register_server(master, "cs-2", "127.0.0.1", 8002);
master_register_server(master, "cs-3", "127.0.0.1", 8003);
// 创建文件
master_create_file(master, "/user/data.txt");
// 分配Chunk
chunk_handle_t *chunk = master_allocate_chunk(master, "/user/data.txt");
if (chunk) {
// 存储数据到ChunkServer
char test_data\[\] = "Hello GFS!";
chunkserver_store_chunk(cs1, chunk->chunk_id, test_data, strlen(test_data));
chunkserver_store_chunk(cs2, chunk->chunk_id, test_data, strlen(test_data));
chunkserver_store_chunk(cs3, chunk->chunk_id, test_data, strlen(test_data));
printf("\n✅ 数据已存储到3个副本\n");
}
// 读取数据
char read_data1024;
int len;
chunkserver_read_chunk(cs1, chunk->chunk_id, read_data, &len);
printf("从ChunkServer读取: %s\n", read_data);
// 清理
cs1->running = 0;
cs2->running = 0;
cs3->running = 0;
free(cs1); free(cs2); free(cs3);
free(master);
}
int main() {
test_gfs();
return 0;
}
```
三、编译和运行
```bash
gcc -o gfs gfs.c -lpthread
./gfs
```
四、GFS vs 其他分布式文件系统
特性 GFS HDFS Ceph
Master 单点 单点 无中心
Chunk大小 64MB 128MB 4MB
副本数 3 3 可配置
适用场景 大文件 大文件 通用
一致性 弱一致性 强一致性 强一致性
五、总结
通过这篇文章,你学会了:
· GFS的核心架构(Master + ChunkServer)
· 文件分块和副本机制
· 元数据管理
· 心跳和健康检查
· 读写流程
GFS是分布式存储系统的开创者。掌握它,你就理解了HDFS、Ceph等系统的设计基础。
下一篇预告:《从零实现一个分布式缓存:Redis Cluster的核心设计》
评论区分享一下你对分布式文件系统的理解~