从零实现一个分布式文件系统:GFS的核心设计

前言

你有没有想过:Google是怎么存储EB级别的数据的?GFS(Google File System)是Google分布式存储的基石,支撑了搜索、YouTube、Gmail等所有服务。

今天我们用C语言从零实现GFS的核心设计:

· Master(元数据管理)

· ChunkServer(数据存储)

· 文件分块(Chunk)

· 副本复制(Replication)

· 心跳与健康检查

· 读取与写入流程


一、GFS核心原理

  1. 架构图

```

┌─────────────────────────────────────────────────────────────┐

│ Client │

└─────────────────────────────────────────────────────────────┘

│ │

▼ ▼

┌─────────────────────────────────────────────────────────────┐

│ Master │

│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │

│ │ 文件元数据 │ │ Chunk位置 │ │ 操作日志 │ │

│ │ 命名空间 │ │ (缓存) │ │ (WAL) │ │

│ └─────────────┘ └─────────────┘ └─────────────┘ │

└─────────────────────────────────────────────────────────────┘

│ │

▼ ▼

┌─────────────────────────────────────────────────────────────┐

│ ChunkServer 集群 │

│ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ │

│ │Chunk A │ │Chunk B │ │Chunk C │ │Chunk A │ │

│ │(主副本) │ │(副本1) │ │(副本2) │ │(主副本) │ │

│ └─────────┘ └─────────┘ └─────────┘ └─────────┘ │

└─────────────────────────────────────────────────────────────┘

```

  1. 核心概念

概念 说明

Master 单点,管理元数据和Chunk位置

Chunk 固定大小(64MB)的数据块

ChunkServer 存储Chunk的节点

副本 每个Chunk默认3个副本

租约 主副本的写入权限

  1. 读写流程

```

写入流程:

Client → Master(获取主副本位置)

→ 主副本(写入数据)

→ 二级副本(同步数据)

→ Client(确认)

读取流程:

Client → Master(获取Chunk位置)

→ ChunkServer(读取数据)

→ Client(返回数据)

```


二、完整代码实现

  1. 基础数据结构

```c

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

#include <unistd.h>

#include <pthread.h>

#include <time.h>

#include <errno.h>

#include <sys/stat.h>

#include <fcntl.h>

#include <dirent.h>

#include <arpa/inet.h>

#define MAX_FILENAME 256

#define MAX_CHUNK_ID 64

#define MAX_CHUNK_SERVERS 10

#define CHUNK_SIZE 64 * 1024 * 1024 // 64MB

#define REPLICATION_FACTOR 3

#define HEARTBEAT_INTERVAL 5

// Chunk句柄

typedef struct chunk_handle {

char chunk_idMAX_CHUNK_ID;

char primary_server32;

char secondary_serversREPLICATION_FACTOR32;

int version;

int ref_count;

} chunk_handle_t;

// 文件元数据

typedef struct file_metadata {

char filenameMAX_FILENAME;

chunk_handle_t *chunks;

int chunk_count;

time_t create_time;

time_t modify_time;

struct file_metadata *next;

} file_metadata_t;

// ChunkServer

typedef struct chunk_server {

char server_id32;

char host32;

int port;

int disk_free;

int status; // 0: offline, 1: online, 2: busy

time_t last_heartbeat;

struct chunk_server *next;

} chunk_server_t;

// Master

typedef struct gfs_master {

file_metadata_t *files;

chunk_server_t *servers;

char chunk_dir256;

pthread_mutex_t mutex;

int running;

pthread_t heartbeat_thread;

} gfs_master_t;

// ChunkServer节点

typedef struct gfs_chunkserver {

char server_id32;

char chunk_dir256;

int port;

pthread_mutex_t mutex;

int running;

pthread_t heartbeat_thread;

} gfs_chunkserver_t;

```

  1. Master实现

```c

// 创建Master

gfs_master_t *master_create(const char *chunk_dir) {

gfs_master_t *m = malloc(sizeof(gfs_master_t));

memset(m, 0, sizeof(gfs_master_t));

strcpy(m->chunk_dir, chunk_dir);

m->running = 1;

pthread_mutex_init(&m->mutex, NULL);

mkdir(chunk_dir, 0755);

mkdir("./metadata", 0755);

printf("Master GFS Master启动,Chunk目录: %s\n", chunk_dir);

return m;

}

// 注册ChunkServer

int master_register_server(gfs_master_t *m, const char *server_id,

const char *host, int port) {

pthread_mutex_lock(&m->mutex);

chunk_server_t *cs = m->servers;

while (cs) {

if (strcmp(cs->server_id, server_id) == 0) {

cs->status = 1;

cs->last_heartbeat = time(NULL);

pthread_mutex_unlock(&m->mutex);

return 0;

}

cs = cs->next;

}

cs = malloc(sizeof(chunk_server_t));

strcpy(cs->server_id, server_id);

strcpy(cs->host, host);

cs->port = port;

cs->status = 1;

cs->disk_free = 1024 * 1024 * 1024; // 1GB

cs->last_heartbeat = time(NULL);

cs->next = m->servers;

m->servers = cs;

pthread_mutex_unlock(&m->mutex);

printf("Master 注册ChunkServer: %s (%s:%d)\n", server_id, host, port);

return 0;

}

// 心跳更新

void master_heartbeat(gfs_master_t *m, const char *server_id, int disk_free) {

pthread_mutex_lock(&m->mutex);

chunk_server_t *cs = m->servers;

while (cs) {

if (strcmp(cs->server_id, server_id) == 0) {

cs->last_heartbeat = time(NULL);

cs->disk_free = disk_free;

cs->status = 1;

break;

}

cs = cs->next;

}

pthread_mutex_unlock(&m->mutex);

}

// 创建文件

int master_create_file(gfs_master_t *m, const char *filename) {

pthread_mutex_lock(&m->mutex);

// 检查是否存在

file_metadata_t *f = m->files;

while (f) {

if (strcmp(f->filename, filename) == 0) {

pthread_mutex_unlock(&m->mutex);

return -1; // 已存在

}

f = f->next;

}

// 创建新文件

f = malloc(sizeof(file_metadata_t));

strcpy(f->filename, filename);

f->chunks = NULL;

f->chunk_count = 0;

f->create_time = time(NULL);

f->modify_time = time(NULL);

f->next = m->files;

m->files = f;

pthread_mutex_unlock(&m->mutex);

printf("Master 创建文件: %s\n", filename);

return 0;

}

// 分配Chunk

chunk_handle_t *master_allocate_chunk(gfs_master_t *m, const char *filename) {

pthread_mutex_lock(&m->mutex);

// 查找文件

file_metadata_t *f = m->files;

while (f) {

if (strcmp(f->filename, filename) == 0) break;

f = f->next;

}

if (!f) {

pthread_mutex_unlock(&m->mutex);

return NULL;

}

// 创建Chunk

chunk_handle_t *chunk = malloc(sizeof(chunk_handle_t));

snprintf(chunk->chunk_id, sizeof(chunk->chunk_id),

"chunk_%s_%d", filename, f->chunk_count);

chunk->version = 1;

// 选择ChunkServer(简化:轮询)

chunk_server_t *cs = m->servers;

int idx = 0;

for (int i = 0; i < REPLICATION_FACTOR && cs; i++) {

if (cs->status == 1 && cs->disk_free > CHUNK_SIZE) {

if (i == 0) {

strcpy(chunk->primary_server, cs->server_id);

} else {

strcpy(chunk->secondary_serversi-1, cs->server_id);

}

// 模拟分配空间

cs->disk_free -= CHUNK_SIZE;

cs = cs->next;

} else {

// 找下一个可用服务器

cs = cs->next;

i--;

}

if (!cs) {

// 从头开始找

cs = m->servers;

}

}

// 添加到文件

chunk->next = f->chunks;

f->chunks = chunk;

f->chunk_count++;

pthread_mutex_unlock(&m->mutex);

printf("Master 分配Chunk: %s (主: %s)\n",

chunk->chunk_id, chunk->primary_server);

return chunk;

}

```

  1. ChunkServer实现

```c

// 创建ChunkServer

gfs_chunkserver_t *chunkserver_create(const char *server_id,

const char *chunk_dir, int port) {

gfs_chunkserver_t *cs = malloc(sizeof(gfs_chunkserver_t));

strcpy(cs->server_id, server_id);

strcpy(cs->chunk_dir, chunk_dir);

cs->port = port;

cs->running = 1;

pthread_mutex_init(&cs->mutex, NULL);

mkdir(chunk_dir, 0755);

printf("ChunkServer %s启动,端口: %d\n", server_id, port);

return cs;

}

// 存储Chunk

int chunkserver_store_chunk(gfs_chunkserver_t *cs, const char *chunk_id,

const char *data, int data_len) {

pthread_mutex_lock(&cs->mutex);

char filepath512;

snprintf(filepath, sizeof(filepath), "%s/%s.dat", cs->chunk_dir, chunk_id);

FILE *fp = fopen(filepath, "wb");

if (!fp) {

pthread_mutex_unlock(&cs->mutex);

return -1;

}

fwrite(data, 1, data_len, fp);

fclose(fp);

pthread_mutex_unlock(&cs->mutex);

return 0;

}

// 读取Chunk

int chunkserver_read_chunk(gfs_chunkserver_t *cs, const char *chunk_id,

char *data, int *data_len) {

pthread_mutex_lock(&cs->mutex);

char filepath512;

snprintf(filepath, sizeof(filepath), "%s/%s.dat", cs->chunk_dir, chunk_id);

FILE *fp = fopen(filepath, "rb");

if (!fp) {

pthread_mutex_unlock(&cs->mutex);

return -1;

}

fseek(fp, 0, SEEK_END);

*data_len = ftell(fp);

fseek(fp, 0, SEEK_SET);

fread(data, 1, *data_len, fp);

fclose(fp);

pthread_mutex_unlock(&cs->mutex);

return 0;

}

// 心跳发送

void *chunkserver_heartbeat_thread(void *arg) {

gfs_chunkserver_t *cs = (gfs_chunkserver_t*)arg;

while (cs->running) {

sleep(HEARTBEAT_INTERVAL);

printf("ChunkServer %s 心跳\n", cs->server_id);

// 实际应通过网络发送心跳到Master

}

return NULL;

}

```

  1. 客户端操作

```c

// GFS客户端

typedef struct gfs_client {

char master_host32;

int master_port;

char server_id32;

} gfs_client_t;

// 创建客户端

gfs_client_t *client_create(const char *master_host, int master_port) {

gfs_client_t *c = malloc(sizeof(gfs_client_t));

strcpy(c->master_host, master_host);

c->master_port = master_port;

snprintf(c->server_id, sizeof(c->server_id), "client-%d", getpid());

return c;

}

// 写入文件

int client_write(gfs_client_t *c, const char *filename, const char *data) {

printf("Client 写入文件: %s, 大小: %zu\n", filename, strlen(data));

// 1. 向Master获取主ChunkServer位置

// 2. 向主ChunkServer写入数据

// 3. 主ChunkServer同步到副本

// 4. 返回确认

// 模拟实现

return 0;

}

// 读取文件

int client_read(gfs_client_t *c, const char *filename, char *data) {

printf("Client 读取文件: %s\n", filename);

// 1. 向Master获取Chunk位置

// 2. 从最近的ChunkServer读取

// 3. 组装返回

strcpy(data, "GFS file content");

return 0;

}

```

  1. 测试代码

```c

void test_gfs() {

printf("=== GFS分布式文件系统测试 ===\n\n");

// 创建Master

gfs_master_t *master = master_create("./chunks");

// 创建ChunkServer

gfs_chunkserver_t *cs1 = chunkserver_create("cs-1", "./cs1_data", 8001);

gfs_chunkserver_t *cs2 = chunkserver_create("cs-2", "./cs2_data", 8002);

gfs_chunkserver_t *cs3 = chunkserver_create("cs-3", "./cs3_data", 8003);

// 注册到Master

master_register_server(master, "cs-1", "127.0.0.1", 8001);

master_register_server(master, "cs-2", "127.0.0.1", 8002);

master_register_server(master, "cs-3", "127.0.0.1", 8003);

// 创建文件

master_create_file(master, "/user/data.txt");

// 分配Chunk

chunk_handle_t *chunk = master_allocate_chunk(master, "/user/data.txt");

if (chunk) {

// 存储数据到ChunkServer

char test_data\[\] = "Hello GFS!";

chunkserver_store_chunk(cs1, chunk->chunk_id, test_data, strlen(test_data));

chunkserver_store_chunk(cs2, chunk->chunk_id, test_data, strlen(test_data));

chunkserver_store_chunk(cs3, chunk->chunk_id, test_data, strlen(test_data));

printf("\n✅ 数据已存储到3个副本\n");

}

// 读取数据

char read_data1024;

int len;

chunkserver_read_chunk(cs1, chunk->chunk_id, read_data, &len);

printf("从ChunkServer读取: %s\n", read_data);

// 清理

cs1->running = 0;

cs2->running = 0;

cs3->running = 0;

free(cs1); free(cs2); free(cs3);

free(master);

}

int main() {

test_gfs();

return 0;

}

```


三、编译和运行

```bash

gcc -o gfs gfs.c -lpthread

./gfs

```


四、GFS vs 其他分布式文件系统

特性 GFS HDFS Ceph

Master 单点 单点 无中心

Chunk大小 64MB 128MB 4MB

副本数 3 3 可配置

适用场景 大文件 大文件 通用

一致性 弱一致性 强一致性 强一致性


五、总结

通过这篇文章,你学会了:

· GFS的核心架构(Master + ChunkServer)

· 文件分块和副本机制

· 元数据管理

· 心跳和健康检查

· 读写流程

GFS是分布式存储系统的开创者。掌握它,你就理解了HDFS、Ceph等系统的设计基础。

下一篇预告:《从零实现一个分布式缓存:Redis Cluster的核心设计》


评论区分享一下你对分布式文件系统的理解~