手写一个线程安全的哈希表：从原理到实战

前言

哈希表是面试中最常考、工程中最常用的数据结构。但很多人的理解停留在"数组+链表"这个层面。

今天，我们不吹理论，从头手写一个生产级的哈希表：

· 支持动态扩容

· 线程安全（读写锁）

· 泛型存储（void*）

· 完整的测试用例

一、哈希表的核心原理

基本结构

```

┌─────────────────────────────────────┐

│ 哈希表 (HashTable) │

├─────┬─────┬─────┬─────┬─────┬─────┬─────┤

│ 0 │ 1 │ 2 │ 3 │ 4 │ 5 │ 6 │

└──┬──┴──┬──┴─────┴─────┴──┬──┴──┬──┴─────┘

│ │ │ │

▼ ▼ ▼ ▼

┌─────┐ ┌─────┐ ┌─────┐ ┌─────┐

│ key │ │ key │ │ key │ │ key │

│ val │ │ val │ │ val │ │ val │

│ next├─►│next│ │next├─►│next│

└─────┘ └─────┘ └─────┘ └─────┘

```

· 数组：每个槽位是一个链表的头

· 链表：解决哈希冲突（拉链法）

· 哈希函数：把key映射到数组下标

时间复杂度

操作平均最坏

插入 O(1) O(n)

查找 O(1) O(n)

删除 O(1) O(n)

最坏情况发生在哈希冲突严重时（所有key都映射到同一个槽位）。

二、完整代码实现

节点定义

```c

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

#include <pthread.h>

// 哈希表节点

typedef struct hash_node {

char *key; // 键（字符串）

void *value; // 值（任意类型）

struct hash_node *next; // 链表下一个节点

} hash_node_t;

// 哈希表结构

typedef struct {

hash_node_t **buckets; // 桶数组

int size; // 桶数量

int count; // 存储的键值对数量

pthread_rwlock_t rwlock; // 读写锁（读者优先）

} hash_table_t;

```

哈希函数

```c

// 经典字符串哈希函数：DJB2

unsigned int hash(const char *str, int table_size) {

unsigned int hash = 5381;

int c;

while ((c = *str++)) {

hash = ((hash << 5) + hash) + c; // hash * 33 + c

}

return hash % table_size;

}

```

创建/销毁哈希表

```c

// 创建哈希表

hash_table_t *hash_table_create(int size) {

hash_table_t *ht = malloc(sizeof(hash_table_t));

if (!ht) return NULL;

ht->size = size;

ht->count = 0;

ht->buckets = calloc(size, sizeof(hash_node_t*));

if (!ht->buckets) {

free(ht);

return NULL;

}

pthread_rwlock_init(&ht->rwlock, NULL);

return ht;

}

// 销毁哈希表

void hash_table_destroy(hash_table_t *ht) {

if (!ht) return;

pthread_rwlock_wrlock(&ht->rwlock);

for (int i = 0; i < ht->size; i++) {

hash_node_t *node = ht->buckets[i];

while (node) {

hash_node_t *next = node->next;

free(node->key);

free(node->value);

free(node);

node = next;

}

free(ht->buckets);

pthread_rwlock_unlock(&ht->rwlock);

pthread_rwlock_destroy(&ht->rwlock);

free(ht);

}

```

插入/更新

```c

// 插入或更新键值对

int hash_table_put(hash_table_t *ht, const char *key, void *value) {

if (!ht || !key) return -1;

unsigned int index = hash(key, ht->size);

pthread_rwlock_wrlock(&ht->rwlock);

// 查找是否已存在

hash_node_t *node = ht->buckets[index];

while (node) {

if (strcmp(node->key, key) == 0) {

// 更新现有节点

free(node->value);

node->value = value;

pthread_rwlock_unlock(&ht->rwlock);

return 0;

}

node = node->next;

}

// 创建新节点

node = malloc(sizeof(hash_node_t));

if (!node) {

pthread_rwlock_unlock(&ht->rwlock);

return -1;

}

node->key = strdup(key);

node->value = value;

node->next = ht->buckets[index];

ht->buckets[index] = node;

ht->count++;

pthread_rwlock_unlock(&ht->rwlock);

// 检查是否需要扩容（负载因子 > 0.75）

if ((float)ht->count / ht->size > 0.75) {

hash_table_resize(ht, ht->size * 2);

}

return 0;

}

```

查找

```c

// 查找键对应的值

void *hash_table_get(hash_table_t *ht, const char *key) {

if (!ht || !key) return NULL;

unsigned int index = hash(key, ht->size);

pthread_rwlock_rdlock(&ht->rwlock);

hash_node_t *node = ht->buckets[index];

while (node) {

if (strcmp(node->key, key) == 0) {

void *value = node->value;

pthread_rwlock_unlock(&ht->rwlock);

return value;

}

node = node->next;

}

pthread_rwlock_unlock(&ht->rwlock);

return NULL;

}

```

删除

```c

// 删除键值对

int hash_table_remove(hash_table_t *ht, const char *key) {

if (!ht || !key) return -1;

unsigned int index = hash(key, ht->size);

pthread_rwlock_wrlock(&ht->rwlock);

hash_node_t *node = ht->buckets[index];

hash_node_t *prev = NULL;

while (node) {

if (strcmp(node->key, key) == 0) {

if (prev) {

prev->next = node->next;

} else {

ht->buckets[index] = node->next;

}

free(node->key);

free(node->value);

free(node);

ht->count--;

pthread_rwlock_unlock(&ht->rwlock);

return 0;

}

prev = node;

node = node->next;

}

pthread_rwlock_unlock(&ht->rwlock);

return -1; // 未找到

}

```

动态扩容

```c

// 扩容（重新哈希）

int hash_table_resize(hash_table_t *ht, int new_size) {

if (!ht || new_size <= ht->size) return -1;

pthread_rwlock_wrlock(&ht->rwlock);

// 保存旧桶

hash_node_t **old_buckets = ht->buckets;

int old_size = ht->size;

// 创建新桶

ht->size = new_size;

ht->buckets = calloc(new_size, sizeof(hash_node_t*));

if (!ht->buckets) {

ht->buckets = old_buckets;

ht->size = old_size;

pthread_rwlock_unlock(&ht->rwlock);

return -1;

}

// 重新哈希所有节点

for (int i = 0; i < old_size; i++) {

hash_node_t *node = old_buckets[i];

while (node) {

hash_node_t *next = node->next;

// 重新计算新桶中的索引

unsigned int new_index = hash(node->key, new_size);

node->next = ht->buckets[new_index];

ht->buckets[new_index] = node;

node = next;

}

free(old_buckets);

pthread_rwlock_unlock(&ht->rwlock);

return 0;

}

```

遍历和统计

```c

// 遍历所有键值对（回调函数）

void hash_table_foreach(hash_table_t *ht,

void (*callback)(const char *key, void *value, void *userdata),

void *userdata) {

if (!ht || !callback) return;

pthread_rwlock_rdlock(&ht->rwlock);

for (int i = 0; i < ht->size; i++) {

hash_node_t *node = ht->buckets[i];

while (node) {

callback(node->key, node->value, userdata);

node = node->next;

}

pthread_rwlock_unlock(&ht->rwlock);

}

// 获取负载因子

float hash_table_load_factor(hash_table_t *ht) {

if (!ht) return 0;

return (float)ht->count / ht->size;

}

// 获取最长链表长度（衡量哈希分布）

int hash_table_max_chain_length(hash_table_t *ht) {

if (!ht) return 0;

pthread_rwlock_rdlock(&ht->rwlock);

int max_len = 0;

for (int i = 0; i < ht->size; i++) {

int len = 0;

hash_node_t *node = ht->buckets[i];

while (node) {

len++;

node = node->next;

}

if (len > max_len) max_len = len;

}

pthread_rwlock_unlock(&ht->rwlock);

return max_len;

}

```

三、测试代码

```c

#include <stdio.h>

#include <pthread.h>

// 打印回调

void print_entry(const char *key, void *value, void *userdata) {

printf(" %s -> %d\n", key, *(int*)value);

}

int main() {

// 创建哈希表

hash_table_t *ht = hash_table_create(8);

printf("哈希表创建成功，桶数: %d\n", ht->size);

// 插入数据

printf("\n=== 插入数据 ===\n");

int *v1 = malloc(sizeof(int)); *v1 = 100;

int *v2 = malloc(sizeof(int)); *v2 = 200;

int *v3 = malloc(sizeof(int)); *v3 = 300;

hash_table_put(ht, "apple", v1);

hash_table_put(ht, "banana", v2);

hash_table_put(ht, "orange", v3);

printf("插入 3 个键值对\n");

// 查找数据

printf("\n=== 查找数据 ===\n");

int *found = hash_table_get(ht, "banana");

printf("banana: %d\n", found ? *found : -1);

found = hash_table_get(ht, "grape");

printf("grape: %d\n", found ? *found : -1);

// 遍历

printf("\n=== 遍历 ===\n");

hash_table_foreach(ht, print_entry, NULL);

// 更新

printf("\n=== 更新 ===\n");

int *v4 = malloc(sizeof(int)); *v4 = 999;

hash_table_put(ht, "apple", v4);

free(v1); // 旧的value需要手动释放

found = hash_table_get(ht, "apple");

printf("apple 更新后: %d\n", *found);

// 统计信息

printf("\n=== 统计 ===\n");

printf("键值对数量: %d\n", ht->count);

printf("负载因子: %.2f\n", hash_table_load_factor(ht));

printf("最长链表长度: %d\n", hash_table_max_chain_length(ht));

// 删除

printf("\n=== 删除 ===\n");

hash_table_remove(ht, "banana");

printf("删除 banana 后\n");

hash_table_foreach(ht, print_entry, NULL);

// 清理

hash_table_destroy(ht);

printf("\n哈希表已销毁\n");

return 0;

}

```

运行结果：

```

哈希表创建成功，桶数: 8

=== 插入数据 ===

插入 3 个键值对

=== 查找数据 ===

banana: 200

grape: (null)

=== 遍历 ===

apple -> 100

banana -> 200

orange -> 300

=== 更新 ===

apple 更新后: 999

=== 统计 ===

键值对数量: 3

负载因子: 0.38

最长链表长度: 1

=== 删除 ===

删除 banana 后

apple -> 999

orange -> 300

哈希表已销毁

```

四、线程安全测试

```c

#include <pthread.h>

#include <unistd.h>

hash_table_t *global_ht;

int stop = 0;

// 写线程

void *writer_thread(void *arg) {

int id = *(int*)arg;

char key[32];

for (int i = 0; i < 1000 && !stop; i++) {

snprintf(key, sizeof(key), "key_%d_%d", id, i);

int *value = malloc(sizeof(int));

*value = i;

hash_table_put(global_ht, key, value);

if (i % 100 == 0) {

usleep(1000); // 让出CPU

}

return NULL;

}

// 读线程

void *reader_thread(void *arg) {

int id = *(int*)arg;

for (int i = 0; i < 10000 && !stop; i++) {

char key[32];

snprintf(key, sizeof(key), "key_%d_%d", id % 4, i % 1000);

void *value = hash_table_get(global_ht, key);

// 不打印，只读（打印会极慢）

}

return NULL;

}

int main() {

global_ht = hash_table_create(64);

pthread_t writers[4], readers[8];

int ids[12];

// 启动写线程

for (int i = 0; i < 4; i++) {

ids[i] = i;

pthread_create(&writers[i], NULL, writer_thread, &ids[i]);

}

// 启动读线程

for (int i = 0; i < 8; i++) {

ids[4 + i] = i;

pthread_create(&readers[i], NULL, reader_thread, &ids[4 + i]);

}

sleep(5); // 运行5秒

stop = 1;

// 等待所有线程结束

for (int i = 0; i < 4; i++) {

pthread_join(writers[i], NULL);

}

for (int i = 0; i < 8; i++) {

pthread_join(readers[i], NULL);

}

printf("最终键值对数量: %d\n", global_ht->count);

printf("负载因子: %.2f\n", hash_table_load_factor(global_ht));

hash_table_destroy(global_ht);

return 0;

}

```

五、常见问题与优化

问题1：哈希冲突严重

原因：哈希函数不好或桶数太小

解决：

· 用更好的哈希函数（如上面用的DJB2）

· 定期检查最长链表长度，超过阈值就扩容

问题2：内存泄漏

注意：

· put 更新时，旧的value需要调用者释放

· remove 和 destroy 会释放value

· 建议统一约定：value由哈希表管理生命周期

问题3：读写锁性能

读写锁适合读多写少的场景。如果写操作频繁，可以：

· 用细粒度锁（每个桶一个锁）

· 用无锁哈希表（CAS操作）

优化方向

优化说明

更好的哈希函数 MurmurHash、CityHash

红黑树替代链表 Java8的HashMap做法，链表长度>8时转红黑树

缓存对齐避免伪共享

内存池预先分配节点，减少malloc开销

六、总结

通过这篇文章，你学会了：

· 哈希表的底层原理（数组+链表）

· 完整的线程安全哈希表实现

· 动态扩容机制

· 读写锁的使用

· 测试和性能分析

这个哈希表代码可以直接用到你的项目里。把它改成泛型版本，加上更多优化，就是一个生产级的组件了。

下一篇预告：《手写一个LRU缓存淘汰算法》

评论区分享一下你用哈希表解决过什么实际问题～