头文件
cpp
#include <stdint.h>
#include <stddef.h>
// 布隆过滤器结构
typedef struct BloomFilter {
uint8_t* bit_array; // 位数组
size_t bit_array_size; // 位数组大小(字节数)
size_t num_bits; // 总位数
int num_hash_funcs; // 哈希函数数量
size_t element_count; // 已插入元素数量
} BloomFilter;
// API 函数
// 创建布隆过滤器
BloomFilter* bloom_filter_create(size_t num_bits, int num_hash_funcs);
//销毁
void bloom_filter_destroy(BloomFilter* filter);
//添加元素
int bloom_filter_add(BloomFilter* filter, const char* element);
//检查元素是否存在
int bloom_filter_contains(const BloomFilter* filter, const char* element);
//获得已插入元素数量
size_t bloom_filter_size(const BloomFilter* filter);
//清空
void bloom_filter_clear(BloomFilter* filter);
//计算理论误判率
double bloom_filter_false_positive_rate(const BloomFilter* filter);
源文件
cpp
#include "布隆过滤器.h"
#include <stdlib.h>
#include <string.h>
#include <math.h>
// 哈希函数1:DJB2
static uint32_t hash1(const char* str) {
uint32_t hash = 5381;
int c;
while ((c = *str++)) {
hash = ((hash << 5) + hash) + c; // hash * 33 + c
}
return hash;
}
// 哈希函数2:FNV-1a
static uint32_t hash2(const char* str) {
uint32_t hash = 2166136261u;
while (*str) {
hash ^= (uint8_t)(*str);
hash *= 16777619u;
str++;
}
return hash;
}
// 双哈希法生成第i个哈希值
static uint32_t get_hash(const BloomFilter* filter, const char* str, int i) {
uint32_t h1 = hash1(str);
uint32_t h2 = hash2(str);
return (h1 + i * h2) % filter->num_bits;
}
// 创建布隆过滤器
BloomFilter* bloom_filter_create(size_t num_bits, int num_hash_funcs) {
if (num_bits == 0 || num_hash_funcs <= 0) {
return NULL;
}
BloomFilter* filter = (BloomFilter*)malloc(sizeof(BloomFilter));
if (!filter) return NULL;
// 计算需要的字节数(向上取整)
size_t byte_size = (num_bits + 7) / 8;
filter->bit_array = (uint8_t*)calloc(byte_size, sizeof(uint8_t));
if (!filter->bit_array) {
free(filter);
return NULL;
}
filter->bit_array_size = byte_size;
filter->num_bits = num_bits;
filter->num_hash_funcs = num_hash_funcs;
filter->element_count = 0;
return filter;
}
// 销毁布隆过滤器
void bloom_filter_destroy(BloomFilter* filter) {
if (filter) {
if (filter->bit_array) {
free(filter->bit_array);
}
free(filter);
}
}
// 添加元素
int bloom_filter_add(BloomFilter* filter, const char* element) {
if (!filter || !element) return 0;
size_t len = strlen(element);
if (len == 0) return 0;
for (int i = 0; i < filter->num_hash_funcs; i++) {
uint32_t hash = get_hash(filter, element, i);
size_t byte_index = hash / 8;
uint8_t bit_offset = hash % 8;
// 设置位
filter->bit_array[byte_index] |= (1 << bit_offset);
}
filter->element_count++;
return 1;
}
// 检查元素是否存在
int bloom_filter_contains(const BloomFilter* filter, const char* element) {
if (!filter || !element) return 0;
size_t len = strlen(element);
if (len == 0) return 0;
for (int i = 0; i < filter->num_hash_funcs; i++) {
uint32_t hash = get_hash(filter, element, i);
size_t byte_index = hash / 8;
uint8_t bit_offset = hash % 8;
// 检查位是否被设置
if (!(filter->bit_array[byte_index] & (1 << bit_offset))) {
return 0; // 肯定不存在
}
}
return 1; // 可能存在(可能误判)
}
// 获取已插入元素数量
size_t bloom_filter_size(const BloomFilter* filter) {
return filter ? filter->element_count : 0;
}
// 清空布隆过滤器
void bloom_filter_clear(BloomFilter* filter) {
if (filter && filter->bit_array) {
memset(filter->bit_array, 0, filter->bit_array_size);
filter->element_count = 0;
}
}
// 计算理论误判率
double bloom_filter_false_positive_rate(const BloomFilter* filter) {
if (!filter || filter->element_count == 0) return 0.0;
// 误判率公式: (1 - e^(-k * n / m))^k
double k = filter->num_hash_funcs;
double m = filter->num_bits;
double n = filter->element_count;
double exponent = -k * n / m;
return pow(1 - exp(exponent), k);
}