一、LRU缓存 - 技术栈

LRU缓存

1.LRU缓存介绍
2.LRU缓存实现
3.LRU缓存总结
- [3.1 LRU 缓存的应用](#3.1 LRU 缓存的应用)
- [3.2 LRU 缓存的优缺点](#3.2 LRU 缓存的优缺点)

1.LRU缓存介绍

LRU是Least Recently Used 的缩写，意为"最近最少使用"。它是一种常见的缓存淘汰策略，用于在缓存容量有限时，决定哪些数据需要被删除以腾出空间。

LRU 缓存的基本原则是：

①优先保留最近被访问的数据，因为这些数据在近期被再次访问的概率更高。

②淘汰最近最少使用的数据，因为它们被再次访问的可能性较小。

2.LRU缓存实现

接下来我将通过c语言中的glib库来实现一个LRU缓存结构。

首先给出结构体定义：

c 复制代码

struct lruCache {
    GList *elem_queue;  // 使用 GList 作为双向链表存储缓存元素。

    int max_size;       // 缓存最大容量，<0 表示无限大小（INFI_Cache）。
    int size;           // 当前缓存已使用的大小。

    double hit_count;   // 命中次数统计。
    double miss_count;  // 未命中次数统计。

    void (*free_elem)(void *);                  // 用户定义的释放元素函数。
    int (*hit_elem)(void* elem, void* user_data); // 判断命中元素的回调函数。
};

需要实现如下功能：

c 复制代码

struct lruCache* new_lru_cache(int size, void (*free_elem)(void *),
		int (*hit_elem)(void* elem, void* user_data));
// 创建一个新的 LRU 缓存，指定容量和自定义的释放与命中回调函数。

void free_lru_cache(struct lruCache*);
// 释放缓存及其中的所有元素。

void* lru_cache_lookup(struct lruCache*, void* user_data);
// 查找元素，若命中，将其移到链表头部；未命中返回 NULL。

void* lru_cache_lookup_without_update(struct lruCache* c, void* user_data);
// 查找元素但不更新其在链表中的顺序。

void* lru_cache_hits(struct lruCache* c, void* user_data,
		int (*hit)(void* elem, void* user_data));
// 模拟命中某个元素，满足自定义命中条件后将元素移到链表头部。

void lru_cache_kicks(struct lruCache* c, void* user_data,
		int (*func)(void* elem, void* user_data));
// 删除满足用户自定义条件的元素。

void lru_cache_insert(struct lruCache *c, void* data,
		void (*victim)(void*, void*), void* user_data);
// 插入新数据到缓存，若缓存已满则淘汰最久未使用的元素，并调用 victim 函数处理被淘汰的数据。

int lru_cache_is_full(struct lruCache*);
// 检查缓存是否已满，已满返回 1，未满返回 0。

具体实现代码：

c 复制代码

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "lru_cache.h"

struct lruCache* new_lru_cache(int size, void (*free_elem)(void *),
		int (*hit_elem)(void* elem, void* user_data)) {
	struct lruCache* c = (struct lruCache*) malloc(sizeof(struct lruCache));

	c->elem_queue = NULL;

	c->max_size = size;
	c->size = 0;
	c->hit_count = 0;
	c->miss_count = 0;

	c->free_elem = free_elem;
	c->hit_elem = hit_elem;

	return c;
}

void free_lru_cache(struct lruCache *c) {
    if (c == NULL) return;  // 防止对 NULL 指针调用

    if (c->elem_queue != NULL) {
        // 确保对每个元素调用释放函数
        g_list_free_full(c->elem_queue, c->free_elem);
        c->elem_queue = NULL;  // 清空队列，防止重复释放
    }

    // 清理 lruCache 结构体本身
    free(c);
}

/* find a item in cache matching the condition */
void* lru_cache_lookup(struct lruCache* c, void* user_data) {
    // 获取链表的第一个节点（链表头部）
    GList* elem = g_list_first(c->elem_queue);

    // 遍历链表，查找匹配的元素
    while (elem) {
        /*
         * 使用回调函数 hit_elem 判断当前节点的数据是否与 user_data 匹配。
         * 回调函数由用户提供，自定义匹配逻辑。
         */
        if (c->hit_elem(elem->data, user_data))
            break;  // 找到匹配的元素，退出循环

        // 继续遍历下一个节点
        elem = g_list_next(elem);
    }

    // 如果找到匹配的元素
    if (elem) {
        /*
         * 将命中的元素移到链表头部，保持 LRU 缓存的访问顺序。
         * 1. 先从链表中移除该元素。
         * 2. 将该元素连接到链表头部。
         */
        c->elem_queue = g_list_remove_link(c->elem_queue, elem);
        c->elem_queue = g_list_concat(elem, c->elem_queue);

        // 增加缓存命中计数
        c->hit_count++;

        // 返回命中元素的数据
        return elem->data;
    } else {
        // 如果未找到匹配的元素，增加缓存未命中计数
        c->miss_count++;
        return NULL;  // 返回 NULL 表示未命中
    }
}

void* lru_cache_lookup_without_update(struct lruCache* c, void* user_data) {
	GList* elem = g_list_first(c->elem_queue);
	while (elem) {
		if (c->hit_elem(elem->data, user_data))
			break;
		elem = g_list_next(elem);
	}
	if (elem) {
		return elem->data;
	} else {
		return NULL;
	}
}

/*
 * Hit an existing elem for simulating an insertion of it.
 */
void* lru_cache_hits(struct lruCache* c, void* user_data,
		int (*hit)(void* elem, void* user_data)) {
	GList* elem = g_list_first(c->elem_queue);
	while (elem) {
		if (hit(elem->data, user_data))
			break;
		elem = g_list_next(elem);
	}
	if (elem) {
		c->elem_queue = g_list_remove_link(c->elem_queue, elem);
		c->elem_queue = g_list_concat(elem, c->elem_queue);
		return elem->data;
	} else {
		return NULL;
	}
}

/*
 * We know that the data does not exist!
 */
void lru_cache_insert(struct lruCache *c, void* data,
                      void (*func)(void*, void*), void* user_data) {
    void *victim = NULL; // 存储被淘汰的数据

    // 检查缓存是否已满
    if (c->max_size > 0 && c->size == c->max_size) {
        // 获取链表尾部的节点（最久未使用的数据）
        GList *last = g_list_last(c->elem_queue);

        // 从链表中移除尾部节点
        c->elem_queue = g_list_remove_link(c->elem_queue, last);

        // 保存被淘汰的数据
        victim = last->data;

        // 释放链表节点（但不释放节点内的数据）
        g_list_free_1(last);

        // 更新缓存大小
        c->size--;
    }

    // 将新数据插入到链表头部（表示最近使用）
    c->elem_queue = g_list_prepend(c->elem_queue, data);

    // 更新缓存大小
    c->size++;

    // 如果有被淘汰的数据
    if (victim) {
        // 调用用户自定义回调函数处理被淘汰的数据（如果提供了 func）
        if (func)
            func(victim, user_data);

        // 调用 free_elem 回调释放被淘汰的数据
        c->free_elem(victim);
    }
}

/*
 * 从缓存中移除符合用户定义条件的元素。
 * 
 * 参数:
 *   c          - 指向 lruCache 结构体的指针。
 *   user_data  - 用户自定义的数据，用于传递给回调函数 func。
 *   func       - 用户自定义的回调函数，用于判断当前元素是否需要被移除。
 *                返回非 0（true）表示移除该元素，返回 0（false）继续遍历。
 */
void lru_cache_kicks(struct lruCache* c, void* user_data,
                     int (*func)(void* elem, void* user_data)) {
    // 从链表尾部开始遍历（最久未使用的数据）
    GList* elem = g_list_last(c->elem_queue);

    // 遍历链表，向前移动，查找符合条件的节点
    while (elem) {
        /*
         * 调用用户提供的回调函数 func，判断当前节点的数据是否符合移除条件。
         * 参数:
         *   elem->data  - 当前节点存储的数据。
         *   user_data   - 用户提供的上下文数据。
         */
        if (func(elem->data, user_data)) 
            break; // 如果找到符合条件的节点，退出循环

        elem = g_list_previous(elem); // 移动到前一个节点
    }

    // 如果找到了符合条件的节点
    if (elem) {
        /*
         * 1. 从链表中移除该节点（但不释放节点内存和数据）。
         *    g_list_remove_link 返回移除后的链表。
         */
        c->elem_queue = g_list_remove_link(c->elem_queue, elem);

        /*
         * 2. 释放节点中存储的数据。
         *    调用用户提供的 free_elem 函数，确保数据被正确释放，防止内存泄漏。
         */
        c->free_elem(elem->data);

        /*
         * 3. 释放链表节点本身的内存。
         *    注意：g_list_free_1 只释放 GList 结构，不释放节点数据。
         */
        g_list_free_1(elem);

        // 4. 更新缓存大小
        c->size--;
    }
}


int lru_cache_is_full(struct lruCache* c) {
	return c->size >= c->max_size ? 1 : 0;
}

简单测试代码：

c 复制代码

#include <stdio.h>
#include <stdlib.h>
#include "lru_cache.h"

// 自定义释放函数：释放节点数据
void free_data(void* data) {
    printf("Freeing data: %d\n", *(int*)data);
    free(data);
}

// 自定义匹配函数：判断数据是否匹配用户输入
int match_data(void* elem, void* user_data) {
    return (*(int*)elem == *(int*)user_data);
}

// 主函数：测试 LRU 缓存
int main() {
    printf("---- Testing LRU Cache ----\n");

    // 创建一个容量为 3 的 LRU 缓存
    struct lruCache* cache = new_lru_cache(3, free_data, match_data);

    // 插入测试数据
    int* a = malloc(sizeof(int)); *a = 1;
    int* b = malloc(sizeof(int)); *b = 2;
    int* c = malloc(sizeof(int)); *c = 3;
    int* d = malloc(sizeof(int)); *d = 4;

    printf("Inserting: 1, 2, 3\n");
    lru_cache_insert(cache, a, NULL, NULL);
    lru_cache_insert(cache, b, NULL, NULL);
    lru_cache_insert(cache, c, NULL, NULL);

    // 查找数据：命中情况
    int target = 2;
    int* result = lru_cache_lookup(cache, &target);
    if (result) {
        printf("Cache hit: %d\n", *result);
    } else {
        printf("Cache miss: %d\n", target);
    }

    // 插入数据 4，导致数据 1 被淘汰
    printf("Inserting: 4 (This should evict 1)\n");
    lru_cache_insert(cache, d, NULL, NULL);

    // 再次查找数据 1：应该未命中
    target = 1;
    result = lru_cache_lookup(cache, &target);
    if (result) {
        printf("Cache hit: %d\n", *result);
    } else {
        printf("Cache miss: %d\n", target);
    }

    // 查找数据 3：应该命中
    target = 3;
    result = lru_cache_lookup(cache, &target);
    if (result) {
        printf("Cache hit: %d\n", *result);
    } else {
        printf("Cache miss: %d\n", target);
    }

    // 释放缓存
    free_lru_cache(cache);

    printf("---- LRU Cache Test Complete ----\n");
    return 0;
}

测试结果：

这是一个非常简单的测试代码，其实上面的LRU缓存实现是和数据类型无关的，因为我们通过函数指针提供了对数据的操作抽象，例如：

free_elem 回调：

允许用户自定义如何释放节点中的数据，可以适配不同类型的内存管理。

hit_elem 回调：

允许用户自定义数据匹配逻辑，适配任意类型的比较需求。

这些设计使得我们的缓存可以支持任意类型的数据，而不仅仅是局限在int类型。

下面我们来一个自定义数据类型的测试代码：

c 复制代码

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "lru_cache.h"

// 自定义数据类型：Person
typedef struct {
    char* name;
    int age;
} Person;

// 自定义释放函数：释放 Person 类型的数据
void free_person(void* data) {
    Person* person = (Person*)data;
    printf("[Free] Name = %s, Age = %d\n", person->name, person->age);
    free(person->name); // 释放 name 字符串
    free(person);       // 释放 Person 结构体
}

// 自定义匹配函数：根据姓名匹配 Person
int match_person(void* elem, void* user_data) {
    Person* person = (Person*)elem;
    char* target_name = (char*)user_data;
    return strcmp(person->name, target_name) == 0;
}

// 工具函数：创建 Person 对象
Person* create_person(const char* name, int age) {
    Person* person = malloc(sizeof(Person));
    person->name = strdup(name); // 分配并复制 name
    person->age = age;
    return person;
}

// 测试函数：打印缓存的命中率统计信息
void print_cache_stats(struct lruCache* cache) {
    printf("Cache Stats: Hits = %.0f, Misses = %.0f, Hit Rate = %.2f%%\n",
           cache->hit_count,
           cache->miss_count,
           cache->hit_count / (cache->hit_count + cache->miss_count) * 100.0);
}

int main() {
    printf("---- Comprehensive LRU Cache Test ----\n");

    // 创建容量为 3 的 LRU 缓存
    struct lruCache* cache = new_lru_cache(3, free_person, match_person);

    // 插入数据：Person 结构体
    printf("Inserting: Alice, Bob, Charlie\n");
    lru_cache_insert(cache, create_person("Alice", 25), NULL, NULL);
    lru_cache_insert(cache, create_person("Bob", 30), NULL, NULL);
    lru_cache_insert(cache, create_person("Charlie", 35), NULL, NULL);

    // 查找数据：命中 Bob
    printf("Looking up: Bob\n");
    char* target_name = "Bob";
    Person* result = (Person*)lru_cache_lookup(cache, target_name);
    if (result) {
        printf("[Hit] Found: Name = %s, Age = %d\n", result->name, result->age);
    } else {
        printf("[Miss] Not found: %s\n", target_name);
    }

    // 插入新数据 Dave，触发淘汰最久未使用的数据 Alice
    printf("Inserting: Dave (Evicts Alice)\n");
    lru_cache_insert(cache, create_person("Dave", 40), NULL, NULL);

    // 查找 Alice：未命中
    printf("Looking up: Alice\n");
    target_name = "Alice";
    result = (Person*)lru_cache_lookup(cache, target_name);
    if (result) {
        printf("[Hit] Found: Name = %s, Age = %d\n", result->name, result->age);
    } else {
        printf("[Miss] Not found: %s\n", target_name);
    }

    // 查找 Charlie：命中
    printf("Looking up: Charlie\n");
    target_name = "Charlie";
    result = (Person*)lru_cache_lookup(cache, target_name);
    if (result) {
        printf("[Hit] Found: Name = %s, Age = %d\n", result->name, result->age);
    } else {
        printf("[Miss] Not found: %s\n", target_name);
    }

    // 打印缓存的命中率统计信息
    print_cache_stats(cache);

    // 释放缓存
    printf("Freeing the cache...\n");
    free_lru_cache(cache);

    printf("---- LRU Cache Test Complete ----\n");
    return 0;
}

测试结果：

上面的LRU缓存代码其实是一个设计精巧、功能全面的缓存实现，具有高度的通用性和灵活性。通过将缓存管理逻辑与数据操作解耦，提供了标准化的接口，包括元素插入、查找、淘汰、命中统计等功能，同时通过回调函数支持任意数据类型的自定义释放和匹配逻辑。

核心优势总结：

①模块化设计：通过free_elem和hit_elem回调函数，适配不同数据类型，用户无需修改核心代码即可实现各种缓存需求。

②功能丰富：支持 LRU 淘汰策略，自动移除最久未使用的数据。提供查找、插入、条件删除、命中模拟等多种操作接口。统计命中次数和未命中次数，便于分析缓存性能。

③内存管理安全：使用 free_elem 回调释放数据，确保内存不会泄漏。

④易于扩展：代码逻辑清晰，接口简单易用，方便进一步添加功能，如并发支持、过期数据清理等

3.LRU缓存总结

接下来我将通过两个表格来简要描述一下LRU缓存的应用以及优缺点。

3.1 LRU 缓存的应用

应用场景	描述	作用
操作系统内存管理	用于页面置换机制，替换最久未使用的内存页。	提高内存利用率，减少磁盘 I/O。
数据库缓存	缓存频繁访问的数据，淘汰不常用的数据。	提高查询性能，减少查询延迟。
Web 浏览器缓存	缓存网页资源（如 HTML、图片、CSS 等），加快访问速度。	减少重复下载，提升用户体验。
CDN 内容分发网络	缓存热点内容，替换最少访问的资源。	减少带宽消耗，加速内容传输。
嵌入式系统	管理资源受限设备中的数据缓存。	提高执行效率，优化内存占用。
数据流处理与缓存	临时缓存大数据处理中间结果，腾出空间以继续处理新的数据。	提升处理速度，避免重复计算。

3.2 LRU 缓存的优缺点

类别	描述
优点	1. 实现简单：逻辑清晰，易于实现。
	2. 适应时间局部性：能很好处理最近访问的数据，提高缓存命中率。
	3. 广泛适用：适用于多种缓存管理场景，如内存、数据库、浏览器等。
缺点	1. 空间开销大：需额外使用链表和哈希表维护数据顺序，增加内存消耗。
	2. 性能瓶颈：若未优化，查找和移动数据时间复杂度较高（O(N)）。
	3. 非最佳策略：在数据访问均匀分布或随机的情况下，命中率较低，效果不佳。
	4. 线程安全问题：在多线程环境下，需要额外加锁保护，影响性能。

总结：LRU 缓存在操作系统、数据库、Web 浏览器等场景中具有广泛应用，优点是实现简单、适应时间局部性，但也存在空间开销和性能瓶颈等缺点。