算法导论第三章：数据结构艺术与高效实现

本文是《算法导论》精讲专栏第三章，通过动态操作可视化 、内存布局图解 和性能对比实验，结合完整C语言实现，深入解析数据结构核心原理。包含栈、队列、链表、散列表、二叉树等完整实现，以及工程实践中的优化技巧。

1. 动态集合：算法操作的基石

1.1 基本操作与数据结构选择

动态集合查询操作修改操作 Search Minimum Successor Insert Delete Update

不同数据结构操作复杂度对比：

数据结构	Search	Insert	Delete	Minimum	Successor
无序数组	O(n)	O(1)	O(n)	O(n)	O(n)
有序数组	O(log n)	O(n)	O(n)	O(1)	O(1)
链表	O(n)	O(1)	O(1)*	O(n)	O(1)
二叉搜索树	O(h)	O(h)	O(h)	O(h)	O(h)
散列表	O(1)	O(1)	O(1)	O(n)	O(n)

*注：链表删除操作在已知节点位置时为O(1)

2. 栈与队列：线性结构的经典应用

2.1 栈：LIFO（后进先出）结构

操作原理：

复制代码

      push -> |   |   |   | <- pop
             +---+---+---+
             | 3 | 2 | 1 |
             +---+---+---+
             栈顶↑     栈底

C语言实现：

c 复制代码

#define MAX_SIZE 100

typedef struct {
    int data[MAX_SIZE];
    int top;
} Stack;

void init_stack(Stack *s) {
    s->top = -1;
}

int is_empty(Stack *s) {
    return s->top == -1;
}

int is_full(Stack *s) {
    return s->top == MAX_SIZE - 1;
}

void push(Stack *s, int item) {
    if (is_full(s)) {
        printf("Stack Overflow\n");
        return;
    }
    s->data[++s->top] = item;
}

int pop(Stack *s) {
    if (is_empty(s)) {
        printf("Stack Underflow\n");
        return -1;
    }
    return s->data[s->top--];
}

// 应用：括号匹配检查
int is_balanced(char *expr) {
    Stack s;
    init_stack(&s);
    
    for (int i = 0; expr[i]; i++) {
        if (expr[i] == '(' || expr[i] == '[' || expr[i] == '{') {
            push(&s, expr[i]);
        } else if (expr[i] == ')' || expr[i] == ']' || expr[i] == '}') {
            if (is_empty(&s)) return 0;
            
            char top = pop(&s);
            if ((expr[i] == ')' && top != '(') ||
                (expr[i] == ']' && top != '[') ||
                (expr[i] == '}' && top != '{')) {
                return 0;
            }
        }
    }
    return is_empty(&s);
}

2.2 队列：FIFO（先进先出）结构

循环队列实现：

c 复制代码

typedef struct {
    int data[MAX_SIZE];
    int front;
    int rear;
    int size;
} Queue;

void init_queue(Queue *q) {
    q->front = 0;
    q->rear = -1;
    q->size = 0;
}

int is_empty(Queue *q) {
    return q->size == 0;
}

int is_full(Queue *q) {
    return q->size == MAX_SIZE;
}

void enqueue(Queue *q, int item) {
    if (is_full(q)) {
        printf("Queue Overflow\n");
        return;
    }
    q->rear = (q->rear + 1) % MAX_SIZE;
    q->data[q->rear] = item;
    q->size++;
}

int dequeue(Queue *q) {
    if (is_empty(q)) {
        printf("Queue Underflow\n");
        return -1;
    }
    int item = q->data[q->front];
    q->front = (q->front + 1) % MAX_SIZE;
    q->size--;
    return item;
}

// 应用：BFS广度优先搜索
void bfs(int graph[][5], int start, int n) {
    int visited[5] = {0};
    Queue q;
    init_queue(&q);
    
    visited[start] = 1;
    enqueue(&q, start);
    
    while (!is_empty(&q)) {
        int node = dequeue(&q);
        printf("%d ", node);
        
        for (int i = 0; i < n; i++) {
            if (graph[node][i] && !visited[i]) {
                visited[i] = 1;
                enqueue(&q, i);
            }
        }
    }
}

3. 链表：动态内存的优雅组织

3.1 链表类型与性能对比

类型	优点	缺点	应用场景
单向链表	插入/删除快，内存小	只能单向遍历	简单列表，LRU缓存
双向链表	双向遍历，删除高效	内存占用大	浏览器历史记录
循环链表	循环访问，无边界	实现复杂	轮询调度，约瑟夫问题
跳跃链表	快速查找(O(log n))	实现复杂，维护成本高	Redis有序集合

3.2 双向链表完整实现

c 复制代码

typedef struct Node {
    int data;
    struct Node *prev;
    struct Node *next;
} Node;

typedef struct {
    Node *head;
    Node *tail;
    int size;
} DoublyLinkedList;

Node *create_node(int data) {
    Node *new_node = (Node *)malloc(sizeof(Node));
    new_node->data = data;
    new_node->prev = NULL;
    new_node->next = NULL;
    return new_node;
}

void init_list(DoublyLinkedList *list) {
    list->head = NULL;
    list->tail = NULL;
    list->size = 0;
}

void insert_front(DoublyLinkedList *list, int data) {
    Node *new_node = create_node(data);
    
    if (list->head == NULL) {
        list->head = list->tail = new_node;
    } else {
        new_node->next = list->head;
        list->head->prev = new_node;
        list->head = new_node;
    }
    list->size++;
}

void insert_end(DoublyLinkedList *list, int data) {
    Node *new_node = create_node(data);
    
    if (list->tail == NULL) {
        list->head = list->tail = new_node;
    } else {
        list->tail->next = new_node;
        new_node->prev = list->tail;
        list->tail = new_node;
    }
    list->size++;
}

void delete_node(DoublyLinkedList *list, Node *node) {
    if (node == NULL) return;
    
    if (node == list->head) {
        list->head = node->next;
        if (list->head) list->head->prev = NULL;
    }
    
    if (node == list->tail) {
        list->tail = node->prev;
        if (list->tail) list->tail->next = NULL;
    }
    
    if (node->prev) node->prev->next = node->next;
    if (node->next) node->next->prev = node->prev;
    
    free(node);
    list->size--;
}

// 应用：LRU缓存实现
typedef struct {
    int capacity;
    DoublyLinkedList list;
    Node **hash_table; // 简化版哈希表
} LRUCache;

LRUCache *create_cache(int capacity) {
    LRUCache *cache = (LRUCache *)malloc(sizeof(LRUCache));
    cache->capacity = capacity;
    init_list(&cache->list);
    cache->hash_table = (Node **)calloc(1000, sizeof(Node *));
    return cache;
}

int get(LRUCache *cache, int key) {
    if (cache->hash_table[key] == NULL) return -1;
    
    // 移动到链表头部
    Node *node = cache->hash_table[key];
    delete_node(&cache->list, node);
    insert_front(&cache->list, key);
    cache->hash_table[key] = cache->list.head;
    
    return node->data;
}

void put(LRUCache *cache, int key, int value) {
    if (cache->hash_table[key] != NULL) {
        // 更新现有值
        Node *node = cache->hash_table[key];
        node->data = value;
        get(cache, key); // 触发访问更新
        return;
    }
    
    if (cache->list.size >= cache->capacity) {
        // 淘汰最久未使用
        int key_to_remove = cache->list.tail->data;
        delete_node(&cache->list, cache->list.tail);
        cache->hash_table[key_to_remove] = NULL;
    }
    
    // 插入新节点
    insert_front(&cache->list, key);
    cache->list.head->data = value;
    cache->hash_table[key] = cache->list.head;
}

4. 内存中的指针与对象

4.1 三种内存表示方式

数组实现图示：

复制代码

索引:  0     1     2     3     4     5
     +-----+-----+-----+-----+-----+-----+
key: |  10 |  20 |  30 |     |     |     |
next:|  2  |  3  | -1  |     |     |     |
prev:| -1  |  0  |  1  |     |     |     |
     +-----+-----+-----+-----+-----+-----+
     头节点↑

C语言实现：

c 复制代码

#define MAX_SIZE 100

typedef struct {
    int key;
    int next;
    int prev;
} ListNode;

typedef struct {
    ListNode nodes[MAX_SIZE];
    int free_list;
    int head;
    int size;
} ArrayLinkedList;

void init_array_list(ArrayLinkedList *list) {
    // 初始化空闲列表
    for (int i = 0; i < MAX_SIZE - 1; i++) {
        list->nodes[i].next = i + 1;
    }
    list->nodes[MAX_SIZE - 1].next = -1;
    list->free_list = 0;
    list->head = -1;
    list->size = 0;
}

int allocate_object(ArrayLinkedList *list) {
    if (list->free_list == -1) {
        printf("Out of space\n");
        return -1;
    }
    int index = list->free_list;
    list->free_list = list->nodes[index].next;
    return index;
}

void free_object(ArrayLinkedList *list, int index) {
    list->nodes[index].next = list->free_list;
    list->free_list = index;
}

void insert_sorted(ArrayLinkedList *list, int key) {
    int new_index = allocate_object(list);
    if (new_index == -1) return;
    
    list->nodes[new_index].key = key;
    list->nodes[new_index].next = -1;
    list->nodes[new_index].prev = -1;
    
    // 查找插入位置
    int curr = list->head;
    int prev = -1;
    
    while (curr != -1 && list->nodes[curr].key < key) {
        prev = curr;
        curr = list->nodes[curr].next;
    }
    
    if (prev == -1) {
        // 插入头部
        list->nodes[new_index].next = list->head;
        if (list->head != -1) {
            list->nodes[list->head].prev = new_index;
        }
        list->head = new_index;
    } else {
        // 插入中间
        list->nodes[new_index].next = curr;
        list->nodes[new_index].prev = prev;
        list->nodes[prev].next = new_index;
        if (curr != -1) {
            list->nodes[curr].prev = new_index;
        }
    }
    list->size++;
}

5. 树结构：层次化数据组织

5.1 二叉树表示法

三种表示法对比：

表示法	存储结构	优点	缺点
左右链表示法	节点存储左右子节点指针	结构灵活，操作高效	内存占用大
数组表示法	按层级顺序存储在数组中	内存紧凑，访问快速	不适合动态变化
父亲链表示法	节点存储父节点指针	适合找父节点操作	查找子节点效率低

C语言实现（左右链法）：

c 复制代码

typedef struct TreeNode {
    int data;
    struct TreeNode *left;
    struct TreeNode *right;
} TreeNode;

TreeNode *create_node(int data) {
    TreeNode *node = (TreeNode *)malloc(sizeof(TreeNode));
    node->data = data;
    node->left = NULL;
    node->right = NULL;
    return node;
}

// 二叉搜索树插入
TreeNode *insert(TreeNode *root, int data) {
    if (root == NULL) {
        return create_node(data);
    }
    
    if (data < root->data) {
        root->left = insert(root->left, data);
    } else if (data > root->data) {
        root->right = insert(root->right, data);
    }
    
    return root;
}

// 三种遍历方式
void preorder(TreeNode *root) {
    if (root) {
        printf("%d ", root->data);
        preorder(root->left);
        preorder(root->right);
    }
}

void inorder(TreeNode *root) {
    if (root) {
        inorder(root->left);
        printf("%d ", root->data);
        inorder(root->right);
    }
}

void postorder(TreeNode *root) {
    if (root) {
        postorder(root->left);
        postorder(root->right);
        printf("%d ", root->data);
    }
}

5.2 树的遍历应用

表达式树求值：

复制代码

c 复制代码

int evaluate_expression(TreeNode *root) {
    if (root == NULL) return 0;
    
    // 叶子节点为操作数
    if (!root->left && !root->right) {
        return root->data;
    }
    
    int left_val = evaluate_expression(root->left);
    int right_val = evaluate_expression(root->right);
    
    switch (root->data) {
        case '+': return left_val + right_val;
        case '-': return left_val - right_val;
        case '*': return left_val * right_val;
        case '/': return left_val / right_val;
    }
    return 0;
}

6. 散列表：快速访问的魔法

6.1 散列函数设计原则

均匀性：键值均匀分布到各个槽
高效性：计算速度快
确定性：相同键产生相同散列值

常见散列函数：

c 复制代码

// 除法散列法：h(k) = k mod m
int division_hash(int key, int m) {
    return key % m;
}

// 乘法散列法：h(k) = floor(m * (k * A mod 1))
int multiplication_hash(int key, int m) {
    double A = 0.6180339887; // 黄金分割
    double val = key * A;
    return (int)(m * (val - (int)val));
}

// 全域散列法（避免最坏情况）
int universal_hash(int key, int a, int b, int p, int m) {
    return ((a * key + b) % p) % m;
}

6.2 冲突解决策略

6.2.1 链地址法

索引0 键1 键2 索引1 键3 索引2 键4 键5

C语言实现：

c 复制代码

#define TABLE_SIZE 10

typedef struct HashNode {
    int key;
    int value;
    struct HashNode *next;
} HashNode;

typedef struct {
    HashNode **buckets;
    int size;
} HashMap;

HashMap *create_hash_map() {
    HashMap *map = (HashMap *)malloc(sizeof(HashMap));
    map->size = TABLE_SIZE;
    map->buckets = (HashNode **)calloc(TABLE_SIZE, sizeof(HashNode *));
    return map;
}

int hash(int key) {
    return key % TABLE_SIZE;
}

void put(HashMap *map, int key, int value) {
    int index = hash(key);
    HashNode *new_node = (HashNode *)malloc(sizeof(HashNode));
    new_node->key = key;
    new_node->value = value;
    new_node->next = NULL;
    
    if (map->buckets[index] == NULL) {
        map->buckets[index] = new_node;
    } else {
        HashNode *current = map->buckets[index];
        while (current->next != NULL) {
            if (current->key == key) {
                current->value = value; // 更新现有键
                free(new_node);
                return;
            }
            current = current->next;
        }
        current->next = new_node;
    }
}

int get(HashMap *map, int key) {
    int index = hash(key);
    HashNode *current = map->buckets[index];
    
    while (current != NULL) {
        if (current->key == key) {
            return current->value;
        }
        current = current->next;
    }
    return -1; // 未找到
}

6.2.2 开放寻址法

探查序列比较：

方法	探查序列	优点	缺点
线性探查	h(k,i) = (h'(k) + i) mod m	实现简单	聚集现象严重
二次探查	h(k,i) = (h'(k)+c₁i+c₂i²) mod m	减少聚集	可能无法探查所有槽
双重散列	h(k,i) = (h₁(k)+i*h₂(k)) mod m	最接近均匀探查	计算复杂

双重散列实现：

c 复制代码

#define TABLE_SIZE 10
#define EMPTY -1
#define DELETED -2

int hash1(int key) {
    return key % TABLE_SIZE;
}

int hash2(int key) {
    return 7 - (key % 7); // 确保不为0
}

int double_hash(int key, int i) {
    return (hash1(key) + i * hash2(key)) % TABLE_SIZE;
}

void insert_open_addressing(int table[], int key) {
    int i = 0;
    do {
        int j = double_hash(key, i);
        if (table[j] == EMPTY || table[j] == DELETED) {
            table[j] = key;
            return;
        }
        i++;
    } while (i < TABLE_SIZE);
    printf("Hash table overflow\n");
}

int search_open_addressing(int table[], int key) {
    int i = 0;
    do {
        int j = double_hash(key, i);
        if (table[j] == EMPTY) return -1;
        if (table[j] == key) return j;
        i++;
    } while (i < TABLE_SIZE);
    return -1;
}

7. 工程优化与性能分析

7.1 动态数组：STL vector 原理

c 复制代码

typedef struct {
    int *data;
    int size;
    int capacity;
} DynamicArray;

void init_dynamic_array(DynamicArray *arr, int capacity) {
    arr->data = (int *)malloc(capacity * sizeof(int));
    arr->size = 0;
    arr->capacity = capacity;
}

void push_back(DynamicArray *arr, int value) {
    if (arr->size >= arr->capacity) {
        // 扩容策略：倍增容量
        int new_capacity = arr->capacity * 2;
        int *new_data = (int *)realloc(arr->data, new_capacity * sizeof(int));
        if (new_data) {
            arr->data = new_data;
            arr->capacity = new_capacity;
        } else {
            printf("Memory allocation failed\n");
            return;
        }
    }
    arr->data[arr->size++] = value;
}

// 时间复杂度分析：均摊O(1)
// 数学证明：考虑连续插入n个元素的总成本
// T(n) = n + (1 + 2 + 4 + ... + 2^{k}) < n + 2^{k+1} < 3n
// ∴ 单次操作均摊成本 = T(n)/n < 3

7.2 内存池优化

c 复制代码

#define POOL_SIZE 1000

typedef struct {
    Node *nodes[POOL_SIZE];
    int free_index;
} MemoryPool;

MemoryPool *create_memory_pool() {
    MemoryPool *pool = (MemoryPool *)malloc(sizeof(MemoryPool));
    for (int i = 0; i < POOL_SIZE - 1; i++) {
        pool->nodes[i] = (Node *)malloc(sizeof(Node));
        pool->nodes[i]->next = i + 1; // 使用next指针连接空闲节点
    }
    pool->nodes[POOL_SIZE - 1] = (Node *)malloc(sizeof(Node));
    pool->nodes[POOL_SIZE - 1]->next = -1;
    pool->free_index = 0;
    return pool;
}

Node *allocate_node(MemoryPool *pool) {
    if (pool->free_index == -1) {
        printf("Memory pool exhausted\n");
        return NULL;
    }
    Node *node = pool->nodes[pool->free_index];
    pool->free_index = (int)(node->next); // 转换指针为索引
    return node;
}

void free_node(MemoryPool *pool, Node *node) {
    node->next = (Node *)(pool->free_index);
    pool->free_index = (int)(node - pool->nodes[0]); // 计算索引
}

总结与思考

本章深入探讨了数据结构的设计与实现：

基础结构：栈、队列、链表的实现与应用
内存管理：指针与数组表示的权衡
树结构：二叉树表示与遍历算法
散列表：冲突解决策略与性能优化
工程实践：动态数组、内存池等优化技术

关键洞见：没有完美的数据结构，只有最适合特定场景的选择。理解操作特性和性能瓶颈是设计高效系统的关键。

下章预告：第四章《高级设计与分析技术》将探讨：

分治策略的数学证明
动态规划的原理与应用
贪心算法的正确性证明
摊还分析的三种方法

本文完整代码已上传至GitHub仓库：Algorithm-Implementations

思考题：

在LRU缓存实现中，为什么使用双向链表而不是单向链表？
当散列表的装载因子超过阈值时，如何实现高效的重散列(rehashing)操作？
对比链式散列和开放寻址法，各自适合什么应用场景？
如何扩展二叉树结构以支持区间查询操作？