Linux环境下的C语言编程（四十六）

基础查找算法

朴素查找算法

这是最直观的查找方法，也是最容易理解的算法：

复制代码

#include <stdio.h>
#include <string.h>

// 朴素查找算法的完整实现
char* naive_strstr(const char *haystack, const char *needle) {
    // 输入验证
    if (haystack == NULL || needle == NULL) {
        return NULL;
    }
    
    // 空字符串处理
    if (*needle == '\0') {
        return (char*)haystack;  // 空串是任何字符串的子串
    }
    
    // 主查找逻辑
    const char *h = haystack;  // haystack指针
    const char *n = needle;    // needle指针
    const char *current_haystack_start = haystack;
    
    // 外层循环：遍历haystack的每个可能起始位置
    while (*current_haystack_start != '\0') {
        h = current_haystack_start;
        n = needle;
        
        // 内层循环：比较当前起始位置的字符串
        while (*h != '\0' && *n != '\0' && *h == *n) {
            h++;
            n++;
        }
        
        // 检查是否找到完整匹配
        if (*n == '\0') {
            // needle的所有字符都匹配了
            return (char*)current_haystack_start;
        }
        
        // 如果haystack剩余长度小于needle长度，提前结束
        size_t remaining_len = strlen(current_haystack_start);
        size_t needle_len = strlen(needle);
        if (remaining_len < needle_len) {
            break;
        }
        
        current_haystack_start++;  // 移动到下一个起始位置
    }
    
    return NULL;  // 未找到
}

// 带详细调试信息的朴素查找
char* naive_strstr_debug(const char *haystack, const char *needle, 
                        int *comparisons, int *attempts) {
    *comparisons = 0;
    *attempts = 0;
    
    if (haystack == NULL || needle == NULL) return NULL;
    if (*needle == '\0') return (char*)haystack;
    
    size_t haystack_len = strlen(haystack);
    size_t needle_len = strlen(needle);
    
    printf("查找开始:\n");
    printf("  Haystack: \"%s\" (长度: %zu)\n", haystack, haystack_len);
    printf("  Needle:   \"%s\" (长度: %zu)\n", needle, needle_len);
    printf("  最大比较次数: %zu\n\n", haystack_len - needle_len + 1);
    
    for (size_t i = 0; i <= haystack_len - needle_len; i++) {
        (*attempts)++;
        printf("尝试 %d: 起始位置 %zu\n", *attempts, i);
        
        size_t j;
        for (j = 0; j < needle_len; j++) {
            (*comparisons)++;
            printf("  比较 haystack[%zu]='%c' 和 needle[%zu]='%c'",
                   i + j, haystack[i + j], j, needle[j]);
            
            if (haystack[i + j] != needle[j]) {
                printf(" -> 不匹配\n");
                break;
            }
            printf(" -> 匹配\n");
        }
        
        if (j == needle_len) {
            printf("  找到匹配！位置: %zu\n", i);
            return (char*)(haystack + i);
        }
    }
    
    printf("未找到匹配\n");
    return NULL;
}

// 性能分析函数
void analyze_naive_search() {
    printf("=== 朴素查找算法性能分析 ===\n\n");
    
    // 测试用例
    struct {
        const char *haystack;
        const char *needle;
        const char *description;
    } test_cases[] = {
        {"ABCDABDABCDABCDABD", "ABCDABD", "最佳情况：快速找到"},
        {"AAAAAAAAAAAAAB", "AAAB", "最坏情况：大量回退"},
        {"Hello World", "World", "正常情况"},
        {"abc", "abcd", "needle比haystack长"},
        {"", "test", "空haystack"},
        {"test", "", "空needle"}
    };
    
    for (int i = 0; i < sizeof(test_cases)/sizeof(test_cases[0]); i++) {
        printf("测试 %d: %s\n", i+1, test_cases[i].description);
        printf("Haystack: \"%s\"\n", test_cases[i].haystack);
        printf("Needle:   \"%s\"\n", test_cases[i].needle);
        
        int comparisons, attempts;
        char *result = naive_strstr_debug(test_cases[i].haystack, 
                                         test_cases[i].needle,
                                         &comparisons, &attempts);
        
        printf("结果: %s\n", result ? "找到" : "未找到");
        printf("总尝试次数: %d, 总比较次数: %d\n\n", 
               attempts, comparisons);
        
        // 复杂度分析
        size_t n = strlen(test_cases[i].haystack);
        size_t m = strlen(test_cases[i].needle);
        printf("理论复杂度:\n");
        printf("  最佳情况: O(n+m)\n");
        printf("  最坏情况: O(n*m)\n");
        printf("  平均情况: O(n+m)\n\n");
    }
}

朴素算法的可视化演示

复制代码

void visualize_search_process() {
    printf("=== 朴素查找算法可视化演示 ===\n\n");
    
    const char *haystack = "ABABDABACDABABCABAB";
    const char *needle = "ABABCABAB";
    
    printf("Haystack: %s\n", haystack);
    printf("Needle:   %s\n\n", needle);
    
    size_t h_len = strlen(haystack);
    size_t n_len = strlen(needle);
    
    printf("查找过程:\n");
    printf("索引:    ");
    for (size_t i = 0; i < h_len; i++) {
        printf("%2zu", i);
    }
    printf("\nHaystack: ");
    for (size_t i = 0; i < h_len; i++) {
        printf("%2c", haystack[i]);
    }
    printf("\n\n");
    
    for (size_t i = 0; i <= h_len - n_len; i++) {
        printf("尝试从位置 %zu 开始:\n", i);
        printf("Haystack: ");
        
        // 显示当前对齐
        for (size_t j = 0; j < h_len; j++) {
            if (j == i) printf("[");
            printf("%c", haystack[j]);
            if (j == i + n_len - 1) printf("]");
        }
        printf("\n");
        
        printf("Needle:   ");
        for (size_t j = 0; j < i; j++) printf("  ");
        for (size_t j = 0; j < n_len; j++) {
            printf("%c", needle[j]);
        }
        printf("\n");
        
        // 进行比较
        int match = 1;
        for (size_t j = 0; j < n_len; j++) {
            if (haystack[i + j] != needle[j]) {
                match = 0;
                printf("在位置 %zu 不匹配: '%c' != '%c'\n\n", 
                       j, haystack[i + j], needle[j]);
                break;
            }
        }
        
        if (match) {
            printf("完全匹配！找到位置: %zu\n\n", i);
            break;
        }
    }
}

4.1.2 KMP算法

KMP算法通过利用已匹配的部分信息，避免不必要的回溯，显著提高了查找效率。

4.1.2.1 前缀函数

复制代码

#include <stdlib.h>
#include <string.h>

// 计算部分匹配表（LPS：最长前缀后缀）
int* compute_lps(const char *pattern, int *lps_length) {
    int pattern_len = strlen(pattern);
    int *lps = (int*)malloc(pattern_len * sizeof(int));
    if (lps == NULL) return NULL;
    
    // lps[0] 总是 0
    lps[0] = 0;
    
    // length 表示当前最长前缀后缀的长度
    int length = 0;
    int i = 1;
    
    while (i < pattern_len) {
        if (pattern[i] == pattern[length]) {
            // 字符匹配，长度加1
            length++;
            lps[i] = length;
            i++;
        } else {
            if (length != 0) {
                // 回退到前一个最长前缀后缀的长度
                // 这个步骤是关键，避免了从头开始匹配
                length = lps[length - 1];
            } else {
                // 没有匹配的前缀，设置lps[i]为0
                lps[i] = 0;
                i++;
            }
        }
    }
    
    *lps_length = pattern_len;
    return lps;
}

// 可视化LPS表计算过程
void visualize_lps_computation(const char *pattern) {
    printf("=== LPS表计算过程可视化 ===\n\n");
    printf("模式串: %s\n", pattern);
    printf("长度: %zu\n\n", strlen(pattern));
    
    int len = strlen(pattern);
    int *lps = (int*)malloc(len * sizeof(int));
    lps[0] = 0;
    
    printf("初始化: lps[0] = 0\n\n");
    
    int length = 0;
    for (int i = 1; i < len; i++) {
        printf("计算 lps[%d]:\n", i);
        printf("  当前字符: pattern[%d] = '%c'\n", i, pattern[i]);
        printf("  当前最长前缀后缀长度: length = %d\n", length);
        printf("  比较: pattern[%d]('%c') vs pattern[%d]('%c')",
               i, pattern[i], length, pattern[length]);
        
        if (pattern[i] == pattern[length]) {
            length++;
            lps[i] = length;
            printf(" -> 匹配，length++ = %d\n", length);
        } else {
            if (length != 0) {
                printf(" -> 不匹配，回退: length = lps[%d-1] = %d\n", 
                       length, lps[length - 1]);
                length = lps[length - 1];
                i--;  // 重新尝试当前i
            } else {
                lps[i] = 0;
                printf(" -> 不匹配，lps[%d] = 0\n", i);
            }
        }
    }
    
    printf("\n最终LPS表:\n");
    printf("索引: ");
    for (int i = 0; i < len; i++) {
        printf("%3d", i);
    }
    printf("\n字符: ");
    for (int i = 0; i < len; i++) {
        printf("%3c", pattern[i]);
    }
    printf("\nLPS : ");
    for (int i = 0; i < len; i++) {
        printf("%3d", lps[i]);
    }
    printf("\n\n");
    
    // 解释LPS表的含义
    printf("LPS表解释:\n");
    for (int i = 0; i < len; i++) {
        if (lps[i] > 0) {
            printf("  lps[%d]=%d: 前缀 \"%.*s\" 也是后缀\n", 
                   i, lps[i], lps[i], pattern);
        }
    }
    
    free(lps);
}

4.1.2.2 KMP查找算法实现

复制代码

c

// KMP算法主函数
char* kmp_search(const char *text, const char *pattern) {
    if (text == NULL || pattern == NULL) return NULL;
    
    int text_len = strlen(text);
    int pattern_len = strlen(pattern);
    
    // 特殊情况处理
    if (pattern_len == 0) return (char*)text;
    if (pattern_len > text_len) return NULL;
    
    // 计算LPS表
    int lps_len;
    int *lps = compute_lps(pattern, &lps_len);
    if (lps == NULL) return NULL;
    
    printf("KMP查找开始:\n");
    printf("文本: \"%s\" (长度: %d)\n", text, text_len);
    printf("模式: \"%s\" (长度: %d)\n", pattern, pattern_len);
    printf("LPS表已计算完成\n\n");
    
    int i = 0;  // text的索引
    int j = 0;  // pattern的索引
    int comparisons = 0;
    
    while (i < text_len) {
        comparisons++;
        printf("比较: text[%d]='%c' vs pattern[%d]='%c'", 
               i, text[i], j, pattern[j]);
        
        if (pattern[j] == text[i]) {
            printf(" -> 匹配\n");
            i++;
            j++;
            
            if (j == pattern_len) {
                printf("\n找到匹配！位置: %d\n", i - j);
                printf("总比较次数: %d\n", comparisons);
                free(lps);
                return (char*)(text + (i - j));
            }
        } else {
            printf(" -> 不匹配\n");
            if (j != 0) {
                // 利用LPS表跳转，避免回溯
                printf("  使用LPS表跳转: j = lps[%d-1] = %d\n", 
                       j, lps[j - 1]);
                j = lps[j - 1];
            } else {
                i++;
            }
        }
    }
    
    printf("\n未找到匹配\n");
    printf("总比较次数: %d\n", comparisons);
    free(lps);
    return NULL;
}

// 优化版本的KMP（减少打印输出）
char* kmp_search_optimized(const char *text, const char *pattern) {
    if (text == NULL || pattern == NULL) return NULL;
    
    int n = strlen(text);
    int m = strlen(pattern);
    
    if (m == 0) return (char*)text;
    if (m > n) return NULL;
    
    // 计算LPS
    int *lps = (int*)malloc(m * sizeof(int));
    if (lps == NULL) return NULL;
    
    // 计算LPS（内联版本）
    lps[0] = 0;
    int len = 0;
    int i = 1;
    
    while (i < m) {
        if (pattern[i] == pattern[len]) {
            len++;
            lps[i] = len;
            i++;
        } else {
            if (len != 0) {
                len = lps[len - 1];
            } else {
                lps[i] = 0;
                i++;
            }
        }
    }
    
    // KMP搜索
    i = 0;  // text索引
    int j = 0;  // pattern索引
    
    while (i < n) {
        if (pattern[j] == text[i]) {
            i++;
            j++;
            
            if (j == m) {
                free(lps);
                return (char*)(text + (i - j));
            }
        } else {
            if (j != 0) {
                j = lps[j - 1];
            } else {
                i++;
            }
        }
    }
    
    free(lps);
    return NULL;
}

复制代码