基础查找算法
朴素查找算法
这是最直观的查找方法,也是最容易理解的算法:
#include <stdio.h>
#include <string.h>
// 朴素查找算法的完整实现
char* naive_strstr(const char *haystack, const char *needle) {
// 输入验证
if (haystack == NULL || needle == NULL) {
return NULL;
}
// 空字符串处理
if (*needle == '\0') {
return (char*)haystack; // 空串是任何字符串的子串
}
// 主查找逻辑
const char *h = haystack; // haystack指针
const char *n = needle; // needle指针
const char *current_haystack_start = haystack;
// 外层循环:遍历haystack的每个可能起始位置
while (*current_haystack_start != '\0') {
h = current_haystack_start;
n = needle;
// 内层循环:比较当前起始位置的字符串
while (*h != '\0' && *n != '\0' && *h == *n) {
h++;
n++;
}
// 检查是否找到完整匹配
if (*n == '\0') {
// needle的所有字符都匹配了
return (char*)current_haystack_start;
}
// 如果haystack剩余长度小于needle长度,提前结束
size_t remaining_len = strlen(current_haystack_start);
size_t needle_len = strlen(needle);
if (remaining_len < needle_len) {
break;
}
current_haystack_start++; // 移动到下一个起始位置
}
return NULL; // 未找到
}
// 带详细调试信息的朴素查找
char* naive_strstr_debug(const char *haystack, const char *needle,
int *comparisons, int *attempts) {
*comparisons = 0;
*attempts = 0;
if (haystack == NULL || needle == NULL) return NULL;
if (*needle == '\0') return (char*)haystack;
size_t haystack_len = strlen(haystack);
size_t needle_len = strlen(needle);
printf("查找开始:\n");
printf(" Haystack: \"%s\" (长度: %zu)\n", haystack, haystack_len);
printf(" Needle: \"%s\" (长度: %zu)\n", needle, needle_len);
printf(" 最大比较次数: %zu\n\n", haystack_len - needle_len + 1);
for (size_t i = 0; i <= haystack_len - needle_len; i++) {
(*attempts)++;
printf("尝试 %d: 起始位置 %zu\n", *attempts, i);
size_t j;
for (j = 0; j < needle_len; j++) {
(*comparisons)++;
printf(" 比较 haystack[%zu]='%c' 和 needle[%zu]='%c'",
i + j, haystack[i + j], j, needle[j]);
if (haystack[i + j] != needle[j]) {
printf(" -> 不匹配\n");
break;
}
printf(" -> 匹配\n");
}
if (j == needle_len) {
printf(" 找到匹配!位置: %zu\n", i);
return (char*)(haystack + i);
}
}
printf("未找到匹配\n");
return NULL;
}
// 性能分析函数
void analyze_naive_search() {
printf("=== 朴素查找算法性能分析 ===\n\n");
// 测试用例
struct {
const char *haystack;
const char *needle;
const char *description;
} test_cases[] = {
{"ABCDABDABCDABCDABD", "ABCDABD", "最佳情况:快速找到"},
{"AAAAAAAAAAAAAB", "AAAB", "最坏情况:大量回退"},
{"Hello World", "World", "正常情况"},
{"abc", "abcd", "needle比haystack长"},
{"", "test", "空haystack"},
{"test", "", "空needle"}
};
for (int i = 0; i < sizeof(test_cases)/sizeof(test_cases[0]); i++) {
printf("测试 %d: %s\n", i+1, test_cases[i].description);
printf("Haystack: \"%s\"\n", test_cases[i].haystack);
printf("Needle: \"%s\"\n", test_cases[i].needle);
int comparisons, attempts;
char *result = naive_strstr_debug(test_cases[i].haystack,
test_cases[i].needle,
&comparisons, &attempts);
printf("结果: %s\n", result ? "找到" : "未找到");
printf("总尝试次数: %d, 总比较次数: %d\n\n",
attempts, comparisons);
// 复杂度分析
size_t n = strlen(test_cases[i].haystack);
size_t m = strlen(test_cases[i].needle);
printf("理论复杂度:\n");
printf(" 最佳情况: O(n+m)\n");
printf(" 最坏情况: O(n*m)\n");
printf(" 平均情况: O(n+m)\n\n");
}
}
朴素算法的可视化演示
void visualize_search_process() {
printf("=== 朴素查找算法可视化演示 ===\n\n");
const char *haystack = "ABABDABACDABABCABAB";
const char *needle = "ABABCABAB";
printf("Haystack: %s\n", haystack);
printf("Needle: %s\n\n", needle);
size_t h_len = strlen(haystack);
size_t n_len = strlen(needle);
printf("查找过程:\n");
printf("索引: ");
for (size_t i = 0; i < h_len; i++) {
printf("%2zu", i);
}
printf("\nHaystack: ");
for (size_t i = 0; i < h_len; i++) {
printf("%2c", haystack[i]);
}
printf("\n\n");
for (size_t i = 0; i <= h_len - n_len; i++) {
printf("尝试从位置 %zu 开始:\n", i);
printf("Haystack: ");
// 显示当前对齐
for (size_t j = 0; j < h_len; j++) {
if (j == i) printf("[");
printf("%c", haystack[j]);
if (j == i + n_len - 1) printf("]");
}
printf("\n");
printf("Needle: ");
for (size_t j = 0; j < i; j++) printf(" ");
for (size_t j = 0; j < n_len; j++) {
printf("%c", needle[j]);
}
printf("\n");
// 进行比较
int match = 1;
for (size_t j = 0; j < n_len; j++) {
if (haystack[i + j] != needle[j]) {
match = 0;
printf("在位置 %zu 不匹配: '%c' != '%c'\n\n",
j, haystack[i + j], needle[j]);
break;
}
}
if (match) {
printf("完全匹配!找到位置: %zu\n\n", i);
break;
}
}
}
4.1.2 KMP算法
KMP算法通过利用已匹配的部分信息,避免不必要的回溯,显著提高了查找效率。
4.1.2.1 前缀函数
c
#include <stdlib.h>
#include <string.h>
// 计算部分匹配表(LPS:最长前缀后缀)
int* compute_lps(const char *pattern, int *lps_length) {
int pattern_len = strlen(pattern);
int *lps = (int*)malloc(pattern_len * sizeof(int));
if (lps == NULL) return NULL;
// lps[0] 总是 0
lps[0] = 0;
// length 表示当前最长前缀后缀的长度
int length = 0;
int i = 1;
while (i < pattern_len) {
if (pattern[i] == pattern[length]) {
// 字符匹配,长度加1
length++;
lps[i] = length;
i++;
} else {
if (length != 0) {
// 回退到前一个最长前缀后缀的长度
// 这个步骤是关键,避免了从头开始匹配
length = lps[length - 1];
} else {
// 没有匹配的前缀,设置lps[i]为0
lps[i] = 0;
i++;
}
}
}
*lps_length = pattern_len;
return lps;
}
// 可视化LPS表计算过程
void visualize_lps_computation(const char *pattern) {
printf("=== LPS表计算过程可视化 ===\n\n");
printf("模式串: %s\n", pattern);
printf("长度: %zu\n\n", strlen(pattern));
int len = strlen(pattern);
int *lps = (int*)malloc(len * sizeof(int));
lps[0] = 0;
printf("初始化: lps[0] = 0\n\n");
int length = 0;
for (int i = 1; i < len; i++) {
printf("计算 lps[%d]:\n", i);
printf(" 当前字符: pattern[%d] = '%c'\n", i, pattern[i]);
printf(" 当前最长前缀后缀长度: length = %d\n", length);
printf(" 比较: pattern[%d]('%c') vs pattern[%d]('%c')",
i, pattern[i], length, pattern[length]);
if (pattern[i] == pattern[length]) {
length++;
lps[i] = length;
printf(" -> 匹配,length++ = %d\n", length);
} else {
if (length != 0) {
printf(" -> 不匹配,回退: length = lps[%d-1] = %d\n",
length, lps[length - 1]);
length = lps[length - 1];
i--; // 重新尝试当前i
} else {
lps[i] = 0;
printf(" -> 不匹配,lps[%d] = 0\n", i);
}
}
}
printf("\n最终LPS表:\n");
printf("索引: ");
for (int i = 0; i < len; i++) {
printf("%3d", i);
}
printf("\n字符: ");
for (int i = 0; i < len; i++) {
printf("%3c", pattern[i]);
}
printf("\nLPS : ");
for (int i = 0; i < len; i++) {
printf("%3d", lps[i]);
}
printf("\n\n");
// 解释LPS表的含义
printf("LPS表解释:\n");
for (int i = 0; i < len; i++) {
if (lps[i] > 0) {
printf(" lps[%d]=%d: 前缀 \"%.*s\" 也是后缀\n",
i, lps[i], lps[i], pattern);
}
}
free(lps);
}
4.1.2.2 KMP查找算法实现
c
// KMP算法主函数
char* kmp_search(const char *text, const char *pattern) {
if (text == NULL || pattern == NULL) return NULL;
int text_len = strlen(text);
int pattern_len = strlen(pattern);
// 特殊情况处理
if (pattern_len == 0) return (char*)text;
if (pattern_len > text_len) return NULL;
// 计算LPS表
int lps_len;
int *lps = compute_lps(pattern, &lps_len);
if (lps == NULL) return NULL;
printf("KMP查找开始:\n");
printf("文本: \"%s\" (长度: %d)\n", text, text_len);
printf("模式: \"%s\" (长度: %d)\n", pattern, pattern_len);
printf("LPS表已计算完成\n\n");
int i = 0; // text的索引
int j = 0; // pattern的索引
int comparisons = 0;
while (i < text_len) {
comparisons++;
printf("比较: text[%d]='%c' vs pattern[%d]='%c'",
i, text[i], j, pattern[j]);
if (pattern[j] == text[i]) {
printf(" -> 匹配\n");
i++;
j++;
if (j == pattern_len) {
printf("\n找到匹配!位置: %d\n", i - j);
printf("总比较次数: %d\n", comparisons);
free(lps);
return (char*)(text + (i - j));
}
} else {
printf(" -> 不匹配\n");
if (j != 0) {
// 利用LPS表跳转,避免回溯
printf(" 使用LPS表跳转: j = lps[%d-1] = %d\n",
j, lps[j - 1]);
j = lps[j - 1];
} else {
i++;
}
}
}
printf("\n未找到匹配\n");
printf("总比较次数: %d\n", comparisons);
free(lps);
return NULL;
}
// 优化版本的KMP(减少打印输出)
char* kmp_search_optimized(const char *text, const char *pattern) {
if (text == NULL || pattern == NULL) return NULL;
int n = strlen(text);
int m = strlen(pattern);
if (m == 0) return (char*)text;
if (m > n) return NULL;
// 计算LPS
int *lps = (int*)malloc(m * sizeof(int));
if (lps == NULL) return NULL;
// 计算LPS(内联版本)
lps[0] = 0;
int len = 0;
int i = 1;
while (i < m) {
if (pattern[i] == pattern[len]) {
len++;
lps[i] = len;
i++;
} else {
if (len != 0) {
len = lps[len - 1];
} else {
lps[i] = 0;
i++;
}
}
}
// KMP搜索
i = 0; // text索引
int j = 0; // pattern索引
while (i < n) {
if (pattern[j] == text[i]) {
i++;
j++;
if (j == m) {
free(lps);
return (char*)(text + (i - j));
}
} else {
if (j != 0) {
j = lps[j - 1];
} else {
i++;
}
}
}
free(lps);
return NULL;
}