一、归并排序算法概述
归并排序是John von Neumann在1945年提出的一种高效排序算法,采用分治策略。其核心思想是将两个有序序列合并成一个有序序列,通过递归地将待排序序列分割成若干子序列,分别排序后再合并。
基本特性:
时间复杂度:O(nlogn)
空间复杂度:O(n)
稳定性:稳定排序
适用性:适合大规模数据排序
二、归并排序核心思想
2.1 分治策略
归并排序遵循三个步骤:
-
分割:将数组分成两半
-
解决:递归地排序两个子数组
-
合并:将两个有序子数组合并成一个有序数组
2.2 合并过程
合并是两个有序数组合并成一个有序数组的过程,这是归并排序的关键操作。
三、归并排序过程详解
3.1 排序过程图解
以数组 [38, 27, 43, 3, 9, 82, 10] 为例:
分割过程:
[38, 27, 43, 3, 9, 82, 10]
[38, 27, 43] [3, 9, 82, 10]
[38] [27, 43] [3, 9] [82, 10]
[38] [27] [43] [3] [9] [82] [10]
合并过程:
[27, 38] [43] [3, 9] [10, 82]
[27, 38, 43] [3, 9, 10, 82]
[3, 9, 10, 27, 38, 43, 82]
四、递归实现归并排序
#include <stdio.h>
#include <stdlib.h>
void merge(int arr[], int left, int mid, int right) {
int n1 = mid - left + 1;
int n2 = right - mid;
int *L = (int*)malloc(n1 * sizeof(int));
int *R = (int*)malloc(n2 * sizeof(int));
for (int i = 0; i < n1; i++) L[i] = arr[left + i];
for (int j = 0; j < n2; j++) R[j] = arr[mid + 1 + j];
int i = 0, j = 0, k = left;
while (i < n1 && j < n2) {
if (L[i] <= R[j]) arr[k++] = L[i++];
else arr[k++] = R[j++];
}
while (i < n1) arr[k++] = L[i++];
while (j < n2) arr[k++] = R[j++];
free(L); free(R);
}
void mergeSortRecursive(int arr[], int left, int right) {
if (left < right) {
int mid = left + (right - left) / 2;
mergeSortRecursive(arr, left, mid);
mergeSortRecursive(arr, mid + 1, right);
merge(arr, left, mid, right);
}
}
void printArray(int arr[], int n) {
for (int i = 0; i < n; i++) printf("%d ", arr[i]);
printf("\n");
}
int main() {
int arr[] = {38, 27, 43, 3, 9, 82, 10};
int n = sizeof(arr) / sizeof(arr[0]);
printf("原数组: ");
printArray(arr, n);
mergeSortRecursive(arr, 0, n - 1);
printf("排序后: ");
printArray(arr, n);
return 0;
}
五、非递归实现归并排序
#include <stdio.h>
#include <stdlib.h>
void merge(int arr[], int left, int mid, int right) {
int n1 = mid - left + 1;
int n2 = right - mid;
int *L = (int*)malloc(n1 * sizeof(int));
int *R = (int*)malloc(n2 * sizeof(int));
for (int i = 0; i < n1; i++) L[i] = arr[left + i];
for (int j = 0; j < n2; j++) R[j] = arr[mid + 1 + j];
int i = 0, j = 0, k = left;
while (i < n1 && j < n2) {
if (L[i] <= R[j]) arr[k++] = L[i++];
else arr[k++] = R[j++];
}
while (i < n1) arr[k++] = L[i++];
while (j < n2) arr[k++] = R[j++];
free(L); free(R);
}
void mergeSortNonRecursive(int arr[], int n) {
for (int curr = 1; curr <= n - 1; curr = 2 * curr) {
for (int left = 0; left < n - 1; left += 2 * curr) {
int mid = left + curr - 1;
if (mid >= n - 1) break;
int right = (left + 2 * curr - 1) < (n - 1) ? (left + 2 * curr - 1) : (n - 1);
merge(arr, left, mid, right);
}
}
}
int main() {
int arr[] = {38, 27, 43, 3, 9, 82, 10};
int n = sizeof(arr) / sizeof(arr[0]);
printf("原数组: ");
for (int i = 0; i < n; i++) printf("%d ", arr[i]);
printf("\n");
mergeSortNonRecursive(arr, n);
printf("非递归排序后: ");
for (int i = 0; i < n; i++) printf("%d ", arr[i]);
printf("\n");
return 0;
}
六、复杂度分析与性能比较
6.1 时间复杂度分析
最好情况 :O(nlogn)平均情况 :O(nlogn)最坏情况:O(nlogn)
归并排序的时间复杂度始终是O(nlogn),这是它的重要优势。
6.2 空间复杂度分析
空间复杂度:O(n) - 需要额外的数组空间来合并
6.3 稳定性分析
归并排序是稳定的排序算法,因为合并过程中相同元素的相对位置不会改变。
七、归并排序优化策略
7.1 小数组使用插入排序
void insertSort(int arr[], int left, int right) {
for (int i = left + 1; i <= right; i++) {
int key = arr[i];
int j = i - 1;
while (j >= left && arr[j] > key) {
arr[j + 1] = arr[j];
j--;
}
arr[j + 1] = key;
}
}
void optimizedMergeSort(int arr[], int left, int right) {
if (right - left <= 15) {
insertSort(arr, left, right);
return;
}
int mid = left + (right - left) / 2;
optimizedMergeSort(arr, left, mid);
optimizedMergeSort(arr, mid + 1, right);
if (arr[mid] <= arr[mid + 1]) return;
merge(arr, left, mid, right);
}
7.2 避免频繁内存分配
void mergeWithTemp(int arr[], int temp[], int left, int mid, int right) {
int i = left, j = mid + 1, k = left;
while (i <= mid && j <= right) {
if (arr[i] <= arr[j]) temp[k++] = arr[i++];
else temp[k++] = arr[j++];
}
while (i <= mid) temp[k++] = arr[i++];
while (j <= right) temp[k++] = arr[j++];
for (i = left; i <= right; i++) arr[i] = temp[i];
}
void mergeSortWithHelper(int arr[], int temp[], int left, int right) {
if (left < right) {
int mid = left + (right - left) / 2;
mergeSortWithHelper(arr, temp, left, mid);
mergeSortWithHelper(arr, temp, mid + 1, right);
mergeWithTemp(arr, temp, left, mid, right);
}
}
八、归并排序与其他排序算法比较
特性 | 归并排序 | 快速排序 | 堆排序 |
---|---|---|---|
时间复杂度 | O(nlogn) | O(nlogn) | O(nlogn) |
空间复杂度 | O(n) | O(logn) | O(1) |
稳定性 | 稳定 | 不稳定 | 不稳定 |
适用场景 | 大数据量、需要稳定 | 通用、随机数据 | 内存受限 |
九、使用注意事项与最佳实践
9.1 适用场景
-
大数据排序:时间复杂度稳定在O(nlogn)
-
需要稳定排序:相同元素相对位置不变
-
链表排序:特别适合链表数据结构
-
外部排序:适合处理无法一次性装入内存的大文件
9.2 注意事项
-
内存使用:需要O(n)额外空间,内存受限时需谨慎
-
递归深度:递归版本可能栈溢出,大数据集考虑非递归
-
常数因子:实际运行常数较大,小数据不如插入排序
-
实现复杂度:相比简单排序算法实现较复杂
9.3 最佳实践建议
// 推荐的归并排序实现
void recommendedMergeSort(int arr[], int n) {
if (n <= 1) return;
if (n <= 16) {
for (int i = 1; i < n; i++) {
int key = arr[i]; int j = i - 1;
while (j >= 0 && arr[j] > key) {
arr[j + 1] = arr[j]; j--;
}
arr[j + 1] = key;
}
return;
}
int *temp = (int*)malloc(n * sizeof(int));
if (temp == NULL) return;
for (int curr = 1; curr < n; curr *= 2) {
for (int left = 0; left < n - 1; left += 2 * curr) {
int mid = (left + curr - 1) < (n - 1) ? (left + curr - 1) : (n - 1);
int right = (left + 2 * curr - 1) < (n - 1) ? (left + 2 * curr - 1) : (n - 1);
int i = left, j = mid + 1, k = left;
while (i <= mid && j <= right) {
if (arr[i] <= arr[j]) temp[k++] = arr[i++];
else temp[k++] = arr[j++];
}
while (i <= mid) temp[k++] = arr[i++];
while (j <= right) temp[k++] = arr[j++];
for (i = left; i <= right; i++) arr[i] = temp[i];
}
}
free(temp);
}
十、归并排序实际应用
10.1 链表排序应用
#include <stdio.h>
#include <stdlib.h>
typedef struct ListNode {
int val;
struct ListNode *next;
} ListNode;
ListNode* mergeTwoLists(ListNode* l1, ListNode* l2) {
ListNode dummy; ListNode *tail = &dummy;
while (l1 && l2) {
if (l1->val <= l2->val) {
tail->next = l1; l1 = l1->next;
} else {
tail->next = l2; l2 = l2->next;
}
tail = tail->next;
}
tail->next = l1 ? l1 : l2;
return dummy.next;
}
ListNode* sortList(ListNode* head) {
if (!head || !head->next) return head;
ListNode *slow = head, *fast = head->next;
while (fast && fast->next) {
slow = slow->next; fast = fast->next->next;
}
ListNode *mid = slow->next; slow->next = NULL;
ListNode *left = sortList(head);
ListNode *right = sortList(mid);
return mergeTwoLists(left, right);
}
10.2 外部排序应用
#include <stdio.h>
void externalMergeSortExample() {
printf("外部排序示例:\n");
printf("1. 将大文件分割成多个小块\n");
printf("2. 对每个小块在内存中排序\n");
printf("3. 使用归并排序合并所有有序块\n");
printf("4. 生成最终有序文件\n");
}
十一、常见面试题精讲
11.1 基础概念题
-
归并排序的时间复杂度为什么是O(nlogn)?
答:每次分割成两半(logn层),每层合并需要O(n)时间
-
归并排序为什么是稳定的?
答:合并时遇到相等元素优先选择前一个子数组的元素
-
归并排序和快速排序的主要区别是什么?
答:归并稳定但需要额外空间,快速不稳定但原地排序
11.2 编码实现题
// 题目1:计算数组中的逆序对数量
int mergeAndCount(int arr[], int temp[], int left, int mid, int right) {
int i = left, j = mid + 1, k = left, count = 0;
while (i <= mid && j <= right) {
if (arr[i] <= arr[j]) temp[k++] = arr[i++];
else {
temp[k++] = arr[j++]; count += (mid - i + 1);
}
}
while (i <= mid) temp[k++] = arr[i++];
while (j <= right) temp[k++] = arr[j++];
for (i = left; i <= right; i++) arr[i] = temp[i];
return count;
}
int countInversions(int arr[], int temp[], int left, int right) {
int count = 0;
if (left < right) {
int mid = left + (right - left) / 2;
count += countInversions(arr, temp, left, mid);
count += countInversions(arr, temp, mid + 1, right);
count += mergeAndCount(arr, temp, left, mid, right);
}
return count;
}
11.3 算法分析题
-
给定1TB数据,如何用归并排序进行排序?
答:使用外部排序,分割成小块,内存排序后多路归并
-
什么情况下归并排序比快速排序更优?
答:需要稳定排序、链表排序、最坏情况性能要求高时
-
如何优化归并排序的空间复杂度?
答:使用原地归并(复杂)、避免递归、小数组用插入排序
11.4 进阶思考题
// 题目:实现三路归并排序
void mergeThree(int arr[], int temp[], int low, int mid1, int mid2, int high) {
int i = low, j = mid1, k = mid2, l = low;
while (i < mid1 && j < mid2 && k < high) {
if (arr[i] <= arr[j] && arr[i] <= arr[k]) temp[l++] = arr[i++];
else if (arr[j] <= arr[i] && arr[j] <= arr[k]) temp[l++] = arr[j++];
else temp[l++] = arr[k++];
}
while (i < mid1 && j < mid2) {
if (arr[i] <= arr[j]) temp[l++] = arr[i++];
else temp[l++] = arr[j++];
}
while (j < mid2 && k < high) {
if (arr[j] <= arr[k]) temp[l++] = arr[j++];
else temp[l++] = arr[k++];
}
while (i < mid1 && k < high) {
if (arr[i] <= arr[k]) temp[l++] = arr[i++];
else temp[l++] = arr[k++];
}
while (i < mid1) temp[l++] = arr[i++];
while (j < mid2) temp[l++] = arr[j++];
while (k < high) temp[l++] = arr[k++];
for (i = low; i < high; i++) arr[i] = temp[i];
}
十二、性能测试与比较
#include <stdio.h>
#include <time.h>
#include <stdlib.h>
void performanceTest() {
const int SIZE = 100000;
int *arr1 = (int*)malloc(SIZE * sizeof(int));
int *arr2 = (int*)malloc(SIZE * sizeof(int));
for (int i = 0; i < SIZE; i++) {
arr1[i] = rand() % 1000;
arr2[i] = arr1[i];
}
clock_t start = clock();
recommendedMergeSort(arr1, SIZE);
clock_t end = clock();
printf("归并排序时间: %f秒\n", (double)(end - start) / CLOCKS_PER_SEC);
free(arr1); free(arr2);
}
总结
归并排序以其稳定的O(nlogn)时间复杂度和稳定性在大数据排序中占据重要地位。虽然需要O(n)的额外空间,但在需要稳定排序、链表排序或外部排序场景下是不可替代的选择。掌握递归和非递归两种实现方式,理解各种优化策略,对于应对不同场景的排序需求具有重要意义。在实际应用中,应根据数据特性和系统资源选择合适的实现方案。