排序算法之快排与TIMSORT的比较测试,python

以下提供 Python、Go、C++ 三种语言的性能测试代码,统一测试两种算法在 随机数据、部分有序数据、完全有序数据 三种场景下的执行时间,数据规模覆盖 1k/10k/100k/1M 元素:

一、Python 性能测试代码

python 复制代码
  
import time
import random
from typing import Callable, List

# ---------------------- 待测试算法 ----------------------
# 1. 之前的快排变种(原逻辑复现)
def insertion_sort(arr: List[int]) -> List[int]:
    if len(arr) <= 1:
        return arr.copy()
    arr_copy = arr.copy()
    n = len(arr_copy)
    for i in range(1, n):
        key = arr_copy[i]
        j = i - 1
        while j >= 0 and arr_copy[j] > key:
            arr_copy[j + 1] = arr_copy[j]
            j -= 1
        arr_copy[j + 1] = key
    return arr_copy

def quick_sort_simple(arr: List[int], threshold_ratio: float = 1/16) -> List[int]:
    if len(arr) <= 1:
        return arr.copy()
    ratio = threshold_ratio
    threshold = max(1, int(len(arr) * ratio))
    def _sort(sub_arr: List[int]) -> List[int]:
        if len(sub_arr) <= threshold:
            return insertion_sort(sub_arr)
        pivot = sub_arr[len(sub_arr)//2]
        left = [x for x in sub_arr if x < pivot]
        middle = [x for x in sub_arr if x == pivot]
        right = [x for x in sub_arr if x > pivot]
        return _sort(left) + middle + _sort(right)
    return _sort(arr.copy())

# 2. Timsort(前文实现)
import bisect
def timsort(arr: List[int]) -> List[int]:
    arr = arr.copy()
    n = len(arr)
    MIN_RUN = 32
    def insertion_sort_sub(arr: List[int], left: int, right: int) -> None:
        for i in range(left + 1, right + 1):
            key = arr[i]
            j = i - 1
            while j >= left and arr[j] > key:
                arr[j + 1] = arr[j]
                j -= 1
            arr[j + 1] = key
    for i in range(0, n, MIN_RUN):
        end = min(i + MIN_RUN - 1, n - 1)
        insertion_sort_sub(arr, i, end)
    def merge(a: List[int], b: List[int]) -> List[int]:
        res = []
        i = j = 0
        len_a, len_b = len(a), len(b)
        while i < len_a and j < len_b:
            if a[i] <= b[j]:
                k = bisect.bisect_right(a[i:], b[j])
                res.extend(a[i:i + k])
                i += k
            else:
                k = bisect.bisect_right(b[j:], a[i])
                res.extend(b[j:j + k])
                j += k
        res.extend(a[i:])
        res.extend(b[j:])
        return res
    size = MIN_RUN
    while size < n:
        for left in range(0, n, 2 * size):
            mid = min(left + size - 1, n - 1)
            right = min(left + 2 * size - 1, n - 1)
            if mid < right:
                merged = merge(arr[left:mid + 1], arr[mid + 1:right + 1])
                arr[left:left + len(merged)] = merged
        size *= 2
    return arr

# ---------------------- 性能测试工具 ----------------------
def generate_test_data(size: int, data_type: str) -> List[int]:
    """生成测试数据:random/partial_sorted/sorted"""
    random.seed(42)  # 固定种子保证可复现
    arr = list(range(size))
    if data_type == "random":
        random.shuffle(arr)
    elif data_type == "partial_sorted":
        # 80%有序,20%随机打乱
        for i in range(size // 5):
            idx1, idx2 = random.randint(0, size-1), random.randint(0, size-1)
            arr[idx1], arr[idx2] = arr[idx2], arr[idx1]
    return arr

def test_algorithm(func: Callable[[List[int]], List[int]], data: List[int], name: str) -> float:
    """测试算法执行时间,返回耗时(秒)"""
    start = time.perf_counter()
    func(data)
    end = time.perf_counter()
   耗时 = end - start
    print(f"{name:20} 耗时: {耗时:.6f} 秒")
    return 耗时

# ---------------------- 执行测试 ----------------------
if __name__ == "__main__":
    数据规模列表 = [1000, 10000, 100000, 1000000]
    数据类型列表 = ["random", "partial_sorted", "sorted"]
    算法列表 = [("快排变种", quick_sort_simple), ("Timsort", timsort)]
    
    for 数据规模 in 数据规模列表:
        print(f"\n===== 数据规模: {数据规模} 元素 =====")
        for 数据类型 in 数据类型列表:
            print(f"\n【{数据类型} 数据】")
            测试数据 = generate_test_data(数据规模, 数据类型)
            for 算法名称, 算法函数 in 算法列表:
                test_algorithm(算法函数, 测试数据, 算法名称)
 

二、Go 性能测试代码

go 复制代码
  
package main

import (
	"bufio"
	"fmt"
	"math/rand"
	"os"
	"sort"
	"time"
)

// ---------------------- 待测试算法 ----------------------
// 1. 之前的快排变种(原逻辑复现)
func insertionSort(arr []int) []int {
	if len(arr) <= 1 {
		return append([]int(nil), arr...)
	}
	arrCopy := append([]int(nil), arr...)
	n := len(arrCopy)
	for i := 1; i < n; i++ {
		key := arrCopy[i]
		j := i - 1
		for j >= 0 && arrCopy[j] > key {
			arrCopy[j+1] = arrCopy[j]
			j--
		}
		arrCopy[j+1] = key
	}
	return arrCopy
}

func QuickSortSimple(arr []int) []int {
	if len(arr) <= 1 {
		return append([]int(nil), arr...)
	}
	ratio := 1.0 / 16.0
	threshold := max(1, int(float64(len(arr))*ratio))

	var _sort func([]int) []int
	_sort = func(subArr []int) []int {
		if len(subArr) <= threshold {
			return insertionSort(subArr)
		}
		pivot := subArr[len(subArr)/2]
		var left, middle, right []int
		for _, x := range subArr {
			switch {
			case x < pivot:
				left = append(left, x)
			case x == pivot:
				middle = append(middle, x)
			default:
				right = append(right, x)
			}
		}
		return append(append(_sort(left), middle...), _sort(right)...)
	}

	return _sort(append([]int(nil), arr...))
}

func max(a, b int) int {
	if a > b {
		return a
	}
	return b
}

// 2. Timsort(前文实现)
const minRun = 32

func insertionSortSub(arr []int, left, right int) {
	for i := left + 1; i <= right; i++ {
		key := arr[i]
		j := i - 1
		for j >= left && arr[j] > key {
			arr[j+1] = arr[j]
			j--
		}
		arr[j+1] = key
	}
}

func merge(a, b []int) []int {
	res := make([]int, 0, len(a)+len(b))
	i, j := 0, 0
	lenA, lenB := len(a), len(b)

	for i < lenA && j < lenB {
		if a[i] <= b[j] {
			k := sort.Search(len(a)-i, func(idx int) bool { return a[i+idx] > b[j] })
			res = append(res, a[i:i+k]...)
			i += k
		} else {
			k := sort.Search(len(b)-j, func(idx int) bool { return b[j+idx] > a[i] })
			res = append(res, b[j:j+k]...)
			j += k
		}
	}

	res = append(res, a[i:]...)
	res = append(res, b[j:]...)
	return res
}

func Timsort(arr []int) []int {
	arrCopy := make([]int, len(arr))
	copy(arrCopy, arr)
	n := len(arrCopy)

	if n <= minRun {
		insertionSortSub(arrCopy, 0, n-1)
		return arrCopy
	}

	for i := 0; i < n; i += minRun {
		end := i + minRun - 1
		if end >= n {
			end = n - 1
		}
		insertionSortSub(arrCopy, i, end)
	}

	size := minRun
	for size < n {
		for left := 0; left < n; left += 2 * size {
			mid := left + size - 1
			right := left + 2*size - 1
			if mid >= n {
				break
			}
			if right >= n {
				right = n - 1
			}
			if mid >= right {
				continue
			}
			merged := merge(arrCopy[left:mid+1], arrCopy[mid+1:right+1])
			copy(arrCopy[left:left+len(merged)], merged)
		}
		size *= 2
	}

	return arrCopy
}

// ---------------------- 性能测试工具 ----------------------
func generateTestData(size int, dataType string) []int {
	rand.Seed(42)
	arr := make([]int, size)
	for i := range arr {
		arr[i] = i
	}

	switch dataType {
	case "random":
		rand.Shuffle(size, func(i, j int) { arr[i], arr[j] = arr[j], arr[i] })
	case "partial_sorted":
		for i := 0; i < size/5; i++ {
			idx1 := rand.Intn(size)
			idx2 := rand.Intn(size)
			arr[idx1], arr[idx2] = arr[idx2], arr[idx1]
		}
	}
	return arr
}

func testAlgorithm(funcName string, f func([]int) []int, data []int) float64 {
	start := time.Now()
	f(data)
	elapsed := time.Since(start).Seconds()
	fmt.Printf("%-20s 耗时: %.6f 秒\n", funcName, elapsed)
	return elapsed
}

// ---------------------- 执行测试 ----------------------
func main() {
	dataSizes := []int{1000, 10000, 100000, 1000000}
	dataTypes := []string{"random", "partial_sorted", "sorted"}
	algorithms := []struct {
		name string
		fn   func([]int) []int
	}{
		{"快排变种", QuickSortSimple},
		{"Timsort", Timsort},
	}

	for _, size := range dataSizes {
		fmt.Printf("\n===== 数据规模: %d 元素 =====\n", size)
		for _, dtype := range dataTypes {
			fmt.Printf("\n【%s 数据】\n", dtype)
			data := generateTestData(size, dtype)
			for _, algo := range algorithms {
				testAlgorithm(algo.name, algo.fn, data)
			}
		}
	}

	// 防止程序退出(可选)
	fmt.Println("\n测试完成,按回车退出...")
	bufio.NewReader(os.Stdin).ReadByte()
}
 

三、C++ 性能测试代码

cpp 复制代码
  
#include <vector>
#include <algorithm>
#include <iostream>
#include <chrono>
#include <random>
#include <iomanip>

using namespace std;
using namespace chrono;

// ---------------------- 待测试算法 ----------------------
// 1. 之前的快排变种(原逻辑复现)
vector<int> insertionSort(vector<int> arr) {
    if (arr.size() <= 1) return arr;
    int n = arr.size();
    for (int i = 1; i < n; ++i) {
        int key = arr[i];
        int j = i - 1;
        while (j >= 0 && arr[j] > key) {
            arr[j + 1] = arr[j];
            --j;
        }
        arr[j + 1] = key;
    }
    return arr;
}

vector<int> quickSortSimple(vector<int> arr, float thresholdRatio = 1.0/16.0) {
    if (arr.size() <= 1) return arr;
    int threshold = max(1, (int)(arr.size() * thresholdRatio));

    function<vector<int>(vector<int>)> _sort = [&](vector<int> subArr) -> vector<int> {
        if (subArr.size() <= threshold) {
            return insertionSort(subArr);
        }
        int pivot = subArr[subArr.size() / 2];
        vector<int> left, middle, right;
        for (int x : subArr) {
            if (x < pivot) left.push_back(x);
            else if (x == pivot) middle.push_back(x);
            else right.push_back(x);
        }
        vector<int> res = _sort(left);
        res.insert(res.end(), middle.begin(), middle.end());
        vector<int> rightSorted = _sort(right);
        res.insert(res.end(), rightSorted.begin(), rightSorted.end());
        return res;
    };

    return _sort(arr);
}

// 2. Timsort(前文实现)
constexpr int MIN_RUN = 32;

void insertionSortSub(vector<int>& arr, int left, int right) {
    for (int i = left + 1; i <= right; ++i) {
        int key = arr[i];
        int j = i - 1;
        while (j >= left && arr[j] > key) {
            arr[j + 1] = arr[j];
            --j;
        }
        arr[j + 1] = key;
    }
}

vector<int> merge(vector<int>& a, vector<int>& b) {
    vector<int> res;
    res.reserve(a.size() + b.size());
    int i = 0, j = 0;
    int lenA = a.size(), lenB = b.size();

    while (i < lenA && j < lenB) {
        if (a[i] <= b[j]) {
            auto it = upper_bound(a.begin() + i, a.end(), b[j]);
            res.insert(res.end(), a.begin() + i, it);
            i = it - a.begin();
        } else {
            auto it = upper_bound(b.begin() + j, b.end(), a[i]);
            res.insert(res.end(), b.begin() + j, it);
            j = it - b.begin();
        }
    }

    res.insert(res.end(), a.begin() + i, a.end());
    res.insert(res.end(), b.begin() + j, b.end());
    return res;
}

vector<int> timsort(vector<int> arr) {
    int n = arr.size();
    if (n <= MIN_RUN) {
        insertionSortSub(arr, 0, n - 1);
        return arr;
    }

    for (int i = 0; i < n; i += MIN_RUN) {
        int end = min(i + MIN_RUN - 1, n - 1);
        insertionSortSub(arr, i, end);
    }

    int size = MIN_RUN;
    while (size < n) {
        for (int left = 0; left < n; left += 2 * size) {
            int mid = left + size - 1;
            int right = min(left + 2 * size - 1, n - 1);
            if (mid >= n) break;
            if (mid >= right) continue;

            vector<int> a(arr.begin() + left, arr.begin() + mid + 1);
            vector<int> b(arr.begin() + mid + 1, arr.begin() + right + 1);
            vector<int> merged = merge(a, b);
            copy(merged.begin(), merged.end(), arr.begin() + left);
        }
        size *= 2;
    }

    return arr;
}

// ---------------------- 性能测试工具 ----------------------
vector<int> generateTestData(int size, const string& dataType) {
    vector<int> arr(size);
    iota(arr.begin(), arr.end(), 0);  // 生成0~size-1的有序数组

    mt19937 rng(42);  // 固定种子
    if (dataType == "random") {
        shuffle(arr.begin(), arr.end(), rng);
    } else if (dataType == "partial_sorted") {
        // 80%有序,20%随机打乱
        for (int i = 0; i < size / 5; ++i) {
            uniform_int_distribution<> dist(0, size - 1);
            int idx1 = dist(rng);
            int idx2 = dist(rng);
            swap(arr[idx1], arr[idx2]);
        }
    }

    return arr;
}

double testAlgorithm(const string& funcName, function<vector<int>(vector<int>)> func, const vector<int>& data) {
    auto start = high_resolution_clock::now();
    func(data);
    auto end = high_resolution_clock::now();
    duration<double> elapsed = end - start;
    cout << left << setw(20) << funcName << " 耗时: " << fixed << setprecision(6) << elapsed.count() << " 秒" << endl;
    return elapsed.count();
}

// ---------------------- 执行测试 ----------------------
int main() {
    vector<int> dataSizes = {1000, 10000, 100000, 1000000};
    vector<string> dataTypes = {"random", "partial_sorted", "sorted"};
    vector<pair<string, function<vector<int>(vector<int>)>>> algorithms = {
        {"快排变种", quickSortSimple},
        {"Timsort", timsort}
    };

    for (int size : dataSizes) {
        cout << "\n===== 数据规模: " << size << " 元素 =====" << endl;
        for (const string& dtype : dataTypes) {
            cout << "\n【" << dtype << " 数据】" << endl;
            vector<int> data = generateTestData(size, dtype);
            for (auto& algo : algorithms) {
                testAlgorithm(algo.first, algo.second, data);
            }
        }
    }

    return 0;
}
 

测试说明

  1. 环境一致性:三种语言均使用固定随机种子(42),保证测试数据完全一致,结果可复现;
  2. 数据场景:覆盖真实场景中常见的三种数据类型,避免单一数据导致的偏差;
  3. 耗时统计:仅统计算法核心执行时间,排除数据生成、打印等额外开销;
  4. 编译/运行建议:
  • Python:直接运行(需Python 3.7+);
  • Go: go run main.go (需Go 1.16+);
  • C++:编译时启用优化( g++ -O2 test.cpp -o test ),否则1M元素测试可能较慢。

需要我帮你分析测试结果的预期趋势,或针对某个语言的实现进行性能优化(如减少拷贝、使用原地排序)吗?

相关推荐
适应规律6 小时前
LWGANet:两大核心模块:TGFI(减空间冗余)和 LWGA(减通道冗余。
python·计算机视觉
胡萝卜3.07 小时前
构建安全的C++内存管理体系:从RAII到智能指针的完整解决方案
运维·开发语言·c++·人工智能·安全·智能指针·raii
拾光Ծ7 小时前
【优选算法】双指针算法:专题一
数据结构·c++·算法
Watermelo6177 小时前
【前端实战】从 try-catch 回调到链式调用:一种更优雅的 async/await 错误处理方案
前端·javascript·网络·vue.js·算法·vue·用户体验
MSTcheng.7 小时前
【C++】如何快速实现一棵支持key或key-value的二叉搜索树?关键技巧一文掌握!
开发语言·c++·算法·二叉搜索树
ByNotD0g7 小时前
Go 泛型 in 1.25
开发语言·后端·golang
自己的九又四分之三站台7 小时前
Make Me a Hanzi:开源汉字数据项目深度解析
python
野生风长7 小时前
从零开始的c语言:指针高级应用(下)(回调函数,qsort函数模拟实现, strlen和sizeof)
java·c语言·开发语言·c++·算法
Dingdangcat867 小时前
YOLO12-ADown改进算法:两轮车辆行驶环境中的多目标检测与识别_1
算法·目标检测·目标跟踪