STC32 MDU32硬件加速测试

STC32 MDU32硬件加速测试


  • ✨本文相关的测试代码基于AI辅助生成。
  • STC32 MDU硬件加速封装库下载地址:https://www.stcai.com/khs
  • 测试单片机:STC32F12K54
  • 测试主频率:52MHz

一、测试概述

本测试旨在对比STC32单片机MDU(Multiplier/Divider Unit)硬件加速与软件实现的性能差异,测试内容包括:

  • 32位CRC32运算(移位 vs MDU)
  • 16位乘法运算(软件移位累加 vs MDU)
  • 16位除法运算(软件试商法 vs MDU)

二、硬件选择和配置

项目 说明
MCU STC32F12K54
时钟频率 52MHz
编译器 Keil C251
MDU库 STC32_MDU32_LARGE.LIB
📝测试相关代码
c 复制代码
u32 shift_time, mdu_time;
u32 mul_soft_time, mul_mdu_time;
u32 div_soft_time, div_mdu_time;

// 使用移位操作实现CRC32
u32 crc32_shift(unsigned char code * addr, u16 l)
{
    u16 i, j;
    u32 crc, msb;
    u32 message;
    crc = 0xFFFFFFFF;
    for(i = 0; i < l; i++)
    {
        message = addr[i];
        crc ^= ((message)<<24);
        for (j = 0; j < 8; j++)
        {
            msb = crc>>31;
            crc <<= 1;
            crc ^= (0 - msb) & 0x04C11DB7;
        }
    }
    return crc;
}

// 使用MDU乘法除法实现CRC32
u32 crc32_mdu(unsigned char code * addr, u16 l)
{
    u16 i, j;
    u32 crc, msb;
    u32 message;
    crc = 0xFFFFFFFF;
    for(i = 0; i < l; i++)
    {
        message = addr[i];
        crc ^= (message * 0x01000000);
        for (j = 0; j < 8; j++)
        {
            msb = (crc / 0x80000000);
            crc = crc * 2;
            crc ^= (0 - msb) & 0x04C11DB7;
        }
    }
    return crc;
}

// 软件实现16位乘法 (移位累加)
u32 mul16_soft(u16 a, u16 b)
{
    u32 result = 0;
    u32 temp_a = a;  // 使用u32避免溢出
    while (b > 0) {
        if (b & 1) {
            result += temp_a;
        }
        temp_a <<= 1;
        b >>= 1;
    }
    return result;
}

// MDU硬件实现16位乘法
u32 mul16_mdu(u16 a, u16 b)
{
    return (u32)a * b;
}

// 软件实现16位除法 (试商法)
u16 div16_soft(u16 dividend, u16 divisor, u16 *remainder)
{
    u16 quotient = 0;
    u16 temp = 0;
    u8 i;

    if (divisor == 0) return 0;

    for (i = 0; i < 16; i++) {
        temp = (temp << 1) | ((dividend >> (15 - i)) & 1);
        if (temp >= divisor) {
            temp -= divisor;
            quotient |= (1 << (15 - i));
        }
    }
    *remainder = temp;
    return quotient;
}

// MDU硬件实现16位除法
u16 div16_mdu(u16 dividend, u16 divisor, u16 *remainder)
{
    if (divisor == 0) return 0;
    *remainder = dividend % divisor;
    return dividend / divisor;
}
  • 基于定时器0计数周期统计
c 复制代码
void Timer0_Init(void)
{
    TMOD &= 0xF0;
    TMOD |= 0x01;
    TL0 = 0x00;
    TH0 = 0x00;
    TR0 = 0;
}

u32 GetTimer0Value(void)
{
    return (u32)TH0 << 8 | TL0;
}
  • 测试程序:main:
c 复制代码
void main(void)
{
    u16 i;
    u32 result_shift, result_mdu;
    u16 a = 0x1234, b = 0xABCD;
    u16 dividend = 0xFFFF, divisor = 0x1234;
    u16 remainder;
    u32 mul_result_soft, mul_result_mdu;
    u16 div_result_soft, div_result_mdu;

    P_SW2 = 0x80;
    P3M1 = 0x00;
    P3M0 = 0x00;

    UART1_config(2);
    Timer0_Init();
    EA = 1;

    // ========== 32位CRC32测试 ==========
    printf("=== STC32 MDU vs Shift Operation Test ===\r\n");
    printf("Test Data: %s\r\n", test_data);
    printf("Length: %bd bytes\r\n\r\n", sizeof(test_data)-1);

    TR0 = 0;
    TL0 = 0x00;
    TH0 = 0x00;
    TR0 = 1;
    result_shift = crc32_shift(test_data, sizeof(test_data)-1);
    TR0 = 0;
    shift_time = GetTimer0Value();

    TR0 = 0;
    TL0 = 0x00;
    TH0 = 0x00;
    TR0 = 1;
    result_mdu = crc32_mdu(test_data, sizeof(test_data)-1);
    TR0 = 0;
    mdu_time = GetTimer0Value();

    printf("Shift Operation:\r\n");
    printf("  CRC32: 0x%08lX\r\n", result_shift);
    printf("  Time: %lu cycles\r\n\r\n", shift_time);

    printf("MDU Operation:\r\n");
    printf("  CRC32: 0x%08lX\r\n", result_mdu);
    printf("  Time: %lu cycles\r\n\r\n", mdu_time);

    printf("Comparison:\r\n");
    if(shift_time > mdu_time) {
        printf("  MDU is %lu cycles faster\r\n", shift_time - mdu_time);
        printf("  Speedup: %lu%%\r\n", (shift_time - mdu_time) * 100 / shift_time);
    } else {
        printf("  Shift is %lu cycles faster\r\n", mdu_time - shift_time);
        printf("  Speedup: %lu%%\r\n", (mdu_time - shift_time) * 100 / mdu_time);
    }
    printf("\r\n");

    // ========== 16位乘法测试 ==========
    printf("=== 16-bit Multiplication Test ===\r\n");
    printf("Test: 0x%04X * 0x%04X\r\n\r\n", a, b);

    TR0 = 0;
    TL0 = 0x00;
    TH0 = 0x00;
    TR0 = 1;
    for(i = 0; i < 1000; i++) {
        mul_result_soft = mul16_soft(a, b);
    }
    TR0 = 0;
    mul_soft_time = GetTimer0Value();

    TR0 = 0;
    TL0 = 0x00;
    TH0 = 0x00;
    TR0 = 1;
    for(i = 0; i < 1000; i++) {
        mul_result_mdu = mul16_mdu(a, b);
    }
    TR0 = 0;
    mul_mdu_time = GetTimer0Value();

    printf("Software (Shift-Add):\r\n");
    printf("  Result: 0x%08lX\r\n", mul_result_soft);
    printf("  Time for 1000 ops: %lu cycles\r\n", mul_soft_time);

    printf("MDU Hardware:\r\n");
    printf("  Result: 0x%08lX\r\n", mul_result_mdu);
    printf("  Time for 1000 ops: %lu cycles\r\n", mul_mdu_time);

    if(mul_soft_time > mul_mdu_time) {
        printf("  MDU is %lu cycles faster per 1000 ops\r\n", mul_soft_time - mul_mdu_time);
        printf("  Speedup: %lu%%\r\n", (mul_soft_time - mul_mdu_time) * 100 / mul_soft_time);
    }
    printf("\r\n");

    // ========== 16位除法测试 ==========
    printf("=== 16-bit Division Test ===\r\n");
    printf("Test: 0x%04X / 0x%04X\r\n\r\n", dividend, divisor);

    TR0 = 0;
    TL0 = 0x00;
    TH0 = 0x00;
    TR0 = 1;
    for(i = 0; i < 1000; i++) {
        div_result_soft = div16_soft(dividend, divisor, &remainder);
    }
    TR0 = 0;
    div_soft_time = GetTimer0Value();

    TR0 = 0;
    TL0 = 0x00;
    TH0 = 0x00;
    TR0 = 1;
    for(i = 0; i < 1000; i++) {
        div_result_mdu = div16_mdu(dividend, divisor, &remainder);
    }
    TR0 = 0;
    div_mdu_time = GetTimer0Value();

    printf("Software (Trial Division):\r\n");
    printf("  Result: 0x%04X, Remainder: 0x%04X\r\n", div_result_soft, remainder);
    printf("  Time for 1000 ops: %lu cycles\r\n", div_soft_time);

    printf("MDU Hardware:\r\n");
    printf("  Result: 0x%04X, Remainder: 0x%04X\r\n", div_result_mdu, remainder);
    printf("  Time for 1000 ops: %lu cycles\r\n", div_mdu_time);

    if(div_soft_time > div_mdu_time) {
        printf("  MDU is %lu cycles faster per 1000 ops\r\n", div_soft_time - div_mdu_time);
        printf("  Speedup: %lu%%\r\n", (div_soft_time - div_mdu_time) * 100 / div_soft_time);
    }
    printf("\r\n");

    // ========== 结果验证 ==========
    printf("=== Result Verification ===\r\n");
    printf("CRC32 Results Match: %s\r\n", result_shift == result_mdu ? "YES" : "NO");
    printf("Mul Results Match: %s\r\n", mul_result_soft == mul_result_mdu ? "YES" : "NO");
    printf("Div Results Match: %s\r\n", div_result_soft == div_result_mdu ? "YES" : "NO");

三、测试结果

  • 测试打印的结果:
c 复制代码
=== STC32 MDU vs Shift Operation Test ===
Test Data: Hello, STC32 MDU Test!
Length: 22 bytes

Shift Operation:
  CRC32: 0x294EA8F9
  Time: 6897 cycles

MDU Operation:
  CRC32: 0x294EA8F9
  Time: 1151 cycles

Comparison:
  MDU is 5746 cycles faster
  Speedup: 83%

=== 16-bit Multiplication Test ===
Test: 0x1234 * 0xABCD

Software (Shift-Add):
  Result: 0x0C374FA4
  Time for 1000 ops: 27083 cycles
MDU Hardware:
  Result: 0x0C374FA4
  Time for 1000 ops: 2167 cycles
  MDU is 24916 cycles faster per 1000 ops
  Speedup: 91%

=== 16-bit Division Test ===
Test: 0xFFFF / 0x1234


[2026-05-07 21:15:10.550]# RECV ASCII/350 <<<
Software (Trial Division):
  Result: 0x000E, Remainder: 0x0127
  Time for 1000 ops: 56463 cycles
MDU Hardware:
  Result: 0x000E, Remainder: 0x0127
  Time for 1000 ops: 5167 cycles
  MDU is 51296 cycles faster per 1000 ops
  Speedup: 90%

=== Result Verification ===
CRC32 Results Match: YES
Mul Results Match: YES
Div Results Match: YES
3.1 CRC32运算测试
实现方式 结果 执行时间
移位操作 0x294EA8F9 6897 cycles
MDU运算 0x294EA8F9 1151 cycles

性能对比:

  • MDU比移位操作快 5746 cycles
  • 加速比:83%

3.2 16位乘法测试

测试用例:0x1234 * 0xABCD

实现方式 结果 1000次运算时间
软件移位累加 0x0C374FA4 27083 cycles
MDU硬件 0x0C374FA4 2167 cycles

性能对比:

  • MDU比软件实现快 24916 cycles
  • 加速比:91%

3.3 16位除法测试

测试用例:0xFFFF / 0x1234

实现方式 余数 1000次运算时间
软件试商法 0x000E 0x0127 56463 cycles
MDU硬件 0x000E 0x0127 5167 cycles

性能对比:

  • MDU比软件实现快 51296 cycles
  • 加速比:90%

四、结果验证

测试项目 结果匹配
CRC32 ? YES
16位乘法 ? YES
16位除法 ? YES

五、性能总结

运算类型 软件时间 MDU时间 加速比
CRC32(32位移位) 6897 cycles 1151 cycles 83%
16位乘法 27083 cycles 2167 cycles 91%
16位除法 56463 cycles 5167 cycles 90%

六、结论

  1. MDU硬件加速效果显著 :所有测试均显示MDU比软件实现快 83%-91%
  2. 除法运算收益最大:软件除法算法复杂度高,MDU加速效果最明显(90%)
  3. 16位运算同样高效 :MDU32对于16位乘除法运算同样提供 90%左右 的性能提升
  4. 结果完全可靠:所有测试结果验证通过,MDU运算结果与软件实现完全一致

七、应用建议

在以下场景中推荐使用MDU硬件加速:

  • 频繁进行乘除法运算的算法
  • 实时信号处理和滤波
  • CRC校验和数据完整性检查
  • 需要高精度定时控制的应用