STC32 MDU32硬件加速测试
- ✨本文相关的测试代码基于AI辅助生成。
- STC32 MDU硬件加速封装库下载地址:
https://www.stcai.com/khs

- 测试单片机:
STC32F12K54 - 测试主频率:52MHz
一、测试概述
本测试旨在对比STC32单片机MDU(Multiplier/Divider Unit)硬件加速与软件实现的性能差异,测试内容包括:
- 32位CRC32运算(移位 vs MDU)
- 16位乘法运算(软件移位累加 vs MDU)
- 16位除法运算(软件试商法 vs MDU)
二、硬件选择和配置
| 项目 | 说明 |
|---|---|
| MCU | STC32F12K54 |
| 时钟频率 | 52MHz |
| 编译器 | Keil C251 |
| MDU库 | STC32_MDU32_LARGE.LIB |

📝测试相关代码
c
u32 shift_time, mdu_time;
u32 mul_soft_time, mul_mdu_time;
u32 div_soft_time, div_mdu_time;
// 使用移位操作实现CRC32
u32 crc32_shift(unsigned char code * addr, u16 l)
{
u16 i, j;
u32 crc, msb;
u32 message;
crc = 0xFFFFFFFF;
for(i = 0; i < l; i++)
{
message = addr[i];
crc ^= ((message)<<24);
for (j = 0; j < 8; j++)
{
msb = crc>>31;
crc <<= 1;
crc ^= (0 - msb) & 0x04C11DB7;
}
}
return crc;
}
// 使用MDU乘法除法实现CRC32
u32 crc32_mdu(unsigned char code * addr, u16 l)
{
u16 i, j;
u32 crc, msb;
u32 message;
crc = 0xFFFFFFFF;
for(i = 0; i < l; i++)
{
message = addr[i];
crc ^= (message * 0x01000000);
for (j = 0; j < 8; j++)
{
msb = (crc / 0x80000000);
crc = crc * 2;
crc ^= (0 - msb) & 0x04C11DB7;
}
}
return crc;
}
// 软件实现16位乘法 (移位累加)
u32 mul16_soft(u16 a, u16 b)
{
u32 result = 0;
u32 temp_a = a; // 使用u32避免溢出
while (b > 0) {
if (b & 1) {
result += temp_a;
}
temp_a <<= 1;
b >>= 1;
}
return result;
}
// MDU硬件实现16位乘法
u32 mul16_mdu(u16 a, u16 b)
{
return (u32)a * b;
}
// 软件实现16位除法 (试商法)
u16 div16_soft(u16 dividend, u16 divisor, u16 *remainder)
{
u16 quotient = 0;
u16 temp = 0;
u8 i;
if (divisor == 0) return 0;
for (i = 0; i < 16; i++) {
temp = (temp << 1) | ((dividend >> (15 - i)) & 1);
if (temp >= divisor) {
temp -= divisor;
quotient |= (1 << (15 - i));
}
}
*remainder = temp;
return quotient;
}
// MDU硬件实现16位除法
u16 div16_mdu(u16 dividend, u16 divisor, u16 *remainder)
{
if (divisor == 0) return 0;
*remainder = dividend % divisor;
return dividend / divisor;
}
- 基于定时器0计数周期统计
c
void Timer0_Init(void)
{
TMOD &= 0xF0;
TMOD |= 0x01;
TL0 = 0x00;
TH0 = 0x00;
TR0 = 0;
}
u32 GetTimer0Value(void)
{
return (u32)TH0 << 8 | TL0;
}
- 测试程序:main:
c
void main(void)
{
u16 i;
u32 result_shift, result_mdu;
u16 a = 0x1234, b = 0xABCD;
u16 dividend = 0xFFFF, divisor = 0x1234;
u16 remainder;
u32 mul_result_soft, mul_result_mdu;
u16 div_result_soft, div_result_mdu;
P_SW2 = 0x80;
P3M1 = 0x00;
P3M0 = 0x00;
UART1_config(2);
Timer0_Init();
EA = 1;
// ========== 32位CRC32测试 ==========
printf("=== STC32 MDU vs Shift Operation Test ===\r\n");
printf("Test Data: %s\r\n", test_data);
printf("Length: %bd bytes\r\n\r\n", sizeof(test_data)-1);
TR0 = 0;
TL0 = 0x00;
TH0 = 0x00;
TR0 = 1;
result_shift = crc32_shift(test_data, sizeof(test_data)-1);
TR0 = 0;
shift_time = GetTimer0Value();
TR0 = 0;
TL0 = 0x00;
TH0 = 0x00;
TR0 = 1;
result_mdu = crc32_mdu(test_data, sizeof(test_data)-1);
TR0 = 0;
mdu_time = GetTimer0Value();
printf("Shift Operation:\r\n");
printf(" CRC32: 0x%08lX\r\n", result_shift);
printf(" Time: %lu cycles\r\n\r\n", shift_time);
printf("MDU Operation:\r\n");
printf(" CRC32: 0x%08lX\r\n", result_mdu);
printf(" Time: %lu cycles\r\n\r\n", mdu_time);
printf("Comparison:\r\n");
if(shift_time > mdu_time) {
printf(" MDU is %lu cycles faster\r\n", shift_time - mdu_time);
printf(" Speedup: %lu%%\r\n", (shift_time - mdu_time) * 100 / shift_time);
} else {
printf(" Shift is %lu cycles faster\r\n", mdu_time - shift_time);
printf(" Speedup: %lu%%\r\n", (mdu_time - shift_time) * 100 / mdu_time);
}
printf("\r\n");
// ========== 16位乘法测试 ==========
printf("=== 16-bit Multiplication Test ===\r\n");
printf("Test: 0x%04X * 0x%04X\r\n\r\n", a, b);
TR0 = 0;
TL0 = 0x00;
TH0 = 0x00;
TR0 = 1;
for(i = 0; i < 1000; i++) {
mul_result_soft = mul16_soft(a, b);
}
TR0 = 0;
mul_soft_time = GetTimer0Value();
TR0 = 0;
TL0 = 0x00;
TH0 = 0x00;
TR0 = 1;
for(i = 0; i < 1000; i++) {
mul_result_mdu = mul16_mdu(a, b);
}
TR0 = 0;
mul_mdu_time = GetTimer0Value();
printf("Software (Shift-Add):\r\n");
printf(" Result: 0x%08lX\r\n", mul_result_soft);
printf(" Time for 1000 ops: %lu cycles\r\n", mul_soft_time);
printf("MDU Hardware:\r\n");
printf(" Result: 0x%08lX\r\n", mul_result_mdu);
printf(" Time for 1000 ops: %lu cycles\r\n", mul_mdu_time);
if(mul_soft_time > mul_mdu_time) {
printf(" MDU is %lu cycles faster per 1000 ops\r\n", mul_soft_time - mul_mdu_time);
printf(" Speedup: %lu%%\r\n", (mul_soft_time - mul_mdu_time) * 100 / mul_soft_time);
}
printf("\r\n");
// ========== 16位除法测试 ==========
printf("=== 16-bit Division Test ===\r\n");
printf("Test: 0x%04X / 0x%04X\r\n\r\n", dividend, divisor);
TR0 = 0;
TL0 = 0x00;
TH0 = 0x00;
TR0 = 1;
for(i = 0; i < 1000; i++) {
div_result_soft = div16_soft(dividend, divisor, &remainder);
}
TR0 = 0;
div_soft_time = GetTimer0Value();
TR0 = 0;
TL0 = 0x00;
TH0 = 0x00;
TR0 = 1;
for(i = 0; i < 1000; i++) {
div_result_mdu = div16_mdu(dividend, divisor, &remainder);
}
TR0 = 0;
div_mdu_time = GetTimer0Value();
printf("Software (Trial Division):\r\n");
printf(" Result: 0x%04X, Remainder: 0x%04X\r\n", div_result_soft, remainder);
printf(" Time for 1000 ops: %lu cycles\r\n", div_soft_time);
printf("MDU Hardware:\r\n");
printf(" Result: 0x%04X, Remainder: 0x%04X\r\n", div_result_mdu, remainder);
printf(" Time for 1000 ops: %lu cycles\r\n", div_mdu_time);
if(div_soft_time > div_mdu_time) {
printf(" MDU is %lu cycles faster per 1000 ops\r\n", div_soft_time - div_mdu_time);
printf(" Speedup: %lu%%\r\n", (div_soft_time - div_mdu_time) * 100 / div_soft_time);
}
printf("\r\n");
// ========== 结果验证 ==========
printf("=== Result Verification ===\r\n");
printf("CRC32 Results Match: %s\r\n", result_shift == result_mdu ? "YES" : "NO");
printf("Mul Results Match: %s\r\n", mul_result_soft == mul_result_mdu ? "YES" : "NO");
printf("Div Results Match: %s\r\n", div_result_soft == div_result_mdu ? "YES" : "NO");
三、测试结果
- 测试打印的结果:
c
=== STC32 MDU vs Shift Operation Test ===
Test Data: Hello, STC32 MDU Test!
Length: 22 bytes
Shift Operation:
CRC32: 0x294EA8F9
Time: 6897 cycles
MDU Operation:
CRC32: 0x294EA8F9
Time: 1151 cycles
Comparison:
MDU is 5746 cycles faster
Speedup: 83%
=== 16-bit Multiplication Test ===
Test: 0x1234 * 0xABCD
Software (Shift-Add):
Result: 0x0C374FA4
Time for 1000 ops: 27083 cycles
MDU Hardware:
Result: 0x0C374FA4
Time for 1000 ops: 2167 cycles
MDU is 24916 cycles faster per 1000 ops
Speedup: 91%
=== 16-bit Division Test ===
Test: 0xFFFF / 0x1234
[2026-05-07 21:15:10.550]# RECV ASCII/350 <<<
Software (Trial Division):
Result: 0x000E, Remainder: 0x0127
Time for 1000 ops: 56463 cycles
MDU Hardware:
Result: 0x000E, Remainder: 0x0127
Time for 1000 ops: 5167 cycles
MDU is 51296 cycles faster per 1000 ops
Speedup: 90%
=== Result Verification ===
CRC32 Results Match: YES
Mul Results Match: YES
Div Results Match: YES
3.1 CRC32运算测试
| 实现方式 | 结果 | 执行时间 |
|---|---|---|
| 移位操作 | 0x294EA8F9 | 6897 cycles |
| MDU运算 | 0x294EA8F9 | 1151 cycles |
性能对比:
- MDU比移位操作快 5746 cycles
- 加速比:83%
3.2 16位乘法测试
测试用例:0x1234 * 0xABCD
| 实现方式 | 结果 | 1000次运算时间 |
|---|---|---|
| 软件移位累加 | 0x0C374FA4 | 27083 cycles |
| MDU硬件 | 0x0C374FA4 | 2167 cycles |
性能对比:
- MDU比软件实现快 24916 cycles
- 加速比:91%
3.3 16位除法测试
测试用例:0xFFFF / 0x1234
| 实现方式 | 商 | 余数 | 1000次运算时间 |
|---|---|---|---|
| 软件试商法 | 0x000E | 0x0127 | 56463 cycles |
| MDU硬件 | 0x000E | 0x0127 | 5167 cycles |
性能对比:
- MDU比软件实现快 51296 cycles
- 加速比:90%
四、结果验证
| 测试项目 | 结果匹配 |
|---|---|
| CRC32 | ? YES |
| 16位乘法 | ? YES |
| 16位除法 | ? YES |
五、性能总结
| 运算类型 | 软件时间 | MDU时间 | 加速比 |
|---|---|---|---|
| CRC32(32位移位) | 6897 cycles | 1151 cycles | 83% |
| 16位乘法 | 27083 cycles | 2167 cycles | 91% |
| 16位除法 | 56463 cycles | 5167 cycles | 90% |
六、结论
- MDU硬件加速效果显著 :所有测试均显示MDU比软件实现快 83%-91%
- 除法运算收益最大:软件除法算法复杂度高,MDU加速效果最明显(90%)
- 16位运算同样高效 :MDU32对于16位乘除法运算同样提供 90%左右 的性能提升
- 结果完全可靠:所有测试结果验证通过,MDU运算结果与软件实现完全一致
七、应用建议
在以下场景中推荐使用MDU硬件加速:
- 频繁进行乘除法运算的算法
- 实时信号处理和滤波
- CRC校验和数据完整性检查
- 需要高精度定时控制的应用