一、汇编指令差异
1.1、不开fpu,O0优化等级,armcc编译器
cpp
int main(void)
{
while (1)
{
float a = 123.456f, b = 78.901f, res = 0.0f; // 初始值仅做起点,非定值
res = a+b;
res = a-b;
res = a*b;
res = a/b;
}
}
对应的反汇编:
XML
main
0x08000450: e017 .. B 0x8000482 ; main + 50
0x08000452: 4c0c .L LDR r4,[pc,#48] ; [0x8000484] = 0x42f6e979
0x08000454: 4d0c .M LDR r5,[pc,#48] ; [0x8000488] = 0x429dcd50
0x08000456: 2600 .& MOVS r6,#0
0x08000458: 4629 )F MOV r1,r5
0x0800045a: 4620 F MOV r0,r4
0x0800045c: f000f823 ..#. BL __aeabi_fadd ; 0x80004a6
0x08000460: 4606 .F MOV r6,r0
0x08000462: 4629 )F MOV r1,r5
0x08000464: 4620 F MOV r0,r4
0x08000466: f000f870 ..p. BL __aeabi_fsub ; 0x800054a
0x0800046a: 4606 .F MOV r6,r0
0x0800046c: 4629 )F MOV r1,r5
0x0800046e: 4620 F MOV r0,r4
0x08000470: f000f871 ..q. BL __aeabi_fmul ; 0x8000556
0x08000474: 4606 .F MOV r6,r0
0x08000476: 4629 )F MOV r1,r5
0x08000478: 4620 F MOV r0,r4
0x0800047a: f000f89e .... BL __aeabi_fdiv ; 0x80005ba
0x0800047e: 4606 .F MOV r6,r0
0x08000480: bf00 .. NOP
0x08000482: e7e6 .. B 0x8000452 ; main + 2
- 指令地址:
0x08000450开始是main函数内的代码段,Thumb 指令为 2 字节 / 4 字节(4 字节是带立即数的跳转 / 调用); - 寄存器:R0-R7 是 Cortex-M 通用低寄存器,函数调用遵循 ARM AAPCS 规则(浮点运算函数入参存在 R0/R1,返回值存在 R0);
- 关键标识:
__aeabi_fxxx是 ARM软浮点运算库函数(软件模拟浮点运算,无硬件 FPU 参与)。
XML
.text
__aeabi_fadd
0x080004a6: b4f0 .. PUSH {r4-r7}
0x080004a8: ea800201 .... EOR r2,r0,r1
0x080004ac: 0fd4 .. LSRS r4,r2,#31
0x080004ae: 0042 B. LSLS r2,r0,#1
0x080004b0: ebb20f41 ..A. CMP r2,r1,LSL #1
0x080004b4: d202 .. BCS 0x80004bc ; __aeabi_fadd + 22
0x080004b6: 4602 .F MOV r2,r0
0x080004b8: 4608 .F MOV r0,r1
0x080004ba: 4611 .F MOV r1,r2
0x080004bc: 004a J. LSLS r2,r1,#1
0x080004be: d042 B. BEQ 0x8000546 ; __aeabi_fadd + 160
0x080004c0: 0dc3 .. LSRS r3,r0,#23
0x080004c2: b2dd .. UXTB r5,r3
0x080004c4: f3c152c7 ...R UBFX r2,r1,#23,#8
0x080004c8: 1aad .. SUBS r5,r5,r2
0x080004ca: 2d20 - CMP r5,#0x20
0x080004cc: da35 5. BGE 0x800053a ; __aeabi_fadd + 148
0x080004ce: f3c10116 .... UBFX r1,r1,#0,#23
0x080004d2: f4410200 A... ORR r2,r1,#0x800000
0x080004d6: b104 .. CBZ r4,0x80004da ; __aeabi_fadd + 52
0x080004d8: 4252 RB RSBS r2,r2,#0
0x080004da: f1c50620 .. . RSB r6,r5,#0x20
0x080004de: fa02f106 .... LSL r1,r2,r6
0x080004e2: 412a *A ASRS r2,r2,r5
0x080004e4: 4410 .D ADD r0,r0,r2
0x080004e6: ebb35fd0 ..._ CMP r3,r0,LSR #23
0x080004ea: d023 #. BEQ 0x8000534 ; __aeabi_fadd + 142
0x080004ec: b1c4 .. CBZ r4,0x8000520 ; __aeabi_fadd + 122
0x080004ee: 2d01 .- CMP r5,#1
0x080004f0: eba050c3 ...P SUB r0,r0,r3,LSL #23
0x080004f4: dc09 .. BGT 0x800050a ; __aeabi_fadd + 100
0x080004f6: bcf0 .. POP {r4-r7}
0x080004f8: f04f4200 O..B MOV r2,#0x80000000
0x080004fc: ea0252c3 ...R AND r2,r2,r3,LSL #23
0x08000500: b2db .. UXTB r3,r3
0x08000502: f5000000 .... ADD r0,r0,#0x800000
0x08000506: f000b89f .... B.W _float_epilogue ; 0x8000648
0x0800050a: 0040 @. LSLS r0,r0,#1
0x0800050c: f1007080 ...p ADD r0,r0,#0x1000000
0x08000510: eb0050c3 ...P ADD r0,r0,r3,LSL #23
0x08000514: f1a07080 ...p SUB r0,r0,#0x1000000
0x08000518: ea4070d1 @..p ORR r0,r0,r1,LSR #31
0x0800051c: 0049 I. LSLS r1,r1,#1
0x0800051e: e009 .. B 0x8000534 ; __aeabi_fadd + 142
0x08000520: 0849 I. LSRS r1,r1,#1
0x08000522: ea4171c0 A..q ORR r1,r1,r0,LSL #31
0x08000526: eba050c3 ...P SUB r0,r0,r3,LSL #23
0x0800052a: f5000000 .... ADD r0,r0,#0x800000
0x0800052e: 0840 @. LSRS r0,r0,#1
0x08000530: eb0050c3 ...P ADD r0,r0,r3,LSL #23
0x08000534: bcf0 .. POP {r4-r7}
0x08000536: f000b87e ..~. B.W __I$use$fp ; 0x8000636
0x0800053a: 4261 aB RSBS r1,r4,#0
0x0800053c: 2201 ." MOVS r2,#1
0x0800053e: eb020141 ..A. ADD r1,r2,r1,LSL #1
0x08000542: 1b00 .. SUBS r0,r0,r4
0x08000544: e7f6 .. B 0x8000534 ; __aeabi_fadd + 142
0x08000546: bcf0 .. POP {r4-r7}
0x08000548: 4770 pG BX lr
__aeabi_fsub
0x0800054a: f0814100 ...A EOR r1,r1,#0x80000000
0x0800054e: e7aa .. B __aeabi_fadd ; 0x80004a6
__aeabi_frsub
0x08000550: f0804000 ...@ EOR r0,r0,#0x80000000
0x08000554: e7a7 .. B __aeabi_fadd ; 0x80004a6
.text
__aeabi_fmul
0x08000556: ea800201 .... EOR r2,r0,r1
0x0800055a: b510 .. PUSH {r4,lr}
0x0800055c: f0024300 ...C AND r3,r2,#0x80000000
0x08000560: 0040 @. LSLS r0,r0,#1
0x08000562: d022 ". BEQ 0x80005aa ; __aeabi_fmul + 84
0x08000564: 004a J. LSLS r2,r1,#1
0x08000566: d01f .. BEQ 0x80005a8 ; __aeabi_fmul + 82
0x08000568: 0e01 .. LSRS r1,r0,#24
0x0800056a: eb016112 ...a ADD r1,r1,r2,LSR #24
0x0800056e: f3c00056 ..V. UBFX r0,r0,#1,#23
0x08000572: f3c20256 ..V. UBFX r2,r2,#1,#23
0x08000576: f4400000 @... ORR r0,r0,#0x800000
0x0800057a: f4420200 B... ORR r2,r2,#0x800000
0x0800057e: fba02002 ... UMULL r2,r0,r0,r2
0x08000582: 0400 .. LSLS r0,r0,#16
0x08000584: 397f .9 SUBS r1,r1,#0x7f
0x08000586: 0414 .. LSLS r4,r2,#16
0x08000588: d000 .. BEQ 0x800058c ; __aeabi_fmul + 54
0x0800058a: 1c40 @. ADDS r0,r0,#1
0x0800058c: ea504012 P..@ ORRS r0,r0,r2,LSR #16
0x08000590: d401 .. BMI 0x8000596 ; __aeabi_fmul + 64
0x08000592: 0040 @. LSLS r0,r0,#1
0x08000594: 1e49 I. SUBS r1,r1,#1
0x08000596: b2c2 .. UXTB r2,r0
0x08000598: 060c .. LSLS r4,r1,#24
0x0800059a: eb0410d0 .... ADD r0,r4,r0,LSR #7
0x0800059e: 1c40 @. ADDS r0,r0,#1
0x080005a0: 0840 @. LSRS r0,r0,#1
0x080005a2: 2a80 .* CMP r2,#0x80
0x080005a4: d002 .. BEQ 0x80005ac ; __aeabi_fmul + 86
0x080005a6: e003 .. B 0x80005b0 ; __aeabi_fmul + 90
0x080005a8: 2000 . MOVS r0,#0
0x080005aa: bd10 .. POP {r4,pc}
0x080005ac: f0200001 ... BIC r0,r0,#1
0x080005b0: 2900 .) CMP r1,#0
0x080005b2: da00 .. BGE 0x80005b6 ; __aeabi_fmul + 96
0x080005b4: 2000 . MOVS r0,#0
0x080005b6: 4318 .C ORRS r0,r0,r3
0x080005b8: bd10 .. POP {r4,pc}
.text
__aeabi_fdiv
0x080005ba: b430 0. PUSH {r4,r5}
0x080005bc: ea800201 .... EOR r2,r0,r1
0x080005c0: f0024500 ...E AND r5,r2,#0x80000000
0x080005c4: f0304200 0..B BICS r2,r0,#0x80000000
0x080005c8: f0214000 !..@ BIC r0,r1,#0x80000000
0x080005cc: d013 .. BEQ 0x80005f6 ; __aeabi_fdiv + 60
0x080005ce: b190 .. CBZ r0,0x80005f6 ; __aeabi_fdiv + 60
0x080005d0: 0dc3 .. LSRS r3,r0,#23
0x080005d2: 0dd4 .. LSRS r4,r2,#23
0x080005d4: f3c20116 .... UBFX r1,r2,#0,#23
0x080005d8: f3c00016 .... UBFX r0,r0,#0,#23
0x080005dc: 1ae4 .. SUBS r4,r4,r3
0x080005de: f4410100 A... ORR r1,r1,#0x800000
0x080005e2: f4400200 @... ORR r2,r0,#0x800000
0x080005e6: 347d }4 ADDS r4,r4,#0x7d
0x080005e8: 4291 .B CMP r1,r2
0x080005ea: d301 .. BCC 0x80005f0 ; __aeabi_fdiv + 54
0x080005ec: 1c64 d. ADDS r4,r4,#1
0x080005ee: e000 .. B 0x80005f2 ; __aeabi_fdiv + 56
0x080005f0: 0049 I. LSLS r1,r1,#1
0x080005f2: 2c00 ., CMP r4,#0
0x080005f4: da02 .. BGE 0x80005fc ; __aeabi_fdiv + 66
0x080005f6: bc30 0. POP {r4,r5}
0x080005f8: 2000 . MOVS r0,#0
0x080005fa: 4770 pG BX lr
0x080005fc: f44f0000 O... MOV r0,#0x800000
0x08000600: 2300 .# MOVS r3,#0
0x08000602: 4291 .B CMP r1,r2
0x08000604: d301 .. BCC 0x800060a ; __aeabi_fdiv + 80
0x08000606: 1a89 .. SUBS r1,r1,r2
0x08000608: 4303 .C ORRS r3,r3,r0
0x0800060a: 0840 @. LSRS r0,r0,#1
0x0800060c: ea4f0141 O.A. LSL r1,r1,#1
0x08000610: d1f7 .. BNE 0x8000602 ; __aeabi_fdiv + 72
0x08000612: b151 Q. CBZ r1,0x800062a ; __aeabi_fdiv + 112
0x08000614: 4291 .B CMP r1,r2
0x08000616: d102 .. BNE 0x800061e ; __aeabi_fdiv + 100
0x08000618: f04f4100 O..A MOV r1,#0x80000000
0x0800061c: e005 .. B 0x800062a ; __aeabi_fdiv + 112
0x0800061e: d202 .. BCS 0x8000626 ; __aeabi_fdiv + 108
0x08000620: f04f0101 O... MOV r1,#1
0x08000624: e001 .. B 0x800062a ; __aeabi_fdiv + 112
0x08000626: f06f0101 o... MVN r1,#1
0x0800062a: eb0350c4 ...P ADD r0,r3,r4,LSL #23
0x0800062e: 4428 (D ADD r0,r0,r5
0x08000630: bc30 0. POP {r4,r5}
0x08000632: f3af8000 .... NOP.W
1.2、打开fpu,O0优化等级,armcc编译器

XML
main
0x0800045c: e01c .. B 0x8000498 ; main + 60
0x0800045e: eddf1a0f .... VLDR s3,[pc,#60] ; [0x800049c] = 0x42f6e979
0x08000462: eeb00a61 ..a. VMOV.F32 s0,s3
0x08000466: eddf1a0e .... VLDR s3,[pc,#56] ; [0x80004a0] = 0x429dcd50
0x0800046a: eef00a61 ..a. VMOV.F32 s1,s3
0x0800046e: eddf1a0d .... VLDR s3,[pc,#52] ; [0x80004a4] = 0
0x08000472: eeb01a61 ..a. VMOV.F32 s2,s3
0x08000476: ee701a20 p. . VADD.F32 s3,s0,s1
0x0800047a: eeb01a61 ..a. VMOV.F32 s2,s3
0x0800047e: ee701a60 p.`. VSUB.F32 s3,s0,s1
0x08000482: eeb01a61 ..a. VMOV.F32 s2,s3
0x08000486: ee601a20 `. . VMUL.F32 s3,s0,s1
0x0800048a: eeb01a61 ..a. VMOV.F32 s2,s3
0x0800048e: eec01a20 .. . VDIV.F32 s3,s0,s1
0x08000492: eeb01a61 ..a. VMOV.F32 s2,s3
0x08000496: bf00 .. NOP
0x08000498: e7e1 .. B 0x800045e ; main + 2
- 指令集 :依旧是 Cortex-M 的 Thumb-2 指令集,FPU 硬件指令为4 字节的 VFPv4 指令 (以
e开头的 32 位机器码,如eddf1a0f),专门用于浮点运算;FPU 加载指令(VLDR)、浮点传参指令(VMOV.F32)、浮点运算指令(VADD/VSUB/VMUL/VDIV.F32),这是硬件 FPU 的核心指令 - FPU 寄存器 :使用 Cortex-M4/M7 的S0-S31 单精度浮点寄存器(FPU 专用,独立于通用寄存器 R0-R15),浮点运算全程在这些寄存器中执行,无通用寄存器参与;
- 指令后缀 :
.F32表示单精度 float 运算(Cortex-M4/M7 的 FPU 仅硬件支持 float,double 仍会走软浮点); - 地址 / 循环 :
0x0800045c跳转到循环入口,0x08000498跳回循环开始,形成无限死循环,持续执行 float 加减乘除硬件运算。
二、运行速度差异
ref: