硬件fpu差异

一、汇编指令差异

1.1、不开fpu,O0优化等级,armcc编译器

cpp 复制代码
int main(void)
{
    while (1)
    {
		float a = 123.456f, b = 78.901f, res = 0.0f; // 初始值仅做起点,非定值
		res = a+b;
		res = a-b;
		res = a*b;
		res = a/b;
    }
}

对应的反汇编:

XML 复制代码
    main
        0x08000450:    e017        ..      B        0x8000482 ; main + 50
        0x08000452:    4c0c        .L      LDR      r4,[pc,#48] ; [0x8000484] = 0x42f6e979
        0x08000454:    4d0c        .M      LDR      r5,[pc,#48] ; [0x8000488] = 0x429dcd50
        0x08000456:    2600        .&      MOVS     r6,#0
        0x08000458:    4629        )F      MOV      r1,r5
        0x0800045a:    4620         F      MOV      r0,r4
        0x0800045c:    f000f823    ..#.    BL       __aeabi_fadd ; 0x80004a6
        0x08000460:    4606        .F      MOV      r6,r0
        0x08000462:    4629        )F      MOV      r1,r5
        0x08000464:    4620         F      MOV      r0,r4
        0x08000466:    f000f870    ..p.    BL       __aeabi_fsub ; 0x800054a
        0x0800046a:    4606        .F      MOV      r6,r0
        0x0800046c:    4629        )F      MOV      r1,r5
        0x0800046e:    4620         F      MOV      r0,r4
        0x08000470:    f000f871    ..q.    BL       __aeabi_fmul ; 0x8000556
        0x08000474:    4606        .F      MOV      r6,r0
        0x08000476:    4629        )F      MOV      r1,r5
        0x08000478:    4620         F      MOV      r0,r4
        0x0800047a:    f000f89e    ....    BL       __aeabi_fdiv ; 0x80005ba
        0x0800047e:    4606        .F      MOV      r6,r0
        0x08000480:    bf00        ..      NOP      
        0x08000482:    e7e6        ..      B        0x8000452 ; main + 2
  • 指令地址:0x08000450 开始是main函数内的代码段,Thumb 指令为 2 字节 / 4 字节(4 字节是带立即数的跳转 / 调用);
  • 寄存器:R0-R7 是 Cortex-M 通用低寄存器,函数调用遵循 ARM AAPCS 规则(浮点运算函数入参存在 R0/R1,返回值存在 R0);
  • 关键标识:__aeabi_fxxx 是 ARM软浮点运算库函数(软件模拟浮点运算,无硬件 FPU 参与)。
XML 复制代码
    .text
    __aeabi_fadd
        0x080004a6:    b4f0        ..      PUSH     {r4-r7}
        0x080004a8:    ea800201    ....    EOR      r2,r0,r1
        0x080004ac:    0fd4        ..      LSRS     r4,r2,#31
        0x080004ae:    0042        B.      LSLS     r2,r0,#1
        0x080004b0:    ebb20f41    ..A.    CMP      r2,r1,LSL #1
        0x080004b4:    d202        ..      BCS      0x80004bc ; __aeabi_fadd + 22
        0x080004b6:    4602        .F      MOV      r2,r0
        0x080004b8:    4608        .F      MOV      r0,r1
        0x080004ba:    4611        .F      MOV      r1,r2
        0x080004bc:    004a        J.      LSLS     r2,r1,#1
        0x080004be:    d042        B.      BEQ      0x8000546 ; __aeabi_fadd + 160
        0x080004c0:    0dc3        ..      LSRS     r3,r0,#23
        0x080004c2:    b2dd        ..      UXTB     r5,r3
        0x080004c4:    f3c152c7    ...R    UBFX     r2,r1,#23,#8
        0x080004c8:    1aad        ..      SUBS     r5,r5,r2
        0x080004ca:    2d20         -      CMP      r5,#0x20
        0x080004cc:    da35        5.      BGE      0x800053a ; __aeabi_fadd + 148
        0x080004ce:    f3c10116    ....    UBFX     r1,r1,#0,#23
        0x080004d2:    f4410200    A...    ORR      r2,r1,#0x800000
        0x080004d6:    b104        ..      CBZ      r4,0x80004da ; __aeabi_fadd + 52
        0x080004d8:    4252        RB      RSBS     r2,r2,#0
        0x080004da:    f1c50620    .. .    RSB      r6,r5,#0x20
        0x080004de:    fa02f106    ....    LSL      r1,r2,r6
        0x080004e2:    412a        *A      ASRS     r2,r2,r5
        0x080004e4:    4410        .D      ADD      r0,r0,r2
        0x080004e6:    ebb35fd0    ..._    CMP      r3,r0,LSR #23
        0x080004ea:    d023        #.      BEQ      0x8000534 ; __aeabi_fadd + 142
        0x080004ec:    b1c4        ..      CBZ      r4,0x8000520 ; __aeabi_fadd + 122
        0x080004ee:    2d01        .-      CMP      r5,#1
        0x080004f0:    eba050c3    ...P    SUB      r0,r0,r3,LSL #23
        0x080004f4:    dc09        ..      BGT      0x800050a ; __aeabi_fadd + 100
        0x080004f6:    bcf0        ..      POP      {r4-r7}
        0x080004f8:    f04f4200    O..B    MOV      r2,#0x80000000
        0x080004fc:    ea0252c3    ...R    AND      r2,r2,r3,LSL #23
        0x08000500:    b2db        ..      UXTB     r3,r3
        0x08000502:    f5000000    ....    ADD      r0,r0,#0x800000
        0x08000506:    f000b89f    ....    B.W      _float_epilogue ; 0x8000648
        0x0800050a:    0040        @.      LSLS     r0,r0,#1
        0x0800050c:    f1007080    ...p    ADD      r0,r0,#0x1000000
        0x08000510:    eb0050c3    ...P    ADD      r0,r0,r3,LSL #23
        0x08000514:    f1a07080    ...p    SUB      r0,r0,#0x1000000
        0x08000518:    ea4070d1    @..p    ORR      r0,r0,r1,LSR #31
        0x0800051c:    0049        I.      LSLS     r1,r1,#1
        0x0800051e:    e009        ..      B        0x8000534 ; __aeabi_fadd + 142
        0x08000520:    0849        I.      LSRS     r1,r1,#1
        0x08000522:    ea4171c0    A..q    ORR      r1,r1,r0,LSL #31
        0x08000526:    eba050c3    ...P    SUB      r0,r0,r3,LSL #23
        0x0800052a:    f5000000    ....    ADD      r0,r0,#0x800000
        0x0800052e:    0840        @.      LSRS     r0,r0,#1
        0x08000530:    eb0050c3    ...P    ADD      r0,r0,r3,LSL #23
        0x08000534:    bcf0        ..      POP      {r4-r7}
        0x08000536:    f000b87e    ..~.    B.W      __I$use$fp ; 0x8000636
        0x0800053a:    4261        aB      RSBS     r1,r4,#0
        0x0800053c:    2201        ."      MOVS     r2,#1
        0x0800053e:    eb020141    ..A.    ADD      r1,r2,r1,LSL #1
        0x08000542:    1b00        ..      SUBS     r0,r0,r4
        0x08000544:    e7f6        ..      B        0x8000534 ; __aeabi_fadd + 142
        0x08000546:    bcf0        ..      POP      {r4-r7}
        0x08000548:    4770        pG      BX       lr
    __aeabi_fsub
        0x0800054a:    f0814100    ...A    EOR      r1,r1,#0x80000000
        0x0800054e:    e7aa        ..      B        __aeabi_fadd ; 0x80004a6
    __aeabi_frsub
        0x08000550:    f0804000    ...@    EOR      r0,r0,#0x80000000
        0x08000554:    e7a7        ..      B        __aeabi_fadd ; 0x80004a6
    .text
    __aeabi_fmul
        0x08000556:    ea800201    ....    EOR      r2,r0,r1
        0x0800055a:    b510        ..      PUSH     {r4,lr}
        0x0800055c:    f0024300    ...C    AND      r3,r2,#0x80000000
        0x08000560:    0040        @.      LSLS     r0,r0,#1
        0x08000562:    d022        ".      BEQ      0x80005aa ; __aeabi_fmul + 84
        0x08000564:    004a        J.      LSLS     r2,r1,#1
        0x08000566:    d01f        ..      BEQ      0x80005a8 ; __aeabi_fmul + 82
        0x08000568:    0e01        ..      LSRS     r1,r0,#24
        0x0800056a:    eb016112    ...a    ADD      r1,r1,r2,LSR #24
        0x0800056e:    f3c00056    ..V.    UBFX     r0,r0,#1,#23
        0x08000572:    f3c20256    ..V.    UBFX     r2,r2,#1,#23
        0x08000576:    f4400000    @...    ORR      r0,r0,#0x800000
        0x0800057a:    f4420200    B...    ORR      r2,r2,#0x800000
        0x0800057e:    fba02002    ...     UMULL    r2,r0,r0,r2
        0x08000582:    0400        ..      LSLS     r0,r0,#16
        0x08000584:    397f        .9      SUBS     r1,r1,#0x7f
        0x08000586:    0414        ..      LSLS     r4,r2,#16
        0x08000588:    d000        ..      BEQ      0x800058c ; __aeabi_fmul + 54
        0x0800058a:    1c40        @.      ADDS     r0,r0,#1
        0x0800058c:    ea504012    P..@    ORRS     r0,r0,r2,LSR #16
        0x08000590:    d401        ..      BMI      0x8000596 ; __aeabi_fmul + 64
        0x08000592:    0040        @.      LSLS     r0,r0,#1
        0x08000594:    1e49        I.      SUBS     r1,r1,#1
        0x08000596:    b2c2        ..      UXTB     r2,r0
        0x08000598:    060c        ..      LSLS     r4,r1,#24
        0x0800059a:    eb0410d0    ....    ADD      r0,r4,r0,LSR #7
        0x0800059e:    1c40        @.      ADDS     r0,r0,#1
        0x080005a0:    0840        @.      LSRS     r0,r0,#1
        0x080005a2:    2a80        .*      CMP      r2,#0x80
        0x080005a4:    d002        ..      BEQ      0x80005ac ; __aeabi_fmul + 86
        0x080005a6:    e003        ..      B        0x80005b0 ; __aeabi_fmul + 90
        0x080005a8:    2000        .       MOVS     r0,#0
        0x080005aa:    bd10        ..      POP      {r4,pc}
        0x080005ac:    f0200001     ...    BIC      r0,r0,#1
        0x080005b0:    2900        .)      CMP      r1,#0
        0x080005b2:    da00        ..      BGE      0x80005b6 ; __aeabi_fmul + 96
        0x080005b4:    2000        .       MOVS     r0,#0
        0x080005b6:    4318        .C      ORRS     r0,r0,r3
        0x080005b8:    bd10        ..      POP      {r4,pc}
    .text
    __aeabi_fdiv
        0x080005ba:    b430        0.      PUSH     {r4,r5}
        0x080005bc:    ea800201    ....    EOR      r2,r0,r1
        0x080005c0:    f0024500    ...E    AND      r5,r2,#0x80000000
        0x080005c4:    f0304200    0..B    BICS     r2,r0,#0x80000000
        0x080005c8:    f0214000    !..@    BIC      r0,r1,#0x80000000
        0x080005cc:    d013        ..      BEQ      0x80005f6 ; __aeabi_fdiv + 60
        0x080005ce:    b190        ..      CBZ      r0,0x80005f6 ; __aeabi_fdiv + 60
        0x080005d0:    0dc3        ..      LSRS     r3,r0,#23
        0x080005d2:    0dd4        ..      LSRS     r4,r2,#23
        0x080005d4:    f3c20116    ....    UBFX     r1,r2,#0,#23
        0x080005d8:    f3c00016    ....    UBFX     r0,r0,#0,#23
        0x080005dc:    1ae4        ..      SUBS     r4,r4,r3
        0x080005de:    f4410100    A...    ORR      r1,r1,#0x800000
        0x080005e2:    f4400200    @...    ORR      r2,r0,#0x800000
        0x080005e6:    347d        }4      ADDS     r4,r4,#0x7d
        0x080005e8:    4291        .B      CMP      r1,r2
        0x080005ea:    d301        ..      BCC      0x80005f0 ; __aeabi_fdiv + 54
        0x080005ec:    1c64        d.      ADDS     r4,r4,#1
        0x080005ee:    e000        ..      B        0x80005f2 ; __aeabi_fdiv + 56
        0x080005f0:    0049        I.      LSLS     r1,r1,#1
        0x080005f2:    2c00        .,      CMP      r4,#0
        0x080005f4:    da02        ..      BGE      0x80005fc ; __aeabi_fdiv + 66
        0x080005f6:    bc30        0.      POP      {r4,r5}
        0x080005f8:    2000        .       MOVS     r0,#0
        0x080005fa:    4770        pG      BX       lr
        0x080005fc:    f44f0000    O...    MOV      r0,#0x800000
        0x08000600:    2300        .#      MOVS     r3,#0
        0x08000602:    4291        .B      CMP      r1,r2
        0x08000604:    d301        ..      BCC      0x800060a ; __aeabi_fdiv + 80
        0x08000606:    1a89        ..      SUBS     r1,r1,r2
        0x08000608:    4303        .C      ORRS     r3,r3,r0
        0x0800060a:    0840        @.      LSRS     r0,r0,#1
        0x0800060c:    ea4f0141    O.A.    LSL      r1,r1,#1
        0x08000610:    d1f7        ..      BNE      0x8000602 ; __aeabi_fdiv + 72
        0x08000612:    b151        Q.      CBZ      r1,0x800062a ; __aeabi_fdiv + 112
        0x08000614:    4291        .B      CMP      r1,r2
        0x08000616:    d102        ..      BNE      0x800061e ; __aeabi_fdiv + 100
        0x08000618:    f04f4100    O..A    MOV      r1,#0x80000000
        0x0800061c:    e005        ..      B        0x800062a ; __aeabi_fdiv + 112
        0x0800061e:    d202        ..      BCS      0x8000626 ; __aeabi_fdiv + 108
        0x08000620:    f04f0101    O...    MOV      r1,#1
        0x08000624:    e001        ..      B        0x800062a ; __aeabi_fdiv + 112
        0x08000626:    f06f0101    o...    MVN      r1,#1
        0x0800062a:    eb0350c4    ...P    ADD      r0,r3,r4,LSL #23
        0x0800062e:    4428        (D      ADD      r0,r0,r5
        0x08000630:    bc30        0.      POP      {r4,r5}
        0x08000632:    f3af8000    ....    NOP.W

1.2、打开fpu,O0优化等级,armcc编译器

XML 复制代码
    main
        0x0800045c:    e01c        ..      B        0x8000498 ; main + 60
        0x0800045e:    eddf1a0f    ....    VLDR     s3,[pc,#60] ; [0x800049c] = 0x42f6e979
        0x08000462:    eeb00a61    ..a.    VMOV.F32 s0,s3
        0x08000466:    eddf1a0e    ....    VLDR     s3,[pc,#56] ; [0x80004a0] = 0x429dcd50
        0x0800046a:    eef00a61    ..a.    VMOV.F32 s1,s3
        0x0800046e:    eddf1a0d    ....    VLDR     s3,[pc,#52] ; [0x80004a4] = 0
        0x08000472:    eeb01a61    ..a.    VMOV.F32 s2,s3
        0x08000476:    ee701a20    p. .    VADD.F32 s3,s0,s1
        0x0800047a:    eeb01a61    ..a.    VMOV.F32 s2,s3
        0x0800047e:    ee701a60    p.`.    VSUB.F32 s3,s0,s1
        0x08000482:    eeb01a61    ..a.    VMOV.F32 s2,s3
        0x08000486:    ee601a20    `. .    VMUL.F32 s3,s0,s1
        0x0800048a:    eeb01a61    ..a.    VMOV.F32 s2,s3
        0x0800048e:    eec01a20    .. .    VDIV.F32 s3,s0,s1
        0x08000492:    eeb01a61    ..a.    VMOV.F32 s2,s3
        0x08000496:    bf00        ..      NOP      
        0x08000498:    e7e1        ..      B        0x800045e ; main + 2
  1. 指令集 :依旧是 Cortex-M 的 Thumb-2 指令集,FPU 硬件指令为4 字节的 VFPv4 指令 (以e开头的 32 位机器码,如eddf1a0f),专门用于浮点运算;FPU 加载指令(VLDR)、浮点传参指令(VMOV.F32)、浮点运算指令(VADD/VSUB/VMUL/VDIV.F32),这是硬件 FPU 的核心指令
  2. FPU 寄存器 :使用 Cortex-M4/M7 的S0-S31 单精度浮点寄存器(FPU 专用,独立于通用寄存器 R0-R15),浮点运算全程在这些寄存器中执行,无通用寄存器参与;
  3. 指令后缀.F32表示单精度 float 运算(Cortex-M4/M7 的 FPU 仅硬件支持 float,double 仍会走软浮点);
  4. 地址 / 循环0x0800045c跳转到循环入口,0x08000498跳回循环开始,形成无限死循环,持续执行 float 加减乘除硬件运算。

二、运行速度差异

ref:

https://www.cnblogs.com/hazy1k/p/19041870#%E7%AC%AC%E4%BA%8C%E5%8D%81%E4%B8%83%E7%AB%A0-fpu%E4%BB%8B%E7%BB%8D%E5%8F%8A%E5%BA%94%E7%94%A8

相关推荐
EleganceJiaBao1 年前
【ARM】CMSIS 软件标准接口
c语言·c++·嵌入式·arm·cortex·cmsis