Android NDK之 使用 arm-v7a 汇编实现两数之和

关键词: NDK armv7a WebRTC arm汇编 CMake

最近适配对讲程序,在webrtc的库编译的过程中,发现其为arm的平台定制了汇编程序以优化平方根倒数算法速度,上次写汇编还是8086的,借此机会初步尝试下android上arm汇编

具体jni工程建立就不介绍了,Android Studio直接可以从模板创建

工程目录如下

ruby 复制代码
kryo@WSL1:/mnt/k/Android/NDK-Project/XXX/src/main$ tree 
.
├── AndroidManifest.xml
├── cpp
│   ├── asm
│   │   ├── CMakeLists.txt
│   │   ├── asm_defines.h
│   │   ├── asm_jni.cpp
│   │   ├── asm_jni.h
│   │   ├── tow_sum_armv7a.S
│   │   └── tow_sum_cpp.cpp
└── java
    └── com
        └── kryo
            ├── asm
            │   └── TowSumAsm.java
            └── ...

1、C++接口编写

asm_jni.h

cpp 复制代码
#ifndef TOW_SUM_AMS_TEST_H
#define TOW_SUM_AMS_TEST_H

#include <jni.h>

#ifdef USE_ASM
    extern "C" int32_t
    tow_sum_asm(int32_t *data_in, int32_t *data_out, int32_t data_len, int32_t ret_len, int32_t target);
#else
    extern "C" int32_t
    tow_sum_cpp(int32_t *data_in, int32_t *data_out, int32_t data_len, int32_t target);
#endif

#endif //TOW_SUM_AMS_TEST_H

这里分别使用asm和c代码各自实现一个暴搜版本的两数之和接口。关于asm传递5个参数是有用意的,涉及到函数调用约定,armv7a前4个参数用寄存器传参,超过4个的用栈传递

2、汇编实现

写汇编时我习惯先参考C代码去推导

tow_sum_cpp.cpp

cpp 复制代码
#include "asm_jni.h"

extern "C" int32_t tow_sum_cpp(int32_t *data_in, int32_t *data_out, int32_t data_len,int32_t target) {
    for (int i = 0; i < data_len; ++i) {
        for (int j = i + 1; j < data_len; ++j) {
            if (data_in[i] + data_in[j] == target) {
                data_out[0] = i;
                data_out[1] = j;
                return 0;
            }
        }
    }
    data_out[0] = 0;
    data_out[1] = 0;
    return -1;
}

以下是具体汇编代码的实现,基本每行都给出了注释

tow_sum_armv7a.S

asm 复制代码
@ Input:(
@        int32_t* data_in, -> r0 &data_in
@        int32_t* data_out,-> r1 &data_out
@        int32_t  data_len, -> r2
@        int32_t  ret_len, -> r3
@        int32_t target -> [sp])
@ Output: r0 32 bit unsigned integer
@
@ r4: i-index
@ r5: j-index
@ r6: target
@ r7: num1-buff
@ r8: num2-buff
@ r9: sum cache


#include "asm_defines.h"

GLOBAL_FUNCTION tow_sum
.align  4
DEFINE_FUNCTION tow_sum
    push {r4-r11} 		@ 保存现场

    ldr r6, [sp, #32] 	@ 保存了8个寄存器,偏移8*4bytes取得第5个参数

    mov r4, #0 			@ 初始化第一个数的索引 i
    mov r5, #0 			@ 初始化第二个数的索引 j


LOOP_1:
    sub r9, r2, #1 		@ 数组长度-1
    cmp r4, r9			@ 判断i是否数组最后一个
    beq FAL				@ 是就查找失败
    mov r5, r4  		@ j = i

LOOP_2:
    add r5, r5, #1 		@ j ++
    lsl r9, r4, #2		@ 把索引 i 乘4得到地址偏移量
    ldr r7, [r0, r9]	@ r7 = data_in[i],寄存器相对寻址, r0为 data_in的地址,加上偏移量取的数组元素
    lsl r9, r5, #2
    ldr r8, [r0, r9]	@ 同上得到 r8 = data_in[j]
    add r9, r8, r7		@ 两数之和
    cmp r9, r6			@ 与目标做比较
    beq SUC				@ 成功
    add r9, r5, #1		@ 没有成功
    cmp r9, r2			@ if j < data_len
    bne LOOP_2			@ then:下一轮j的查找
    add r4, r4, #1		@ else: j没找到,把i++
    b LOOP_1			@ 下一轮 i的查找

SUC:
    str r4, [r1]		@ data_out[0] = i
    str r5, [r1, #4]	@ data_out[1] = j
    mov r0, #0			@ return 0
    b END

FAL:
    mov r4, #0
    mov r5, #0
    mov r0, #-1			@ return -1
    b SUC

END:
    pop {r4-r11}		@ 还原现场
    bx  lr

3、JNI实现

asm_jni.cpp

cpp 复制代码
#include "asm_jni.h"
#include <android/log.h>

#define TAG "ASM_TEST"

#define LOGD(...) __android_log_print(ANDROID_LOG_DEBUG, TAG, __VA_ARGS__)

#ifdef __cplusplus
extern "C" {
#endif
JNIEXPORT jintArray JNICALL
    Java_com_kryo_asm_TowSumAsm_towsum(JNIEnv *env, jobject thiz, jintArray data, jint target) {

        jintArray r_array = env->NewIntArray(2);
        jint *elements_out = env->GetIntArrayElements(r_array, NULL);

        jsize length = env->GetArrayLength(data);
        jint *elements_in = env->GetIntArrayElements(data, NULL);

    #ifdef USE_ASM
        LOGD("call tow_sum_asm !\n");
        tow_sum_asm(elements_in, elements_out, (size_t) length, 2, (size_t) target);
    #else
        LOGD("call tow_sum_cpp !\n");
        tow_sum_cpp(elements_in, elements_out, (size_t) length, (size_t) target);
    #endif

        env->ReleaseIntArrayElements(data, elements_in, 0);
        env->ReleaseIntArrayElements(r_array, elements_out, 0);

        return r_array;
    }
#ifdef __cplusplus
}
#endif

TowSumAsm.java

java 复制代码
public class TowSumAsm {
    static {
        System.loadLibrary("asm");
    }
    public native int[] towsum(int[] data, int target);
}

最后贴一下从webrtc中copy来的asm_defines.h

c 复制代码
/*
 *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#ifndef KRYO_INCLUDE_ASM_DEFINES_H_
#define KRYO_INCLUDE_ASM_DEFINES_H_

#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

// Define the macros used in ARM assembly code, so that for Mac or iOS builds
// we add leading underscores for the function names.
#ifdef __APPLE__
.macro GLOBAL_FUNCTION name
.global _\name
.private_extern _\name
.endm
.macro DEFINE_FUNCTION name
_\name:
.endm
.macro CALL_FUNCTION name
bl _\name
.endm
.macro GLOBAL_LABEL name
.global _\name
.private_extern _\name
.endm
#else
.macro GLOBAL_FUNCTION name
.global \name
.hidden \name
.endm
.macro DEFINE_FUNCTION name
#if defined(__linux__) && defined(__ELF__)
.type \name,%function
#endif
\name:
.endm
.macro CALL_FUNCTION name
bl \name
.endm
.macro GLOBAL_LABEL name
.global \name
.hidden \name
.endm
#endif

// With Apple's clang compiler, for instructions ldrb, strh, etc.,
// the condition code is after the width specifier. Here we define
// only the ones that are actually used in the assembly files.
#if (defined __llvm__) && (defined __APPLE__)
.macro streqh reg1, reg2, num
strheq \reg1, \reg2, \num
.endm
#endif
.text
#endif  // KRYO_INCLUDE_ASM_DEFINES_H_

4、CMakeLists.txt编写生成libasm.so

CMakeLists.txt

cmake 复制代码
cmake_minimum_required(VERSION 3.10.2)

project("asm")

ENABLE_LANGUAGE(ASM) #启用汇编支持

if(${ANDROID_ABI} STREQUAL "armeabi-v7a")
    add_library(asm SHARED
            asm_jni.cpp
            tow_sum_armv7a.S)
    add_definitions(-DUSE_ASM)
elseif(${ANDROID_ABI} STREQUAL "arm64-v8a")
    add_library(asm SHARED
            asm_jni.cpp
            tow_sum_cpp.cpp)
else()
    message(FATAL_ERROR "Unsupported ABI: ${ANDROID_ABI}")
endif()

target_link_libraries(asm
        log)

5、运行测试

java 复制代码
TowSumAsm towSumAsm = new TowSumAsm();
int[] result = towSumAsm.towsum(new int[]{1, 3, 5, 7, 9}, 12);
Log.d(TAG, "result " + result[0] + " " + result[1]);
sql 复制代码
2024-04-05 10:24:29.269 19863-19863 ASM_TEST                com.kryo.demo                        D  call tow_sum_asm !
2024-04-05 10:24:29.269 19863-19863 JNI_Activity            com.kryo.demo                        D  result 1 4

Reference

相关推荐
安卓理事人3 小时前
安卓LinkedBlockingQueue消息队列
android
万能的小裴同学5 小时前
Android M3U8视频播放器
android·音视频
q***57745 小时前
MySql的慢查询(慢日志)
android·mysql·adb
JavaNoober5 小时前
Android 前台服务 "Bad Notification" 崩溃机制分析文档
android
城东米粉儿6 小时前
关于ObjectAnimator
android
zhangphil7 小时前
Android渲染线程Render Thread的RenderNode与DisplayList,引用Bitmap及Open GL纹理上传GPU
android
火柴就是我8 小时前
从头写一个自己的app
android·前端·flutter
lichong9519 小时前
XLog debug 开启打印日志,release 关闭打印日志
android·java·前端
用户69371750013849 小时前
14.Kotlin 类:类的形态(一):抽象类 (Abstract Class)
android·后端·kotlin
火柴就是我10 小时前
NekoBoxForAndroid 编译libcore.aar
android