Android二代抽取壳简易实现和踩坑记录

Android二代抽取壳简易实现和踩坑记录

参考资料

[1] dpt-shell
[2] Android函数抽取壳的实现
[3] dpt-shell抽取壳项目源码及其逆向分析

整体思路

Android一代整体壳简易实现和踩坑记录基础上

1、在向壳程序dex末尾追加源程序所有dex时,抽取方法代码到资源文件

2、在壳程序Application的attachBaseContext方法解压出Apk所有so并通过System.load()加载壳so

3、壳so的初始函数_init实现对execve、mmap以及LoadMethod的hook

源程序

源程序基本同一代整体壳中的源程序,没有特别需要说明的地方

壳程序

ShellApplication,相较于一代整体壳中的代码,增加了两个方法,而且结构非常像,只是解压出的目录不同

java 复制代码
private void extractsofiles(String apkabspath){
    try {
        ZipInputStream zipInputStream = new ZipInputStream(new BufferedInputStream(new FileInputStream(apkabspath)));
        while (true) {
            ZipEntry entry = zipInputStream.getNextEntry();
            if (entry == null) {
                zipInputStream.close();
                break;
            }
            String entryname = entry.getName();
            // 解压出arm64-v8a/lib*.so
            if (entryname.startsWith("lib/") && entryname.endsWith(".so") && entryname.contains("arm64-v8a")) {
                File libfile = new File(privatelibspath + File.separator + entryname.substring(entryname.lastIndexOf('/')));
                if(libfile.createNewFile()) {
                    FileOutputStream fileOutputStream = new FileOutputStream(libfile);
                    byte[] bytes = new byte[1024];
                    while (true) {
                        int length = zipInputStream.read(bytes);
                        if (length == -1){
                            break;
                        }
                        fileOutputStream.write(bytes);
                    }
                    fileOutputStream.flush();
                    fileOutputStream.close();
                }
            }
            zipInputStream.closeEntry();
        }
        zipInputStream.close();
    } catch (IOException ex) {
        throw new RuntimeException(ex);
    }
}

private void extractcodefile(String apkabspath) {
    try {
        ZipInputStream zipInputStream = new ZipInputStream(new BufferedInputStream(new FileInputStream(apkabspath)));
        while (true) {
            ZipEntry entry = zipInputStream.getNextEntry();
            if (entry == null) {
                zipInputStream.close();
                break;
            }
            String entryname = entry.getName();
            // 解压出assets/extractedcodefile
            if (entryname.startsWith("assets") && entryname.contains(extractedcodefile)) {
                File libfile = new File(privateodexpath + File.separator + entryname.substring(entryname.lastIndexOf('/')));
                if(libfile.createNewFile()) {
                    FileOutputStream fileOutputStream = new FileOutputStream(libfile);
                    byte[] bytes = new byte[1024];
                    while (true) {
                        int length = zipInputStream.read(bytes);
                        if (length == -1){
                            break;
                        }
                        fileOutputStream.write(bytes);
                    }
                    fileOutputStream.flush();
                    fileOutputStream.close();
                }
            }
            zipInputStream.closeEntry();
        }
        zipInputStream.close();
    } catch (IOException ex) {
        throw new RuntimeException(ex);
    }
}

壳so

hook_DefineClass是一开始参考的[1]的比较新的代码,是把hook点前移了

后来发现之前[2]中hook LoadMethod的思路更加简单,于是有了hook_LoadMethod

bhook的使用参考bhook,前两步必做

Dobby的使用参考CMakeLists.txt,静态导入(原Dobby项目编译不出来,说是少个头文件)

cpp 复制代码
#include <jni.h>
#include <string>
#include "android/log.h"
#include "sys/mman.h"
#include "bytehook.h"
#include <unistd.h>
#include "Dobby/include/dobby.h"
#include <elf.h>
#include <dlfcn.h>
#include "DexFile.h"
#include <map>
#include <fstream>
#include <stdlib.h>

// sdk版本,用于兼容适配
int apiLevel;
// 函数声明
void hook();
void hook_execve();
void hook_mmap();
void hook_DefineClass();
void hook_LoadMethod();

// 抽取代码文件,与源.dex在同一私有目录
std::string extractedcodefile = "srcapkcode";
// 抽取代码文件解析标志,解析一次
bool extractedcodefileflag = false;
// 抽取代码对象映射:方法序号->抽取代码对象
std::map<uint32_t , CodeItem*> codemap;

// 函数声明
static void* (*g_originDefineClassV22)(void* thiz,
                                       void* self,
                                       const char* descriptor,
                                       size_t hash,
                                       void* class_loader,
                                       const void* dex_file,
                                       const void* dex_class_def);

// 函数声明
static void* (*g_originDefineClassV21)(void* thiz,
                                       const char* descriptor,
                                       void* class_loader,
                                       const void* dex_file,
                                       const void* dex_class_def);

// 函数声明
static void (*g_originLoadMethod)(void* thiz,
                                  const DexFile* dex_file,
                                  ClassAccessor::Method* method,
                                  void* klass,
                                  void* dst);

// 初始函数,实现hook
extern "C"
void _init(){
    apiLevel = android_get_device_api_level();
    hook();
}

// hook
void hook(){
    bytehook_init(BYTEHOOK_MODE_AUTOMATIC, false);
    hook_execve();
    hook_mmap();
    // hook_DefineClass();
    hook_LoadMethod();
}

const char * getArtLibName() {
    if (apiLevel >= 29) {
        return "libartbase.so";
    }
    return "libart.so";
}

int fake_execve(const char *pathname, char *const argv[], char *const envp[]) {
    BYTEHOOK_STACK_SCOPE();
    // 禁用dex2oat
    if (strstr(pathname, "dex2oat") != nullptr) {
        errno = EACCES;
        return -1;
    }
    return BYTEHOOK_CALL_PREV(fake_execve, pathname, argv, envp);
}

void hook_execve(){
    bytehook_stub_t stub = bytehook_hook_single(
            getArtLibName(),
            "libc.so",
            "execve",
            (void *) fake_execve,
            nullptr,
            nullptr);
    if (stub != nullptr) {
        __android_log_print(6,"p1umh0","hook execve done");
    }
}

void* fake_mmap(void * __addr, size_t __size, int __prot, int __flags, int __fd, off_t __offset){
    BYTEHOOK_STACK_SCOPE();
    int prot = __prot;
    int hasRead = (__prot & PROT_READ) == PROT_READ;
    int hasWrite = (__prot & PROT_WRITE) == PROT_WRITE;
    // 添加写权限
    if(hasRead && !hasWrite) {
        prot = prot | PROT_WRITE;
    }
    void * addr = BYTEHOOK_CALL_PREV(fake_mmap,__addr,  __size, prot,  __flags,  __fd,  __offset);
    return addr;
}

void hook_mmap(){
    bytehook_stub_t stub = bytehook_hook_single(
            getArtLibName(),
            "libc.so",
            "mmap",
            (void *) fake_mmap,
            nullptr,
            nullptr);
    if(stub != nullptr){
        __android_log_print(6,"p1umh0","hook mmap done");
    }
}

const char * getArtLibPath() {
    if(apiLevel < 29) {
        return "/system/lib64/libart.so";
    } else if(apiLevel == 29) {
        return "/apex/com.android.runtime/lib64/libart.so";
    } else {
        return "/apex/com.android.art/lib64/libart.so";
    }
}

const char * getArtBaseLibPath() {
    if(apiLevel == 29) {
        return "/apex/com.android.runtime/lib64/libartbase.so";
    } else {
        return "/apex/com.android.art/lib64/libartbase.so";
    }
}

const char* find_symbol_in_elf_file(const char *elf_file,int keyword_count,...) {
    FILE *elf_fp = fopen(elf_file, "r");
    if (elf_fp) {
        // 获取elf文件大小
        fseek(elf_fp, 0L, SEEK_END);
        size_t lib_size = ftell(elf_fp);
        fseek(elf_fp, 0L, SEEK_SET);
        // 读取elf文件数据
        char *data = (char *) calloc(lib_size, 1);
        fread(data, 1, lib_size, elf_fp);
        char *elf_bytes_data = data;
        // elf头
        Elf64_Ehdr *ehdr = (Elf64_Ehdr *) elf_bytes_data;
        // 节头
        Elf64_Shdr *shdr = (Elf64_Shdr *) (((uint8_t *) elf_bytes_data) + ehdr->e_shoff);
        va_list kw_list;
        // 遍历节
        for (int i = 0; i < ehdr->e_shnum; i++) {
            // 字符串表
            if (shdr->sh_type == SHT_STRTAB) {
                const char *str_base = (char *) ((uint8_t *) elf_bytes_data + shdr->sh_offset);
                char *ptr = (char *) str_base;
                // 遍历字符串表
                for (int k = 0; ptr < (str_base + shdr->sh_size); k++) {
                    const char *item_value = ptr;
                    size_t item_len = strnlen(item_value, 128);
                    ptr += (item_len + 1);
                    if (item_len == 0) {
                        continue;
                    }
                    int match_count = 0;
                    va_start(kw_list, keyword_count);
                    for (int n = 0; n < keyword_count; n++) {
                        const char *keyword = va_arg(kw_list, const char*);
                        if (strstr(item_value, keyword)) {
                            match_count++;
                        }
                    }
                    va_end(kw_list);
                    if (match_count == keyword_count) {
                        return item_value;
                    }
                }
                break;
            }
            shdr++;
        }
        fclose(elf_fp);
        free(data);
    }
    return nullptr;
}

const char * getClassLinkerDefineClassLibPath(){
    return getArtLibPath();
}

const char * getClassLinkerDefineClassSymbol() {
    const char * sym = find_symbol_in_elf_file(getClassLinkerDefineClassLibPath(),2,"ClassLinker","DefineClass");
    return sym;
}

void *DefineClassV22(void* thiz,
                     void* self,
                     const char* descriptor,
                     size_t hash,
                     void* class_loader,
                     const void* dex_file,
                     const void* dex_class_def) {
    if(g_originDefineClassV22 != nullptr) {
        // patchClass(descriptor,dex_file,dex_class_def);
        return g_originDefineClassV22( thiz,self,descriptor,hash,class_loader, dex_file, dex_class_def);
    }
    return nullptr;
}

void *DefineClassV21(void* thiz,
                     const char* descriptor,
                     void* class_loader,
                     const void* dex_file,
                     const void* dex_class_def) {
    if(g_originDefineClassV21 != nullptr) {
        // patchClass(descriptor,dex_file,dex_class_def);
        return g_originDefineClassV21( thiz,descriptor,class_loader, dex_file, dex_class_def);
    }
    return nullptr;
}

void hook_DefineClass(){
    void * defineClassAddress = DobbySymbolResolver(getClassLinkerDefineClassLibPath(),getClassLinkerDefineClassSymbol());
    if(apiLevel >= __ANDROID_API_L_MR1__) {
        DobbyHook(defineClassAddress, (void *) DefineClassV22, (void **) &g_originDefineClassV22);
        __android_log_print(6,"p1umh0","hook DefineClassV22 done");
    } else {
        DobbyHook(defineClassAddress, (void *) DefineClassV21, (void **) &g_originDefineClassV21);
        __android_log_print(6,"p1umh0","hook DefineClassV21 done");
    }
}

const char * getClassLinkerLoadMethodLibPath(){
    return getArtLibPath();
}

const char * getClassLinkerLoadMethodSymbol() {
    const char * sym = find_symbol_in_elf_file(getClassLinkerLoadMethodLibPath(),2,"ClassLinker","LoadMethod");
    return sym;
}

uint32_t bytearr2uint32(char * bytearr){
    uint32_t retnum = 0;
    for(int i = 3;i >=0;i--){
        retnum <<= 8;
        retnum |= bytearr[i];
    }
    return retnum;
}

// 解析抽取代码文件
void parseextractedcodefile(std::string dexpath){
    size_t dirlen = dexpath.find_last_of("/");
    std::string codepath = dexpath.substr(0,dirlen+1) + extractedcodefile;
    FILE * codefile = fopen(codepath.c_str(),"r");
    fseek(codefile,0,SEEK_END);
    uint32_t filelen = ftell(codefile);
    __android_log_print(6, "p1umh0", "filelen => 0x%x", filelen);
    fseek(codefile,0,SEEK_SET);
    size_t off = 0;
    while(off<filelen) {
        fseek(codefile,0,off);
        // 4字节方法序号
        char codenumberstr[4];
        fread(codenumberstr,1,4,codefile);
        uint32_t codenumber = bytearr2uint32(codenumberstr);
        if(codenumber==0){
            break;
        }
        __android_log_print(6, "p1umh0", "codenumber => 0x%x", codenumber);
        off += 4;
        fseek(codefile,0,off);
        // 4字节方法长度
        char codelengthstr[4];
        fread(codelengthstr,1,4,codefile);
        uint32_t codelength = bytearr2uint32(codelengthstr);
        if(codelength==0){
            break;
        }
        __android_log_print(6, "p1umh0", "codelength => 0x%x", codelength);
        off += 4;
        fseek(codefile,0,off);
        // codelength字节方法代码
        char code[codelength];
        fread(code,1,codelength,codefile);
        // 创建抽取代码对象
        CodeItem * codeItem = new CodeItem(codelength,code);
        // 映射
        codemap.insert(std::pair<int, CodeItem*>(codenumber, codeItem));
        // 继续向后
        off += codelength;
    }
    fclose(codefile);
}

void innerLoadMethod(void* thiz, const DexFile* dex_file, ClassAccessor::Method* method, void* klass, void* dest){
    // 保证是源.dex解压目录
    std::string location = dex_file->location_;
    if(location.find("app_myodex") == std::string::npos){
        return;
    }
    if(!extractedcodefileflag){
        parseextractedcodefile(location);
        extractedcodefileflag = true;
    }
    // 虚函数
    if(method->code_off_==0){
        return;
    }
    // 代码/指令地址
    uint8_t* codeAddr = dex_file->begin_ + method->code_off_ + 16;
    // 前2字节
    uint16_t firstDvmCode = *((uint16_t*)(codeAddr));
    // 4字节序号
    uint32_t codeNumber = 0;
    // 开头必须是return指令
    if(firstDvmCode == 0x000e){
        codeNumber = *((uint32_t*)(codeAddr+2));
    }else if(firstDvmCode==0x0012){
        codeNumber = *((uint32_t*)(codeAddr+4));
    }else if(firstDvmCode==0x0016){
        codeNumber = *((uint32_t*)(codeAddr+6));
    }else{
        return;
    }
    if(codeNumber==0){
        return;
    }
    __android_log_print(6,"p1umh0","hooked codeNumber => 0x%x", codeNumber);
    std::map<uint32_t , CodeItem*>::iterator itr = codemap.find(codeNumber);
    if(itr==codemap.end()){
        return;
    }
    CodeItem * codeItem = itr->second;
    __android_log_print(6,"p1umh0","hooked codelength => 0x%x", codeItem->getlength());
    memcpy(codeAddr,codeItem->getcode(),codeItem->getlength());
    __android_log_print(6,"p1umh0","hooked memcpy code success");
}

void LoadMethod(void* thiz, const DexFile* dex_file, ClassAccessor::Method* method, void* klass, void* dest){
    if(g_originLoadMethod!= nullptr){
        // 先恢复,再调用
        innerLoadMethod(thiz,dex_file,method,klass,dest);
        g_originLoadMethod(thiz,dex_file,method, klass, dest);
    }
    return;
}

void hook_LoadMethod(){
    void * loadMethodAddress =  DobbySymbolResolver(getClassLinkerLoadMethodLibPath(),getClassLinkerLoadMethodSymbol());
    DobbyHook(loadMethodAddress, (void *) LoadMethod, (void **) &g_originLoadMethod);
    __android_log_print(6,"p1umh0","hook LoadMethod done");
}

extern "C" JNIEXPORT
jstring JNICALL Java_com_p1umh0_shell3shell_MainActivity_stringFromJNI(JNIEnv *env, jobject) {
    std::string shellapp3 = "Shell Application 3 in .so";
    return env->NewStringUTF(shellapp3.c_str());
}

加壳代码

packer,基本同一代整体壳的加壳代码,增加了对源程序dex方法代码的抽取

python 复制代码
import hashlib
import os
import pathlib
import struct
import zlib
from zipfile import ZipFile
from shell3dexparser import DexParser

# 路径
thisDir = os.path.dirname(__file__)
srcApkPath = os.path.join(thisDir, "shell3src.apk")
shellApkPath = os.path.join(thisDir, "shell3shell.apk")
newShellApkPath = os.path.join(thisDir, "shell3newshell.apk")

# 文件
srcApk = ZipFile(srcApkPath, "r")
shellApk = ZipFile(shellApkPath, "r")
newShellApk = ZipFile(newShellApkPath, "w")

# 从源Apk中提取res文件夹、resources.arsc文件、lib文件夹和所有.dex文件
srcApkUnzipTempDir = os.path.join(thisDir, "srcApkUnzipTempDir")
for srcFullName in srcApk.namelist():
    if srcFullName.startswith("res") or srcFullName.startswith("lib") or srcFullName.endswith(".dex"):
        srcApk.extract(srcFullName, srcApkUnzipTempDir)

# 从壳Apk中提取AndroidManifest.xml文件、classes.dex文件和lib文件夹
shellApkUnzipTempDir = os.path.join(thisDir, "shellApkUnzipTempDir")
for shellFullName in shellApk.namelist():
    if shellFullName == "AndroidManifest.xml" or shellFullName == "classes.dex" or shellFullName.startswith("lib"):
        shellApk.extract(shellFullName, shellApkUnzipTempDir)

# 为新壳Apk插入条目:源Apk的res文件夹、源Apk的resources.arsc文件、源Apk的lib文件夹
srcApkUnzipTempDirIns = pathlib.Path(srcApkUnzipTempDir)
for srcResFilePath in srcApkUnzipTempDirIns.rglob(r"*"):
    if os.path.isfile(srcResFilePath) and not srcResFilePath.name.endswith(".dex"):
        newShellApk.write(
            srcResFilePath, srcResFilePath.relative_to(srcApkUnzipTempDirIns))

# 为新壳Apk插入条目:壳Apk的AndroidManifest.xml文件、壳Apk的lib文件夹
shellApkUnzipTempDirIns = pathlib.Path(shellApkUnzipTempDir)
for shellResFilePath in shellApkUnzipTempDirIns.rglob(r"*"):
    if os.path.isfile(shellResFilePath) and not shellResFilePath.name == "classes.dex":
        newShellApk.write(shellResFilePath, shellResFilePath.relative_to(
            shellApkUnzipTempDirIns))

# 拼接壳Apk的classes.dex文件以及源Apk的所有.dex文件
# 拼接结构:
# 壳dex数据
# 源Apk的.dex文件数量(2字节)
# 源dex1名称长度(2字节) + 源dex1名称(不定大小) + 源dex1数据长度(4字节) + 源dex1数据(不定大小)
# 源dexN名称长度(2字节) + 源dexN名称(不定大小) + 源dexN数据长度(4字节) + 源dexN数据(不定大小)
# 除壳dex数据外的数据长度(4字节)

# 新壳Apk的classes.dex文件数据
newShellDexData = b""
# 拼接壳Apk的classes.dex文件数据
with open(os.path.join(shellApkUnzipTempDir, "classes.dex"), "rb") as f:
    newShellDexData += f.read()
# 壳dex数据长度
shellDexDataLen = len(newShellDexData)
# 源Apk的.dex文件数量(2字节占坑)
newShellDexData += b"??"
srcDexFileNum = 0
# 源Apk的.dex文件被抽取的数据
srcDexFileDataExtractedAll = b""
# 拼接源Apk的所有.dex文件数据
for srcDexFilePath in srcApkUnzipTempDirIns.rglob(r"*"):
    if os.path.isfile(srcDexFilePath) and srcDexFilePath.name.endswith(".dex"):
        srcDexFileNum += 1
        srcDexFileRelaPath = srcDexFilePath.relative_to(
            srcApkUnzipTempDirIns).name.encode()
        # 大端序short,为了适应java中DataInputStream.readShort()
        newShellDexData += struct.pack(">H", len(srcDexFileRelaPath))
        newShellDexData += srcDexFileRelaPath
        with open(srcDexFilePath, "rb") as f:
            srcDexFileData = f.read()
        # 通过dexparser抽取指令,获取:
        # 1、被抽取的数据(srcDexFileDataExtracted,结构:)
        # 2、抽取后的.dex数据(srcDexFileDataPatched,修复checksum和signature)
        dexParser = DexParser(srcDexFileData)
        srcDexFileDataExtracted = dexParser.extract()
        srcDexFileDataExtractedAll += srcDexFileDataExtracted
        srcDexFileDataPatched = dexParser.patch()
        # 大端序int,为了适应java中DataInputStream.readInt()
        newShellDexData += struct.pack(">I", len(srcDexFileDataPatched))
        newShellDexData += srcDexFileDataPatched
# 除壳dex数据外的数据长度,大端序int,为了适应java中DataInputStream.readInt()
newShellDexData += struct.pack(">I", len(newShellDexData)-shellDexDataLen)

# bytes转为list,用来item assignment
newShellDexData = list(newShellDexData)

# 设置源Apk的.dex文件数量,大端序short,为了适应java中DataInputStream.readShort()
newShellDexData[shellDexDataLen:shellDexDataLen +
                2] = list(struct.pack(">H", srcDexFileNum))

# 修新壳Apk的classes.dex文件的file_size,小端序int,为了匹配dex文件结构
newFileSize = list(struct.pack("<I", len(newShellDexData)))
newShellDexData[32:32+len(newFileSize)] = newFileSize

# 修新壳Apk的classes.dex文件的signature,没有设置端序,直接添加
newSignature = hashlib.sha1(bytes(newShellDexData[32:])).hexdigest()
newSignature = list(bytes.fromhex(newSignature))
newShellDexData[12:12+len(newSignature)] = newSignature

# 修新壳Apk的classes.dex文件的checksum,小端序int,为了匹配dex文件结构
newChecksum = zlib.adler32(bytes(newShellDexData[12:]))
newChecksum = list(struct.pack("<I", newChecksum))
newShellDexData[8:8+len(newChecksum)] = newChecksum

# 为新壳Apk插入条目:新壳Apk的classes.dex文件
with open(os.path.join(thisDir, "classes.dex"), "wb") as f:
    f.write(bytes(newShellDexData))
newShellApk.write(os.path.join(thisDir, "classes.dex"), "classes.dex")

# 为新壳Apk插入条目:新壳Apk的assets/srcapkcode文件,包含源Apk的.dex文件被抽取的数据
with open(os.path.join(thisDir, "srcapkcode"), "wb") as f:
    f.write(srcDexFileDataExtractedAll)
newShellApk.write(os.path.join(thisDir, "srcapkcode"), "assets/srcapkcode")

srcApk.close()
shellApk.close()
newShellApk.close()

dexparser

主要解析了字符串表、类型表、原型表、方法表和类定义表

在抽取方法代码时没有保留方法在方法表中的索引method_idx,而是保留了一个全局方法序号

因为encoded_method结构中实际上是method_idx_diff,它是一个方法表索引差值

如果想要得到某个方法真实的method_idx,就需要解析所属类的其他方法的encoded_method结构,非常麻烦

笔者这里用一个全局方法序号去标识所抽取的方法代码,在回填代码时只需找这个全局方法序号即可

全局方法序号保存在返回指令后,可以通过Verifier对Method的验证,只是要求原方法指令数量足够多

python 复制代码
import hashlib
import os
import struct
import zlib


BASEPATH = os.path.join(os.path.dirname(__file__), "srcApkUnzipTempDir")
DEXPATH = os.path.join(BASEPATH, "classes.dex")
DEXCODEPATH = os.path.join(BASEPATH, "classes_code")
DEXPATCHPATH = os.path.join(BASEPATH, "classes_patched.dex")
CLASSNAMELIST = [b"Lcom/p1umh0/shell3src/MainActivity;"]


class DexClass():
    def __init__(self, class_name, class_direct_methods_size, class_virtual_methods_size) -> None:
        # 类名称
        self.class_name = class_name
        # 直接方法数量
        self.class_direct_methods_size = class_direct_methods_size
        # 直接方法列表
        self.class_direct_methods_list = None
        # 虚方法数量
        self.class_virtual_methods_size = class_virtual_methods_size
        # 虚方法列表
        self.class_virtual_methods_list = None


class DexMethod():
    def __init__(self, method_idx, code_insns_off, code_insns_size) -> None:
        # 方法表索引
        self.method_idx = method_idx
        # 方法代码全局偏移
        self.code_insns_off = code_insns_off
        # 方法代码长度
        self.code_insns_size = code_insns_size
        # 方法代码
        self.code_insns_data = None
        # 方法返回代码
        self.code_return_insns_data = None
        # 方法序号,跟在方法返回代码后面,4字节
        self.code_number = None


class DexParser():
    def __init__(self, srcDexFileData: bytes) -> None:
        # 全局方法序号
        self.code_number_global = 1
        # dex文件数据
        self.srcDexFileData = srcDexFileData
        # 解析字符串表
        self.string_ids_size = -1
        self.string_ids_off = -1
        self.srcDexStringList = None
        self.parseStringList()
        # 解析类型表
        self.type_ids_size = -1
        self.type_ids_off = -1
        self.srcDexTypeList = None
        self.parseTypeList()
        # 解析原型表
        self.proto_ids_size = -1
        self.proto_ids_off = -1
        self.srcDexProtoList = None
        self.parseProtoList()
        # 解析方法表
        self.method_ids_size = -1
        self.method_ids_off = -1
        self.srcDexMethodList = None
        self.parseMethodList()
        # 解析类定义表
        self.class_defs_size = -1
        self.class_defs_off = -1
        self.srcDexClassList = None
        self.parseClassList()

    def parseStringList(self) -> None:
        # 字符串数量
        self.string_ids_size = struct.unpack(
            "<I", self.srcDexFileData[0x38:0x38+4])[0]
        # 字符串表偏移
        self.string_ids_off = struct.unpack(
            "<I", self.srcDexFileData[0x3C:0x3C+4])[0]
        # 创建字符串表
        self.srcDexStringList = [b""] * self.string_ids_size
        # 填充字符串表
        for i in range(self.string_ids_size):
            # 字符串数据偏移
            string_data_off = struct.unpack(
                "<I", self.srcDexFileData[self.string_ids_off+i*4:self.string_ids_off+(i+1)*4])[0]
            # 开头1~5字节表示字符串长度,最少1字节,最多5字节
            uleb128_max = list(
                self.srcDexFileData[string_data_off:string_data_off+5])
            # uleb128所用字节数以及所表示的值
            uleb128_used, uleb128_val = self.parseUleb128(uleb128_max)
            # 字符串真实数据偏移
            string_data_real_off = string_data_off + uleb128_used
            # 字符串长度,+1还包含了\x00
            string_data_real_len = uleb128_val + 1
            # 字符串真实数据
            string_data_real_data = self.srcDexFileData[string_data_real_off:
                                                        string_data_real_off+string_data_real_len]
            # 如果按照字符串长度取出的字符串真实数据以\x00结尾,说明该字符串正常,需要保留(去除末尾的\x00)
            if list(string_data_real_data)[-1] == 0:
                self.srcDexStringList[i] = string_data_real_data[:-1]

    def parseTypeList(self) -> None:
        # 类型数量
        self.type_ids_size = struct.unpack(
            "<I", self.srcDexFileData[0x40:0x40+4])[0]
        # 类型表偏移
        self.type_ids_off = struct.unpack(
            "<I", self.srcDexFileData[0x44:0x44+4])[0]
        # 创建类型表
        self.srcDexTypeList = [-1] * self.type_ids_size
        # 填充类型表
        for i in range(self.type_ids_size):
            # 描述符索引/字符串表索引
            descriptor_idx = struct.unpack(
                "<I", self.srcDexFileData[self.type_ids_off+i*4:self.type_ids_off+(i+1)*4])[0]
            self.srcDexTypeList[i] = descriptor_idx

    def parseProtoList(self) -> None:
        # 原型数量
        self.proto_ids_size = struct.unpack(
            "<I", self.srcDexFileData[0x48:0x48+4])[0]
        # 原型表偏移
        self.proto_ids_off = struct.unpack(
            "<I", self.srcDexFileData[0x4C:0x4C+4])[0]
        # 创建原型表
        self.srcDexProtoList = [None] * self.proto_ids_size
        # 填充原型表
        for i in range(self.proto_ids_size):
            proto_id_data = self.srcDexFileData[self.proto_ids_off +
                                                i*0xC:self.proto_ids_off+(i+1)*0xC]
            shorty_idx = struct.unpack("<I", proto_id_data[0:4])[0]
            return_type_idx = struct.unpack("<I", proto_id_data[4:8])[0]
            parameters_off = struct.unpack("<I", proto_id_data[8:12])[0]
            self.srcDexProtoList[i] = (
                shorty_idx, return_type_idx, parameters_off)

    def parseMethodList(self) -> None:
        # 方法数量
        self.method_ids_size = struct.unpack(
            "<I", self.srcDexFileData[0x58:0x58+4])[0]
        # 方法表偏移
        self.method_ids_off = struct.unpack(
            "<I", self.srcDexFileData[0x5C:0x5C+4])[0]
        # 创建方法表
        self.srcDexMethodList = [None] * self.method_ids_size
        # 填充方法表
        for i in range(self.method_ids_size):
            method_id_data = self.srcDexFileData[self.method_ids_off +
                                                 i*8:self.method_ids_off+(i+1)*8]
            class_idx = struct.unpack("<H", method_id_data[0:2])[0]
            proto_idx = struct.unpack("<H", method_id_data[2:4])[0]
            name_idx = struct.unpack("<I", method_id_data[4:8])[0]
            self.srcDexMethodList[i] = (class_idx, proto_idx, name_idx)

    def parseClassList(self) -> None:
        # 类定义数量
        self.class_defs_size = struct.unpack(
            "<I", self.srcDexFileData[0x60:0x60+4])[0]
        # 类定义表偏移
        self.class_defs_off = struct.unpack(
            "<I", self.srcDexFileData[0x64:0x64+4])[0]
        # 创建类定义表
        self.srcDexClassList = []
        # 填充类定义表
        for i in range(self.class_defs_size):
            # 类定义的结构体数据
            class_def_data = self.srcDexFileData[self.class_defs_off +
                                                 i*0x20:self.class_defs_off+(i+1)*0x20]
            # 类名称索引/类型表索引
            class_idx = struct.unpack("<I", class_def_data[:4])[0]
            # 类名称
            class_name = self.srcDexStringList[self.srcDexTypeList[class_idx]]
            # 保留目标类
            if class_name in CLASSNAMELIST:
                # 类数据偏移
                class_data_off = struct.unpack(
                    "<I", class_def_data[0x18:0x18+4])[0]
                # 几个size都是uleb128类型,依次处理
                uleb128_used_all = 0
                # static_fields_size
                uleb128_max = list(
                    self.srcDexFileData[class_data_off+uleb128_used_all:class_data_off+uleb128_used_all+5])
                uleb128_used, static_fields_size = self.parseUleb128(
                    uleb128_max)
                uleb128_used_all += uleb128_used
                # instance_fields_size
                uleb128_max = list(
                    self.srcDexFileData[class_data_off+uleb128_used_all:class_data_off+uleb128_used_all+5])
                uleb128_used, instance_fields_size = self.parseUleb128(
                    uleb128_max)
                uleb128_used_all += uleb128_used
                # direct_methods_size
                uleb128_max = list(
                    self.srcDexFileData[class_data_off+uleb128_used_all:class_data_off+uleb128_used_all+5])
                uleb128_used, direct_methods_size = self.parseUleb128(
                    uleb128_max)
                uleb128_used_all += uleb128_used
                # virtual_methods_size
                uleb128_max = list(
                    self.srcDexFileData[class_data_off+uleb128_used_all:class_data_off+uleb128_used_all+5])
                uleb128_used, virtual_methods_size = self.parseUleb128(
                    uleb128_max)
                uleb128_used_all += uleb128_used
                # 创建类
                dexClass = DexClass(
                    class_name, direct_methods_size, virtual_methods_size)
                # 抽取直接方法
                class_direct_methods_list = []
                method_idx_prev = -1
                for i in range(direct_methods_size):
                    # 3个uleb128,依次处理
                    # method_idx_diff
                    uleb128_max = list(
                        self.srcDexFileData[class_data_off+uleb128_used_all:class_data_off+uleb128_used_all+5])
                    uleb128_used, method_idx_diff = self.parseUleb128(
                        uleb128_max)
                    uleb128_used_all += uleb128_used
                    # access_flags
                    uleb128_max = list(
                        self.srcDexFileData[class_data_off+uleb128_used_all:class_data_off+uleb128_used_all+5])
                    uleb128_used, access_flags = self.parseUleb128(
                        uleb128_max)
                    uleb128_used_all += uleb128_used
                    # code_off
                    uleb128_max = list(
                        self.srcDexFileData[class_data_off+uleb128_used_all:class_data_off+uleb128_used_all+5])
                    uleb128_used, code_off = self.parseUleb128(
                        uleb128_max)
                    uleb128_used_all += uleb128_used
                    # 处理方法表索引method_idx
                    if method_idx_prev == -1:
                        method_idx_prev = method_idx_diff
                        method_idx = method_idx_diff
                    else:
                        method_idx = method_idx_prev + method_idx_diff
                        method_idx_prev = method_idx_prev + method_idx_diff
                    # 创建方法,只抽取ACC_PUBLIC(0x1)、ACC_PRIVATE(0x2)、ACC_PROTECTED(0x4)
                    if code_off != 0 and access_flags in [0x1, 0x2, 0x4]:
                        # 方法返回类型
                        code_return_type = self.srcDexStringList[self.srcDexTypeList[
                            self.srcDexProtoList[self.srcDexMethodList[method_idx][1]][1]]]
                        # 根据方法返回类型获取方法返回指令字节码
                        code_return_bytes = self.getCodeReturnBytes(
                            code_return_type)
                        # 方法代码长度
                        code_insns_size = struct.unpack(
                            "<I", self.srcDexFileData[code_off+12:code_off+12+4])[0]
                        # +4是为了存储方法序号
                        if len(code_return_bytes) + 4 <= code_insns_size * 2:
                            # 方法代码全局偏移
                            code_insns_off = code_off + 12 + 4
                            # 方法实例
                            dexMethod = DexMethod(
                                method_idx, code_insns_off, code_insns_size)
                            # 方法代码
                            code_insns_data = self.srcDexFileData[code_insns_off:code_insns_off+code_insns_size*2]
                            dexMethod.code_insns_data = code_insns_data
                            # 方法返回代码
                            dexMethod.code_return_insns_data = list(
                                code_return_bytes)
                            # 方法序号,直接保存为4字节
                            dexMethod.code_number = struct.pack(
                                "<I", self.code_number_global)
                            self.code_number_global += 1
                            class_direct_methods_list.append(dexMethod)
                dexClass.class_direct_methods_list = class_direct_methods_list
                # 抽取虚方法
                class_virtual_methods_list = []
                method_idx_prev = -1
                for i in range(virtual_methods_size):
                    # 3个uleb128,依次处理
                    # method_idx_diff
                    uleb128_max = list(
                        self.srcDexFileData[class_data_off+uleb128_used_all:class_data_off+uleb128_used_all+5])
                    uleb128_used, method_idx_diff = self.parseUleb128(
                        uleb128_max)
                    uleb128_used_all += uleb128_used
                    # access_flags
                    uleb128_max = list(
                        self.srcDexFileData[class_data_off+uleb128_used_all:class_data_off+uleb128_used_all+5])
                    uleb128_used, access_flags = self.parseUleb128(
                        uleb128_max)
                    uleb128_used_all += uleb128_used
                    # code_off
                    uleb128_max = list(
                        self.srcDexFileData[class_data_off+uleb128_used_all:class_data_off+uleb128_used_all+5])
                    uleb128_used, code_off = self.parseUleb128(
                        uleb128_max)
                    uleb128_used_all += uleb128_used
                    # 处理方法表索引method_idx
                    if method_idx_prev == -1:
                        method_idx_prev = method_idx_diff
                        method_idx = method_idx_diff
                    else:
                        method_idx = method_idx_prev + method_idx_diff
                        method_idx_prev = method_idx_prev + method_idx_diff
                    # 创建方法,只抽取ACC_PUBLIC(0x1)、ACC_PRIVATE(0x2)、ACC_PROTECTED(0x4)
                    if code_off != 0 and access_flags in [0x1, 0x2, 0x4]:
                        # 方法返回类型
                        code_return_type = self.srcDexStringList[self.srcDexTypeList[
                            self.srcDexProtoList[self.srcDexMethodList[method_idx][1]][1]]]
                        # 根据方法返回类型获取方法返回指令字节码
                        code_return_bytes = self.getCodeReturnBytes(
                            code_return_type)
                        # 方法代码长度
                        code_insns_size = struct.unpack(
                            "<I", self.srcDexFileData[code_off+12:code_off+12+4])[0]
                        # +4是为了存储方法序号
                        if len(code_return_bytes) + 4 <= code_insns_size * 2:
                            # 方法代码全局偏移
                            code_insns_off = code_off + 12 + 4
                            # 方法实例
                            dexMethod = DexMethod(
                                method_idx, code_insns_off, code_insns_size)
                            # 方法代码
                            code_insns_data = self.srcDexFileData[code_insns_off:code_insns_off+code_insns_size*2]
                            dexMethod.code_insns_data = code_insns_data
                            # 方法返回代码
                            dexMethod.code_return_insns_data = list(
                                code_return_bytes)
                            # 方法序号,直接保存为4字节
                            dexMethod.code_number = struct.pack(
                                "<I", self.code_number_global)
                            self.code_number_global += 1
                            class_virtual_methods_list.append(dexMethod)
                dexClass.class_virtual_methods_list = class_virtual_methods_list
                # 添加类
                self.srcDexClassList.append(dexClass)

    def parseUleb128(self, uleb128_max: list):
        # uleb128所用字节数
        uleb128_used = 0
        # uleb128所表示的值
        uleb128_val = 0
        values = []
        while True:
            # 最少1字节
            value = uleb128_max[uleb128_used]
            values.append(value)
            uleb128_used += 1
            # 最多5字节
            if value < 0x7F or uleb128_used == 4:
                break
        # 小端序拼接
        values = values[::-1]
        for i in range(len(values)):
            ii = len(values) - i - 1
            uleb128_val |= ((values[i] & 0x7f) << (ii*7))
        return uleb128_used, uleb128_val

    def getCodeReturnBytes(self, code_return_type: bytes) -> bytes:
        # 根据方法返回类型获取方法返回指令字节码
        returnVoidBytes = bytes([0x0e, 0x0])
        returnBytes = bytes([0x12, 0x00, 0x0f, 0x00])
        returnWideBytes = bytes([0x16, 0x00, 0x00, 0x00, 0x10, 0x00])
        returnObjectBytes = bytes([0x12, 0x00, 0x11, 0x00])
        if code_return_type in [b"V"]:
            return returnVoidBytes
        elif code_return_type in [b"B", b"C", b"F", b"I", b"S", b"Z"]:
            return returnBytes
        elif code_return_type in [b"D", b"J"]:
            return returnWideBytes
        else:
            return returnObjectBytes

    def extract(self) -> bytes:
        # 抽取代码
        # 结构:code_number(4字节) + code_insns_size(4字节) + code_insns_data(code_insns_size*2字节)
        srcDexFileDataExtracted = b""
        for dexClass in self.srcDexClassList:
            if isinstance(dexClass, DexClass):
                if dexClass.class_direct_methods_size > 0:
                    for dexMethod in dexClass.class_direct_methods_list:
                        if isinstance(dexMethod, DexMethod):
                            srcDexFileDataExtracted += dexMethod.code_number
                            srcDexFileDataExtracted += struct.pack(
                                "<I", dexMethod.code_insns_size*2) # 指令长度直接乘2,读文件时不用再乘
                            srcDexFileDataExtracted += dexMethod.code_insns_data
                if dexClass.class_virtual_methods_size > 0:
                    for dexMethod in dexClass.class_virtual_methods_list:
                        if isinstance(dexMethod, DexMethod):
                            srcDexFileDataExtracted += dexMethod.code_number
                            srcDexFileDataExtracted += struct.pack(
                                "<I", dexMethod.code_insns_size*2) # # 指令长度直接乘2,读文件时不用再乘
                            srcDexFileDataExtracted += dexMethod.code_insns_data
        return srcDexFileDataExtracted

    def patch(self) -> bytes:
        # 填充nop并修复checksum和signature
        srcDexFileDataPatched = list(self.srcDexFileData)
        for dexClass in self.srcDexClassList:
            if isinstance(dexClass, DexClass):
                if dexClass.class_direct_methods_size > 0:
                    for dexMethod in dexClass.class_direct_methods_list:
                        if isinstance(dexMethod, DexMethod):
                            # 先全部填充为nop
                            srcDexFileDataPatched[dexMethod.code_insns_off:dexMethod.code_insns_off +
                                                  dexMethod.code_insns_size*2] = [0] * (dexMethod.code_insns_size*2)
                            # 再在开头填充为返回指令
                            srcDexFileDataPatched[dexMethod.code_insns_off:dexMethod.code_insns_off+len(
                                dexMethod.code_return_insns_data)] = dexMethod.code_return_insns_data
                            # 最后在返回指令后填充方法序号
                            srcDexFileDataPatched[dexMethod.code_insns_off+len(dexMethod.code_return_insns_data):dexMethod.code_insns_off+len(
                                dexMethod.code_return_insns_data)+4] = list(dexMethod.code_number) # bytes直接转list
                if dexClass.class_virtual_methods_size > 0:
                    for dexMethod in dexClass.class_virtual_methods_list:
                        if isinstance(dexMethod, DexMethod):
                            # 先全部填充为nop
                            srcDexFileDataPatched[dexMethod.code_insns_off:dexMethod.code_insns_off +
                                                  dexMethod.code_insns_size*2] = [0] * (dexMethod.code_insns_size*2)
                            # 再在开头填充为返回指令
                            srcDexFileDataPatched[dexMethod.code_insns_off:dexMethod.code_insns_off+len(
                                dexMethod.code_return_insns_data)] = dexMethod.code_return_insns_data
                            # 最后在返回指令后填充方法序号
                            srcDexFileDataPatched[dexMethod.code_insns_off+len(dexMethod.code_return_insns_data):dexMethod.code_insns_off+len(
                                dexMethod.code_return_insns_data)+4] = list(dexMethod.code_number) # bytes直接转list
        # 修复signature,没有设置端序,直接添加
        newSignature = hashlib.sha1(
            bytes(srcDexFileDataPatched[32:])).hexdigest()
        newSignature = list(bytes.fromhex(newSignature))
        srcDexFileDataPatched[12:12+len(newSignature)] = newSignature
        # 修复checksum,小端序int,为了匹配dex文件结构
        newChecksum = zlib.adler32(bytes(srcDexFileDataPatched[12:]))
        newChecksum = list(struct.pack("<I", newChecksum))
        srcDexFileDataPatched[8:8+len(newChecksum)] = newChecksum
        return bytes(srcDexFileDataPatched)

    def extract_old(self) -> bytes:
        # 抽取代码
        # 结构:method_idx(4字节) + code_insns_off(4字节) + code_insns_size(4字节) + code_insns_data(code_insns_size*2字节)
        srcDexFileDataExtracted = b""
        for dexClass in self.srcDexClassList:
            if isinstance(dexClass, DexClass):
                if dexClass.class_direct_methods_size > 0:
                    for dexMethod in dexClass.class_direct_methods_list:
                        if isinstance(dexMethod, DexMethod):
                            srcDexFileDataExtracted += struct.pack(
                                "<I", dexMethod.method_idx)
                            srcDexFileDataExtracted += struct.pack(
                                "<I", dexMethod.code_insns_off)
                            srcDexFileDataExtracted += struct.pack(
                                "<I", dexMethod.code_insns_size)
                            srcDexFileDataExtracted += dexMethod.code_insns_data
                if dexClass.class_virtual_methods_size > 0:
                    for dexMethod in dexClass.class_virtual_methods_list:
                        if isinstance(dexMethod, DexMethod):
                            srcDexFileDataExtracted += struct.pack(
                                "<I", dexMethod.method_idx)
                            srcDexFileDataExtracted += struct.pack(
                                "<I", dexMethod.code_insns_off)
                            srcDexFileDataExtracted += struct.pack(
                                "<I", dexMethod.code_insns_size)
                            srcDexFileDataExtracted += dexMethod.code_insns_data
        return srcDexFileDataExtracted

    def patch_old(self) -> bytes:
        # 填充nop并修复checksum和signature
        srcDexFileDataPatched = list(self.srcDexFileData)
        for dexClass in self.srcDexClassList:
            if isinstance(dexClass, DexClass):
                if dexClass.class_direct_methods_size > 0:
                    for dexMethod in dexClass.class_direct_methods_list:
                        if isinstance(dexMethod, DexMethod):
                            # 先全部填充为nop
                            srcDexFileDataPatched[dexMethod.code_insns_off:dexMethod.code_insns_off +
                                                  dexMethod.code_insns_size*2] = [0] * (dexMethod.code_insns_size*2)
                            # 再在开头填充为返回指令
                            srcDexFileDataPatched[dexMethod.code_insns_off:dexMethod.code_insns_off+len(
                                dexMethod.code_return_insns_data)] = dexMethod.code_return_insns_data
                if dexClass.class_virtual_methods_size > 0:
                    for dexMethod in dexClass.class_virtual_methods_list:
                        if isinstance(dexMethod, DexMethod):
                            # 先全部填充为nop
                            srcDexFileDataPatched[dexMethod.code_insns_off:dexMethod.code_insns_off +
                                                  dexMethod.code_insns_size*2] = [0] * (dexMethod.code_insns_size*2)
                            # 再在开头填充为返回指令
                            srcDexFileDataPatched[dexMethod.code_insns_off:dexMethod.code_insns_off+len(
                                dexMethod.code_return_insns_data)] = dexMethod.code_return_insns_data
        # 修复signature,没有设置端序,直接添加
        newSignature = hashlib.sha1(
            bytes(srcDexFileDataPatched[32:])).hexdigest()
        newSignature = list(bytes.fromhex(newSignature))
        srcDexFileDataPatched[12:12+len(newSignature)] = newSignature
        # 修复checksum,小端序int,为了匹配dex文件结构
        newChecksum = zlib.adler32(bytes(srcDexFileDataPatched[12:]))
        newChecksum = list(struct.pack("<I", newChecksum))
        srcDexFileDataPatched[8:8+len(newChecksum)] = newChecksum
        return bytes(srcDexFileDataPatched)


# with open(DEXPATH, "rb") as f:
#     srcDexFileData = f.read()
# dexParser = DexParser(srcDexFileData)

# with open(DEXCODEPATH, "wb") as f:
#     f.write(dexParser.extract())

# with open(DEXPATCHPATH, "wb") as f:
#     f.write(dexParser.patch())

尾声

笔者用了一个全局方法序号去标识所抽取的方法代码,从而避免在so层对dex文件的重新解析(其实就是偷懒了)

这里猜测dpt-shell项目的作者前移hook点是为了适配多种安卓版本

笔者这里没有考虑版本兼容性的问题,只是在安卓13系统上的简单实践,欢迎师傅们讨论~

相关推荐
秋4271 小时前
防火墙基本介绍与使用
linux·网络协议·安全·网络安全·架构·系统安全
sunnyday04262 小时前
Spring Boot 项目中使用 Dynamic Datasource 实现多数据源管理
android·spring boot·后端
幽络源小助理3 小时前
下载安装AndroidStudio配置Gradle运行第一个kotlin程序
android·开发语言·kotlin
inBuilder低代码平台3 小时前
浅谈安卓Webview从初级到高级应用
android·java·webview
豌豆学姐3 小时前
Sora2 短剧视频创作中如何保持人物一致性?角色创建接口教程
android·java·aigc·php·音视频·uniapp
白熊小北极3 小时前
Android Jetpack Compose折叠屏感知与适配
android
HelloBan3 小时前
setHintTextColor不生效
android
Bug.ink5 小时前
BUUCTF——WEB(4)
前端·网络安全·靶场·ctf·buuctf
洞窝技术6 小时前
从0到30+:智能家居配网协议融合的实战与思考
android
QING6186 小时前
SupervisorJob子协程异常处理机制 —— 新手指南
android·kotlin·android jetpack