Android二代抽取壳简易实现和踩坑记录

Android二代抽取壳简易实现和踩坑记录

参考资料

[1] dpt-shell
[2] Android函数抽取壳的实现
[3] dpt-shell抽取壳项目源码及其逆向分析

整体思路

Android一代整体壳简易实现和踩坑记录基础上

1、在向壳程序dex末尾追加源程序所有dex时,抽取方法代码到资源文件

2、在壳程序Application的attachBaseContext方法解压出Apk所有so并通过System.load()加载壳so

3、壳so的初始函数_init实现对execve、mmap以及LoadMethod的hook

源程序

源程序基本同一代整体壳中的源程序,没有特别需要说明的地方

壳程序

ShellApplication,相较于一代整体壳中的代码,增加了两个方法,而且结构非常像,只是解压出的目录不同

java 复制代码
private void extractsofiles(String apkabspath){
    try {
        ZipInputStream zipInputStream = new ZipInputStream(new BufferedInputStream(new FileInputStream(apkabspath)));
        while (true) {
            ZipEntry entry = zipInputStream.getNextEntry();
            if (entry == null) {
                zipInputStream.close();
                break;
            }
            String entryname = entry.getName();
            // 解压出arm64-v8a/lib*.so
            if (entryname.startsWith("lib/") && entryname.endsWith(".so") && entryname.contains("arm64-v8a")) {
                File libfile = new File(privatelibspath + File.separator + entryname.substring(entryname.lastIndexOf('/')));
                if(libfile.createNewFile()) {
                    FileOutputStream fileOutputStream = new FileOutputStream(libfile);
                    byte[] bytes = new byte[1024];
                    while (true) {
                        int length = zipInputStream.read(bytes);
                        if (length == -1){
                            break;
                        }
                        fileOutputStream.write(bytes);
                    }
                    fileOutputStream.flush();
                    fileOutputStream.close();
                }
            }
            zipInputStream.closeEntry();
        }
        zipInputStream.close();
    } catch (IOException ex) {
        throw new RuntimeException(ex);
    }
}

private void extractcodefile(String apkabspath) {
    try {
        ZipInputStream zipInputStream = new ZipInputStream(new BufferedInputStream(new FileInputStream(apkabspath)));
        while (true) {
            ZipEntry entry = zipInputStream.getNextEntry();
            if (entry == null) {
                zipInputStream.close();
                break;
            }
            String entryname = entry.getName();
            // 解压出assets/extractedcodefile
            if (entryname.startsWith("assets") && entryname.contains(extractedcodefile)) {
                File libfile = new File(privateodexpath + File.separator + entryname.substring(entryname.lastIndexOf('/')));
                if(libfile.createNewFile()) {
                    FileOutputStream fileOutputStream = new FileOutputStream(libfile);
                    byte[] bytes = new byte[1024];
                    while (true) {
                        int length = zipInputStream.read(bytes);
                        if (length == -1){
                            break;
                        }
                        fileOutputStream.write(bytes);
                    }
                    fileOutputStream.flush();
                    fileOutputStream.close();
                }
            }
            zipInputStream.closeEntry();
        }
        zipInputStream.close();
    } catch (IOException ex) {
        throw new RuntimeException(ex);
    }
}

壳so

hook_DefineClass是一开始参考的[1]的比较新的代码,是把hook点前移了

后来发现之前[2]中hook LoadMethod的思路更加简单,于是有了hook_LoadMethod

bhook的使用参考bhook,前两步必做

Dobby的使用参考CMakeLists.txt,静态导入(原Dobby项目编译不出来,说是少个头文件)

cpp 复制代码
#include <jni.h>
#include <string>
#include "android/log.h"
#include "sys/mman.h"
#include "bytehook.h"
#include <unistd.h>
#include "Dobby/include/dobby.h"
#include <elf.h>
#include <dlfcn.h>
#include "DexFile.h"
#include <map>
#include <fstream>
#include <stdlib.h>

// sdk版本,用于兼容适配
int apiLevel;
// 函数声明
void hook();
void hook_execve();
void hook_mmap();
void hook_DefineClass();
void hook_LoadMethod();

// 抽取代码文件,与源.dex在同一私有目录
std::string extractedcodefile = "srcapkcode";
// 抽取代码文件解析标志,解析一次
bool extractedcodefileflag = false;
// 抽取代码对象映射:方法序号->抽取代码对象
std::map<uint32_t , CodeItem*> codemap;

// 函数声明
static void* (*g_originDefineClassV22)(void* thiz,
                                       void* self,
                                       const char* descriptor,
                                       size_t hash,
                                       void* class_loader,
                                       const void* dex_file,
                                       const void* dex_class_def);

// 函数声明
static void* (*g_originDefineClassV21)(void* thiz,
                                       const char* descriptor,
                                       void* class_loader,
                                       const void* dex_file,
                                       const void* dex_class_def);

// 函数声明
static void (*g_originLoadMethod)(void* thiz,
                                  const DexFile* dex_file,
                                  ClassAccessor::Method* method,
                                  void* klass,
                                  void* dst);

// 初始函数,实现hook
extern "C"
void _init(){
    apiLevel = android_get_device_api_level();
    hook();
}

// hook
void hook(){
    bytehook_init(BYTEHOOK_MODE_AUTOMATIC, false);
    hook_execve();
    hook_mmap();
    // hook_DefineClass();
    hook_LoadMethod();
}

const char * getArtLibName() {
    if (apiLevel >= 29) {
        return "libartbase.so";
    }
    return "libart.so";
}

int fake_execve(const char *pathname, char *const argv[], char *const envp[]) {
    BYTEHOOK_STACK_SCOPE();
    // 禁用dex2oat
    if (strstr(pathname, "dex2oat") != nullptr) {
        errno = EACCES;
        return -1;
    }
    return BYTEHOOK_CALL_PREV(fake_execve, pathname, argv, envp);
}

void hook_execve(){
    bytehook_stub_t stub = bytehook_hook_single(
            getArtLibName(),
            "libc.so",
            "execve",
            (void *) fake_execve,
            nullptr,
            nullptr);
    if (stub != nullptr) {
        __android_log_print(6,"p1umh0","hook execve done");
    }
}

void* fake_mmap(void * __addr, size_t __size, int __prot, int __flags, int __fd, off_t __offset){
    BYTEHOOK_STACK_SCOPE();
    int prot = __prot;
    int hasRead = (__prot & PROT_READ) == PROT_READ;
    int hasWrite = (__prot & PROT_WRITE) == PROT_WRITE;
    // 添加写权限
    if(hasRead && !hasWrite) {
        prot = prot | PROT_WRITE;
    }
    void * addr = BYTEHOOK_CALL_PREV(fake_mmap,__addr,  __size, prot,  __flags,  __fd,  __offset);
    return addr;
}

void hook_mmap(){
    bytehook_stub_t stub = bytehook_hook_single(
            getArtLibName(),
            "libc.so",
            "mmap",
            (void *) fake_mmap,
            nullptr,
            nullptr);
    if(stub != nullptr){
        __android_log_print(6,"p1umh0","hook mmap done");
    }
}

const char * getArtLibPath() {
    if(apiLevel < 29) {
        return "/system/lib64/libart.so";
    } else if(apiLevel == 29) {
        return "/apex/com.android.runtime/lib64/libart.so";
    } else {
        return "/apex/com.android.art/lib64/libart.so";
    }
}

const char * getArtBaseLibPath() {
    if(apiLevel == 29) {
        return "/apex/com.android.runtime/lib64/libartbase.so";
    } else {
        return "/apex/com.android.art/lib64/libartbase.so";
    }
}

const char* find_symbol_in_elf_file(const char *elf_file,int keyword_count,...) {
    FILE *elf_fp = fopen(elf_file, "r");
    if (elf_fp) {
        // 获取elf文件大小
        fseek(elf_fp, 0L, SEEK_END);
        size_t lib_size = ftell(elf_fp);
        fseek(elf_fp, 0L, SEEK_SET);
        // 读取elf文件数据
        char *data = (char *) calloc(lib_size, 1);
        fread(data, 1, lib_size, elf_fp);
        char *elf_bytes_data = data;
        // elf头
        Elf64_Ehdr *ehdr = (Elf64_Ehdr *) elf_bytes_data;
        // 节头
        Elf64_Shdr *shdr = (Elf64_Shdr *) (((uint8_t *) elf_bytes_data) + ehdr->e_shoff);
        va_list kw_list;
        // 遍历节
        for (int i = 0; i < ehdr->e_shnum; i++) {
            // 字符串表
            if (shdr->sh_type == SHT_STRTAB) {
                const char *str_base = (char *) ((uint8_t *) elf_bytes_data + shdr->sh_offset);
                char *ptr = (char *) str_base;
                // 遍历字符串表
                for (int k = 0; ptr < (str_base + shdr->sh_size); k++) {
                    const char *item_value = ptr;
                    size_t item_len = strnlen(item_value, 128);
                    ptr += (item_len + 1);
                    if (item_len == 0) {
                        continue;
                    }
                    int match_count = 0;
                    va_start(kw_list, keyword_count);
                    for (int n = 0; n < keyword_count; n++) {
                        const char *keyword = va_arg(kw_list, const char*);
                        if (strstr(item_value, keyword)) {
                            match_count++;
                        }
                    }
                    va_end(kw_list);
                    if (match_count == keyword_count) {
                        return item_value;
                    }
                }
                break;
            }
            shdr++;
        }
        fclose(elf_fp);
        free(data);
    }
    return nullptr;
}

const char * getClassLinkerDefineClassLibPath(){
    return getArtLibPath();
}

const char * getClassLinkerDefineClassSymbol() {
    const char * sym = find_symbol_in_elf_file(getClassLinkerDefineClassLibPath(),2,"ClassLinker","DefineClass");
    return sym;
}

void *DefineClassV22(void* thiz,
                     void* self,
                     const char* descriptor,
                     size_t hash,
                     void* class_loader,
                     const void* dex_file,
                     const void* dex_class_def) {
    if(g_originDefineClassV22 != nullptr) {
        // patchClass(descriptor,dex_file,dex_class_def);
        return g_originDefineClassV22( thiz,self,descriptor,hash,class_loader, dex_file, dex_class_def);
    }
    return nullptr;
}

void *DefineClassV21(void* thiz,
                     const char* descriptor,
                     void* class_loader,
                     const void* dex_file,
                     const void* dex_class_def) {
    if(g_originDefineClassV21 != nullptr) {
        // patchClass(descriptor,dex_file,dex_class_def);
        return g_originDefineClassV21( thiz,descriptor,class_loader, dex_file, dex_class_def);
    }
    return nullptr;
}

void hook_DefineClass(){
    void * defineClassAddress = DobbySymbolResolver(getClassLinkerDefineClassLibPath(),getClassLinkerDefineClassSymbol());
    if(apiLevel >= __ANDROID_API_L_MR1__) {
        DobbyHook(defineClassAddress, (void *) DefineClassV22, (void **) &g_originDefineClassV22);
        __android_log_print(6,"p1umh0","hook DefineClassV22 done");
    } else {
        DobbyHook(defineClassAddress, (void *) DefineClassV21, (void **) &g_originDefineClassV21);
        __android_log_print(6,"p1umh0","hook DefineClassV21 done");
    }
}

const char * getClassLinkerLoadMethodLibPath(){
    return getArtLibPath();
}

const char * getClassLinkerLoadMethodSymbol() {
    const char * sym = find_symbol_in_elf_file(getClassLinkerLoadMethodLibPath(),2,"ClassLinker","LoadMethod");
    return sym;
}

uint32_t bytearr2uint32(char * bytearr){
    uint32_t retnum = 0;
    for(int i = 3;i >=0;i--){
        retnum <<= 8;
        retnum |= bytearr[i];
    }
    return retnum;
}

// 解析抽取代码文件
void parseextractedcodefile(std::string dexpath){
    size_t dirlen = dexpath.find_last_of("/");
    std::string codepath = dexpath.substr(0,dirlen+1) + extractedcodefile;
    FILE * codefile = fopen(codepath.c_str(),"r");
    fseek(codefile,0,SEEK_END);
    uint32_t filelen = ftell(codefile);
    __android_log_print(6, "p1umh0", "filelen => 0x%x", filelen);
    fseek(codefile,0,SEEK_SET);
    size_t off = 0;
    while(off<filelen) {
        fseek(codefile,0,off);
        // 4字节方法序号
        char codenumberstr[4];
        fread(codenumberstr,1,4,codefile);
        uint32_t codenumber = bytearr2uint32(codenumberstr);
        if(codenumber==0){
            break;
        }
        __android_log_print(6, "p1umh0", "codenumber => 0x%x", codenumber);
        off += 4;
        fseek(codefile,0,off);
        // 4字节方法长度
        char codelengthstr[4];
        fread(codelengthstr,1,4,codefile);
        uint32_t codelength = bytearr2uint32(codelengthstr);
        if(codelength==0){
            break;
        }
        __android_log_print(6, "p1umh0", "codelength => 0x%x", codelength);
        off += 4;
        fseek(codefile,0,off);
        // codelength字节方法代码
        char code[codelength];
        fread(code,1,codelength,codefile);
        // 创建抽取代码对象
        CodeItem * codeItem = new CodeItem(codelength,code);
        // 映射
        codemap.insert(std::pair<int, CodeItem*>(codenumber, codeItem));
        // 继续向后
        off += codelength;
    }
    fclose(codefile);
}

void innerLoadMethod(void* thiz, const DexFile* dex_file, ClassAccessor::Method* method, void* klass, void* dest){
    // 保证是源.dex解压目录
    std::string location = dex_file->location_;
    if(location.find("app_myodex") == std::string::npos){
        return;
    }
    if(!extractedcodefileflag){
        parseextractedcodefile(location);
        extractedcodefileflag = true;
    }
    // 虚函数
    if(method->code_off_==0){
        return;
    }
    // 代码/指令地址
    uint8_t* codeAddr = dex_file->begin_ + method->code_off_ + 16;
    // 前2字节
    uint16_t firstDvmCode = *((uint16_t*)(codeAddr));
    // 4字节序号
    uint32_t codeNumber = 0;
    // 开头必须是return指令
    if(firstDvmCode == 0x000e){
        codeNumber = *((uint32_t*)(codeAddr+2));
    }else if(firstDvmCode==0x0012){
        codeNumber = *((uint32_t*)(codeAddr+4));
    }else if(firstDvmCode==0x0016){
        codeNumber = *((uint32_t*)(codeAddr+6));
    }else{
        return;
    }
    if(codeNumber==0){
        return;
    }
    __android_log_print(6,"p1umh0","hooked codeNumber => 0x%x", codeNumber);
    std::map<uint32_t , CodeItem*>::iterator itr = codemap.find(codeNumber);
    if(itr==codemap.end()){
        return;
    }
    CodeItem * codeItem = itr->second;
    __android_log_print(6,"p1umh0","hooked codelength => 0x%x", codeItem->getlength());
    memcpy(codeAddr,codeItem->getcode(),codeItem->getlength());
    __android_log_print(6,"p1umh0","hooked memcpy code success");
}

void LoadMethod(void* thiz, const DexFile* dex_file, ClassAccessor::Method* method, void* klass, void* dest){
    if(g_originLoadMethod!= nullptr){
        // 先恢复,再调用
        innerLoadMethod(thiz,dex_file,method,klass,dest);
        g_originLoadMethod(thiz,dex_file,method, klass, dest);
    }
    return;
}

void hook_LoadMethod(){
    void * loadMethodAddress =  DobbySymbolResolver(getClassLinkerLoadMethodLibPath(),getClassLinkerLoadMethodSymbol());
    DobbyHook(loadMethodAddress, (void *) LoadMethod, (void **) &g_originLoadMethod);
    __android_log_print(6,"p1umh0","hook LoadMethod done");
}

extern "C" JNIEXPORT
jstring JNICALL Java_com_p1umh0_shell3shell_MainActivity_stringFromJNI(JNIEnv *env, jobject) {
    std::string shellapp3 = "Shell Application 3 in .so";
    return env->NewStringUTF(shellapp3.c_str());
}

加壳代码

packer,基本同一代整体壳的加壳代码,增加了对源程序dex方法代码的抽取

python 复制代码
import hashlib
import os
import pathlib
import struct
import zlib
from zipfile import ZipFile
from shell3dexparser import DexParser

# 路径
thisDir = os.path.dirname(__file__)
srcApkPath = os.path.join(thisDir, "shell3src.apk")
shellApkPath = os.path.join(thisDir, "shell3shell.apk")
newShellApkPath = os.path.join(thisDir, "shell3newshell.apk")

# 文件
srcApk = ZipFile(srcApkPath, "r")
shellApk = ZipFile(shellApkPath, "r")
newShellApk = ZipFile(newShellApkPath, "w")

# 从源Apk中提取res文件夹、resources.arsc文件、lib文件夹和所有.dex文件
srcApkUnzipTempDir = os.path.join(thisDir, "srcApkUnzipTempDir")
for srcFullName in srcApk.namelist():
    if srcFullName.startswith("res") or srcFullName.startswith("lib") or srcFullName.endswith(".dex"):
        srcApk.extract(srcFullName, srcApkUnzipTempDir)

# 从壳Apk中提取AndroidManifest.xml文件、classes.dex文件和lib文件夹
shellApkUnzipTempDir = os.path.join(thisDir, "shellApkUnzipTempDir")
for shellFullName in shellApk.namelist():
    if shellFullName == "AndroidManifest.xml" or shellFullName == "classes.dex" or shellFullName.startswith("lib"):
        shellApk.extract(shellFullName, shellApkUnzipTempDir)

# 为新壳Apk插入条目:源Apk的res文件夹、源Apk的resources.arsc文件、源Apk的lib文件夹
srcApkUnzipTempDirIns = pathlib.Path(srcApkUnzipTempDir)
for srcResFilePath in srcApkUnzipTempDirIns.rglob(r"*"):
    if os.path.isfile(srcResFilePath) and not srcResFilePath.name.endswith(".dex"):
        newShellApk.write(
            srcResFilePath, srcResFilePath.relative_to(srcApkUnzipTempDirIns))

# 为新壳Apk插入条目:壳Apk的AndroidManifest.xml文件、壳Apk的lib文件夹
shellApkUnzipTempDirIns = pathlib.Path(shellApkUnzipTempDir)
for shellResFilePath in shellApkUnzipTempDirIns.rglob(r"*"):
    if os.path.isfile(shellResFilePath) and not shellResFilePath.name == "classes.dex":
        newShellApk.write(shellResFilePath, shellResFilePath.relative_to(
            shellApkUnzipTempDirIns))

# 拼接壳Apk的classes.dex文件以及源Apk的所有.dex文件
# 拼接结构:
# 壳dex数据
# 源Apk的.dex文件数量(2字节)
# 源dex1名称长度(2字节) + 源dex1名称(不定大小) + 源dex1数据长度(4字节) + 源dex1数据(不定大小)
# 源dexN名称长度(2字节) + 源dexN名称(不定大小) + 源dexN数据长度(4字节) + 源dexN数据(不定大小)
# 除壳dex数据外的数据长度(4字节)

# 新壳Apk的classes.dex文件数据
newShellDexData = b""
# 拼接壳Apk的classes.dex文件数据
with open(os.path.join(shellApkUnzipTempDir, "classes.dex"), "rb") as f:
    newShellDexData += f.read()
# 壳dex数据长度
shellDexDataLen = len(newShellDexData)
# 源Apk的.dex文件数量(2字节占坑)
newShellDexData += b"??"
srcDexFileNum = 0
# 源Apk的.dex文件被抽取的数据
srcDexFileDataExtractedAll = b""
# 拼接源Apk的所有.dex文件数据
for srcDexFilePath in srcApkUnzipTempDirIns.rglob(r"*"):
    if os.path.isfile(srcDexFilePath) and srcDexFilePath.name.endswith(".dex"):
        srcDexFileNum += 1
        srcDexFileRelaPath = srcDexFilePath.relative_to(
            srcApkUnzipTempDirIns).name.encode()
        # 大端序short,为了适应java中DataInputStream.readShort()
        newShellDexData += struct.pack(">H", len(srcDexFileRelaPath))
        newShellDexData += srcDexFileRelaPath
        with open(srcDexFilePath, "rb") as f:
            srcDexFileData = f.read()
        # 通过dexparser抽取指令,获取:
        # 1、被抽取的数据(srcDexFileDataExtracted,结构:)
        # 2、抽取后的.dex数据(srcDexFileDataPatched,修复checksum和signature)
        dexParser = DexParser(srcDexFileData)
        srcDexFileDataExtracted = dexParser.extract()
        srcDexFileDataExtractedAll += srcDexFileDataExtracted
        srcDexFileDataPatched = dexParser.patch()
        # 大端序int,为了适应java中DataInputStream.readInt()
        newShellDexData += struct.pack(">I", len(srcDexFileDataPatched))
        newShellDexData += srcDexFileDataPatched
# 除壳dex数据外的数据长度,大端序int,为了适应java中DataInputStream.readInt()
newShellDexData += struct.pack(">I", len(newShellDexData)-shellDexDataLen)

# bytes转为list,用来item assignment
newShellDexData = list(newShellDexData)

# 设置源Apk的.dex文件数量,大端序short,为了适应java中DataInputStream.readShort()
newShellDexData[shellDexDataLen:shellDexDataLen +
                2] = list(struct.pack(">H", srcDexFileNum))

# 修新壳Apk的classes.dex文件的file_size,小端序int,为了匹配dex文件结构
newFileSize = list(struct.pack("<I", len(newShellDexData)))
newShellDexData[32:32+len(newFileSize)] = newFileSize

# 修新壳Apk的classes.dex文件的signature,没有设置端序,直接添加
newSignature = hashlib.sha1(bytes(newShellDexData[32:])).hexdigest()
newSignature = list(bytes.fromhex(newSignature))
newShellDexData[12:12+len(newSignature)] = newSignature

# 修新壳Apk的classes.dex文件的checksum,小端序int,为了匹配dex文件结构
newChecksum = zlib.adler32(bytes(newShellDexData[12:]))
newChecksum = list(struct.pack("<I", newChecksum))
newShellDexData[8:8+len(newChecksum)] = newChecksum

# 为新壳Apk插入条目:新壳Apk的classes.dex文件
with open(os.path.join(thisDir, "classes.dex"), "wb") as f:
    f.write(bytes(newShellDexData))
newShellApk.write(os.path.join(thisDir, "classes.dex"), "classes.dex")

# 为新壳Apk插入条目:新壳Apk的assets/srcapkcode文件,包含源Apk的.dex文件被抽取的数据
with open(os.path.join(thisDir, "srcapkcode"), "wb") as f:
    f.write(srcDexFileDataExtractedAll)
newShellApk.write(os.path.join(thisDir, "srcapkcode"), "assets/srcapkcode")

srcApk.close()
shellApk.close()
newShellApk.close()

dexparser

主要解析了字符串表、类型表、原型表、方法表和类定义表

在抽取方法代码时没有保留方法在方法表中的索引method_idx,而是保留了一个全局方法序号

因为encoded_method结构中实际上是method_idx_diff,它是一个方法表索引差值

如果想要得到某个方法真实的method_idx,就需要解析所属类的其他方法的encoded_method结构,非常麻烦

笔者这里用一个全局方法序号去标识所抽取的方法代码,在回填代码时只需找这个全局方法序号即可

全局方法序号保存在返回指令后,可以通过Verifier对Method的验证,只是要求原方法指令数量足够多

python 复制代码
import hashlib
import os
import struct
import zlib


BASEPATH = os.path.join(os.path.dirname(__file__), "srcApkUnzipTempDir")
DEXPATH = os.path.join(BASEPATH, "classes.dex")
DEXCODEPATH = os.path.join(BASEPATH, "classes_code")
DEXPATCHPATH = os.path.join(BASEPATH, "classes_patched.dex")
CLASSNAMELIST = [b"Lcom/p1umh0/shell3src/MainActivity;"]


class DexClass():
    def __init__(self, class_name, class_direct_methods_size, class_virtual_methods_size) -> None:
        # 类名称
        self.class_name = class_name
        # 直接方法数量
        self.class_direct_methods_size = class_direct_methods_size
        # 直接方法列表
        self.class_direct_methods_list = None
        # 虚方法数量
        self.class_virtual_methods_size = class_virtual_methods_size
        # 虚方法列表
        self.class_virtual_methods_list = None


class DexMethod():
    def __init__(self, method_idx, code_insns_off, code_insns_size) -> None:
        # 方法表索引
        self.method_idx = method_idx
        # 方法代码全局偏移
        self.code_insns_off = code_insns_off
        # 方法代码长度
        self.code_insns_size = code_insns_size
        # 方法代码
        self.code_insns_data = None
        # 方法返回代码
        self.code_return_insns_data = None
        # 方法序号,跟在方法返回代码后面,4字节
        self.code_number = None


class DexParser():
    def __init__(self, srcDexFileData: bytes) -> None:
        # 全局方法序号
        self.code_number_global = 1
        # dex文件数据
        self.srcDexFileData = srcDexFileData
        # 解析字符串表
        self.string_ids_size = -1
        self.string_ids_off = -1
        self.srcDexStringList = None
        self.parseStringList()
        # 解析类型表
        self.type_ids_size = -1
        self.type_ids_off = -1
        self.srcDexTypeList = None
        self.parseTypeList()
        # 解析原型表
        self.proto_ids_size = -1
        self.proto_ids_off = -1
        self.srcDexProtoList = None
        self.parseProtoList()
        # 解析方法表
        self.method_ids_size = -1
        self.method_ids_off = -1
        self.srcDexMethodList = None
        self.parseMethodList()
        # 解析类定义表
        self.class_defs_size = -1
        self.class_defs_off = -1
        self.srcDexClassList = None
        self.parseClassList()

    def parseStringList(self) -> None:
        # 字符串数量
        self.string_ids_size = struct.unpack(
            "<I", self.srcDexFileData[0x38:0x38+4])[0]
        # 字符串表偏移
        self.string_ids_off = struct.unpack(
            "<I", self.srcDexFileData[0x3C:0x3C+4])[0]
        # 创建字符串表
        self.srcDexStringList = [b""] * self.string_ids_size
        # 填充字符串表
        for i in range(self.string_ids_size):
            # 字符串数据偏移
            string_data_off = struct.unpack(
                "<I", self.srcDexFileData[self.string_ids_off+i*4:self.string_ids_off+(i+1)*4])[0]
            # 开头1~5字节表示字符串长度,最少1字节,最多5字节
            uleb128_max = list(
                self.srcDexFileData[string_data_off:string_data_off+5])
            # uleb128所用字节数以及所表示的值
            uleb128_used, uleb128_val = self.parseUleb128(uleb128_max)
            # 字符串真实数据偏移
            string_data_real_off = string_data_off + uleb128_used
            # 字符串长度,+1还包含了\x00
            string_data_real_len = uleb128_val + 1
            # 字符串真实数据
            string_data_real_data = self.srcDexFileData[string_data_real_off:
                                                        string_data_real_off+string_data_real_len]
            # 如果按照字符串长度取出的字符串真实数据以\x00结尾,说明该字符串正常,需要保留(去除末尾的\x00)
            if list(string_data_real_data)[-1] == 0:
                self.srcDexStringList[i] = string_data_real_data[:-1]

    def parseTypeList(self) -> None:
        # 类型数量
        self.type_ids_size = struct.unpack(
            "<I", self.srcDexFileData[0x40:0x40+4])[0]
        # 类型表偏移
        self.type_ids_off = struct.unpack(
            "<I", self.srcDexFileData[0x44:0x44+4])[0]
        # 创建类型表
        self.srcDexTypeList = [-1] * self.type_ids_size
        # 填充类型表
        for i in range(self.type_ids_size):
            # 描述符索引/字符串表索引
            descriptor_idx = struct.unpack(
                "<I", self.srcDexFileData[self.type_ids_off+i*4:self.type_ids_off+(i+1)*4])[0]
            self.srcDexTypeList[i] = descriptor_idx

    def parseProtoList(self) -> None:
        # 原型数量
        self.proto_ids_size = struct.unpack(
            "<I", self.srcDexFileData[0x48:0x48+4])[0]
        # 原型表偏移
        self.proto_ids_off = struct.unpack(
            "<I", self.srcDexFileData[0x4C:0x4C+4])[0]
        # 创建原型表
        self.srcDexProtoList = [None] * self.proto_ids_size
        # 填充原型表
        for i in range(self.proto_ids_size):
            proto_id_data = self.srcDexFileData[self.proto_ids_off +
                                                i*0xC:self.proto_ids_off+(i+1)*0xC]
            shorty_idx = struct.unpack("<I", proto_id_data[0:4])[0]
            return_type_idx = struct.unpack("<I", proto_id_data[4:8])[0]
            parameters_off = struct.unpack("<I", proto_id_data[8:12])[0]
            self.srcDexProtoList[i] = (
                shorty_idx, return_type_idx, parameters_off)

    def parseMethodList(self) -> None:
        # 方法数量
        self.method_ids_size = struct.unpack(
            "<I", self.srcDexFileData[0x58:0x58+4])[0]
        # 方法表偏移
        self.method_ids_off = struct.unpack(
            "<I", self.srcDexFileData[0x5C:0x5C+4])[0]
        # 创建方法表
        self.srcDexMethodList = [None] * self.method_ids_size
        # 填充方法表
        for i in range(self.method_ids_size):
            method_id_data = self.srcDexFileData[self.method_ids_off +
                                                 i*8:self.method_ids_off+(i+1)*8]
            class_idx = struct.unpack("<H", method_id_data[0:2])[0]
            proto_idx = struct.unpack("<H", method_id_data[2:4])[0]
            name_idx = struct.unpack("<I", method_id_data[4:8])[0]
            self.srcDexMethodList[i] = (class_idx, proto_idx, name_idx)

    def parseClassList(self) -> None:
        # 类定义数量
        self.class_defs_size = struct.unpack(
            "<I", self.srcDexFileData[0x60:0x60+4])[0]
        # 类定义表偏移
        self.class_defs_off = struct.unpack(
            "<I", self.srcDexFileData[0x64:0x64+4])[0]
        # 创建类定义表
        self.srcDexClassList = []
        # 填充类定义表
        for i in range(self.class_defs_size):
            # 类定义的结构体数据
            class_def_data = self.srcDexFileData[self.class_defs_off +
                                                 i*0x20:self.class_defs_off+(i+1)*0x20]
            # 类名称索引/类型表索引
            class_idx = struct.unpack("<I", class_def_data[:4])[0]
            # 类名称
            class_name = self.srcDexStringList[self.srcDexTypeList[class_idx]]
            # 保留目标类
            if class_name in CLASSNAMELIST:
                # 类数据偏移
                class_data_off = struct.unpack(
                    "<I", class_def_data[0x18:0x18+4])[0]
                # 几个size都是uleb128类型,依次处理
                uleb128_used_all = 0
                # static_fields_size
                uleb128_max = list(
                    self.srcDexFileData[class_data_off+uleb128_used_all:class_data_off+uleb128_used_all+5])
                uleb128_used, static_fields_size = self.parseUleb128(
                    uleb128_max)
                uleb128_used_all += uleb128_used
                # instance_fields_size
                uleb128_max = list(
                    self.srcDexFileData[class_data_off+uleb128_used_all:class_data_off+uleb128_used_all+5])
                uleb128_used, instance_fields_size = self.parseUleb128(
                    uleb128_max)
                uleb128_used_all += uleb128_used
                # direct_methods_size
                uleb128_max = list(
                    self.srcDexFileData[class_data_off+uleb128_used_all:class_data_off+uleb128_used_all+5])
                uleb128_used, direct_methods_size = self.parseUleb128(
                    uleb128_max)
                uleb128_used_all += uleb128_used
                # virtual_methods_size
                uleb128_max = list(
                    self.srcDexFileData[class_data_off+uleb128_used_all:class_data_off+uleb128_used_all+5])
                uleb128_used, virtual_methods_size = self.parseUleb128(
                    uleb128_max)
                uleb128_used_all += uleb128_used
                # 创建类
                dexClass = DexClass(
                    class_name, direct_methods_size, virtual_methods_size)
                # 抽取直接方法
                class_direct_methods_list = []
                method_idx_prev = -1
                for i in range(direct_methods_size):
                    # 3个uleb128,依次处理
                    # method_idx_diff
                    uleb128_max = list(
                        self.srcDexFileData[class_data_off+uleb128_used_all:class_data_off+uleb128_used_all+5])
                    uleb128_used, method_idx_diff = self.parseUleb128(
                        uleb128_max)
                    uleb128_used_all += uleb128_used
                    # access_flags
                    uleb128_max = list(
                        self.srcDexFileData[class_data_off+uleb128_used_all:class_data_off+uleb128_used_all+5])
                    uleb128_used, access_flags = self.parseUleb128(
                        uleb128_max)
                    uleb128_used_all += uleb128_used
                    # code_off
                    uleb128_max = list(
                        self.srcDexFileData[class_data_off+uleb128_used_all:class_data_off+uleb128_used_all+5])
                    uleb128_used, code_off = self.parseUleb128(
                        uleb128_max)
                    uleb128_used_all += uleb128_used
                    # 处理方法表索引method_idx
                    if method_idx_prev == -1:
                        method_idx_prev = method_idx_diff
                        method_idx = method_idx_diff
                    else:
                        method_idx = method_idx_prev + method_idx_diff
                        method_idx_prev = method_idx_prev + method_idx_diff
                    # 创建方法,只抽取ACC_PUBLIC(0x1)、ACC_PRIVATE(0x2)、ACC_PROTECTED(0x4)
                    if code_off != 0 and access_flags in [0x1, 0x2, 0x4]:
                        # 方法返回类型
                        code_return_type = self.srcDexStringList[self.srcDexTypeList[
                            self.srcDexProtoList[self.srcDexMethodList[method_idx][1]][1]]]
                        # 根据方法返回类型获取方法返回指令字节码
                        code_return_bytes = self.getCodeReturnBytes(
                            code_return_type)
                        # 方法代码长度
                        code_insns_size = struct.unpack(
                            "<I", self.srcDexFileData[code_off+12:code_off+12+4])[0]
                        # +4是为了存储方法序号
                        if len(code_return_bytes) + 4 <= code_insns_size * 2:
                            # 方法代码全局偏移
                            code_insns_off = code_off + 12 + 4
                            # 方法实例
                            dexMethod = DexMethod(
                                method_idx, code_insns_off, code_insns_size)
                            # 方法代码
                            code_insns_data = self.srcDexFileData[code_insns_off:code_insns_off+code_insns_size*2]
                            dexMethod.code_insns_data = code_insns_data
                            # 方法返回代码
                            dexMethod.code_return_insns_data = list(
                                code_return_bytes)
                            # 方法序号,直接保存为4字节
                            dexMethod.code_number = struct.pack(
                                "<I", self.code_number_global)
                            self.code_number_global += 1
                            class_direct_methods_list.append(dexMethod)
                dexClass.class_direct_methods_list = class_direct_methods_list
                # 抽取虚方法
                class_virtual_methods_list = []
                method_idx_prev = -1
                for i in range(virtual_methods_size):
                    # 3个uleb128,依次处理
                    # method_idx_diff
                    uleb128_max = list(
                        self.srcDexFileData[class_data_off+uleb128_used_all:class_data_off+uleb128_used_all+5])
                    uleb128_used, method_idx_diff = self.parseUleb128(
                        uleb128_max)
                    uleb128_used_all += uleb128_used
                    # access_flags
                    uleb128_max = list(
                        self.srcDexFileData[class_data_off+uleb128_used_all:class_data_off+uleb128_used_all+5])
                    uleb128_used, access_flags = self.parseUleb128(
                        uleb128_max)
                    uleb128_used_all += uleb128_used
                    # code_off
                    uleb128_max = list(
                        self.srcDexFileData[class_data_off+uleb128_used_all:class_data_off+uleb128_used_all+5])
                    uleb128_used, code_off = self.parseUleb128(
                        uleb128_max)
                    uleb128_used_all += uleb128_used
                    # 处理方法表索引method_idx
                    if method_idx_prev == -1:
                        method_idx_prev = method_idx_diff
                        method_idx = method_idx_diff
                    else:
                        method_idx = method_idx_prev + method_idx_diff
                        method_idx_prev = method_idx_prev + method_idx_diff
                    # 创建方法,只抽取ACC_PUBLIC(0x1)、ACC_PRIVATE(0x2)、ACC_PROTECTED(0x4)
                    if code_off != 0 and access_flags in [0x1, 0x2, 0x4]:
                        # 方法返回类型
                        code_return_type = self.srcDexStringList[self.srcDexTypeList[
                            self.srcDexProtoList[self.srcDexMethodList[method_idx][1]][1]]]
                        # 根据方法返回类型获取方法返回指令字节码
                        code_return_bytes = self.getCodeReturnBytes(
                            code_return_type)
                        # 方法代码长度
                        code_insns_size = struct.unpack(
                            "<I", self.srcDexFileData[code_off+12:code_off+12+4])[0]
                        # +4是为了存储方法序号
                        if len(code_return_bytes) + 4 <= code_insns_size * 2:
                            # 方法代码全局偏移
                            code_insns_off = code_off + 12 + 4
                            # 方法实例
                            dexMethod = DexMethod(
                                method_idx, code_insns_off, code_insns_size)
                            # 方法代码
                            code_insns_data = self.srcDexFileData[code_insns_off:code_insns_off+code_insns_size*2]
                            dexMethod.code_insns_data = code_insns_data
                            # 方法返回代码
                            dexMethod.code_return_insns_data = list(
                                code_return_bytes)
                            # 方法序号,直接保存为4字节
                            dexMethod.code_number = struct.pack(
                                "<I", self.code_number_global)
                            self.code_number_global += 1
                            class_virtual_methods_list.append(dexMethod)
                dexClass.class_virtual_methods_list = class_virtual_methods_list
                # 添加类
                self.srcDexClassList.append(dexClass)

    def parseUleb128(self, uleb128_max: list):
        # uleb128所用字节数
        uleb128_used = 0
        # uleb128所表示的值
        uleb128_val = 0
        values = []
        while True:
            # 最少1字节
            value = uleb128_max[uleb128_used]
            values.append(value)
            uleb128_used += 1
            # 最多5字节
            if value < 0x7F or uleb128_used == 4:
                break
        # 小端序拼接
        values = values[::-1]
        for i in range(len(values)):
            ii = len(values) - i - 1
            uleb128_val |= ((values[i] & 0x7f) << (ii*7))
        return uleb128_used, uleb128_val

    def getCodeReturnBytes(self, code_return_type: bytes) -> bytes:
        # 根据方法返回类型获取方法返回指令字节码
        returnVoidBytes = bytes([0x0e, 0x0])
        returnBytes = bytes([0x12, 0x00, 0x0f, 0x00])
        returnWideBytes = bytes([0x16, 0x00, 0x00, 0x00, 0x10, 0x00])
        returnObjectBytes = bytes([0x12, 0x00, 0x11, 0x00])
        if code_return_type in [b"V"]:
            return returnVoidBytes
        elif code_return_type in [b"B", b"C", b"F", b"I", b"S", b"Z"]:
            return returnBytes
        elif code_return_type in [b"D", b"J"]:
            return returnWideBytes
        else:
            return returnObjectBytes

    def extract(self) -> bytes:
        # 抽取代码
        # 结构:code_number(4字节) + code_insns_size(4字节) + code_insns_data(code_insns_size*2字节)
        srcDexFileDataExtracted = b""
        for dexClass in self.srcDexClassList:
            if isinstance(dexClass, DexClass):
                if dexClass.class_direct_methods_size > 0:
                    for dexMethod in dexClass.class_direct_methods_list:
                        if isinstance(dexMethod, DexMethod):
                            srcDexFileDataExtracted += dexMethod.code_number
                            srcDexFileDataExtracted += struct.pack(
                                "<I", dexMethod.code_insns_size*2) # 指令长度直接乘2,读文件时不用再乘
                            srcDexFileDataExtracted += dexMethod.code_insns_data
                if dexClass.class_virtual_methods_size > 0:
                    for dexMethod in dexClass.class_virtual_methods_list:
                        if isinstance(dexMethod, DexMethod):
                            srcDexFileDataExtracted += dexMethod.code_number
                            srcDexFileDataExtracted += struct.pack(
                                "<I", dexMethod.code_insns_size*2) # # 指令长度直接乘2,读文件时不用再乘
                            srcDexFileDataExtracted += dexMethod.code_insns_data
        return srcDexFileDataExtracted

    def patch(self) -> bytes:
        # 填充nop并修复checksum和signature
        srcDexFileDataPatched = list(self.srcDexFileData)
        for dexClass in self.srcDexClassList:
            if isinstance(dexClass, DexClass):
                if dexClass.class_direct_methods_size > 0:
                    for dexMethod in dexClass.class_direct_methods_list:
                        if isinstance(dexMethod, DexMethod):
                            # 先全部填充为nop
                            srcDexFileDataPatched[dexMethod.code_insns_off:dexMethod.code_insns_off +
                                                  dexMethod.code_insns_size*2] = [0] * (dexMethod.code_insns_size*2)
                            # 再在开头填充为返回指令
                            srcDexFileDataPatched[dexMethod.code_insns_off:dexMethod.code_insns_off+len(
                                dexMethod.code_return_insns_data)] = dexMethod.code_return_insns_data
                            # 最后在返回指令后填充方法序号
                            srcDexFileDataPatched[dexMethod.code_insns_off+len(dexMethod.code_return_insns_data):dexMethod.code_insns_off+len(
                                dexMethod.code_return_insns_data)+4] = list(dexMethod.code_number) # bytes直接转list
                if dexClass.class_virtual_methods_size > 0:
                    for dexMethod in dexClass.class_virtual_methods_list:
                        if isinstance(dexMethod, DexMethod):
                            # 先全部填充为nop
                            srcDexFileDataPatched[dexMethod.code_insns_off:dexMethod.code_insns_off +
                                                  dexMethod.code_insns_size*2] = [0] * (dexMethod.code_insns_size*2)
                            # 再在开头填充为返回指令
                            srcDexFileDataPatched[dexMethod.code_insns_off:dexMethod.code_insns_off+len(
                                dexMethod.code_return_insns_data)] = dexMethod.code_return_insns_data
                            # 最后在返回指令后填充方法序号
                            srcDexFileDataPatched[dexMethod.code_insns_off+len(dexMethod.code_return_insns_data):dexMethod.code_insns_off+len(
                                dexMethod.code_return_insns_data)+4] = list(dexMethod.code_number) # bytes直接转list
        # 修复signature,没有设置端序,直接添加
        newSignature = hashlib.sha1(
            bytes(srcDexFileDataPatched[32:])).hexdigest()
        newSignature = list(bytes.fromhex(newSignature))
        srcDexFileDataPatched[12:12+len(newSignature)] = newSignature
        # 修复checksum,小端序int,为了匹配dex文件结构
        newChecksum = zlib.adler32(bytes(srcDexFileDataPatched[12:]))
        newChecksum = list(struct.pack("<I", newChecksum))
        srcDexFileDataPatched[8:8+len(newChecksum)] = newChecksum
        return bytes(srcDexFileDataPatched)

    def extract_old(self) -> bytes:
        # 抽取代码
        # 结构:method_idx(4字节) + code_insns_off(4字节) + code_insns_size(4字节) + code_insns_data(code_insns_size*2字节)
        srcDexFileDataExtracted = b""
        for dexClass in self.srcDexClassList:
            if isinstance(dexClass, DexClass):
                if dexClass.class_direct_methods_size > 0:
                    for dexMethod in dexClass.class_direct_methods_list:
                        if isinstance(dexMethod, DexMethod):
                            srcDexFileDataExtracted += struct.pack(
                                "<I", dexMethod.method_idx)
                            srcDexFileDataExtracted += struct.pack(
                                "<I", dexMethod.code_insns_off)
                            srcDexFileDataExtracted += struct.pack(
                                "<I", dexMethod.code_insns_size)
                            srcDexFileDataExtracted += dexMethod.code_insns_data
                if dexClass.class_virtual_methods_size > 0:
                    for dexMethod in dexClass.class_virtual_methods_list:
                        if isinstance(dexMethod, DexMethod):
                            srcDexFileDataExtracted += struct.pack(
                                "<I", dexMethod.method_idx)
                            srcDexFileDataExtracted += struct.pack(
                                "<I", dexMethod.code_insns_off)
                            srcDexFileDataExtracted += struct.pack(
                                "<I", dexMethod.code_insns_size)
                            srcDexFileDataExtracted += dexMethod.code_insns_data
        return srcDexFileDataExtracted

    def patch_old(self) -> bytes:
        # 填充nop并修复checksum和signature
        srcDexFileDataPatched = list(self.srcDexFileData)
        for dexClass in self.srcDexClassList:
            if isinstance(dexClass, DexClass):
                if dexClass.class_direct_methods_size > 0:
                    for dexMethod in dexClass.class_direct_methods_list:
                        if isinstance(dexMethod, DexMethod):
                            # 先全部填充为nop
                            srcDexFileDataPatched[dexMethod.code_insns_off:dexMethod.code_insns_off +
                                                  dexMethod.code_insns_size*2] = [0] * (dexMethod.code_insns_size*2)
                            # 再在开头填充为返回指令
                            srcDexFileDataPatched[dexMethod.code_insns_off:dexMethod.code_insns_off+len(
                                dexMethod.code_return_insns_data)] = dexMethod.code_return_insns_data
                if dexClass.class_virtual_methods_size > 0:
                    for dexMethod in dexClass.class_virtual_methods_list:
                        if isinstance(dexMethod, DexMethod):
                            # 先全部填充为nop
                            srcDexFileDataPatched[dexMethod.code_insns_off:dexMethod.code_insns_off +
                                                  dexMethod.code_insns_size*2] = [0] * (dexMethod.code_insns_size*2)
                            # 再在开头填充为返回指令
                            srcDexFileDataPatched[dexMethod.code_insns_off:dexMethod.code_insns_off+len(
                                dexMethod.code_return_insns_data)] = dexMethod.code_return_insns_data
        # 修复signature,没有设置端序,直接添加
        newSignature = hashlib.sha1(
            bytes(srcDexFileDataPatched[32:])).hexdigest()
        newSignature = list(bytes.fromhex(newSignature))
        srcDexFileDataPatched[12:12+len(newSignature)] = newSignature
        # 修复checksum,小端序int,为了匹配dex文件结构
        newChecksum = zlib.adler32(bytes(srcDexFileDataPatched[12:]))
        newChecksum = list(struct.pack("<I", newChecksum))
        srcDexFileDataPatched[8:8+len(newChecksum)] = newChecksum
        return bytes(srcDexFileDataPatched)


# with open(DEXPATH, "rb") as f:
#     srcDexFileData = f.read()
# dexParser = DexParser(srcDexFileData)

# with open(DEXCODEPATH, "wb") as f:
#     f.write(dexParser.extract())

# with open(DEXPATCHPATH, "wb") as f:
#     f.write(dexParser.patch())

尾声

笔者用了一个全局方法序号去标识所抽取的方法代码,从而避免在so层对dex文件的重新解析(其实就是偷懒了)

这里猜测dpt-shell项目的作者前移hook点是为了适配多种安卓版本

笔者这里没有考虑版本兼容性的问题,只是在安卓13系统上的简单实践,欢迎师傅们讨论~

相关推荐
C4rpeDime1 小时前
自建MD5解密平台-续
android
鲤籽鲲3 小时前
C# Random 随机数 全面解析
android·java·c#
蜜獾云4 小时前
linux firewalld 命令详解
linux·运维·服务器·网络·windows·网络安全·firewalld
m0_548514777 小时前
2024.12.10——攻防世界Web_php_include
android·前端·php
凤邪摩羯7 小时前
Android-性能优化-03-启动优化-启动耗时
android
凤邪摩羯7 小时前
Android-性能优化-02-内存优化-LeakCanary原理解析
android
喀什酱豆腐8 小时前
Handle
android
m0_748232929 小时前
Android Https和WebView
android·网络协议·https
m0_748251729 小时前
Android webview 打开本地H5项目(Cocos游戏以及Unity游戏)
android·游戏·unity