Android二代抽取壳简易实现和踩坑记录
参考资料
[1] dpt-shell
[2] Android函数抽取壳的实现
[3] dpt-shell抽取壳项目源码及其逆向分析
整体思路
1、在向壳程序dex末尾追加源程序所有dex时,抽取方法代码到资源文件
2、在壳程序Application的attachBaseContext方法解压出Apk所有so并通过System.load()加载壳so
3、壳so的初始函数_init实现对execve、mmap以及LoadMethod的hook
源程序
源程序基本同一代整体壳中的源程序,没有特别需要说明的地方
壳程序
ShellApplication,相较于一代整体壳中的代码,增加了两个方法,而且结构非常像,只是解压出的目录不同
java
private void extractsofiles(String apkabspath){
try {
ZipInputStream zipInputStream = new ZipInputStream(new BufferedInputStream(new FileInputStream(apkabspath)));
while (true) {
ZipEntry entry = zipInputStream.getNextEntry();
if (entry == null) {
zipInputStream.close();
break;
}
String entryname = entry.getName();
// 解压出arm64-v8a/lib*.so
if (entryname.startsWith("lib/") && entryname.endsWith(".so") && entryname.contains("arm64-v8a")) {
File libfile = new File(privatelibspath + File.separator + entryname.substring(entryname.lastIndexOf('/')));
if(libfile.createNewFile()) {
FileOutputStream fileOutputStream = new FileOutputStream(libfile);
byte[] bytes = new byte[1024];
while (true) {
int length = zipInputStream.read(bytes);
if (length == -1){
break;
}
fileOutputStream.write(bytes);
}
fileOutputStream.flush();
fileOutputStream.close();
}
}
zipInputStream.closeEntry();
}
zipInputStream.close();
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
private void extractcodefile(String apkabspath) {
try {
ZipInputStream zipInputStream = new ZipInputStream(new BufferedInputStream(new FileInputStream(apkabspath)));
while (true) {
ZipEntry entry = zipInputStream.getNextEntry();
if (entry == null) {
zipInputStream.close();
break;
}
String entryname = entry.getName();
// 解压出assets/extractedcodefile
if (entryname.startsWith("assets") && entryname.contains(extractedcodefile)) {
File libfile = new File(privateodexpath + File.separator + entryname.substring(entryname.lastIndexOf('/')));
if(libfile.createNewFile()) {
FileOutputStream fileOutputStream = new FileOutputStream(libfile);
byte[] bytes = new byte[1024];
while (true) {
int length = zipInputStream.read(bytes);
if (length == -1){
break;
}
fileOutputStream.write(bytes);
}
fileOutputStream.flush();
fileOutputStream.close();
}
}
zipInputStream.closeEntry();
}
zipInputStream.close();
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
壳so
hook_DefineClass是一开始参考的[1]的比较新的代码,是把hook点前移了
后来发现之前[2]中hook LoadMethod的思路更加简单,于是有了hook_LoadMethod
bhook的使用参考bhook,前两步必做
Dobby的使用参考CMakeLists.txt,静态导入(原Dobby项目编译不出来,说是少个头文件)
cpp
#include <jni.h>
#include <string>
#include "android/log.h"
#include "sys/mman.h"
#include "bytehook.h"
#include <unistd.h>
#include "Dobby/include/dobby.h"
#include <elf.h>
#include <dlfcn.h>
#include "DexFile.h"
#include <map>
#include <fstream>
#include <stdlib.h>
// sdk版本,用于兼容适配
int apiLevel;
// 函数声明
void hook();
void hook_execve();
void hook_mmap();
void hook_DefineClass();
void hook_LoadMethod();
// 抽取代码文件,与源.dex在同一私有目录
std::string extractedcodefile = "srcapkcode";
// 抽取代码文件解析标志,解析一次
bool extractedcodefileflag = false;
// 抽取代码对象映射:方法序号->抽取代码对象
std::map<uint32_t , CodeItem*> codemap;
// 函数声明
static void* (*g_originDefineClassV22)(void* thiz,
void* self,
const char* descriptor,
size_t hash,
void* class_loader,
const void* dex_file,
const void* dex_class_def);
// 函数声明
static void* (*g_originDefineClassV21)(void* thiz,
const char* descriptor,
void* class_loader,
const void* dex_file,
const void* dex_class_def);
// 函数声明
static void (*g_originLoadMethod)(void* thiz,
const DexFile* dex_file,
ClassAccessor::Method* method,
void* klass,
void* dst);
// 初始函数,实现hook
extern "C"
void _init(){
apiLevel = android_get_device_api_level();
hook();
}
// hook
void hook(){
bytehook_init(BYTEHOOK_MODE_AUTOMATIC, false);
hook_execve();
hook_mmap();
// hook_DefineClass();
hook_LoadMethod();
}
const char * getArtLibName() {
if (apiLevel >= 29) {
return "libartbase.so";
}
return "libart.so";
}
int fake_execve(const char *pathname, char *const argv[], char *const envp[]) {
BYTEHOOK_STACK_SCOPE();
// 禁用dex2oat
if (strstr(pathname, "dex2oat") != nullptr) {
errno = EACCES;
return -1;
}
return BYTEHOOK_CALL_PREV(fake_execve, pathname, argv, envp);
}
void hook_execve(){
bytehook_stub_t stub = bytehook_hook_single(
getArtLibName(),
"libc.so",
"execve",
(void *) fake_execve,
nullptr,
nullptr);
if (stub != nullptr) {
__android_log_print(6,"p1umh0","hook execve done");
}
}
void* fake_mmap(void * __addr, size_t __size, int __prot, int __flags, int __fd, off_t __offset){
BYTEHOOK_STACK_SCOPE();
int prot = __prot;
int hasRead = (__prot & PROT_READ) == PROT_READ;
int hasWrite = (__prot & PROT_WRITE) == PROT_WRITE;
// 添加写权限
if(hasRead && !hasWrite) {
prot = prot | PROT_WRITE;
}
void * addr = BYTEHOOK_CALL_PREV(fake_mmap,__addr, __size, prot, __flags, __fd, __offset);
return addr;
}
void hook_mmap(){
bytehook_stub_t stub = bytehook_hook_single(
getArtLibName(),
"libc.so",
"mmap",
(void *) fake_mmap,
nullptr,
nullptr);
if(stub != nullptr){
__android_log_print(6,"p1umh0","hook mmap done");
}
}
const char * getArtLibPath() {
if(apiLevel < 29) {
return "/system/lib64/libart.so";
} else if(apiLevel == 29) {
return "/apex/com.android.runtime/lib64/libart.so";
} else {
return "/apex/com.android.art/lib64/libart.so";
}
}
const char * getArtBaseLibPath() {
if(apiLevel == 29) {
return "/apex/com.android.runtime/lib64/libartbase.so";
} else {
return "/apex/com.android.art/lib64/libartbase.so";
}
}
const char* find_symbol_in_elf_file(const char *elf_file,int keyword_count,...) {
FILE *elf_fp = fopen(elf_file, "r");
if (elf_fp) {
// 获取elf文件大小
fseek(elf_fp, 0L, SEEK_END);
size_t lib_size = ftell(elf_fp);
fseek(elf_fp, 0L, SEEK_SET);
// 读取elf文件数据
char *data = (char *) calloc(lib_size, 1);
fread(data, 1, lib_size, elf_fp);
char *elf_bytes_data = data;
// elf头
Elf64_Ehdr *ehdr = (Elf64_Ehdr *) elf_bytes_data;
// 节头
Elf64_Shdr *shdr = (Elf64_Shdr *) (((uint8_t *) elf_bytes_data) + ehdr->e_shoff);
va_list kw_list;
// 遍历节
for (int i = 0; i < ehdr->e_shnum; i++) {
// 字符串表
if (shdr->sh_type == SHT_STRTAB) {
const char *str_base = (char *) ((uint8_t *) elf_bytes_data + shdr->sh_offset);
char *ptr = (char *) str_base;
// 遍历字符串表
for (int k = 0; ptr < (str_base + shdr->sh_size); k++) {
const char *item_value = ptr;
size_t item_len = strnlen(item_value, 128);
ptr += (item_len + 1);
if (item_len == 0) {
continue;
}
int match_count = 0;
va_start(kw_list, keyword_count);
for (int n = 0; n < keyword_count; n++) {
const char *keyword = va_arg(kw_list, const char*);
if (strstr(item_value, keyword)) {
match_count++;
}
}
va_end(kw_list);
if (match_count == keyword_count) {
return item_value;
}
}
break;
}
shdr++;
}
fclose(elf_fp);
free(data);
}
return nullptr;
}
const char * getClassLinkerDefineClassLibPath(){
return getArtLibPath();
}
const char * getClassLinkerDefineClassSymbol() {
const char * sym = find_symbol_in_elf_file(getClassLinkerDefineClassLibPath(),2,"ClassLinker","DefineClass");
return sym;
}
void *DefineClassV22(void* thiz,
void* self,
const char* descriptor,
size_t hash,
void* class_loader,
const void* dex_file,
const void* dex_class_def) {
if(g_originDefineClassV22 != nullptr) {
// patchClass(descriptor,dex_file,dex_class_def);
return g_originDefineClassV22( thiz,self,descriptor,hash,class_loader, dex_file, dex_class_def);
}
return nullptr;
}
void *DefineClassV21(void* thiz,
const char* descriptor,
void* class_loader,
const void* dex_file,
const void* dex_class_def) {
if(g_originDefineClassV21 != nullptr) {
// patchClass(descriptor,dex_file,dex_class_def);
return g_originDefineClassV21( thiz,descriptor,class_loader, dex_file, dex_class_def);
}
return nullptr;
}
void hook_DefineClass(){
void * defineClassAddress = DobbySymbolResolver(getClassLinkerDefineClassLibPath(),getClassLinkerDefineClassSymbol());
if(apiLevel >= __ANDROID_API_L_MR1__) {
DobbyHook(defineClassAddress, (void *) DefineClassV22, (void **) &g_originDefineClassV22);
__android_log_print(6,"p1umh0","hook DefineClassV22 done");
} else {
DobbyHook(defineClassAddress, (void *) DefineClassV21, (void **) &g_originDefineClassV21);
__android_log_print(6,"p1umh0","hook DefineClassV21 done");
}
}
const char * getClassLinkerLoadMethodLibPath(){
return getArtLibPath();
}
const char * getClassLinkerLoadMethodSymbol() {
const char * sym = find_symbol_in_elf_file(getClassLinkerLoadMethodLibPath(),2,"ClassLinker","LoadMethod");
return sym;
}
uint32_t bytearr2uint32(char * bytearr){
uint32_t retnum = 0;
for(int i = 3;i >=0;i--){
retnum <<= 8;
retnum |= bytearr[i];
}
return retnum;
}
// 解析抽取代码文件
void parseextractedcodefile(std::string dexpath){
size_t dirlen = dexpath.find_last_of("/");
std::string codepath = dexpath.substr(0,dirlen+1) + extractedcodefile;
FILE * codefile = fopen(codepath.c_str(),"r");
fseek(codefile,0,SEEK_END);
uint32_t filelen = ftell(codefile);
__android_log_print(6, "p1umh0", "filelen => 0x%x", filelen);
fseek(codefile,0,SEEK_SET);
size_t off = 0;
while(off<filelen) {
fseek(codefile,0,off);
// 4字节方法序号
char codenumberstr[4];
fread(codenumberstr,1,4,codefile);
uint32_t codenumber = bytearr2uint32(codenumberstr);
if(codenumber==0){
break;
}
__android_log_print(6, "p1umh0", "codenumber => 0x%x", codenumber);
off += 4;
fseek(codefile,0,off);
// 4字节方法长度
char codelengthstr[4];
fread(codelengthstr,1,4,codefile);
uint32_t codelength = bytearr2uint32(codelengthstr);
if(codelength==0){
break;
}
__android_log_print(6, "p1umh0", "codelength => 0x%x", codelength);
off += 4;
fseek(codefile,0,off);
// codelength字节方法代码
char code[codelength];
fread(code,1,codelength,codefile);
// 创建抽取代码对象
CodeItem * codeItem = new CodeItem(codelength,code);
// 映射
codemap.insert(std::pair<int, CodeItem*>(codenumber, codeItem));
// 继续向后
off += codelength;
}
fclose(codefile);
}
void innerLoadMethod(void* thiz, const DexFile* dex_file, ClassAccessor::Method* method, void* klass, void* dest){
// 保证是源.dex解压目录
std::string location = dex_file->location_;
if(location.find("app_myodex") == std::string::npos){
return;
}
if(!extractedcodefileflag){
parseextractedcodefile(location);
extractedcodefileflag = true;
}
// 虚函数
if(method->code_off_==0){
return;
}
// 代码/指令地址
uint8_t* codeAddr = dex_file->begin_ + method->code_off_ + 16;
// 前2字节
uint16_t firstDvmCode = *((uint16_t*)(codeAddr));
// 4字节序号
uint32_t codeNumber = 0;
// 开头必须是return指令
if(firstDvmCode == 0x000e){
codeNumber = *((uint32_t*)(codeAddr+2));
}else if(firstDvmCode==0x0012){
codeNumber = *((uint32_t*)(codeAddr+4));
}else if(firstDvmCode==0x0016){
codeNumber = *((uint32_t*)(codeAddr+6));
}else{
return;
}
if(codeNumber==0){
return;
}
__android_log_print(6,"p1umh0","hooked codeNumber => 0x%x", codeNumber);
std::map<uint32_t , CodeItem*>::iterator itr = codemap.find(codeNumber);
if(itr==codemap.end()){
return;
}
CodeItem * codeItem = itr->second;
__android_log_print(6,"p1umh0","hooked codelength => 0x%x", codeItem->getlength());
memcpy(codeAddr,codeItem->getcode(),codeItem->getlength());
__android_log_print(6,"p1umh0","hooked memcpy code success");
}
void LoadMethod(void* thiz, const DexFile* dex_file, ClassAccessor::Method* method, void* klass, void* dest){
if(g_originLoadMethod!= nullptr){
// 先恢复,再调用
innerLoadMethod(thiz,dex_file,method,klass,dest);
g_originLoadMethod(thiz,dex_file,method, klass, dest);
}
return;
}
void hook_LoadMethod(){
void * loadMethodAddress = DobbySymbolResolver(getClassLinkerLoadMethodLibPath(),getClassLinkerLoadMethodSymbol());
DobbyHook(loadMethodAddress, (void *) LoadMethod, (void **) &g_originLoadMethod);
__android_log_print(6,"p1umh0","hook LoadMethod done");
}
extern "C" JNIEXPORT
jstring JNICALL Java_com_p1umh0_shell3shell_MainActivity_stringFromJNI(JNIEnv *env, jobject) {
std::string shellapp3 = "Shell Application 3 in .so";
return env->NewStringUTF(shellapp3.c_str());
}
加壳代码
packer,基本同一代整体壳的加壳代码,增加了对源程序dex方法代码的抽取
python
import hashlib
import os
import pathlib
import struct
import zlib
from zipfile import ZipFile
from shell3dexparser import DexParser
# 路径
thisDir = os.path.dirname(__file__)
srcApkPath = os.path.join(thisDir, "shell3src.apk")
shellApkPath = os.path.join(thisDir, "shell3shell.apk")
newShellApkPath = os.path.join(thisDir, "shell3newshell.apk")
# 文件
srcApk = ZipFile(srcApkPath, "r")
shellApk = ZipFile(shellApkPath, "r")
newShellApk = ZipFile(newShellApkPath, "w")
# 从源Apk中提取res文件夹、resources.arsc文件、lib文件夹和所有.dex文件
srcApkUnzipTempDir = os.path.join(thisDir, "srcApkUnzipTempDir")
for srcFullName in srcApk.namelist():
if srcFullName.startswith("res") or srcFullName.startswith("lib") or srcFullName.endswith(".dex"):
srcApk.extract(srcFullName, srcApkUnzipTempDir)
# 从壳Apk中提取AndroidManifest.xml文件、classes.dex文件和lib文件夹
shellApkUnzipTempDir = os.path.join(thisDir, "shellApkUnzipTempDir")
for shellFullName in shellApk.namelist():
if shellFullName == "AndroidManifest.xml" or shellFullName == "classes.dex" or shellFullName.startswith("lib"):
shellApk.extract(shellFullName, shellApkUnzipTempDir)
# 为新壳Apk插入条目:源Apk的res文件夹、源Apk的resources.arsc文件、源Apk的lib文件夹
srcApkUnzipTempDirIns = pathlib.Path(srcApkUnzipTempDir)
for srcResFilePath in srcApkUnzipTempDirIns.rglob(r"*"):
if os.path.isfile(srcResFilePath) and not srcResFilePath.name.endswith(".dex"):
newShellApk.write(
srcResFilePath, srcResFilePath.relative_to(srcApkUnzipTempDirIns))
# 为新壳Apk插入条目:壳Apk的AndroidManifest.xml文件、壳Apk的lib文件夹
shellApkUnzipTempDirIns = pathlib.Path(shellApkUnzipTempDir)
for shellResFilePath in shellApkUnzipTempDirIns.rglob(r"*"):
if os.path.isfile(shellResFilePath) and not shellResFilePath.name == "classes.dex":
newShellApk.write(shellResFilePath, shellResFilePath.relative_to(
shellApkUnzipTempDirIns))
# 拼接壳Apk的classes.dex文件以及源Apk的所有.dex文件
# 拼接结构:
# 壳dex数据
# 源Apk的.dex文件数量(2字节)
# 源dex1名称长度(2字节) + 源dex1名称(不定大小) + 源dex1数据长度(4字节) + 源dex1数据(不定大小)
# 源dexN名称长度(2字节) + 源dexN名称(不定大小) + 源dexN数据长度(4字节) + 源dexN数据(不定大小)
# 除壳dex数据外的数据长度(4字节)
# 新壳Apk的classes.dex文件数据
newShellDexData = b""
# 拼接壳Apk的classes.dex文件数据
with open(os.path.join(shellApkUnzipTempDir, "classes.dex"), "rb") as f:
newShellDexData += f.read()
# 壳dex数据长度
shellDexDataLen = len(newShellDexData)
# 源Apk的.dex文件数量(2字节占坑)
newShellDexData += b"??"
srcDexFileNum = 0
# 源Apk的.dex文件被抽取的数据
srcDexFileDataExtractedAll = b""
# 拼接源Apk的所有.dex文件数据
for srcDexFilePath in srcApkUnzipTempDirIns.rglob(r"*"):
if os.path.isfile(srcDexFilePath) and srcDexFilePath.name.endswith(".dex"):
srcDexFileNum += 1
srcDexFileRelaPath = srcDexFilePath.relative_to(
srcApkUnzipTempDirIns).name.encode()
# 大端序short,为了适应java中DataInputStream.readShort()
newShellDexData += struct.pack(">H", len(srcDexFileRelaPath))
newShellDexData += srcDexFileRelaPath
with open(srcDexFilePath, "rb") as f:
srcDexFileData = f.read()
# 通过dexparser抽取指令,获取:
# 1、被抽取的数据(srcDexFileDataExtracted,结构:)
# 2、抽取后的.dex数据(srcDexFileDataPatched,修复checksum和signature)
dexParser = DexParser(srcDexFileData)
srcDexFileDataExtracted = dexParser.extract()
srcDexFileDataExtractedAll += srcDexFileDataExtracted
srcDexFileDataPatched = dexParser.patch()
# 大端序int,为了适应java中DataInputStream.readInt()
newShellDexData += struct.pack(">I", len(srcDexFileDataPatched))
newShellDexData += srcDexFileDataPatched
# 除壳dex数据外的数据长度,大端序int,为了适应java中DataInputStream.readInt()
newShellDexData += struct.pack(">I", len(newShellDexData)-shellDexDataLen)
# bytes转为list,用来item assignment
newShellDexData = list(newShellDexData)
# 设置源Apk的.dex文件数量,大端序short,为了适应java中DataInputStream.readShort()
newShellDexData[shellDexDataLen:shellDexDataLen +
2] = list(struct.pack(">H", srcDexFileNum))
# 修新壳Apk的classes.dex文件的file_size,小端序int,为了匹配dex文件结构
newFileSize = list(struct.pack("<I", len(newShellDexData)))
newShellDexData[32:32+len(newFileSize)] = newFileSize
# 修新壳Apk的classes.dex文件的signature,没有设置端序,直接添加
newSignature = hashlib.sha1(bytes(newShellDexData[32:])).hexdigest()
newSignature = list(bytes.fromhex(newSignature))
newShellDexData[12:12+len(newSignature)] = newSignature
# 修新壳Apk的classes.dex文件的checksum,小端序int,为了匹配dex文件结构
newChecksum = zlib.adler32(bytes(newShellDexData[12:]))
newChecksum = list(struct.pack("<I", newChecksum))
newShellDexData[8:8+len(newChecksum)] = newChecksum
# 为新壳Apk插入条目:新壳Apk的classes.dex文件
with open(os.path.join(thisDir, "classes.dex"), "wb") as f:
f.write(bytes(newShellDexData))
newShellApk.write(os.path.join(thisDir, "classes.dex"), "classes.dex")
# 为新壳Apk插入条目:新壳Apk的assets/srcapkcode文件,包含源Apk的.dex文件被抽取的数据
with open(os.path.join(thisDir, "srcapkcode"), "wb") as f:
f.write(srcDexFileDataExtractedAll)
newShellApk.write(os.path.join(thisDir, "srcapkcode"), "assets/srcapkcode")
srcApk.close()
shellApk.close()
newShellApk.close()
dexparser
主要解析了字符串表、类型表、原型表、方法表和类定义表
在抽取方法代码时没有保留方法在方法表中的索引method_idx,而是保留了一个全局方法序号
因为encoded_method结构中实际上是method_idx_diff,它是一个方法表索引差值
如果想要得到某个方法真实的method_idx,就需要解析所属类的其他方法的encoded_method结构,非常麻烦
笔者这里用一个全局方法序号去标识所抽取的方法代码,在回填代码时只需找这个全局方法序号即可
全局方法序号保存在返回指令后,可以通过Verifier对Method的验证,只是要求原方法指令数量足够多
python
import hashlib
import os
import struct
import zlib
BASEPATH = os.path.join(os.path.dirname(__file__), "srcApkUnzipTempDir")
DEXPATH = os.path.join(BASEPATH, "classes.dex")
DEXCODEPATH = os.path.join(BASEPATH, "classes_code")
DEXPATCHPATH = os.path.join(BASEPATH, "classes_patched.dex")
CLASSNAMELIST = [b"Lcom/p1umh0/shell3src/MainActivity;"]
class DexClass():
def __init__(self, class_name, class_direct_methods_size, class_virtual_methods_size) -> None:
# 类名称
self.class_name = class_name
# 直接方法数量
self.class_direct_methods_size = class_direct_methods_size
# 直接方法列表
self.class_direct_methods_list = None
# 虚方法数量
self.class_virtual_methods_size = class_virtual_methods_size
# 虚方法列表
self.class_virtual_methods_list = None
class DexMethod():
def __init__(self, method_idx, code_insns_off, code_insns_size) -> None:
# 方法表索引
self.method_idx = method_idx
# 方法代码全局偏移
self.code_insns_off = code_insns_off
# 方法代码长度
self.code_insns_size = code_insns_size
# 方法代码
self.code_insns_data = None
# 方法返回代码
self.code_return_insns_data = None
# 方法序号,跟在方法返回代码后面,4字节
self.code_number = None
class DexParser():
def __init__(self, srcDexFileData: bytes) -> None:
# 全局方法序号
self.code_number_global = 1
# dex文件数据
self.srcDexFileData = srcDexFileData
# 解析字符串表
self.string_ids_size = -1
self.string_ids_off = -1
self.srcDexStringList = None
self.parseStringList()
# 解析类型表
self.type_ids_size = -1
self.type_ids_off = -1
self.srcDexTypeList = None
self.parseTypeList()
# 解析原型表
self.proto_ids_size = -1
self.proto_ids_off = -1
self.srcDexProtoList = None
self.parseProtoList()
# 解析方法表
self.method_ids_size = -1
self.method_ids_off = -1
self.srcDexMethodList = None
self.parseMethodList()
# 解析类定义表
self.class_defs_size = -1
self.class_defs_off = -1
self.srcDexClassList = None
self.parseClassList()
def parseStringList(self) -> None:
# 字符串数量
self.string_ids_size = struct.unpack(
"<I", self.srcDexFileData[0x38:0x38+4])[0]
# 字符串表偏移
self.string_ids_off = struct.unpack(
"<I", self.srcDexFileData[0x3C:0x3C+4])[0]
# 创建字符串表
self.srcDexStringList = [b""] * self.string_ids_size
# 填充字符串表
for i in range(self.string_ids_size):
# 字符串数据偏移
string_data_off = struct.unpack(
"<I", self.srcDexFileData[self.string_ids_off+i*4:self.string_ids_off+(i+1)*4])[0]
# 开头1~5字节表示字符串长度,最少1字节,最多5字节
uleb128_max = list(
self.srcDexFileData[string_data_off:string_data_off+5])
# uleb128所用字节数以及所表示的值
uleb128_used, uleb128_val = self.parseUleb128(uleb128_max)
# 字符串真实数据偏移
string_data_real_off = string_data_off + uleb128_used
# 字符串长度,+1还包含了\x00
string_data_real_len = uleb128_val + 1
# 字符串真实数据
string_data_real_data = self.srcDexFileData[string_data_real_off:
string_data_real_off+string_data_real_len]
# 如果按照字符串长度取出的字符串真实数据以\x00结尾,说明该字符串正常,需要保留(去除末尾的\x00)
if list(string_data_real_data)[-1] == 0:
self.srcDexStringList[i] = string_data_real_data[:-1]
def parseTypeList(self) -> None:
# 类型数量
self.type_ids_size = struct.unpack(
"<I", self.srcDexFileData[0x40:0x40+4])[0]
# 类型表偏移
self.type_ids_off = struct.unpack(
"<I", self.srcDexFileData[0x44:0x44+4])[0]
# 创建类型表
self.srcDexTypeList = [-1] * self.type_ids_size
# 填充类型表
for i in range(self.type_ids_size):
# 描述符索引/字符串表索引
descriptor_idx = struct.unpack(
"<I", self.srcDexFileData[self.type_ids_off+i*4:self.type_ids_off+(i+1)*4])[0]
self.srcDexTypeList[i] = descriptor_idx
def parseProtoList(self) -> None:
# 原型数量
self.proto_ids_size = struct.unpack(
"<I", self.srcDexFileData[0x48:0x48+4])[0]
# 原型表偏移
self.proto_ids_off = struct.unpack(
"<I", self.srcDexFileData[0x4C:0x4C+4])[0]
# 创建原型表
self.srcDexProtoList = [None] * self.proto_ids_size
# 填充原型表
for i in range(self.proto_ids_size):
proto_id_data = self.srcDexFileData[self.proto_ids_off +
i*0xC:self.proto_ids_off+(i+1)*0xC]
shorty_idx = struct.unpack("<I", proto_id_data[0:4])[0]
return_type_idx = struct.unpack("<I", proto_id_data[4:8])[0]
parameters_off = struct.unpack("<I", proto_id_data[8:12])[0]
self.srcDexProtoList[i] = (
shorty_idx, return_type_idx, parameters_off)
def parseMethodList(self) -> None:
# 方法数量
self.method_ids_size = struct.unpack(
"<I", self.srcDexFileData[0x58:0x58+4])[0]
# 方法表偏移
self.method_ids_off = struct.unpack(
"<I", self.srcDexFileData[0x5C:0x5C+4])[0]
# 创建方法表
self.srcDexMethodList = [None] * self.method_ids_size
# 填充方法表
for i in range(self.method_ids_size):
method_id_data = self.srcDexFileData[self.method_ids_off +
i*8:self.method_ids_off+(i+1)*8]
class_idx = struct.unpack("<H", method_id_data[0:2])[0]
proto_idx = struct.unpack("<H", method_id_data[2:4])[0]
name_idx = struct.unpack("<I", method_id_data[4:8])[0]
self.srcDexMethodList[i] = (class_idx, proto_idx, name_idx)
def parseClassList(self) -> None:
# 类定义数量
self.class_defs_size = struct.unpack(
"<I", self.srcDexFileData[0x60:0x60+4])[0]
# 类定义表偏移
self.class_defs_off = struct.unpack(
"<I", self.srcDexFileData[0x64:0x64+4])[0]
# 创建类定义表
self.srcDexClassList = []
# 填充类定义表
for i in range(self.class_defs_size):
# 类定义的结构体数据
class_def_data = self.srcDexFileData[self.class_defs_off +
i*0x20:self.class_defs_off+(i+1)*0x20]
# 类名称索引/类型表索引
class_idx = struct.unpack("<I", class_def_data[:4])[0]
# 类名称
class_name = self.srcDexStringList[self.srcDexTypeList[class_idx]]
# 保留目标类
if class_name in CLASSNAMELIST:
# 类数据偏移
class_data_off = struct.unpack(
"<I", class_def_data[0x18:0x18+4])[0]
# 几个size都是uleb128类型,依次处理
uleb128_used_all = 0
# static_fields_size
uleb128_max = list(
self.srcDexFileData[class_data_off+uleb128_used_all:class_data_off+uleb128_used_all+5])
uleb128_used, static_fields_size = self.parseUleb128(
uleb128_max)
uleb128_used_all += uleb128_used
# instance_fields_size
uleb128_max = list(
self.srcDexFileData[class_data_off+uleb128_used_all:class_data_off+uleb128_used_all+5])
uleb128_used, instance_fields_size = self.parseUleb128(
uleb128_max)
uleb128_used_all += uleb128_used
# direct_methods_size
uleb128_max = list(
self.srcDexFileData[class_data_off+uleb128_used_all:class_data_off+uleb128_used_all+5])
uleb128_used, direct_methods_size = self.parseUleb128(
uleb128_max)
uleb128_used_all += uleb128_used
# virtual_methods_size
uleb128_max = list(
self.srcDexFileData[class_data_off+uleb128_used_all:class_data_off+uleb128_used_all+5])
uleb128_used, virtual_methods_size = self.parseUleb128(
uleb128_max)
uleb128_used_all += uleb128_used
# 创建类
dexClass = DexClass(
class_name, direct_methods_size, virtual_methods_size)
# 抽取直接方法
class_direct_methods_list = []
method_idx_prev = -1
for i in range(direct_methods_size):
# 3个uleb128,依次处理
# method_idx_diff
uleb128_max = list(
self.srcDexFileData[class_data_off+uleb128_used_all:class_data_off+uleb128_used_all+5])
uleb128_used, method_idx_diff = self.parseUleb128(
uleb128_max)
uleb128_used_all += uleb128_used
# access_flags
uleb128_max = list(
self.srcDexFileData[class_data_off+uleb128_used_all:class_data_off+uleb128_used_all+5])
uleb128_used, access_flags = self.parseUleb128(
uleb128_max)
uleb128_used_all += uleb128_used
# code_off
uleb128_max = list(
self.srcDexFileData[class_data_off+uleb128_used_all:class_data_off+uleb128_used_all+5])
uleb128_used, code_off = self.parseUleb128(
uleb128_max)
uleb128_used_all += uleb128_used
# 处理方法表索引method_idx
if method_idx_prev == -1:
method_idx_prev = method_idx_diff
method_idx = method_idx_diff
else:
method_idx = method_idx_prev + method_idx_diff
method_idx_prev = method_idx_prev + method_idx_diff
# 创建方法,只抽取ACC_PUBLIC(0x1)、ACC_PRIVATE(0x2)、ACC_PROTECTED(0x4)
if code_off != 0 and access_flags in [0x1, 0x2, 0x4]:
# 方法返回类型
code_return_type = self.srcDexStringList[self.srcDexTypeList[
self.srcDexProtoList[self.srcDexMethodList[method_idx][1]][1]]]
# 根据方法返回类型获取方法返回指令字节码
code_return_bytes = self.getCodeReturnBytes(
code_return_type)
# 方法代码长度
code_insns_size = struct.unpack(
"<I", self.srcDexFileData[code_off+12:code_off+12+4])[0]
# +4是为了存储方法序号
if len(code_return_bytes) + 4 <= code_insns_size * 2:
# 方法代码全局偏移
code_insns_off = code_off + 12 + 4
# 方法实例
dexMethod = DexMethod(
method_idx, code_insns_off, code_insns_size)
# 方法代码
code_insns_data = self.srcDexFileData[code_insns_off:code_insns_off+code_insns_size*2]
dexMethod.code_insns_data = code_insns_data
# 方法返回代码
dexMethod.code_return_insns_data = list(
code_return_bytes)
# 方法序号,直接保存为4字节
dexMethod.code_number = struct.pack(
"<I", self.code_number_global)
self.code_number_global += 1
class_direct_methods_list.append(dexMethod)
dexClass.class_direct_methods_list = class_direct_methods_list
# 抽取虚方法
class_virtual_methods_list = []
method_idx_prev = -1
for i in range(virtual_methods_size):
# 3个uleb128,依次处理
# method_idx_diff
uleb128_max = list(
self.srcDexFileData[class_data_off+uleb128_used_all:class_data_off+uleb128_used_all+5])
uleb128_used, method_idx_diff = self.parseUleb128(
uleb128_max)
uleb128_used_all += uleb128_used
# access_flags
uleb128_max = list(
self.srcDexFileData[class_data_off+uleb128_used_all:class_data_off+uleb128_used_all+5])
uleb128_used, access_flags = self.parseUleb128(
uleb128_max)
uleb128_used_all += uleb128_used
# code_off
uleb128_max = list(
self.srcDexFileData[class_data_off+uleb128_used_all:class_data_off+uleb128_used_all+5])
uleb128_used, code_off = self.parseUleb128(
uleb128_max)
uleb128_used_all += uleb128_used
# 处理方法表索引method_idx
if method_idx_prev == -1:
method_idx_prev = method_idx_diff
method_idx = method_idx_diff
else:
method_idx = method_idx_prev + method_idx_diff
method_idx_prev = method_idx_prev + method_idx_diff
# 创建方法,只抽取ACC_PUBLIC(0x1)、ACC_PRIVATE(0x2)、ACC_PROTECTED(0x4)
if code_off != 0 and access_flags in [0x1, 0x2, 0x4]:
# 方法返回类型
code_return_type = self.srcDexStringList[self.srcDexTypeList[
self.srcDexProtoList[self.srcDexMethodList[method_idx][1]][1]]]
# 根据方法返回类型获取方法返回指令字节码
code_return_bytes = self.getCodeReturnBytes(
code_return_type)
# 方法代码长度
code_insns_size = struct.unpack(
"<I", self.srcDexFileData[code_off+12:code_off+12+4])[0]
# +4是为了存储方法序号
if len(code_return_bytes) + 4 <= code_insns_size * 2:
# 方法代码全局偏移
code_insns_off = code_off + 12 + 4
# 方法实例
dexMethod = DexMethod(
method_idx, code_insns_off, code_insns_size)
# 方法代码
code_insns_data = self.srcDexFileData[code_insns_off:code_insns_off+code_insns_size*2]
dexMethod.code_insns_data = code_insns_data
# 方法返回代码
dexMethod.code_return_insns_data = list(
code_return_bytes)
# 方法序号,直接保存为4字节
dexMethod.code_number = struct.pack(
"<I", self.code_number_global)
self.code_number_global += 1
class_virtual_methods_list.append(dexMethod)
dexClass.class_virtual_methods_list = class_virtual_methods_list
# 添加类
self.srcDexClassList.append(dexClass)
def parseUleb128(self, uleb128_max: list):
# uleb128所用字节数
uleb128_used = 0
# uleb128所表示的值
uleb128_val = 0
values = []
while True:
# 最少1字节
value = uleb128_max[uleb128_used]
values.append(value)
uleb128_used += 1
# 最多5字节
if value < 0x7F or uleb128_used == 4:
break
# 小端序拼接
values = values[::-1]
for i in range(len(values)):
ii = len(values) - i - 1
uleb128_val |= ((values[i] & 0x7f) << (ii*7))
return uleb128_used, uleb128_val
def getCodeReturnBytes(self, code_return_type: bytes) -> bytes:
# 根据方法返回类型获取方法返回指令字节码
returnVoidBytes = bytes([0x0e, 0x0])
returnBytes = bytes([0x12, 0x00, 0x0f, 0x00])
returnWideBytes = bytes([0x16, 0x00, 0x00, 0x00, 0x10, 0x00])
returnObjectBytes = bytes([0x12, 0x00, 0x11, 0x00])
if code_return_type in [b"V"]:
return returnVoidBytes
elif code_return_type in [b"B", b"C", b"F", b"I", b"S", b"Z"]:
return returnBytes
elif code_return_type in [b"D", b"J"]:
return returnWideBytes
else:
return returnObjectBytes
def extract(self) -> bytes:
# 抽取代码
# 结构:code_number(4字节) + code_insns_size(4字节) + code_insns_data(code_insns_size*2字节)
srcDexFileDataExtracted = b""
for dexClass in self.srcDexClassList:
if isinstance(dexClass, DexClass):
if dexClass.class_direct_methods_size > 0:
for dexMethod in dexClass.class_direct_methods_list:
if isinstance(dexMethod, DexMethod):
srcDexFileDataExtracted += dexMethod.code_number
srcDexFileDataExtracted += struct.pack(
"<I", dexMethod.code_insns_size*2) # 指令长度直接乘2,读文件时不用再乘
srcDexFileDataExtracted += dexMethod.code_insns_data
if dexClass.class_virtual_methods_size > 0:
for dexMethod in dexClass.class_virtual_methods_list:
if isinstance(dexMethod, DexMethod):
srcDexFileDataExtracted += dexMethod.code_number
srcDexFileDataExtracted += struct.pack(
"<I", dexMethod.code_insns_size*2) # # 指令长度直接乘2,读文件时不用再乘
srcDexFileDataExtracted += dexMethod.code_insns_data
return srcDexFileDataExtracted
def patch(self) -> bytes:
# 填充nop并修复checksum和signature
srcDexFileDataPatched = list(self.srcDexFileData)
for dexClass in self.srcDexClassList:
if isinstance(dexClass, DexClass):
if dexClass.class_direct_methods_size > 0:
for dexMethod in dexClass.class_direct_methods_list:
if isinstance(dexMethod, DexMethod):
# 先全部填充为nop
srcDexFileDataPatched[dexMethod.code_insns_off:dexMethod.code_insns_off +
dexMethod.code_insns_size*2] = [0] * (dexMethod.code_insns_size*2)
# 再在开头填充为返回指令
srcDexFileDataPatched[dexMethod.code_insns_off:dexMethod.code_insns_off+len(
dexMethod.code_return_insns_data)] = dexMethod.code_return_insns_data
# 最后在返回指令后填充方法序号
srcDexFileDataPatched[dexMethod.code_insns_off+len(dexMethod.code_return_insns_data):dexMethod.code_insns_off+len(
dexMethod.code_return_insns_data)+4] = list(dexMethod.code_number) # bytes直接转list
if dexClass.class_virtual_methods_size > 0:
for dexMethod in dexClass.class_virtual_methods_list:
if isinstance(dexMethod, DexMethod):
# 先全部填充为nop
srcDexFileDataPatched[dexMethod.code_insns_off:dexMethod.code_insns_off +
dexMethod.code_insns_size*2] = [0] * (dexMethod.code_insns_size*2)
# 再在开头填充为返回指令
srcDexFileDataPatched[dexMethod.code_insns_off:dexMethod.code_insns_off+len(
dexMethod.code_return_insns_data)] = dexMethod.code_return_insns_data
# 最后在返回指令后填充方法序号
srcDexFileDataPatched[dexMethod.code_insns_off+len(dexMethod.code_return_insns_data):dexMethod.code_insns_off+len(
dexMethod.code_return_insns_data)+4] = list(dexMethod.code_number) # bytes直接转list
# 修复signature,没有设置端序,直接添加
newSignature = hashlib.sha1(
bytes(srcDexFileDataPatched[32:])).hexdigest()
newSignature = list(bytes.fromhex(newSignature))
srcDexFileDataPatched[12:12+len(newSignature)] = newSignature
# 修复checksum,小端序int,为了匹配dex文件结构
newChecksum = zlib.adler32(bytes(srcDexFileDataPatched[12:]))
newChecksum = list(struct.pack("<I", newChecksum))
srcDexFileDataPatched[8:8+len(newChecksum)] = newChecksum
return bytes(srcDexFileDataPatched)
def extract_old(self) -> bytes:
# 抽取代码
# 结构:method_idx(4字节) + code_insns_off(4字节) + code_insns_size(4字节) + code_insns_data(code_insns_size*2字节)
srcDexFileDataExtracted = b""
for dexClass in self.srcDexClassList:
if isinstance(dexClass, DexClass):
if dexClass.class_direct_methods_size > 0:
for dexMethod in dexClass.class_direct_methods_list:
if isinstance(dexMethod, DexMethod):
srcDexFileDataExtracted += struct.pack(
"<I", dexMethod.method_idx)
srcDexFileDataExtracted += struct.pack(
"<I", dexMethod.code_insns_off)
srcDexFileDataExtracted += struct.pack(
"<I", dexMethod.code_insns_size)
srcDexFileDataExtracted += dexMethod.code_insns_data
if dexClass.class_virtual_methods_size > 0:
for dexMethod in dexClass.class_virtual_methods_list:
if isinstance(dexMethod, DexMethod):
srcDexFileDataExtracted += struct.pack(
"<I", dexMethod.method_idx)
srcDexFileDataExtracted += struct.pack(
"<I", dexMethod.code_insns_off)
srcDexFileDataExtracted += struct.pack(
"<I", dexMethod.code_insns_size)
srcDexFileDataExtracted += dexMethod.code_insns_data
return srcDexFileDataExtracted
def patch_old(self) -> bytes:
# 填充nop并修复checksum和signature
srcDexFileDataPatched = list(self.srcDexFileData)
for dexClass in self.srcDexClassList:
if isinstance(dexClass, DexClass):
if dexClass.class_direct_methods_size > 0:
for dexMethod in dexClass.class_direct_methods_list:
if isinstance(dexMethod, DexMethod):
# 先全部填充为nop
srcDexFileDataPatched[dexMethod.code_insns_off:dexMethod.code_insns_off +
dexMethod.code_insns_size*2] = [0] * (dexMethod.code_insns_size*2)
# 再在开头填充为返回指令
srcDexFileDataPatched[dexMethod.code_insns_off:dexMethod.code_insns_off+len(
dexMethod.code_return_insns_data)] = dexMethod.code_return_insns_data
if dexClass.class_virtual_methods_size > 0:
for dexMethod in dexClass.class_virtual_methods_list:
if isinstance(dexMethod, DexMethod):
# 先全部填充为nop
srcDexFileDataPatched[dexMethod.code_insns_off:dexMethod.code_insns_off +
dexMethod.code_insns_size*2] = [0] * (dexMethod.code_insns_size*2)
# 再在开头填充为返回指令
srcDexFileDataPatched[dexMethod.code_insns_off:dexMethod.code_insns_off+len(
dexMethod.code_return_insns_data)] = dexMethod.code_return_insns_data
# 修复signature,没有设置端序,直接添加
newSignature = hashlib.sha1(
bytes(srcDexFileDataPatched[32:])).hexdigest()
newSignature = list(bytes.fromhex(newSignature))
srcDexFileDataPatched[12:12+len(newSignature)] = newSignature
# 修复checksum,小端序int,为了匹配dex文件结构
newChecksum = zlib.adler32(bytes(srcDexFileDataPatched[12:]))
newChecksum = list(struct.pack("<I", newChecksum))
srcDexFileDataPatched[8:8+len(newChecksum)] = newChecksum
return bytes(srcDexFileDataPatched)
# with open(DEXPATH, "rb") as f:
# srcDexFileData = f.read()
# dexParser = DexParser(srcDexFileData)
# with open(DEXCODEPATH, "wb") as f:
# f.write(dexParser.extract())
# with open(DEXPATCHPATH, "wb") as f:
# f.write(dexParser.patch())
尾声
笔者用了一个全局方法序号去标识所抽取的方法代码,从而避免在so层对dex文件的重新解析(其实就是偷懒了)
这里猜测dpt-shell项目的作者前移hook点是为了适配多种安卓版本
笔者这里没有考虑版本兼容性的问题,只是在安卓13系统上的简单实践,欢迎师傅们讨论~