文章目录
- 源代码
- 汇编代码
- 汇编分析
- 从汇编代码深入分析字符串定义方式
-
- [1. 整体汇编结构分析](#1. 整体汇编结构分析)
- [2. 字符串常量指针](#2. 字符串常量指针)
- [3. 字符数组(逐个字符)](#3. 字符数组(逐个字符))
- [4. 字符串指针数组](#4. 字符串指针数组)
- [5. 二维字符数组(字符串字面量)](#5. 二维字符数组(字符串字面量))
- [6. 二维字符数组(逐个字符)](#6. 二维字符数组(逐个字符))
- [7. 逆向工程关键对比](#7. 逆向工程关键对比)
-
- [7.1 字符串可见性分析](#7.1 字符串可见性分析)
- [7.2 内存访问模式](#7.2 内存访问模式)
- [7.3 初始化开销](#7.3 初始化开销)
- [8. 总结:从逆向角度的洞察](#8. 总结:从逆向角度的洞察)
-
- [8.1 最容易逆向的定义](#8.1 最容易逆向的定义)
- [8.2 较难逆向的定义](#8.2 较难逆向的定义)
- [8.3 逆向分析技巧](#8.3 逆向分析技巧)
- [8.4 编译器优化影响](#8.4 编译器优化影响)
- 最终结论
源代码
cpp
#include <iostream>
using namespace std;
int main()
{
// 单个字符串指针 - 指向一个字符串常量
const char* status_message = "Operation successful";
// 字符数组 - 存储可修改的字符序列
char command_buffer[] = { 'l','o','g','i','n','_','u','s','e','r','\0' };
// 字符串指针数组 - 存储多个字符串常量的地址
const char* menu_options[] = {
"1. Start Game",
"2. Load Game",
"3. Settings",
"4. Exit"
};
// 二维字符数组 - 存储可修改的字符串(使用字符串字面量初始化)
char user_database[3][20] = {
"admin",
"guest_user",
"test_account"
};
// 二维字符数组 - 存储可修改的字符串(使用字符初始化)
char system_logs[3][20] = {
{'S','t','a','r','t','u','p',' ','O','K','\0'},
{'I','n','i','t',' ','D','e','v','i','c','e','s','\0'},
{'L','o','a','d',' ','C','o','n','f','i','g','\0'}
};
return 0;
}
汇编代码
cpp
int main()
{
00007FF73F3618F0 push rbp
00007FF73F3618F2 push rsi
00007FF73F3618F3 push rdi
00007FF73F3618F4 sub rsp,230h
00007FF73F3618FB lea rbp,[rsp+20h]
00007FF73F361900 lea rdi,[rsp+20h]
00007FF73F361905 mov ecx,54h
00007FF73F36190A mov eax,0CCCCCCCCh
00007FF73F36190F rep stos dword ptr [rdi]
00007FF73F361911 mov rax,qword ptr [__security_cookie (07FF73F36F000h)]
00007FF73F361918 xor rax,rbp
00007FF73F36191B mov qword ptr [rbp+208h],rax
00007FF73F361922 lea rcx,[__D4D72169_test_cpp2@cpp (07FF73F375076h)]
00007FF73F361929 call __CheckForDebuggerJustMyCode (07FF73F3613F7h)
00007FF73F36192E nop
// 单个字符串指针 - 指向一个字符串常量
const char* status_message = "Operation successful";
00007FF73F36192F lea rax,[string "Operation successful" (07FF73F36BDC8h)]
00007FF73F361936 mov qword ptr [status_message],rax
// 字符数组 - 存储可修改的字符序列
char command_buffer[] = { 'l','o','g','i','n','_','u','s','e','r','\0' };
00007FF73F36193A mov byte ptr [command_buffer],6Ch
00007FF73F36193E mov byte ptr [rbp+29h],6Fh
00007FF73F361942 mov byte ptr [rbp+2Ah],67h
00007FF73F361946 mov byte ptr [rbp+2Bh],69h
00007FF73F36194A mov byte ptr [rbp+2Ch],6Eh
00007FF73F36194E mov byte ptr [rbp+2Dh],5Fh
00007FF73F361952 mov byte ptr [rbp+2Eh],75h
00007FF73F361956 mov byte ptr [rbp+2Fh],73h
00007FF73F36195A mov byte ptr [rbp+30h],65h
00007FF73F36195E mov byte ptr [rbp+31h],72h
00007FF73F361962 mov byte ptr [rbp+32h],0
// 字符串指针数组 - 存储多个字符串常量的地址
const char* menu_options[] = {
"1. Start Game",
00007FF73F361966 lea rax,[string "1. Start Game" (07FF73F36BDE8h)]
00007FF73F36196D mov qword ptr [menu_options],rax
"2. Load Game",
00007FF73F361971 lea rax,[string "2. Load Game" (07FF73F36BDF8h)]
00007FF73F361978 mov qword ptr [rbp+60h],rax
"3. Settings",
00007FF73F36197C lea rax,[string "3. Settings" (07FF73F36BE08h)]
00007FF73F361983 mov qword ptr [rbp+68h],rax
"4. Exit"
00007FF73F361987 lea rax,[string "4. Exit" (07FF73F36BE18h)]
00007FF73F36198E mov qword ptr [rbp+70h],rax
};
// 二维字符数组 - 存储可修改的字符串(使用字符串字面量初始化)
char user_database[3][20] = {
00007FF73F361992 lea rax,[user_database]
00007FF73F361999 lea rcx,[string "admin" (07FF73F36BE24h)]
00007FF73F3619A0 mov rdi,rax
00007FF73F3619A3 mov rsi,rcx
00007FF73F3619A6 mov ecx,6
00007FF73F3619AB rep movs byte ptr [rdi],byte ptr [rsi]
00007FF73F3619AD lea rax,[rbp+9Eh]
00007FF73F3619B4 mov rdi,rax
00007FF73F3619B7 xor eax,eax
00007FF73F3619B9 mov ecx,0Eh
00007FF73F3619BE rep stos byte ptr [rdi]
00007FF73F3619C0 lea rax,[rbp+0ACh]
00007FF73F3619C7 lea rcx,[string "guest_user" (07FF73F36BE30h)]
00007FF73F3619CE mov rdi,rax
00007FF73F3619D1 mov rsi,rcx
00007FF73F3619D4 mov ecx,0Bh
00007FF73F3619D9 rep movs byte ptr [rdi],byte ptr [rsi]
00007FF73F3619DB lea rax,[rbp+0B7h]
00007FF73F3619E2 mov rdi,rax
00007FF73F3619E5 xor eax,eax
00007FF73F3619E7 mov ecx,9
00007FF73F3619EC rep stos byte ptr [rdi]
00007FF73F3619EE lea rax,[rbp+0C0h]
00007FF73F3619F5 lea rcx,[string "test_account" (07FF73F36BE40h)]
00007FF73F3619FC mov rdi,rax
00007FF73F3619FF mov rsi,rcx
00007FF73F361A02 mov ecx,0Dh
00007FF73F361A07 rep movs byte ptr [rdi],byte ptr [rsi]
00007FF73F361A09 lea rax,[rbp+0CDh]
00007FF73F361A10 mov rdi,rax
00007FF73F361A13 xor eax,eax
00007FF73F361A15 mov ecx,7
00007FF73F361A1A rep stos byte ptr [rdi]
"admin",
"guest_user",
"test_account"
};
// 二维字符数组 - 存储可修改的字符串(使用字符初始化)
char system_logs[3][20] = {
{'S','t','a','r','t','u','p',' ','O','K','\0'},
00007FF73F361A1C mov byte ptr [system_logs],53h
00007FF73F361A23 mov byte ptr [rbp+0F9h],74h
00007FF73F361A2A mov byte ptr [rbp+0FAh],61h
00007FF73F361A31 mov byte ptr [rbp+0FBh],72h
00007FF73F361A38 mov byte ptr [rbp+0FCh],74h
00007FF73F361A3F mov byte ptr [rbp+0FDh],75h
00007FF73F361A46 mov byte ptr [rbp+0FEh],70h
00007FF73F361A4D mov byte ptr [rbp+0FFh],20h
00007FF73F361A54 mov byte ptr [rbp+100h],4Fh
00007FF73F361A5B mov byte ptr [rbp+101h],4Bh
00007FF73F361A62 mov byte ptr [rbp+102h],0
00007FF73F361A69 lea rax,[rbp+103h]
00007FF73F361A70 mov rdi,rax
00007FF73F361A73 xor eax,eax
00007FF73F361A75 mov ecx,9
00007FF73F361A7A rep stos byte ptr [rdi]
{'I','n','i','t',' ','D','e','v','i','c','e','s','\0'},
00007FF73F361A7C mov byte ptr [rbp+10Ch],49h
00007FF73F361A83 mov byte ptr [rbp+10Dh],6Eh
00007FF73F361A8A mov byte ptr [rbp+10Eh],69h
00007FF73F361A91 mov byte ptr [rbp+10Fh],74h
00007FF73F361A98 mov byte ptr [rbp+110h],20h
00007FF73F361A9F mov byte ptr [rbp+111h],44h
00007FF73F361AA6 mov byte ptr [rbp+112h],65h
00007FF73F361AAD mov byte ptr [rbp+113h],76h
00007FF73F361AB4 mov byte ptr [rbp+114h],69h
00007FF73F361ABB mov byte ptr [rbp+115h],63h
00007FF73F361AC2 mov byte ptr [rbp+116h],65h
00007FF73F361AC9 mov byte ptr [rbp+117h],73h
00007FF73F361AD0 mov byte ptr [rbp+118h],0
00007FF73F361AD7 lea rax,[rbp+119h]
00007FF73F361ADE mov rdi,rax
00007FF73F361AE1 xor eax,eax
00007FF73F361AE3 mov ecx,7
00007FF73F361AE8 rep stos byte ptr [rdi]
{'L','o','a','d',' ','C','o','n','f','i','g','\0'}
00007FF73F361AEA mov byte ptr [rbp+120h],4Ch
00007FF73F361AF1 mov byte ptr [rbp+121h],6Fh
00007FF73F361AF8 mov byte ptr [rbp+122h],61h
00007FF73F361AFF mov byte ptr [rbp+123h],64h
00007FF73F361B06 mov byte ptr [rbp+124h],20h
00007FF73F361B0D mov byte ptr [rbp+125h],43h
00007FF73F361B14 mov byte ptr [rbp+126h],6Fh
00007FF73F361B1B mov byte ptr [rbp+127h],6Eh
00007FF73F361B22 mov byte ptr [rbp+128h],66h
00007FF73F361B29 mov byte ptr [rbp+129h],69h
00007FF73F361B30 mov byte ptr [rbp+12Ah],67h
00007FF73F361B37 mov byte ptr [rbp+12Bh],0
00007FF73F361B3E lea rax,[rbp+12Ch]
00007FF73F361B45 mov rdi,rax
00007FF73F361B48 xor eax,eax
00007FF73F361B4A mov ecx,8
00007FF73F361B4F rep stos byte ptr [rdi]
};
return 0;
00007FF73F361B51 xor eax,eax
}
汇编分析
从汇编代码深入分析字符串定义方式
非常好!通过实际的汇编代码来分析,我们能最直观地看到不同字符串定义方式的本质区别。让我逐段分析你提供的汇编代码。
1. 整体汇编结构分析
首先看到编译器生成的标准调试代码:
assembly
00007FF73F3618F0 push rbp
00007FF73F3618F2 push rsi
00007FF73F3618F3 push rdi
00007FF73F3618F4 sub rsp,230h ; 分配栈空间 560字节
00007FF73F3618FB lea rbp,[rsp+20h] ; 建立栈帧
00007FF73F361900 lea rdi,[rsp+20h]
00007FF73F361905 mov ecx,54h ; 填充84个dword
00007FF73F36190A mov eax,0CCCCCCCCh ; 调试模式填充0xCC
00007FF73F36190F rep stos dword ptr [rdi] ; 初始化栈空间为0xCC
2. 字符串常量指针
assembly
; const char* status_message = "Operation successful";
00007FF73F36192F lea rax,[string "Operation successful" (07FF73F36BDC8h)]
00007FF73F361936 mov qword ptr [status_message],rax
分析:
- 地址
07FF73F36BDC8h:位于.rdata段(只读数据段) - 指针变量:在栈上占用8字节
- 操作:只是将常量地址存入栈变量
内存布局:
.rdata段:
0x7FF73F36BDC8: "Operation successful\0"
栈上:
[rbp+?] = 0x7FF73F36BDC8 (指向只读区)
逆向特征:
✅ 字符串在.rdata段完整可见
✅ 通过strings命令直接提取
✅ 运行时只需一次内存访问
3. 字符数组(逐个字符)
assembly
; char command_buffer[] = {'l','o','g','i','n','_','u','s','e','r','\0'};
00007FF73F36193A mov byte ptr [command_buffer],6Ch ; 'l'
00007FF73F36193E mov byte ptr [rbp+29h],6Fh ; 'o'
00007FF73F361942 mov byte ptr [rbp+2Ah],67h ; 'g'
00007FF73F361946 mov byte ptr [rbp+2Bh],69h ; 'i'
00007FF73F36194A mov byte ptr [rbp+2Ch],6Eh ; 'n'
00007FF73F36194E mov byte ptr [rbp+2Dh],5Fh ; '_'
00007FF73F361952 mov byte ptr [rbp+2Eh],75h ; 'u'
00007FF73F361956 mov byte ptr [rbp+2Fh],73h ; 's'
00007FF73F36195A mov byte ptr [rbp+30h],65h ; 'e'
00007FF73F36195E mov byte ptr [rbp+31h],72h ; 'r'
00007FF73F361962 mov byte ptr [rbp+32h],0 ; '\0'
分析:
- 存储位置 :完全在栈上 (
[rbp+?]地址) - 初始化方式:立即数逐个写入
- 偏移计算 :
command_buffer基址 + 偏移量
内存布局:
栈上连续内存:
[rbp+28h]: 'l' (6Ch)
[rbp+29h]: 'o' (6Fh)
[rbp+2Ah]: 'g' (67h)
[rbp+2Bh]: 'i' (69h)
[rbp+2Ch]: 'n' (6Eh)
[rbp+2Dh]: '_' (5Fh)
[rbp+2Eh]: 'u' (75h)
[rbp+2Fh]: 's' (73h)
[rbp+30h]: 'e' (65h)
[rbp+31h]: 'r' (72h)
[rbp+32h]: 0
逆向特征:
⚠️ 字符串数据在代码段 (.text)作为立即数
⚠️ 不会出现在.rdata段
⚠️ 但连续的值容易被识别为ASCII
4. 字符串指针数组
assembly
; const char* menu_options[] = {
; "1. Start Game",
00007FF73F361966 lea rax,[string "1. Start Game" (07FF73F36BDE8h)]
00007FF73F36196D mov qword ptr [menu_options],rax
; "2. Load Game",
00007FF73F361971 lea rax,[string "2. Load Game" (07FF73F36BDF8h)]
00007FF73F361978 mov qword ptr [rbp+60h],rax
; "3. Settings",
00007FF73F36197C lea rax,[string "3. Settings" (07FF73F36BE08h)]
00007FF73F361983 mov qword ptr [rbp+68h],rax
; "4. Exit"
00007FF73F361987 lea rax,[string "4. Exit" (07FF73F36BE18h)]
00007FF73F36198E mov qword ptr [rbp+70h],rax
分析:
- 字符串 :都在
.rdata段(只读) - 指针数组:在栈上连续存储4个指针(4×8=32字节)
- 连续地址 :
menu_options,[rbp+60h],[rbp+68h],[rbp+70h]
内存布局:
.rdata段:
0x7FF73F36BDE8: "1. Start Game\0"
0x7FF73F36BDF8: "2. Load Game\0"
0x7FF73F36BE08: "3. Settings\0"
0x7FF73F36BE18: "4. Exit\0"
栈上指针数组:
menu_options = 0x7FF73F36BDE8
[rbp+60h] = 0x7FF73F36BDF8
[rbp+68h] = 0x7FF73F36BE08
[rbp+70h] = 0x7FF73F36BE18
5. 二维字符数组(字符串字面量)
assembly
; char user_database[3][20] = { "admin", "guest_user", "test_account" };
; 第一行: "admin"
00007FF73F361992 lea rax,[user_database]
00007FF73F361999 lea rcx,[string "admin" (07FF73F36BE24h)]
00007FF73F3619A0 mov rdi,rax
00007FF73F3619A3 mov rsi,rcx
00007FF73F3619A6 mov ecx,6 ; 复制6字节(包括\0)
00007FF73F3619AB rep movs byte ptr [rdi],byte ptr [rsi]
; 填充剩余14字节为0
00007FF73F3619AD lea rax,[rbp+9Eh]
00007FF73F3619B4 mov rdi,rax
00007FF73F3619B7 xor eax,eax
00007FF73F3619B9 mov ecx,0Eh ; 填充14字节0
00007FF73F3619BE rep stos byte ptr [rdi]
; 第二行: "guest_user" (类似模式)
00007FF73F3619C0 lea rax,[rbp+0ACh]
00007FF73F3619C7 lea rcx,[string "guest_user" (07FF73F36BE30h)]
; ... 复制11字节,填充9字节0
; 第三行: "test_account" (类似模式)
00007FF73F3619EE lea rax,[rbp+0C0h]
00007FF73F3619F5 lea rcx,[string "test_account" (07FF73F36BE40h)]
; ... 复制13字节,填充7字节0
分析:
- 源字符串 :在
.rdata段(临时使用) - 目标数组:在栈上连续3×20=60字节
- 初始化过程:从只读区复制到栈上
- 填充剩余空间:用0填充未使用的字节
关键观察:
assembly
; 地址计算显示连续存储:
第一行: [user_database] (基址)
第二行: [rbp+0ACh] = 基址 + 20
第三行: [rbp+0C0h] = 基址 + 40
6. 二维字符数组(逐个字符)
assembly
; char system_logs[3][20] = {
; {'S','t','a','r','t','u','p',' ','O','K','\0'},
; 逐个写入第一行
00007FF73F361A1C mov byte ptr [system_logs],53h ; 'S'
00007FF73F361A23 mov byte ptr [rbp+0F9h],74h ; 't'
00007FF73F361A2A mov byte ptr [rbp+0FAh],61h ; 'a'
00007FF73F361A31 mov byte ptr [rbp+0FBh],72h ; 'r'
00007FF73F361A38 mov byte ptr [rbp+0FCh],74h ; 't'
00007FF73F361A3F mov byte ptr [rbp+0FDh],75h ; 'u'
00007FF73F361A46 mov byte ptr [rbp+0FEh],70h ; 'p'
00007FF73F361A4D mov byte ptr [rbp+0FFh],20h ; ' '
00007FF73F361A54 mov byte ptr [rbp+100h],4Fh ; 'O'
00007FF73F361A5B mov byte ptr [rbp+101h],4Bh ; 'K'
00007FF73F361A62 mov byte ptr [rbp+102h],0 ; '\0'
; 填充剩余9字节为0
00007FF73F361A69 lea rax,[rbp+103h]
00007FF73F361A70 mov rdi,rax
00007FF73F361A73 xor eax,eax
00007FF73F361A75 mov ecx,9
00007FF73F361A7A rep stos byte ptr [rdi]
; 第二行: "Init Devices"
00007FF73F361A7C mov byte ptr [rbp+10Ch],49h ; 'I'
00007FF73F361A83 mov byte ptr [rbp+10Dh],6Eh ; 'n'
; ... 继续写入12个字符
; 第三行: "Load Config"
00007FF73F361AEA mov byte ptr [rbp+120h],4Ch ; 'L'
00007FF73F361AF1 mov byte ptr [rbp+121h],6Fh ; 'o'
; ... 继续写入11个字符
分析:
- 存储位置:完全在栈上
- 初始化 :立即数直接写入,没有从
.rdata复制的过程 - 字符分散:每个字符作为独立立即数出现在代码中
7. 逆向工程关键对比
7.1 字符串可见性分析
| 定义方式 | .rdata段 |
代码段(.text) | 栈初始化 | strings命令可见 |
|---|---|---|---|---|
const char* |
✅ 完整字符串 | ❌ | 只存指针 | ✅ 完全可见 |
| 字符数组(字面量) | ✅ 临时存在 | ❌ | 复制字符串 | ✅ 可见 |
| 字符数组(逐个) | ❌ | ✅ 分散立即数 | 直接写入 | ⚠️ 不易见 |
| 指针数组 | ✅ 多个字符串 | ❌ | 存指针数组 | ✅ 完全可见 |
| 二维数组(字面量) | ✅ 临时存在 | ❌ | 复制多行 | ✅ 可见 |
| 二维数组(逐个) | ❌ | ✅ 分散立即数 | 直接写入 | ⚠️ 隐藏较好 |
7.2 内存访问模式
assembly
; const char* - 间接访问
mov rax, [status_message] ; 取指针
mov cl, [rax] ; 间接取字符
; char[] - 直接访问
mov cl, [rbp+28h] ; 直接取字符
; 指针数组 - 两次间接
mov rax, [menu_options] ; 取指针
mov rax, [rax] ; 取字符串指针
mov cl, [rax] ; 取字符
7.3 初始化开销
assembly
; const char* - 开销最小
lea rax, [string_addr]
mov [rbp+?], rax ; 2条指令
; 字符数组(逐个) - 开销最大
mov byte ptr [addr1], 'l' ; 11条指令
mov byte ptr [addr2], 'o' ; 每条一个字符
; ... 共11条指令
; 二维数组(字面量) - 中等
rep movs ; 批量复制,效率高
rep stos ; 批量填充0
8. 总结:从逆向角度的洞察
8.1 最容易逆向的定义
cpp
const char* status_message = "Operation successful";
const char* menu_options[] = {...};
✅ 所有字符串集中在.rdata段
✅ strings命令一网打尽
✅ 逆向工程师的最爱 😊
8.2 较难逆向的定义
cpp
char system_logs[3][20] = {
{'S','t','a','r','t','u','p',' ','O','K','\0'},
// ...
};
⚠️ 字符串数据分散在代码段
⚠️ 需要跟踪大量立即数
⚠️ 增加逆向难度
8.3 逆向分析技巧
从你的汇编代码中,逆向工程师会:
- 首先查看
.rdata段 :立即发现status_message和menu_options - 扫描代码段:寻找连续的立即数赋值模式
- 识别rep movs :发现从
.rdata到栈的复制操作 - 跟踪栈偏移:重建二维数组的内存布局
8.4 编译器优化影响
注意在Debug模式(你的代码):
- 大量
mov byte ptr指令 - 没有优化合并
- 清晰的边界
在Release模式下,编译器可能会:
- 合并连续的字符赋值
- 使用更高效的复制指令
- 甚至完全优化掉未使用的字符串
最终结论
通过这份汇编代码,我们可以清晰地看到:
- 字符串常量和字面量 永远在
.rdata段可见 - 逐个字符初始化将字符串数据隐藏在代码段
- 指针数组 只存储指针,字符串仍在
.rdata段 - 二维数组会在栈上开辟连续空间
- 初始化的方式决定了字符串在二进制文件中的可见性
这就是为什么在需要隐藏字符串时,开发者会选择逐个字符初始化或运行时解密的方式。