Cython二进制逆向系列(三)运算符
在开始前,先给出本文用到的py源代码
python
def test1(x, y):
# 数学运算符
a = x + y
b = x - y
c = x * y
d = x / y
e = x // y
f = x % y
g = x ** y
# 位运算符
h = x & y
i = x | y
j = x ^ y
k = ~x
l = x >> 4
m = x << 2
print(a, b, c, d, e, f, g, h, i, j, k, l, m)
def test2(x, y):
# in/not in 运算符
if x in y:
x = y
elif x not in y:
y = x
print(x, y)
def test3(x, y):
# ==运算符与逻辑运算符
print(x == 0 and y == 0)
print(y == 0 or x == 0)
print(not x==0)
if __name__ == '__main__':
test1(1, 2)
test2(1, 2)
test3(1, 2)
在这篇文章里,我们会讨论Cython是如何处理运算符的(数学运算符、位运算符、in/not in 运算符、 ==运算符与逻辑运算符)。总的来叔其中大部分是调用虚拟机api来实现的。
数学运算符与位运算符号

可以看得出来全是调用虚拟机的api
下面给出运算符与api的对应表(其实看名字大概都能猜出来):
符号 | 含义 | 函数名 |
---|---|---|
+ | 加 | PyNumber_Add |
- | 减 | PyNumber_Subtract |
* | 乘 | PyNumber_Multiply |
/ | 除 | __Pyx_PyNumber_Divide |
// | 整除 | PyNumber_FloorDivide |
% | 取模 | PyNumber_Remainder |
** | 乘方 | PyNumber_Power |
& | 按位与 | PyNumber_And |
| | 按位或 | PyNumber_Or |
^ | 按位异或 | PyNumber_Xor |
~ | 按位取非 | PyNumber_Invert |
>> | 右移 | PyNumber_Rshift |
<< | 左移 | PyNumber_Lshift |
这里单独看一下位移在ida中的体现
c
v24 = off_1800095B8[32];
if ( *(_QWORD *)(v4 + 8) != PyLong_Type[0] )
{
v27 = PyNumber_Rshift(v4, off_1800095B8[32]);
LABEL_35:
v4 = v27;
goto LABEL_36;
}
v25 = *(_QWORD *)(v4 + 16);
if ( v25 )
{
if ( ((v25 + 1) & 0xFFFFFFFFFFFFFFFDui64) != 0 )
{
v26 = v25 + 4;
switch ( v26 )
{
case 2i64:
v27 = PyLong_FromLongLong(
-(__int64)(*(unsigned int *)(v4 + 24) | ((unsigned __int64)*(unsigned int *)(v4 + 28) << 30)) >> 4,
v26,
v24,
0x180000000ui64);
break;
case 6i64:
v27 = PyLong_FromLongLong(
(__int64)(*(unsigned int *)(v4 + 24) | ((unsigned __int64)*(unsigned int *)(v4 + 28) << 30)) >> 4,
v26,
v24,
0x180000000ui64);
break;
default:
v27 = (*(__int64 (__fastcall **)(__int64, _QWORD *))(PyLong_Type[12] + 96i64))(v4, off_1800095B8[32]);
break;
}
}
else
{
v28 = -*(_DWORD *)(v4 + 24);
if ( v25 >= 0 )
v28 = *(_DWORD *)(v4 + 24);
v27 = PyLong_FromLong((unsigned int)(v28 >> 4), v25, v24, 0x180000000ui64);
}
goto LABEL_35;
}
++*(_QWORD *)v4;
LABEL_36:
if ( !v4 )
{
v12 = 2534i64;
v13 = 13i64;
goto LABEL_58;
}
v10 = (_QWORD *)v4;
off_1800095B8[32]
中储存就是4,这里python为了安全性还有对于整数的处理做了安全措施,我们可以看到在else后面PyLong_FromLong((unsigned int)(v28 >> 4), v25, v24, 0x180000000ui64);
这里也可以看到是右移多少。
问题是,这里好像没看到表格中的PyNumber_Rshift
?因为py源代码中位移的位数是立即数,因此直接转换为c语言的位移运算符就好了。但是如果是x>>y
这样的两个都是变量,就会调用api PyNumber_Rshift

in/not in 运算符
c
/* "test.py":21
* def test2(x, y):
* # in/not in
* if x in y: # <<<<<<<<<<<<<<
* x = y
* elif x not in y:
*/
__pyx_t_1 = (__Pyx_PySequence_ContainsTF(__pyx_v_x, __pyx_v_y, Py_EQ)); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(0, 21, __pyx_L1_error)
。。。。。。
/* "test.py":23
* if x in y:
* x = y
* elif x not in y: # <<<<<<<<<<<<<<
* y = x
* print(x, y)
*/
__pyx_t_1 = (__Pyx_PySequence_ContainsTF(__pyx_v_x, __pyx_v_y, Py_NE)); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(0, 23, __pyx_L1_error)
这里涉及到一些条件语句的转换,不过没关系,照样能看懂
在c代码中可以看到无论是in还是 not in 调用的都是函数__Pyx_PySequence_ContainsTF
。其前两个参数是前后两个参与运算的变量,而第三个参数Py_EQ
/Py_NE
则决定当前运算到底是in还是 not in

不幸的是,无论是in还是not in ,在ida中都是PySequence_Contains
,具体是哪个要结合上下文分析。比如这里v5 = PySequence_Contains(a3)
判断的是 a3
中是否包含 a2
。如果 v5 == 1
,表示 a2
在 a3
中,则进入接下来的操作(++*v3
和调整 v4
和 v3
的指向)。
而下面那个v9 = PySequence_Contains(v3)
判断的是 v3
中是否包含 v4
(即 v4 not in v3
)。这里,如果 v9 == 0
,表示 v4
不在 v3
中,符合 not in
的语义。因为当 v9 == 0
时表示 v4
不在 v3
中。
说人话就是看后续是对PySequence_Contains
的返回值和谁比较(1或者0)。
==运算符与逻辑运算符
逻辑与运算符的处理
c
/* "test.py":30
* def test3(x, y):
* # ==
* print(x == 0 and y == 0) # <<<<<<<<<<<<<<
* print(y == 0 or x == 0)
* print(not x==0)
*/
__pyx_t_2 = __Pyx_PyInt_EqObjC(__pyx_v_x, __pyx_int_0, 0, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 30, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_2);
__pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely((__pyx_t_3 < 0))) __PYX_ERR(0, 30, __pyx_L1_error)
if (__pyx_t_3) {
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
} else {
__Pyx_INCREF(__pyx_t_2);
__pyx_t_1 = __pyx_t_2;
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
goto __pyx_L3_bool_binop_done;
}
__pyx_t_2 = __Pyx_PyInt_EqObjC(__pyx_v_y, __pyx_int_0, 0, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 30, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_2);
__Pyx_INCREF(__pyx_t_2);
__pyx_t_1 = __pyx_t_2;
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
__pyx_L3_bool_binop_done:;
__Pyx_PyInt_EqObjC(__pyx_v_x, __pyx_int_0, 0, 0)
: 这行代码将 x == 0
的比较操作转换为 C 语言函数。它检查 x
是否等于 0
。(猜测不同类型的==有对应的函数,暂未验证)。

ida中比较==0的部分,看得出来它把变量分为int float 和其他三种情况,除了整数和浮点,一概用PyObject_RichCompare
比较。
在 C 代码中,and
逻辑运算符的处理通常是短路的。即,如果第一个条件为 False
,那么第二个条件不会被计算。在这里,编译后的代码会继续执行 y == 0
的检查,只有在 x == 0
为 True
时才会检查 y == 0
。
然后__Pyx_PyInt_EqObjC(__pyx_v_y, __pyx_int_0, 0, 0)
检查 y == 0
,并根据结果将 __pyx_t_2
设置为布尔值。

ida中对and的处理也差不多类似。看着有点恶心,全是if else条件分支和各种goto
逻辑或运算符的处理
c
/* "test.py":31
* # ==
* print(x == 0 and y == 0)
* print(y == 0 or x == 0) # <<<<<<<<<<<<<<
* print(not x==0)
*
*/
__pyx_t_1 = __Pyx_PyInt_EqObjC(__pyx_v_y, __pyx_int_0, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 31, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely((__pyx_t_3 < 0))) __PYX_ERR(0, 31, __pyx_L1_error)
if (!__pyx_t_3) {
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
} else {
__Pyx_INCREF(__pyx_t_1);
__pyx_t_2 = __pyx_t_1;
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
goto __pyx_L5_bool_binop_done;
}
__pyx_t_1 = __Pyx_PyInt_EqObjC(__pyx_v_x, __pyx_int_0, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 31, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__Pyx_INCREF(__pyx_t_1);
__pyx_t_2 = __pyx_t_1;
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
__pyx_L5_bool_binop_done:;
前面都是在处理== :__Pyx_PyInt_EqObjC(__pyx_v_y, __pyx_int_0, 0, 0)
: 检查 y == 0
,即比较 y
是否等于 0
。__Pyx_PyObject_IsTrue(__pyx_t_1)
: 将 __pyx_t_1
转换为布尔值。如果 y == 0
(即 __pyx_t_3
为 True
),就直接跳到 __pyx_L5_bool_binop_done
,并将 __pyx_t_1
(存储 y == 0
结果)传递给下一个操作。
在执行 or
运算时,短路操作符同样会起作用:如果 y == 0
为 True
,则 x == 0
的比较不会被执行,结果会直接为 True
。__pyx_t_2
保存了 y == 0
或 x == 0
的结果,它将作为最终的结果传递给 print
函数。
逻辑非运算符的处理
c
/* "test.py":32
* print(x == 0 and y == 0)
* print(y == 0 or x == 0)
* print(not x==0) # <<<<<<<<<<<<<<
*
*
*/
__pyx_t_3 = (__Pyx_PyInt_BoolEqObjC(__pyx_v_x, __pyx_int_0, 0, 0)); if (unlikely((__pyx_t_3 < 0))) __PYX_ERR(0, 32, __pyx_L1_error)
__pyx_t_1 = __Pyx_PyBool_FromLong((!__pyx_t_3)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 32, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__pyx_t_2 = __Pyx_PyObject_CallOneArg(__pyx_builtin_print, __pyx_t_1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 32, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_2);
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
!__pyx_t_3
:这行代码计算 not x == 0
。由于 __pyx_t_3
是 x == 0
的布尔值,!__pyx_t_3
就是其逻辑取反。__Pyx_PyBool_FromLong((!__pyx_t_3))
将 !__pyx_t_3
转换为 Python 的布尔对象。如果 !__pyx_t_3
为 0
,则返回 False
;如果 !__pyx_t_3
为 1
,则返回 True
。

如果以后逆向在这里出题,考察逻辑运算符,那就认命吧,这里反编译出的代码很绕。
下面粘上test3函数的反编译代码。
c
// write access to const memory has been detected, the output may be wrong!
__int64 __fastcall sub_180001E30(__int64 a1, __int64 a2, __int64 a3)
{
v5 = *((_QWORD *)off_18000B688 + 35);
if ( a2 == v5 )
goto LABEL_2;
v7 = *(_QWORD *)(a2 + 8);
if ( v7 == PyLong_Type )
{
if ( *(_QWORD *)(a2 + 16) )
{
LABEL_5:
v6 = (_QWORD *)++Py_FalseStruct;
goto LABEL_10;
}
LABEL_2:
v6 = (_QWORD *)++Py_TrueStruct;
goto LABEL_10;
}
if ( v7 == PyFloat_Type )
{
if ( *(double *)(a2 + 16) != 0.0 )
goto LABEL_5;
goto LABEL_2;
}
v6 = (_QWORD *)PyObject_RichCompare(a2, v5, 2LL);
LABEL_10:
if ( !v6 )
{
v8 = 30;
v9 = 3136;
LABEL_75:
sub_180005F50("test.test3", v9, v8, (__int64)"test.py");
return 0LL;
}
IsTrue = v6 == (_QWORD *)Py_TrueStruct;
v11 = v6 == (_QWORD *)Py_NoneStruct;
v12 = IsTrue | v11 | (unsigned int)(v6 == (_QWORD *)Py_FalseStruct);
if ( !(IsTrue | (v11 || v6 == (_QWORD *)Py_FalseStruct)) )
IsTrue = PyObject_IsTrue(v6);
if ( IsTrue < 0 )
{
v8 = 30;
v9 = 3138;
goto LABEL_73;
}
v13 = *v6;
if ( !IsTrue )
{
*v6 = v13;
v16 = v6;
if ( v13 )
goto LABEL_26;
v18 = v6;
goto LABEL_25;
}
v14 = v13 - 1;
*v6 = v14;
if ( !v14 )
Py_Dealloc(v6);
v15 = (_QWORD *)sub_180004780(a3, *((_QWORD *)off_18000B688 + 35));
v16 = v15;
if ( !v15 )
{
v8 = 30;
v9 = 3147;
goto LABEL_75;
}
v17 = *v15;
*v16 = v17;
if ( !v17 )
{
v18 = v16;
LABEL_25:
Py_Dealloc(v18);
}
LABEL_26:
v6 = v16;
v19 = (_QWORD *)sub_1800048D0(v12, v16);
if ( !v19 )
{
v8 = 30;
v9 = 3153;
if ( !v6 )
goto LABEL_75;
LABEL_73:
v20 = (*v6)-- == 1LL;
if ( v20 )
Py_Dealloc(v6);
goto LABEL_75;
}
v20 = (*v16)-- == 1LL;
if ( v20 )
Py_Dealloc(v16);
v20 = (*v19)-- == 1LL;
if ( v20 )
Py_Dealloc(v19);
v21 = sub_180004780(a3, *((_QWORD *)off_18000B688 + 35));
v6 = (_QWORD *)v21;
if ( !v21 )
{
v8 = 31;
v9 = 3165;
goto LABEL_75;
}
v22 = sub_180006570(v21);
v23 = (unsigned int)v22;
if ( v22 < 0 )
{
v8 = 31;
v9 = 3167;
goto LABEL_73;
}
v24 = *v6;
if ( !(_DWORD)v23 )
{
v25 = v24 - 1;
*v6 = v25;
if ( !v25 )
Py_Dealloc(v6);
v26 = (_QWORD *)sub_180004780(a2, *((_QWORD *)off_18000B688 + 35));
v6 = v26;
if ( !v26 )
{
v8 = 31;
v9 = 3176;
goto LABEL_75;
}
v24 = *v26;
}
*v6 = v24;
if ( !v24 )
Py_Dealloc(v6);
v28 = (_QWORD *)sub_1800048D0(v23, v6);
if ( !v28 )
{
v8 = 31;
v9 = 3182;
if ( !v6 )
goto LABEL_75;
goto LABEL_73;
}
v20 = (*v6)-- == 1LL;
if ( v20 )
Py_Dealloc(v6);
v20 = (*v28)-- == 1LL;
if ( v20 )
Py_Dealloc(v28);
v29 = *((_QWORD *)off_18000B688 + 35);
if ( a2 == v29 )
goto LABEL_68;
v30 = *(_QWORD *)(a2 + 8);
if ( v30 == PyLong_Type )
{
v31 = *(_QWORD *)(a2 + 16) == 0LL;
}
else if ( v30 == PyFloat_Type )
{
if ( *(double *)(a2 + 16) == 0.0 )
goto LABEL_68;
v31 = 0;
}
else
{
v32 = PyObject_RichCompare(a2, v29, 2LL);
v33 = (_QWORD *)v32;
if ( v32 )
{
v31 = v32 == Py_TrueStruct;
v34 = v32 == Py_NoneStruct;
v27 = v31 | v34 | (unsigned int)(v33 == (_QWORD *)Py_FalseStruct);
if ( !(v31 | (v34 || v33 == (_QWORD *)Py_FalseStruct)) )
v31 = PyObject_IsTrue(v33);
v20 = (*v33)-- == 1LL;
if ( v20 )
Py_Dealloc(v33);
}
else
{
v31 = -1;
}
}
if ( v31 < 0 )
{
v8 = 32;
v9 = 3194;
goto LABEL_75;
}
if ( !v31 )
{
v6 = (_QWORD *)++Py_TrueStruct;
goto LABEL_69;
}
LABEL_68:
v6 = (_QWORD *)++Py_FalseStruct;
LABEL_69:
if ( !v6 )
{
v8 = 32;
v9 = 3195;
goto LABEL_75;
}
v35 = (_QWORD *)sub_1800048D0(v27, v6);
if ( !v35 )
{
v8 = 32;
v9 = 3197;
goto LABEL_73;
}
v20 = (*v6)-- == 1LL;
if ( v20 )
Py_Dealloc(v6);
v20 = (*v35)-- == 1LL;
if ( v20 )
Py_Dealloc(v35);
return Py_NoneStruct++;
}