Cython二进制逆向系列（三）运算符

在开始前，先给出本文用到的py源代码

python 复制代码

def test1(x, y):
    #   数学运算符
    a = x + y
    b = x - y
    c = x * y
    d = x / y
    e = x // y
    f = x % y
    g = x ** y
    #   位运算符
    h = x & y
    i = x | y
    j = x ^ y
    k = ~x
    l = x >> 4
    m = x << 2
    print(a, b, c, d, e, f, g, h, i, j, k, l, m)

def test2(x, y):
    #   in/not in 运算符
    if x in y:
        x = y
    elif x not in y:
        y = x
    print(x, y)


def test3(x, y):
    #  ==运算符与逻辑运算符
    print(x == 0 and y == 0)
    print(y == 0 or x == 0)
    print(not x==0)


if __name__ == '__main__':
    test1(1, 2)
    test2(1, 2)
    test3(1, 2)

在这篇文章里，我们会讨论Cython是如何处理运算符的（数学运算符、位运算符、in/not in 运算符、 ==运算符与逻辑运算符）。总的来叔其中大部分是调用虚拟机api来实现的。

数学运算符与位运算符号

可以看得出来全是调用虚拟机的api

下面给出运算符与api的对应表（其实看名字大概都能猜出来）：

符号	含义	函数名
+	加	PyNumber_Add
-	减	PyNumber_Subtract
*	乘	PyNumber_Multiply
/	除	__Pyx_PyNumber_Divide
//	整除	PyNumber_FloorDivide
%	取模	PyNumber_Remainder
**	乘方	PyNumber_Power
&	按位与	PyNumber_And
\|	按位或	PyNumber_Or
^	按位异或	PyNumber_Xor
~	按位取非	PyNumber_Invert
>>	右移	PyNumber_Rshift
<<	左移	PyNumber_Lshift

这里单独看一下位移在ida中的体现

c 复制代码

v24 = off_1800095B8[32];
  if ( *(_QWORD *)(v4 + 8) != PyLong_Type[0] )
  {
    v27 = PyNumber_Rshift(v4, off_1800095B8[32]);
LABEL_35:
    v4 = v27;
    goto LABEL_36;
  }
  v25 = *(_QWORD *)(v4 + 16);
  if ( v25 )
  {
    if ( ((v25 + 1) & 0xFFFFFFFFFFFFFFFDui64) != 0 )
    {
      v26 = v25 + 4;
      switch ( v26 )
      {
        case 2i64:
          v27 = PyLong_FromLongLong(
                  -(__int64)(*(unsigned int *)(v4 + 24) | ((unsigned __int64)*(unsigned int *)(v4 + 28) << 30)) >> 4,
                  v26,
                  v24,
                  0x180000000ui64);
          break;
        case 6i64:
          v27 = PyLong_FromLongLong(
                  (__int64)(*(unsigned int *)(v4 + 24) | ((unsigned __int64)*(unsigned int *)(v4 + 28) << 30)) >> 4,
                  v26,
                  v24,
                  0x180000000ui64);
          break;
        default:
          v27 = (*(__int64 (__fastcall **)(__int64, _QWORD *))(PyLong_Type[12] + 96i64))(v4, off_1800095B8[32]);
          break;
      }
    }
    else
    {
      v28 = -*(_DWORD *)(v4 + 24);
      if ( v25 >= 0 )
        v28 = *(_DWORD *)(v4 + 24);
      v27 = PyLong_FromLong((unsigned int)(v28 >> 4), v25, v24, 0x180000000ui64);
    }
    goto LABEL_35;
  }
  ++*(_QWORD *)v4;
LABEL_36:
  if ( !v4 )
  {
    v12 = 2534i64;
    v13 = 13i64;
    goto LABEL_58;
  }
  v10 = (_QWORD *)v4;

off_1800095B8[32]中储存就是4，这里python为了安全性还有对于整数的处理做了安全措施，我们可以看到在else后面PyLong_FromLong((unsigned int)(v28 >> 4), v25, v24, 0x180000000ui64);这里也可以看到是右移多少。

问题是，这里好像没看到表格中的PyNumber_Rshift？因为py源代码中位移的位数是立即数，因此直接转换为c语言的位移运算符就好了。但是如果是x>>y这样的两个都是变量，就会调用api PyNumber_Rshift

in/not in 运算符

c 复制代码

 /* "test.py":21
 * def test2(x, y):
 *     #   in/not in
 *     if x in y:             # <<<<<<<<<<<<<<
 *         x = y
 *     elif x not in y:
 */
  __pyx_t_1 = (__Pyx_PySequence_ContainsTF(__pyx_v_x, __pyx_v_y, Py_EQ)); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(0, 21, __pyx_L1_error)
      
      。。。。。。
      
 /* "test.py":23
 *     if x in y:
 *         x = y
 *     elif x not in y:             # <<<<<<<<<<<<<<
 *         y = x
 *     print(x, y)
 */
  __pyx_t_1 = (__Pyx_PySequence_ContainsTF(__pyx_v_x, __pyx_v_y, Py_NE)); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(0, 23, __pyx_L1_error)

这里涉及到一些条件语句的转换，不过没关系，照样能看懂

在c代码中可以看到无论是in还是 not in 调用的都是函数__Pyx_PySequence_ContainsTF。其前两个参数是前后两个参与运算的变量，而第三个参数Py_EQ/Py_NE则决定当前运算到底是in还是 not in

不幸的是，无论是in还是not in ，在ida中都是PySequence_Contains，具体是哪个要结合上下文分析。比如这里v5 = PySequence_Contains(a3) 判断的是 a3 中是否包含 a2。如果 v5 == 1，表示 a2 在 a3 中，则进入接下来的操作（++*v3 和调整 v4 和 v3 的指向）。

而下面那个v9 = PySequence_Contains(v3) 判断的是 v3 中是否包含 v4（即 v4 not in v3）。这里，如果 v9 == 0，表示 v4 不在 v3 中，符合 not in 的语义。因为当 v9 == 0 时表示 v4 不在 v3 中。

说人话就是看后续是对PySequence_Contains的返回值和谁比较（1或者0）。

==运算符与逻辑运算符

逻辑与运算符的处理

c 复制代码

  /* "test.py":30
 * def test3(x, y):
 *     #  ==
 *     print(x == 0 and y == 0)             # <<<<<<<<<<<<<<
 *     print(y == 0 or x == 0)
 *     print(not x==0)
 */
  __pyx_t_2 = __Pyx_PyInt_EqObjC(__pyx_v_x, __pyx_int_0, 0, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 30, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely((__pyx_t_3 < 0))) __PYX_ERR(0, 30, __pyx_L1_error)
  if (__pyx_t_3) {
    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  } else {
    __Pyx_INCREF(__pyx_t_2);
    __pyx_t_1 = __pyx_t_2;
    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
    goto __pyx_L3_bool_binop_done;
  }
  __pyx_t_2 = __Pyx_PyInt_EqObjC(__pyx_v_y, __pyx_int_0, 0, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 30, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_INCREF(__pyx_t_2);
  __pyx_t_1 = __pyx_t_2;
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_L3_bool_binop_done:;

__Pyx_PyInt_EqObjC(__pyx_v_x, __pyx_int_0, 0, 0): 这行代码将 x == 0 的比较操作转换为 C 语言函数。它检查 x 是否等于 0。（猜测不同类型的==有对应的函数，暂未验证）。

ida中比较==0的部分，看得出来它把变量分为int float 和其他三种情况，除了整数和浮点，一概用PyObject_RichCompare比较。

在 C 代码中，and 逻辑运算符的处理通常是短路的。即，如果第一个条件为 False，那么第二个条件不会被计算。在这里，编译后的代码会继续执行 y == 0 的检查，只有在 x == 0 为 True 时才会检查 y == 0。

然后__Pyx_PyInt_EqObjC(__pyx_v_y, __pyx_int_0, 0, 0) 检查 y == 0，并根据结果将 __pyx_t_2 设置为布尔值。

ida中对and的处理也差不多类似。看着有点恶心，全是if else条件分支和各种goto

逻辑或运算符的处理

c 复制代码

 /* "test.py":31
 *     #  ==
 *     print(x == 0 and y == 0)
 *     print(y == 0 or x == 0)             # <<<<<<<<<<<<<<
 *     print(not x==0)
 * 
 */
  __pyx_t_1 = __Pyx_PyInt_EqObjC(__pyx_v_y, __pyx_int_0, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 31, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely((__pyx_t_3 < 0))) __PYX_ERR(0, 31, __pyx_L1_error)
  if (!__pyx_t_3) {
    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  } else {
    __Pyx_INCREF(__pyx_t_1);
    __pyx_t_2 = __pyx_t_1;
    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
    goto __pyx_L5_bool_binop_done;
  }
  __pyx_t_1 = __Pyx_PyInt_EqObjC(__pyx_v_x, __pyx_int_0, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 31, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_INCREF(__pyx_t_1);
  __pyx_t_2 = __pyx_t_1;
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_L5_bool_binop_done:;

前面都是在处理== ：__Pyx_PyInt_EqObjC(__pyx_v_y, __pyx_int_0, 0, 0): 检查 y == 0，即比较 y 是否等于 0。__Pyx_PyObject_IsTrue(__pyx_t_1): 将 __pyx_t_1 转换为布尔值。如果 y == 0（即 __pyx_t_3 为 True），就直接跳到 __pyx_L5_bool_binop_done，并将 __pyx_t_1（存储 y == 0 结果）传递给下一个操作。

在执行 or 运算时，短路操作符同样会起作用：如果 y == 0 为 True，则 x == 0 的比较不会被执行，结果会直接为 True。__pyx_t_2 保存了 y == 0 或 x == 0 的结果，它将作为最终的结果传递给 print 函数。

逻辑非运算符的处理

c 复制代码

  /* "test.py":32
 *     print(x == 0 and y == 0)
 *     print(y == 0 or x == 0)
 *     print(not x==0)             # <<<<<<<<<<<<<<
 * 
 * 
 */
  __pyx_t_3 = (__Pyx_PyInt_BoolEqObjC(__pyx_v_x, __pyx_int_0, 0, 0)); if (unlikely((__pyx_t_3 < 0))) __PYX_ERR(0, 32, __pyx_L1_error)
  __pyx_t_1 = __Pyx_PyBool_FromLong((!__pyx_t_3)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 32, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_2 = __Pyx_PyObject_CallOneArg(__pyx_builtin_print, __pyx_t_1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 32, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;

!__pyx_t_3 ：这行代码计算 not x == 0。由于 __pyx_t_3 是 x == 0 的布尔值，!__pyx_t_3 就是其逻辑取反。__Pyx_PyBool_FromLong((!__pyx_t_3)) 将 !__pyx_t_3 转换为 Python 的布尔对象。如果 !__pyx_t_3 为 0，则返回 False；如果 !__pyx_t_3 为 1，则返回 True。

如果以后逆向在这里出题，考察逻辑运算符，那就认命吧，这里反编译出的代码很绕。

下面粘上test3函数的反编译代码。

c 复制代码

// write access to const memory has been detected, the output may be wrong!
__int64 __fastcall sub_180001E30(__int64 a1, __int64 a2, __int64 a3)
{
  v5 = *((_QWORD *)off_18000B688 + 35);
  if ( a2 == v5 )
    goto LABEL_2;
  v7 = *(_QWORD *)(a2 + 8);
  if ( v7 == PyLong_Type )
  {
    if ( *(_QWORD *)(a2 + 16) )
    {
LABEL_5:
      v6 = (_QWORD *)++Py_FalseStruct;
      goto LABEL_10;
    }
LABEL_2:
    v6 = (_QWORD *)++Py_TrueStruct;
    goto LABEL_10;
  }
  if ( v7 == PyFloat_Type )
  {
    if ( *(double *)(a2 + 16) != 0.0 )
      goto LABEL_5;
    goto LABEL_2;
  }
  v6 = (_QWORD *)PyObject_RichCompare(a2, v5, 2LL);
LABEL_10:
  if ( !v6 )
  {
    v8 = 30;
    v9 = 3136;
LABEL_75:
    sub_180005F50("test.test3", v9, v8, (__int64)"test.py");
    return 0LL;
  }
  IsTrue = v6 == (_QWORD *)Py_TrueStruct;
  v11 = v6 == (_QWORD *)Py_NoneStruct;
  v12 = IsTrue | v11 | (unsigned int)(v6 == (_QWORD *)Py_FalseStruct);
  if ( !(IsTrue | (v11 || v6 == (_QWORD *)Py_FalseStruct)) )
    IsTrue = PyObject_IsTrue(v6);
  if ( IsTrue < 0 )
  {
    v8 = 30;
    v9 = 3138;
    goto LABEL_73;
  }
  v13 = *v6;
  if ( !IsTrue )
  {
    *v6 = v13;
    v16 = v6;
    if ( v13 )
      goto LABEL_26;
    v18 = v6;
    goto LABEL_25;
  }
  v14 = v13 - 1;
  *v6 = v14;
  if ( !v14 )
    Py_Dealloc(v6);
  v15 = (_QWORD *)sub_180004780(a3, *((_QWORD *)off_18000B688 + 35));
  v16 = v15;
  if ( !v15 )
  {
    v8 = 30;
    v9 = 3147;
    goto LABEL_75;
  }
  v17 = *v15;
  *v16 = v17;
  if ( !v17 )
  {
    v18 = v16;
LABEL_25:
    Py_Dealloc(v18);
  }
LABEL_26:
  v6 = v16;
  v19 = (_QWORD *)sub_1800048D0(v12, v16);
  if ( !v19 )
  {
    v8 = 30;
    v9 = 3153;
    if ( !v6 )
      goto LABEL_75;
LABEL_73:
    v20 = (*v6)-- == 1LL;
    if ( v20 )
      Py_Dealloc(v6);
    goto LABEL_75;
  }
  v20 = (*v16)-- == 1LL;
  if ( v20 )
    Py_Dealloc(v16);
  v20 = (*v19)-- == 1LL;
  if ( v20 )
    Py_Dealloc(v19);
  v21 = sub_180004780(a3, *((_QWORD *)off_18000B688 + 35));
  v6 = (_QWORD *)v21;
  if ( !v21 )
  {
    v8 = 31;
    v9 = 3165;
    goto LABEL_75;
  }
  v22 = sub_180006570(v21);
  v23 = (unsigned int)v22;
  if ( v22 < 0 )
  {
    v8 = 31;
    v9 = 3167;
    goto LABEL_73;
  }
  v24 = *v6;
  if ( !(_DWORD)v23 )
  {
    v25 = v24 - 1;
    *v6 = v25;
    if ( !v25 )
      Py_Dealloc(v6);
    v26 = (_QWORD *)sub_180004780(a2, *((_QWORD *)off_18000B688 + 35));
    v6 = v26;
    if ( !v26 )
    {
      v8 = 31;
      v9 = 3176;
      goto LABEL_75;
    }
    v24 = *v26;
  }
  *v6 = v24;
  if ( !v24 )
    Py_Dealloc(v6);
  v28 = (_QWORD *)sub_1800048D0(v23, v6);
  if ( !v28 )
  {
    v8 = 31;
    v9 = 3182;
    if ( !v6 )
      goto LABEL_75;
    goto LABEL_73;
  }
  v20 = (*v6)-- == 1LL;
  if ( v20 )
    Py_Dealloc(v6);
  v20 = (*v28)-- == 1LL;
  if ( v20 )
    Py_Dealloc(v28);
  v29 = *((_QWORD *)off_18000B688 + 35);
  if ( a2 == v29 )
    goto LABEL_68;
  v30 = *(_QWORD *)(a2 + 8);
  if ( v30 == PyLong_Type )
  {
    v31 = *(_QWORD *)(a2 + 16) == 0LL;
  }
  else if ( v30 == PyFloat_Type )
  {
    if ( *(double *)(a2 + 16) == 0.0 )
      goto LABEL_68;
    v31 = 0;
  }
  else
  {
    v32 = PyObject_RichCompare(a2, v29, 2LL);
    v33 = (_QWORD *)v32;
    if ( v32 )
    {
      v31 = v32 == Py_TrueStruct;
      v34 = v32 == Py_NoneStruct;
      v27 = v31 | v34 | (unsigned int)(v33 == (_QWORD *)Py_FalseStruct);
      if ( !(v31 | (v34 || v33 == (_QWORD *)Py_FalseStruct)) )
        v31 = PyObject_IsTrue(v33);
      v20 = (*v33)-- == 1LL;
      if ( v20 )
        Py_Dealloc(v33);
    }
    else
    {
      v31 = -1;
    }
  }
  if ( v31 < 0 )
  {
    v8 = 32;
    v9 = 3194;
    goto LABEL_75;
  }
  if ( !v31 )
  {
    v6 = (_QWORD *)++Py_TrueStruct;
    goto LABEL_69;
  }
LABEL_68:
  v6 = (_QWORD *)++Py_FalseStruct;
LABEL_69:
  if ( !v6 )
  {
    v8 = 32;
    v9 = 3195;
    goto LABEL_75;
  }
  v35 = (_QWORD *)sub_1800048D0(v27, v6);
  if ( !v35 )
  {
    v8 = 32;
    v9 = 3197;
    goto LABEL_73;
  }
  v20 = (*v6)-- == 1LL;
  if ( v20 )
    Py_Dealloc(v6);
  v20 = (*v35)-- == 1LL;
  if ( v20 )
    Py_Dealloc(v35);
  return Py_NoneStruct++;
}