python基本数据结构源码分享

一、搭建源码环境

  • 进入github地址:github.com/python/cpyt... ,克隆出自己需要看的python版本
  • 软件要求:visual studio
  • 双击打开克隆路径下的PCbuild/pcbuild.sln
  • 打开界面如下:主要的源码部分在pythoncore目录

二、python 对象数据结构

2.1 python 一切都是对象

今天涉及到的对象为:PyObject(不可变对象)、PyTypeObject(类型对象)、PyVarObject(可变对象)

2.2 PyObject 数据结构

PyObject的数据结构如下:

swift 复制代码
struct _object {
    _PyObject_HEAD_EXTRA   # 对象链
    Py_ssize_t ob_refcnt;  # 引用的数量
    PyTypeObject *ob_type;  # 对象的类型
};

define _PyObject_HEAD_EXTRA           
    PyObject *_ob_next;    # 后一个对象的地址       
    PyObject *_ob_prev;    # 前一个对象的地址

typedef _W64 int Py_ssize_t; # int类型的变量
# 理解为object默认的方法 其子类会实现一些方法
struct _typeobject {
    PyObject_VAR_HEAD  
    const char *tp_name; /* For printing, in format "<module>.<name>" */
    Py_ssize_t tp_basicsize, tp_itemsize; /* For allocation */

    /* Methods to implement standard operations */

    destructor tp_dealloc;
    Py_ssize_t tp_vectorcall_offset;
    getattrfunc tp_getattr;
    setattrfunc tp_setattr;
    PyAsyncMethods *tp_as_async; /* formerly known as tp_compare (Python 2)
                                    or tp_reserved (Python 3) */
    reprfunc tp_repr;

    /* Method suites for standard classes */

    PyNumberMethods *tp_as_number;
    PySequenceMethods *tp_as_sequence;
    PyMappingMethods *tp_as_mapping;

    /* More standard operations (here for binary compatibility) */

    hashfunc tp_hash;
    ternaryfunc tp_call;
    reprfunc tp_str;
    getattrofunc tp_getattro;
    setattrofunc tp_setattro;

    /* Functions to access object as input/output buffer */
    PyBufferProcs *tp_as_buffer;

    /* Flags to define presence of optional/expanded features */
    unsigned long tp_flags;

    const char *tp_doc; /* Documentation string */

    /* Assigned meaning in release 2.0 */
    /* call function for all accessible objects */
    traverseproc tp_traverse;

    /* delete references to contained objects */
    inquiry tp_clear;

    /* Assigned meaning in release 2.1 */
    /* rich comparisons */
    richcmpfunc tp_richcompare;

    /* weak reference enabler */
    Py_ssize_t tp_weaklistoffset;

    /* Iterators */
    getiterfunc tp_iter;
    iternextfunc tp_iternext;

    /* Attribute descriptor and subclassing stuff */
    PyMethodDef *tp_methods;
    PyMemberDef *tp_members;
    PyGetSetDef *tp_getset;
    // Strong reference on a heap type, borrowed reference on a static type
    PyTypeObject *tp_base;
    PyObject *tp_dict;
    descrgetfunc tp_descr_get;
    descrsetfunc tp_descr_set;
    Py_ssize_t tp_dictoffset;
    initproc tp_init;
    allocfunc tp_alloc;
    newfunc tp_new;
    freefunc tp_free; /* Low-level free-memory routine */
    inquiry tp_is_gc; /* For PyObject_IS_GC */
    PyObject *tp_bases;
    PyObject *tp_mro; /* method resolution order */
    PyObject *tp_cache; /* no longer used */
    void *tp_subclasses;  /* for static builtin types this is an index */
    PyObject *tp_weaklist; /* not used for static builtin types */
    destructor tp_del;

    /* Type attribute cache version tag. Added in version 2.6 */
    unsigned int tp_version_tag;

    destructor tp_finalize;
    vectorcallfunc tp_vectorcall;

    /* bitset of which type-watchers care about this type */
    char tp_watched;
};

2.3 PyvarObject 对象

arduino 复制代码
typedef struct {
        PyObject ob_base;
        Py_ssize_t ob_size; /* Number of items in variable part */
} PyVarObject;

# 相对于pyobject来说  pyvarobject 多了一个size

三、 基本数据结构

3.1 long 类型

3.1.1 数据结构

ini 复制代码
struct _longobject {
    PyObject_HEAD
    _PyLongValue long_value; 多了一个value
};

typedef struct _PyLongValue {
    Py_ssize_t ob_size; /* Number of items in variable part */
      digit ob_digit[1];
} _PyLongValue;

计算公式 SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i)
理解 ob_size 为多少位数  ob_size为1  整数位数为32, ob_size为2 整数位数为64 基本不存在溢出问题

3.1.2 常量池

scss 复制代码
static PyObject *
get_small_int(sdigit ival)
{
    # 如果是小数,直接返回引用
    assert(IS_SMALL_INT(ival));
    PyObject *v = (PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS + ival];
    return Py_NewRef(v);
}

#define _PyLong_SMALL_INTS _Py_SINGLETON(small_ints)

#define _PY_NSMALLPOSINTS           257
#define _PY_NSMALLNEGINTS           5

/* Small integers are preallocated in this array so that they
         * can be shared.
         * The integers that are preallocated are those in the range
         * -_PY_NSMALLNEGINTS (inclusive) to _PY_NSMALLPOSINTS (exclusive).
         */
PyLongObject small_ints[_PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS];

3.1.3 加法操作

ini 复制代码
#define PyLong_BASE     ((digit)1 << PyLong_SHIFT)
#define PyLong_MASK     ((digit)(PyLong_BASE - 1))
#define PyLong_SHIFT    30

static PyLongObject *
x_add(PyLongObject *a, PyLongObject *b)
{
    # 获取size的绝对值
    Py_ssize_t size_a = Py_ABS(Py_SIZE(a)), size_b = Py_ABS(Py_SIZE(b));
    PyLongObject *z;
    Py_ssize_t i;
    digit carry = 0;

    # 确保a的size值最大
    if (size_a < size_b) {
        { PyLongObject *temp = a; a = b; b = temp; }
        { Py_ssize_t size_temp = size_a;
            size_a = size_b;
            size_b = size_temp; }
    }
    # 扩多一位的size
    z = _PyLong_New(size_a+1);
    if (z == NULL)
        return NULL;
    # 将低位的数相加
    for (i = 0; i < size_b; ++i) {
        carry += a->long_value.ob_digit[i] + b->long_value.ob_digit[i];
        # 得到的结果与PyLong_MASK 相与 只保留后30位
        z->long_value.ob_digit[i] = carry & PyLong_MASK;
        # 得到carry的进位
        carry >>= PyLong_SHIFT;
    }
    # 保留剩下的高位数据
    for (; i < size_a; ++i) {
        carry += a->long_value.ob_digit[i];
        z->long_value.ob_digit[i] = carry & PyLong_MASK;
        carry >>= PyLong_SHIFT;
    }
    z->long_value.ob_digit[i] = carry;
    return long_normalize(z);
}

static PyLongObject *
long_normalize(PyLongObject *v)
{
    Py_ssize_t j = Py_ABS(Py_SIZE(v));
    Py_ssize_t i = j;
    
    while (i > 0 && v->long_value.ob_digit[i-1] == 0)
        --i;
    if (i != j) {
        Py_SET_SIZE(v, (Py_SIZE(v) < 0) ? -(i) : i);
    }
    return v;
}

long 类型值的计算
long
PyLong_AsLongAndOverflow(PyObject *vv, int *overflow)
{
    /* This version by Tim Peters */
    PyLongObject *v;
    unsigned long x, prev;
    long res;
    Py_ssize_t i;
    int sign;
    int do_decref = 0; /* if PyNumber_Index was called */

    *overflow = 0;
    if (vv == NULL) {
        PyErr_BadInternalCall();
        return -1;
    }

    if (PyLong_Check(vv)) {
        v = (PyLongObject *)vv;
    }
    else {
        v = (PyLongObject *)_PyNumber_Index(vv);
        if (v == NULL)
            return -1;
        do_decref = 1;
    }

    res = -1;
    i = Py_SIZE(v);

    switch (i) {
    case -1:
        res = -(sdigit)v->long_value.ob_digit[0];
        break;
    case 0:
        res = 0;
        break;
    case 1:
        res = v->long_value.ob_digit[0];
        break;
    default:
        sign = 1;
        x = 0;
        if (i < 0) {
            sign = -1;
            i = -(i);
        }
        while (--i >= 0) {
            prev = x;
            x = (x << PyLong_SHIFT) | v->long_value.ob_digit[i];
            if ((x >> PyLong_SHIFT) != prev) {
                *overflow = sign;
                goto exit;
            }
        }
        /* Haven't lost any bits, but casting to long requires extra
         * care (see comment above).
         */
        if (x <= (unsigned long)LONG_MAX) {
            res = (long)x * sign;
        }
        else if (sign < 0 && x == PY_ABS_LONG_MIN) {
            res = LONG_MIN;
        }
        else {
            *overflow = sign;
            /* res is already set to -1 */
        }
    }
  exit:
    if (do_decref) {
        Py_DECREF(v);
    }
    return res;
}

3.2 list数据类型

3.2.1 list数据结构

csharp 复制代码
typedef struct {
    # 可变pyvarobject, 存在一个ob_size  已分配元素的个数
    PyObject_VAR_HEAD
    /* Vector of pointers to list elements.  list[0] is ob_item[0], etc. */
    # ob_item地址的指针
    PyObject **ob_item; 

    /* ob_item contains space for 'allocated' elements.  The number
     * currently in use is ob_size.
     * Invariants:
     *     0 <= ob_size <= allocated
     *     len(list) == ob_size
     *     ob_item == NULL implies ob_size == allocated == 0
     * list.sort() temporarily sets allocated to -1 to detect mutations.
     *
     * Items must normally not be NULL, except during construction when
     * the list is not yet visible outside the function that builds it.
     */
    # 可分配元素的个数
    Py_ssize_t allocated;
} PyListObject;

3.2.2 插入操作

ini 复制代码
static int
ins1(PyListObject *self, Py_ssize_t where, PyObject *v)
{
    # 得到当前的数量
    Py_ssize_t i, n = Py_SIZE(self);
    PyObject **items;
    if (v == NULL) {
        PyErr_BadInternalCall();
        return -1;
    }
    # 断言 小于PY_SSIZE_T_MAX
    assert((size_t)n + 1 < PY_SSIZE_T_MAX);
    # 进行扩容操作
    if (list_resize(self, n+1) < 0)
        return -1;
    # where 小于0时
    if (where < 0) {
        # where = where+n (获取正向的位置) 即 n-abs(where)
        where += n;
        if (where < 0)
            # 如果为0 则插入到第一个
            where = 0;
    }
    # 大于n时 默认插入到最后一个
    if (where > n)
        where = n;
    items = self->ob_item;
    # where位置后的数据进行后移
    for (i = n; --i >= where; )
        items[i+1] = items[i];
    # 当前下标 填入v值,并创建引用
    items[where] = Py_NewRef(v);
    return 0;
}

static int
list_resize(PyListObject *self, Py_ssize_t newsize)
{
    PyObject **items;
    size_t new_allocated, num_allocated_bytes;
    Py_ssize_t allocated = self->allocated;

    /* Bypass realloc() when a previous overallocation is large enough
       to accommodate the newsize.  If the newsize falls lower than half
       the allocated size, then proceed with the realloc() to shrink the list.
    */
    if (allocated >= newsize && newsize >= (allocated >> 1)) {
        assert(self->ob_item != NULL || newsize == 0);
        Py_SET_SIZE(self, newsize);
        return 0;
    }

    /* This over-allocates proportional to the list size, making room
     * for additional growth.  The over-allocation is mild, but is
     * enough to give linear-time amortized behavior over a long
     * sequence of appends() in the presence of a poorly-performing
     * system realloc().
     * Add padding to make the allocated size multiple of 4.
     * The growth pattern is:  0, 4, 8, 16, 24, 32, 40, 52, 64, 76, ...
     * Note: new_allocated won't overflow because the largest possible value
     *       is PY_SSIZE_T_MAX * (9 / 8) + 6 which always fits in a size_t.
     */
    new_allocated = ((size_t)newsize + (newsize >> 3) + 6) & ~(size_t)3;
    /* Do not overallocate if the new size is closer to overallocated size
     * than to the old size.
     */
    if (newsize - Py_SIZE(self) > (Py_ssize_t)(new_allocated - newsize))
        new_allocated = ((size_t)newsize + 3) & ~(size_t)3;

    if (newsize == 0)
        new_allocated = 0;
    if (new_allocated <= (size_t)PY_SSIZE_T_MAX / sizeof(PyObject *)) {
        num_allocated_bytes = new_allocated * sizeof(PyObject *);
        items = (PyObject **)PyMem_Realloc(self->ob_item, num_allocated_bytes);
    }
    else {
        // integer overflow
        items = NULL;
    }
    if (items == NULL) {
        PyErr_NoMemory();
        return -1;
    }
    self->ob_item = items;
    Py_SET_SIZE(self, newsize);
    self->allocated = new_allocated;
    return 0;
}

参考:深入理解 Python 虚拟机:整型(int)的实现原理及源码剖析 - 知乎 (zhihu.com)

参考链接:docs.python.org/zh-cn/3/

相关推荐
databook3 小时前
Manim实现闪光轨迹特效
后端·python·动效
Juchecar4 小时前
解惑:NumPy 中 ndarray.ndim 到底是什么?
python
用户8356290780515 小时前
Python 删除 Excel 工作表中的空白行列
后端·python
Json_5 小时前
使用python-fastApi框架开发一个学校宿舍管理系统-前后端分离项目
后端·python·fastapi
数据智能老司机11 小时前
精通 Python 设计模式——分布式系统模式
python·设计模式·架构
数据智能老司机12 小时前
精通 Python 设计模式——并发与异步模式
python·设计模式·编程语言
数据智能老司机12 小时前
精通 Python 设计模式——测试模式
python·设计模式·架构
数据智能老司机12 小时前
精通 Python 设计模式——性能模式
python·设计模式·架构
c8i13 小时前
drf初步梳理
python·django
每日AI新事件13 小时前
python的异步函数
python