python基本数据结构源码分享

一、搭建源码环境

  • 进入github地址:github.com/python/cpyt... ,克隆出自己需要看的python版本
  • 软件要求:visual studio
  • 双击打开克隆路径下的PCbuild/pcbuild.sln
  • 打开界面如下:主要的源码部分在pythoncore目录

二、python 对象数据结构

2.1 python 一切都是对象

今天涉及到的对象为:PyObject(不可变对象)、PyTypeObject(类型对象)、PyVarObject(可变对象)

2.2 PyObject 数据结构

PyObject的数据结构如下:

swift 复制代码
struct _object {
    _PyObject_HEAD_EXTRA   # 对象链
    Py_ssize_t ob_refcnt;  # 引用的数量
    PyTypeObject *ob_type;  # 对象的类型
};

define _PyObject_HEAD_EXTRA           
    PyObject *_ob_next;    # 后一个对象的地址       
    PyObject *_ob_prev;    # 前一个对象的地址

typedef _W64 int Py_ssize_t; # int类型的变量
# 理解为object默认的方法 其子类会实现一些方法
struct _typeobject {
    PyObject_VAR_HEAD  
    const char *tp_name; /* For printing, in format "<module>.<name>" */
    Py_ssize_t tp_basicsize, tp_itemsize; /* For allocation */

    /* Methods to implement standard operations */

    destructor tp_dealloc;
    Py_ssize_t tp_vectorcall_offset;
    getattrfunc tp_getattr;
    setattrfunc tp_setattr;
    PyAsyncMethods *tp_as_async; /* formerly known as tp_compare (Python 2)
                                    or tp_reserved (Python 3) */
    reprfunc tp_repr;

    /* Method suites for standard classes */

    PyNumberMethods *tp_as_number;
    PySequenceMethods *tp_as_sequence;
    PyMappingMethods *tp_as_mapping;

    /* More standard operations (here for binary compatibility) */

    hashfunc tp_hash;
    ternaryfunc tp_call;
    reprfunc tp_str;
    getattrofunc tp_getattro;
    setattrofunc tp_setattro;

    /* Functions to access object as input/output buffer */
    PyBufferProcs *tp_as_buffer;

    /* Flags to define presence of optional/expanded features */
    unsigned long tp_flags;

    const char *tp_doc; /* Documentation string */

    /* Assigned meaning in release 2.0 */
    /* call function for all accessible objects */
    traverseproc tp_traverse;

    /* delete references to contained objects */
    inquiry tp_clear;

    /* Assigned meaning in release 2.1 */
    /* rich comparisons */
    richcmpfunc tp_richcompare;

    /* weak reference enabler */
    Py_ssize_t tp_weaklistoffset;

    /* Iterators */
    getiterfunc tp_iter;
    iternextfunc tp_iternext;

    /* Attribute descriptor and subclassing stuff */
    PyMethodDef *tp_methods;
    PyMemberDef *tp_members;
    PyGetSetDef *tp_getset;
    // Strong reference on a heap type, borrowed reference on a static type
    PyTypeObject *tp_base;
    PyObject *tp_dict;
    descrgetfunc tp_descr_get;
    descrsetfunc tp_descr_set;
    Py_ssize_t tp_dictoffset;
    initproc tp_init;
    allocfunc tp_alloc;
    newfunc tp_new;
    freefunc tp_free; /* Low-level free-memory routine */
    inquiry tp_is_gc; /* For PyObject_IS_GC */
    PyObject *tp_bases;
    PyObject *tp_mro; /* method resolution order */
    PyObject *tp_cache; /* no longer used */
    void *tp_subclasses;  /* for static builtin types this is an index */
    PyObject *tp_weaklist; /* not used for static builtin types */
    destructor tp_del;

    /* Type attribute cache version tag. Added in version 2.6 */
    unsigned int tp_version_tag;

    destructor tp_finalize;
    vectorcallfunc tp_vectorcall;

    /* bitset of which type-watchers care about this type */
    char tp_watched;
};

2.3 PyvarObject 对象

arduino 复制代码
typedef struct {
        PyObject ob_base;
        Py_ssize_t ob_size; /* Number of items in variable part */
} PyVarObject;

# 相对于pyobject来说  pyvarobject 多了一个size

三、 基本数据结构

3.1 long 类型

3.1.1 数据结构

ini 复制代码
struct _longobject {
    PyObject_HEAD
    _PyLongValue long_value; 多了一个value
};

typedef struct _PyLongValue {
    Py_ssize_t ob_size; /* Number of items in variable part */
      digit ob_digit[1];
} _PyLongValue;

计算公式 SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i)
理解 ob_size 为多少位数  ob_size为1  整数位数为32, ob_size为2 整数位数为64 基本不存在溢出问题

3.1.2 常量池

scss 复制代码
static PyObject *
get_small_int(sdigit ival)
{
    # 如果是小数,直接返回引用
    assert(IS_SMALL_INT(ival));
    PyObject *v = (PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS + ival];
    return Py_NewRef(v);
}

#define _PyLong_SMALL_INTS _Py_SINGLETON(small_ints)

#define _PY_NSMALLPOSINTS           257
#define _PY_NSMALLNEGINTS           5

/* Small integers are preallocated in this array so that they
         * can be shared.
         * The integers that are preallocated are those in the range
         * -_PY_NSMALLNEGINTS (inclusive) to _PY_NSMALLPOSINTS (exclusive).
         */
PyLongObject small_ints[_PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS];

3.1.3 加法操作

ini 复制代码
#define PyLong_BASE     ((digit)1 << PyLong_SHIFT)
#define PyLong_MASK     ((digit)(PyLong_BASE - 1))
#define PyLong_SHIFT    30

static PyLongObject *
x_add(PyLongObject *a, PyLongObject *b)
{
    # 获取size的绝对值
    Py_ssize_t size_a = Py_ABS(Py_SIZE(a)), size_b = Py_ABS(Py_SIZE(b));
    PyLongObject *z;
    Py_ssize_t i;
    digit carry = 0;

    # 确保a的size值最大
    if (size_a < size_b) {
        { PyLongObject *temp = a; a = b; b = temp; }
        { Py_ssize_t size_temp = size_a;
            size_a = size_b;
            size_b = size_temp; }
    }
    # 扩多一位的size
    z = _PyLong_New(size_a+1);
    if (z == NULL)
        return NULL;
    # 将低位的数相加
    for (i = 0; i < size_b; ++i) {
        carry += a->long_value.ob_digit[i] + b->long_value.ob_digit[i];
        # 得到的结果与PyLong_MASK 相与 只保留后30位
        z->long_value.ob_digit[i] = carry & PyLong_MASK;
        # 得到carry的进位
        carry >>= PyLong_SHIFT;
    }
    # 保留剩下的高位数据
    for (; i < size_a; ++i) {
        carry += a->long_value.ob_digit[i];
        z->long_value.ob_digit[i] = carry & PyLong_MASK;
        carry >>= PyLong_SHIFT;
    }
    z->long_value.ob_digit[i] = carry;
    return long_normalize(z);
}

static PyLongObject *
long_normalize(PyLongObject *v)
{
    Py_ssize_t j = Py_ABS(Py_SIZE(v));
    Py_ssize_t i = j;
    
    while (i > 0 && v->long_value.ob_digit[i-1] == 0)
        --i;
    if (i != j) {
        Py_SET_SIZE(v, (Py_SIZE(v) < 0) ? -(i) : i);
    }
    return v;
}

long 类型值的计算
long
PyLong_AsLongAndOverflow(PyObject *vv, int *overflow)
{
    /* This version by Tim Peters */
    PyLongObject *v;
    unsigned long x, prev;
    long res;
    Py_ssize_t i;
    int sign;
    int do_decref = 0; /* if PyNumber_Index was called */

    *overflow = 0;
    if (vv == NULL) {
        PyErr_BadInternalCall();
        return -1;
    }

    if (PyLong_Check(vv)) {
        v = (PyLongObject *)vv;
    }
    else {
        v = (PyLongObject *)_PyNumber_Index(vv);
        if (v == NULL)
            return -1;
        do_decref = 1;
    }

    res = -1;
    i = Py_SIZE(v);

    switch (i) {
    case -1:
        res = -(sdigit)v->long_value.ob_digit[0];
        break;
    case 0:
        res = 0;
        break;
    case 1:
        res = v->long_value.ob_digit[0];
        break;
    default:
        sign = 1;
        x = 0;
        if (i < 0) {
            sign = -1;
            i = -(i);
        }
        while (--i >= 0) {
            prev = x;
            x = (x << PyLong_SHIFT) | v->long_value.ob_digit[i];
            if ((x >> PyLong_SHIFT) != prev) {
                *overflow = sign;
                goto exit;
            }
        }
        /* Haven't lost any bits, but casting to long requires extra
         * care (see comment above).
         */
        if (x <= (unsigned long)LONG_MAX) {
            res = (long)x * sign;
        }
        else if (sign < 0 && x == PY_ABS_LONG_MIN) {
            res = LONG_MIN;
        }
        else {
            *overflow = sign;
            /* res is already set to -1 */
        }
    }
  exit:
    if (do_decref) {
        Py_DECREF(v);
    }
    return res;
}

3.2 list数据类型

3.2.1 list数据结构

csharp 复制代码
typedef struct {
    # 可变pyvarobject, 存在一个ob_size  已分配元素的个数
    PyObject_VAR_HEAD
    /* Vector of pointers to list elements.  list[0] is ob_item[0], etc. */
    # ob_item地址的指针
    PyObject **ob_item; 

    /* ob_item contains space for 'allocated' elements.  The number
     * currently in use is ob_size.
     * Invariants:
     *     0 <= ob_size <= allocated
     *     len(list) == ob_size
     *     ob_item == NULL implies ob_size == allocated == 0
     * list.sort() temporarily sets allocated to -1 to detect mutations.
     *
     * Items must normally not be NULL, except during construction when
     * the list is not yet visible outside the function that builds it.
     */
    # 可分配元素的个数
    Py_ssize_t allocated;
} PyListObject;

3.2.2 插入操作

ini 复制代码
static int
ins1(PyListObject *self, Py_ssize_t where, PyObject *v)
{
    # 得到当前的数量
    Py_ssize_t i, n = Py_SIZE(self);
    PyObject **items;
    if (v == NULL) {
        PyErr_BadInternalCall();
        return -1;
    }
    # 断言 小于PY_SSIZE_T_MAX
    assert((size_t)n + 1 < PY_SSIZE_T_MAX);
    # 进行扩容操作
    if (list_resize(self, n+1) < 0)
        return -1;
    # where 小于0时
    if (where < 0) {
        # where = where+n (获取正向的位置) 即 n-abs(where)
        where += n;
        if (where < 0)
            # 如果为0 则插入到第一个
            where = 0;
    }
    # 大于n时 默认插入到最后一个
    if (where > n)
        where = n;
    items = self->ob_item;
    # where位置后的数据进行后移
    for (i = n; --i >= where; )
        items[i+1] = items[i];
    # 当前下标 填入v值,并创建引用
    items[where] = Py_NewRef(v);
    return 0;
}

static int
list_resize(PyListObject *self, Py_ssize_t newsize)
{
    PyObject **items;
    size_t new_allocated, num_allocated_bytes;
    Py_ssize_t allocated = self->allocated;

    /* Bypass realloc() when a previous overallocation is large enough
       to accommodate the newsize.  If the newsize falls lower than half
       the allocated size, then proceed with the realloc() to shrink the list.
    */
    if (allocated >= newsize && newsize >= (allocated >> 1)) {
        assert(self->ob_item != NULL || newsize == 0);
        Py_SET_SIZE(self, newsize);
        return 0;
    }

    /* This over-allocates proportional to the list size, making room
     * for additional growth.  The over-allocation is mild, but is
     * enough to give linear-time amortized behavior over a long
     * sequence of appends() in the presence of a poorly-performing
     * system realloc().
     * Add padding to make the allocated size multiple of 4.
     * The growth pattern is:  0, 4, 8, 16, 24, 32, 40, 52, 64, 76, ...
     * Note: new_allocated won't overflow because the largest possible value
     *       is PY_SSIZE_T_MAX * (9 / 8) + 6 which always fits in a size_t.
     */
    new_allocated = ((size_t)newsize + (newsize >> 3) + 6) & ~(size_t)3;
    /* Do not overallocate if the new size is closer to overallocated size
     * than to the old size.
     */
    if (newsize - Py_SIZE(self) > (Py_ssize_t)(new_allocated - newsize))
        new_allocated = ((size_t)newsize + 3) & ~(size_t)3;

    if (newsize == 0)
        new_allocated = 0;
    if (new_allocated <= (size_t)PY_SSIZE_T_MAX / sizeof(PyObject *)) {
        num_allocated_bytes = new_allocated * sizeof(PyObject *);
        items = (PyObject **)PyMem_Realloc(self->ob_item, num_allocated_bytes);
    }
    else {
        // integer overflow
        items = NULL;
    }
    if (items == NULL) {
        PyErr_NoMemory();
        return -1;
    }
    self->ob_item = items;
    Py_SET_SIZE(self, newsize);
    self->allocated = new_allocated;
    return 0;
}

参考:深入理解 Python 虚拟机:整型(int)的实现原理及源码剖析 - 知乎 (zhihu.com)

参考链接:docs.python.org/zh-cn/3/

相关推荐
梧桐树04292 小时前
python常用内建模块:collections
python
Dream_Snowar2 小时前
速通Python 第三节
开发语言·python
蓝天星空4 小时前
Python调用open ai接口
人工智能·python
jasmine s4 小时前
Pandas
开发语言·python
郭wes代码4 小时前
Cmd命令大全(万字详细版)
python·算法·小程序
leaf_leaves_leaf4 小时前
win11用一条命令给anaconda环境安装GPU版本pytorch,并检查是否为GPU版本
人工智能·pytorch·python
夜雨飘零14 小时前
基于Pytorch实现的说话人日志(说话人分离)
人工智能·pytorch·python·声纹识别·说话人分离·说话人日志
404NooFound4 小时前
Python轻量级NoSQL数据库TinyDB
开发语言·python·nosql
天天要nx4 小时前
D102【python 接口自动化学习】- pytest进阶之fixture用法
python·pytest
minstbe4 小时前
AI开发:使用支持向量机(SVM)进行文本情感分析训练 - Python
人工智能·python·支持向量机