一、搭建源码环境
- 进入github地址:github.com/python/cpyt... ,克隆出自己需要看的python版本
- 软件要求:visual studio
- 双击打开克隆路径下的PCbuild/pcbuild.sln
- 打开界面如下:主要的源码部分在pythoncore目录
二、python 对象数据结构
2.1 python 一切都是对象
今天涉及到的对象为:PyObject(不可变对象)、PyTypeObject(类型对象)、PyVarObject(可变对象)
2.2 PyObject 数据结构
PyObject的数据结构如下:
swift
struct _object {
_PyObject_HEAD_EXTRA # 对象链
Py_ssize_t ob_refcnt; # 引用的数量
PyTypeObject *ob_type; # 对象的类型
};
define _PyObject_HEAD_EXTRA
PyObject *_ob_next; # 后一个对象的地址
PyObject *_ob_prev; # 前一个对象的地址
typedef _W64 int Py_ssize_t; # int类型的变量
# 理解为object默认的方法 其子类会实现一些方法
struct _typeobject {
PyObject_VAR_HEAD
const char *tp_name; /* For printing, in format "<module>.<name>" */
Py_ssize_t tp_basicsize, tp_itemsize; /* For allocation */
/* Methods to implement standard operations */
destructor tp_dealloc;
Py_ssize_t tp_vectorcall_offset;
getattrfunc tp_getattr;
setattrfunc tp_setattr;
PyAsyncMethods *tp_as_async; /* formerly known as tp_compare (Python 2)
or tp_reserved (Python 3) */
reprfunc tp_repr;
/* Method suites for standard classes */
PyNumberMethods *tp_as_number;
PySequenceMethods *tp_as_sequence;
PyMappingMethods *tp_as_mapping;
/* More standard operations (here for binary compatibility) */
hashfunc tp_hash;
ternaryfunc tp_call;
reprfunc tp_str;
getattrofunc tp_getattro;
setattrofunc tp_setattro;
/* Functions to access object as input/output buffer */
PyBufferProcs *tp_as_buffer;
/* Flags to define presence of optional/expanded features */
unsigned long tp_flags;
const char *tp_doc; /* Documentation string */
/* Assigned meaning in release 2.0 */
/* call function for all accessible objects */
traverseproc tp_traverse;
/* delete references to contained objects */
inquiry tp_clear;
/* Assigned meaning in release 2.1 */
/* rich comparisons */
richcmpfunc tp_richcompare;
/* weak reference enabler */
Py_ssize_t tp_weaklistoffset;
/* Iterators */
getiterfunc tp_iter;
iternextfunc tp_iternext;
/* Attribute descriptor and subclassing stuff */
PyMethodDef *tp_methods;
PyMemberDef *tp_members;
PyGetSetDef *tp_getset;
// Strong reference on a heap type, borrowed reference on a static type
PyTypeObject *tp_base;
PyObject *tp_dict;
descrgetfunc tp_descr_get;
descrsetfunc tp_descr_set;
Py_ssize_t tp_dictoffset;
initproc tp_init;
allocfunc tp_alloc;
newfunc tp_new;
freefunc tp_free; /* Low-level free-memory routine */
inquiry tp_is_gc; /* For PyObject_IS_GC */
PyObject *tp_bases;
PyObject *tp_mro; /* method resolution order */
PyObject *tp_cache; /* no longer used */
void *tp_subclasses; /* for static builtin types this is an index */
PyObject *tp_weaklist; /* not used for static builtin types */
destructor tp_del;
/* Type attribute cache version tag. Added in version 2.6 */
unsigned int tp_version_tag;
destructor tp_finalize;
vectorcallfunc tp_vectorcall;
/* bitset of which type-watchers care about this type */
char tp_watched;
};
2.3 PyvarObject 对象
arduino
typedef struct {
PyObject ob_base;
Py_ssize_t ob_size; /* Number of items in variable part */
} PyVarObject;
# 相对于pyobject来说 pyvarobject 多了一个size
三、 基本数据结构
3.1 long 类型
3.1.1 数据结构
ini
struct _longobject {
PyObject_HEAD
_PyLongValue long_value; 多了一个value
};
typedef struct _PyLongValue {
Py_ssize_t ob_size; /* Number of items in variable part */
digit ob_digit[1];
} _PyLongValue;
计算公式 SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i)
理解 ob_size 为多少位数 ob_size为1 整数位数为32, ob_size为2 整数位数为64 基本不存在溢出问题
3.1.2 常量池
scss
static PyObject *
get_small_int(sdigit ival)
{
# 如果是小数,直接返回引用
assert(IS_SMALL_INT(ival));
PyObject *v = (PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS + ival];
return Py_NewRef(v);
}
#define _PyLong_SMALL_INTS _Py_SINGLETON(small_ints)
#define _PY_NSMALLPOSINTS 257
#define _PY_NSMALLNEGINTS 5
/* Small integers are preallocated in this array so that they
* can be shared.
* The integers that are preallocated are those in the range
* -_PY_NSMALLNEGINTS (inclusive) to _PY_NSMALLPOSINTS (exclusive).
*/
PyLongObject small_ints[_PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS];
3.1.3 加法操作
ini
#define PyLong_BASE ((digit)1 << PyLong_SHIFT)
#define PyLong_MASK ((digit)(PyLong_BASE - 1))
#define PyLong_SHIFT 30
static PyLongObject *
x_add(PyLongObject *a, PyLongObject *b)
{
# 获取size的绝对值
Py_ssize_t size_a = Py_ABS(Py_SIZE(a)), size_b = Py_ABS(Py_SIZE(b));
PyLongObject *z;
Py_ssize_t i;
digit carry = 0;
# 确保a的size值最大
if (size_a < size_b) {
{ PyLongObject *temp = a; a = b; b = temp; }
{ Py_ssize_t size_temp = size_a;
size_a = size_b;
size_b = size_temp; }
}
# 扩多一位的size
z = _PyLong_New(size_a+1);
if (z == NULL)
return NULL;
# 将低位的数相加
for (i = 0; i < size_b; ++i) {
carry += a->long_value.ob_digit[i] + b->long_value.ob_digit[i];
# 得到的结果与PyLong_MASK 相与 只保留后30位
z->long_value.ob_digit[i] = carry & PyLong_MASK;
# 得到carry的进位
carry >>= PyLong_SHIFT;
}
# 保留剩下的高位数据
for (; i < size_a; ++i) {
carry += a->long_value.ob_digit[i];
z->long_value.ob_digit[i] = carry & PyLong_MASK;
carry >>= PyLong_SHIFT;
}
z->long_value.ob_digit[i] = carry;
return long_normalize(z);
}
static PyLongObject *
long_normalize(PyLongObject *v)
{
Py_ssize_t j = Py_ABS(Py_SIZE(v));
Py_ssize_t i = j;
while (i > 0 && v->long_value.ob_digit[i-1] == 0)
--i;
if (i != j) {
Py_SET_SIZE(v, (Py_SIZE(v) < 0) ? -(i) : i);
}
return v;
}
long 类型值的计算
long
PyLong_AsLongAndOverflow(PyObject *vv, int *overflow)
{
/* This version by Tim Peters */
PyLongObject *v;
unsigned long x, prev;
long res;
Py_ssize_t i;
int sign;
int do_decref = 0; /* if PyNumber_Index was called */
*overflow = 0;
if (vv == NULL) {
PyErr_BadInternalCall();
return -1;
}
if (PyLong_Check(vv)) {
v = (PyLongObject *)vv;
}
else {
v = (PyLongObject *)_PyNumber_Index(vv);
if (v == NULL)
return -1;
do_decref = 1;
}
res = -1;
i = Py_SIZE(v);
switch (i) {
case -1:
res = -(sdigit)v->long_value.ob_digit[0];
break;
case 0:
res = 0;
break;
case 1:
res = v->long_value.ob_digit[0];
break;
default:
sign = 1;
x = 0;
if (i < 0) {
sign = -1;
i = -(i);
}
while (--i >= 0) {
prev = x;
x = (x << PyLong_SHIFT) | v->long_value.ob_digit[i];
if ((x >> PyLong_SHIFT) != prev) {
*overflow = sign;
goto exit;
}
}
/* Haven't lost any bits, but casting to long requires extra
* care (see comment above).
*/
if (x <= (unsigned long)LONG_MAX) {
res = (long)x * sign;
}
else if (sign < 0 && x == PY_ABS_LONG_MIN) {
res = LONG_MIN;
}
else {
*overflow = sign;
/* res is already set to -1 */
}
}
exit:
if (do_decref) {
Py_DECREF(v);
}
return res;
}
3.2 list数据类型
3.2.1 list数据结构
csharp
typedef struct {
# 可变pyvarobject, 存在一个ob_size 已分配元素的个数
PyObject_VAR_HEAD
/* Vector of pointers to list elements. list[0] is ob_item[0], etc. */
# ob_item地址的指针
PyObject **ob_item;
/* ob_item contains space for 'allocated' elements. The number
* currently in use is ob_size.
* Invariants:
* 0 <= ob_size <= allocated
* len(list) == ob_size
* ob_item == NULL implies ob_size == allocated == 0
* list.sort() temporarily sets allocated to -1 to detect mutations.
*
* Items must normally not be NULL, except during construction when
* the list is not yet visible outside the function that builds it.
*/
# 可分配元素的个数
Py_ssize_t allocated;
} PyListObject;
3.2.2 插入操作
ini
static int
ins1(PyListObject *self, Py_ssize_t where, PyObject *v)
{
# 得到当前的数量
Py_ssize_t i, n = Py_SIZE(self);
PyObject **items;
if (v == NULL) {
PyErr_BadInternalCall();
return -1;
}
# 断言 小于PY_SSIZE_T_MAX
assert((size_t)n + 1 < PY_SSIZE_T_MAX);
# 进行扩容操作
if (list_resize(self, n+1) < 0)
return -1;
# where 小于0时
if (where < 0) {
# where = where+n (获取正向的位置) 即 n-abs(where)
where += n;
if (where < 0)
# 如果为0 则插入到第一个
where = 0;
}
# 大于n时 默认插入到最后一个
if (where > n)
where = n;
items = self->ob_item;
# where位置后的数据进行后移
for (i = n; --i >= where; )
items[i+1] = items[i];
# 当前下标 填入v值,并创建引用
items[where] = Py_NewRef(v);
return 0;
}
static int
list_resize(PyListObject *self, Py_ssize_t newsize)
{
PyObject **items;
size_t new_allocated, num_allocated_bytes;
Py_ssize_t allocated = self->allocated;
/* Bypass realloc() when a previous overallocation is large enough
to accommodate the newsize. If the newsize falls lower than half
the allocated size, then proceed with the realloc() to shrink the list.
*/
if (allocated >= newsize && newsize >= (allocated >> 1)) {
assert(self->ob_item != NULL || newsize == 0);
Py_SET_SIZE(self, newsize);
return 0;
}
/* This over-allocates proportional to the list size, making room
* for additional growth. The over-allocation is mild, but is
* enough to give linear-time amortized behavior over a long
* sequence of appends() in the presence of a poorly-performing
* system realloc().
* Add padding to make the allocated size multiple of 4.
* The growth pattern is: 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, ...
* Note: new_allocated won't overflow because the largest possible value
* is PY_SSIZE_T_MAX * (9 / 8) + 6 which always fits in a size_t.
*/
new_allocated = ((size_t)newsize + (newsize >> 3) + 6) & ~(size_t)3;
/* Do not overallocate if the new size is closer to overallocated size
* than to the old size.
*/
if (newsize - Py_SIZE(self) > (Py_ssize_t)(new_allocated - newsize))
new_allocated = ((size_t)newsize + 3) & ~(size_t)3;
if (newsize == 0)
new_allocated = 0;
if (new_allocated <= (size_t)PY_SSIZE_T_MAX / sizeof(PyObject *)) {
num_allocated_bytes = new_allocated * sizeof(PyObject *);
items = (PyObject **)PyMem_Realloc(self->ob_item, num_allocated_bytes);
}
else {
// integer overflow
items = NULL;
}
if (items == NULL) {
PyErr_NoMemory();
return -1;
}
self->ob_item = items;
Py_SET_SIZE(self, newsize);
self->allocated = new_allocated;
return 0;
}