#11 Python 源码学习 11: PyFrameObject

2021-09-25
// Include/cpython/frameobject.h
struct _frame {
    PyObject_VAR_HEAD
    struct _frame *f_back;      /* previous frame, or NULL */
    PyCodeObject *f_code;       /* code segment */
    PyObject *f_builtins;       /* builtin symbol table (PyDictObject) */
    PyObject *f_globals;        /* global symbol table (PyDictObject) */
    PyObject *f_locals;         /* local symbol table (any mapping) */
    PyObject **f_valuestack;    /* points after the last local */
    /* Next free slot in f_valuestack.  Frame creation sets to f_valuestack.
       Frame evaluation usually NULLs it, but a frame that yields sets it
       to the current stack top. */
    PyObject **f_stacktop;
    PyObject *f_trace;          /* Trace function */
    char f_trace_lines;         /* Emit per-line trace events? */
    char f_trace_opcodes;       /* Emit per-opcode trace events? */

    /* Borrowed reference to a generator, or NULL */
    PyObject *f_gen;

    int f_lasti;                /* Last instruction if called */
    /* Call PyFrame_GetLineNumber() instead of reading this field
       directly.  As of 2.3 f_lineno is only valid when tracing is
       active (i.e. when f_trace is set).  At other times we use
       PyCode_Addr2Line to calculate the line from the current
       bytecode index. */
    int f_lineno;               /* Current line number */
    int f_iblock;               /* index in f_blockstack */
    char f_executing;           /* whether the frame is still executing */
    PyTryBlock f_blockstack[CO_MAXBLOCKS]; /* for try and loop blocks */
    PyObject *f_localsplus[1];  /* locals+stack, dynamically sized */
};

// Include/pyframe.h
typedef struct _frame PyFrameObject;

字节码

def sum(a, b):
    return a + b

def test():
    print('hello world')
    print(sum(1, 2))

import dis
dis.dis(sum)
dis.dis(test)
2           0 LOAD_FAST                0 (a)
            2 LOAD_FAST                1 (b)
            4 BINARY_ADD
            6 RETURN_VALUE
5           0 LOAD_GLOBAL              0 (print)
            2 LOAD_CONST               1 ('hello world')
            4 CALL_FUNCTION            1
            6 POP_TOP

6           8 LOAD_GLOBAL              0 (print)
           10 LOAD_GLOBAL              1 (sum)
           12 LOAD_CONST               2 (1)
           14 LOAD_CONST               3 (2)
           16 CALL_FUNCTION            2
           18 CALL_FUNCTION            1
           20 POP_TOP
           22 LOAD_CONST               0 (None)
           24 RETURN_VALUE

行号,指令偏移,指令,参数,参数值(参考)

查看字节码:

sum.__code__.co_code
sum.__code__.co_varnames
sum.__code__.co_consts
sum.__code__.co_names

参考资料与拓展阅读

#10 Python 源码学习 09: set

2021-09-12

我都有点怀疑我这个学习计划是否是正确的。

同样的废话,就懒得说了,只说有价值的信息。

  1. 疑问:代码中的 clinic 相关文件不知道是做什么用的。
  2. set 和 frozenset 的定义在一起。不是这次看代码,我简直忘了还有 frozenset 这个类型的存在。

汇总了一下,所有的类型:

  1. PyAsyncGen_Type
  2. PyBaseObject_Type
  3. PyBlake2_BLAKE2bType
  4. PyBlake2_BLAKE2sType
  5. PyBool_Type
  6. PyBufferedIOBase_Type
  7. PyBufferedRandom_Type
  8. PyBufferedReader_Type
  9. PyBufferedRWPair_Type
  10. PyBufferedWriter_Type
  11. PyByteArrayIter_Type
  12. PyByteArray_Type
  13. PyBytesIO_Type
  14. PyBytesIter_Type
  15. PyBytes_Type
  16. PyCallIter_Type
  17. PyCapsule_Type
  18. PyCArg_Type
  19. PyCArray_Type
  20. PyCArrayType_Type
  21. PyCData_Type
  22. PyCell_Type
  23. PyCField_Type
  24. PyCFuncPtr_Type
  25. PyCFuncPtrType_Type
  26. PyCFunction_Type
  27. PyClassMethodDescr_Type
  28. PyClassMethod_Type
  29. PyCMethod_Type
  30. PyCode_Type
  31. PyComplex_Type
  32. PyContextTokenMissing_Type
  33. PyContextToken_Type
  34. PyContext_Type
  35. PyContextVar_Type
  36. PyCoro_Type
  37. PyCPointer_Type
  38. PyCPointerType_Type
  39. PyCSimpleType_Type
  40. PyCStgDict_Type
  41. PyCStructType_Type
  42. PyCThunk_Type
  43. PyCursesWindow_Type
  44. PyCursesWindow_Type;
  45. PyDictItems_Type
  46. PyDictIterItem_Type
  47. PyDictIterKey_Type
  48. PyDictIterValue_Type
  49. PyDictKeys_Type
  50. PyDictProxy_Type
  51. PyDictRevIterItem_Type
  52. PyDictRevIterKey_Type
  53. PyDictRevIterValue_Type
  54. PyDict_Type
  55. PyDictValues_Type
  56. PyEllipsis_Type
  57. PyEnum_Type
  58. PyFileIO_Type
  59. PyFileIO_Type;
  60. PyFilter_Type
  61. PyFloat_Type
  62. PyFrame_Type
  63. PyFrozenSet_Type
  64. PyFunction_Type
  65. Py_GenericAliasType
  66. PyGen_Type
  67. PyGetSetDescr_Type
  68. PyHKEY_Type
  69. PyIncrementalNewlineDecoder_Type
  70. PyInstanceMethod_Type
  71. PyIOBase_Type
  72. PyListIter_Type
  73. PyListRevIter_Type
  74. PyList_Type
  75. PyLongRangeIter_Type
  76. PyLong_Type
  77. PyMap_Type
  78. PyMemberDescr_Type
  79. PyMemoryView_Type
  80. PyMethodDescr_Type
  81. PyMethod_Type
  82. PyModuleDef_Type
  83. PyModule_Type
  84. PyODictItems_Type
  85. PyODictIter_Type
  86. PyODictKeys_Type
  87. PyODict_Type
  88. PyODictValues_Type
  89. PyPickleBuffer_Type
  90. PyProperty_Type
  91. PyRangeIter_Type
  92. PyRange_Type
  93. PyRawIOBase_Type
  94. PyReversed_Type
  95. PySeqIter_Type
  96. PySetIter_Type
  97. PySet_Type
  98. PySlice_Type
  99. PyStaticMethod_Type
  100. PyStdPrinter_Type
  101. PySTEntry_Type
  102. PyStringIO_Type
  103. PyST_Type
  104. PySuper_Type
  105. PyTextIOBase_Type
  106. PyTextIOWrapper_Type
  107. PyTraceBack_Type
  108. PyTupleIter_Type
  109. PyTuple_Type
  110. PyType_Type
  111. PyUnicodeIter_Type
  112. PyUnicode_Type
  113. PyWindowsConsoleIO_Type
  114. PyWindowsConsoleIO_Type;
  115. PyWrapperDescr_Type
  116. PyZip_Type

#9 Python 源码学习 08: dict

2021-09-02

类型定义

INIT_TYPE(&PyDict_Type, "dict");
SETBUILTIN("dict", &PyDict_Type);

PyTypeObject PyDict_Type 的定义在 Objects/dictobject.c 中。

Include/cpython/dictobject.h

typedef struct {
    PyObject_HEAD

    /* Number of items in the dictionary */
    Py_ssize_t ma_used;

    /* Dictionary version: globally unique, value change each time
       the dictionary is modified */
    uint64_t ma_version_tag;

    PyDictKeysObject *ma_keys;

    /* If ma_values is NULL, the table is "combined": keys and values
       are stored in ma_keys.

       If ma_values is not NULL, the table is splitted:
       keys are stored in ma_keys and values are stored in ma_values */
    PyObject **ma_values;
} PyDictObject;

成员方法

setdefault

#define DICT_SETDEFAULT_METHODDEF    \
    {"setdefault", (PyCFunction)(void(*)(void))dict_setdefault, METH_FASTCALL, dict_setdefault__doc__},

static PyObject *
dict_setdefault(PyDictObject *self, PyObject *const *args, Py_ssize_t nargs)
{
    PyObject *return_value = NULL;
    PyObject *key;
    PyObject *default_value = Py_None;

    if (!_PyArg_CheckPositional("setdefault", nargs, 1, 2)) {
        goto exit;
    }
    key = args[0];
    if (nargs < 2) {
        goto skip_optional;
    }
    default_value = args[1];
skip_optional:
    return_value = dict_setdefault_impl(self, key, default_value);

exit:
    return return_value;
}

static PyObject *
dict_setdefault_impl(PyDictObject *self, PyObject *key,
                     PyObject *default_value)
/*[clinic end generated code: output=f8c1101ebf69e220 input=0f063756e815fd9d]*/
{
    PyObject *val;

    val = PyDict_SetDefault((PyObject *)self, key, default_value);
    Py_XINCREF(val);
    return val;
}

#8 Python 源码学习 07: list

2021-08-19

cpython/Include/cpython/tupleobject.h

INIT_TYPE(&PyList_Type, "list");
SETBUILTIN("list", &PyList_Type);

PyTypeObject PyList_Type 的定义在 Objects/listobject.c 中。

成员方法

相关的方法在 #define LIST_.+_METHODDEF 的定义中,比如 extend 方法:

#define LIST_APPEND_METHODDEF    \
    {"append", (PyCFunction)list_append, METH_O, list_append__doc__},

static PyObject *
list_append(PyListObject *self, PyObject *object)
/*[clinic end generated code: output=7c096003a29c0eae input=43a3fe48a7066e91]*/
{
    if (app1(self, object) == 0)
        Py_RETURN_NONE;
    return NULL;
}

static int
app1(PyListObject *self, PyObject *v)
{
    Py_ssize_t n = PyList_GET_SIZE(self);

    assert (v != NULL);
    if (n == PY_SSIZE_T_MAX) {
        PyErr_SetString(PyExc_OverflowError,
            "cannot add more objects to list");
        return -1;
    }

    if (list_resize(self, n+1) < 0)
        return -1;

    Py_INCREF(v);
    PyList_SET_ITEM(self, n, v);
    return 0;
}

#7 Python 源码学习 06: tuple

2021-08-05

源码

INIT_TYPE(&PyTuple_Type, "tuple");
SETBUILTIN("tuple", &PyTuple_Type);

**cpython/Include/cpython/tupleobject.h**

typedef struct {
    PyObject_VAR_HEAD
    /* ob_item contains space for 'ob_size' elements.
       Items must normally not be NULL, except during construction when
       the tuple is not yet visible outside the function that builds it. */
    PyObject *ob_item[1];
} PyTupleObject;

当然,与之对应的 PyTypeObject PyTuple_Type 定义在 Objects/tupleobject.c,就不贴出来了。

PyTuple_SET_ITEM 似乎是在完成内存初始化的空间内填充元素时使用的。

成员方法

tuple 类型只有两个成员方法:count, index

#define TUPLE_INDEX_METHODDEF    \
    {"index", (PyCFunction)(void(*)(void))tuple_index, METH_FASTCALL, tuple_index__doc__},

#define TUPLE_COUNT_METHODDEF    \
    {"count", (PyCFunction)tuple_count, METH_O, tuple_count__doc__},

static PyMethodDef tuple_methods[] = {
    TUPLE___GETNEWARGS___METHODDEF
    TUPLE_INDEX_METHODDEF
    TUPLE_COUNT_METHODDEF
    {"__class_getitem__", (PyCFunction)Py_GenericAlias, METH_O|METH_CLASS, PyDoc_STR("See PEP 585")},
    {NULL,              NULL}           /* sentinel */
};

引用计数

Py_INCREF
Py_DECREF
Py_XINCREF
Py_XDECREF

#6 Python 源码学习 05: strbytes

2021-07-28

有了 int 的一点点经验,先找:

SETBUILTIN("bytearray",             &PyByteArray_Type);
SETBUILTIN("bytes",                 &PyBytes_Type);
SETBUILTIN("str",                   &PyUnicode_Type);

INIT_TYPE(&PyByteArray_Type, "bytearray");
INIT_TYPE(&PyBytes_Type, "str");
INIT_TYPE(&PyUnicode_Type, "str");

PS: 而且还可以通过 bytes 类型的 __doc__ 内容在 cpython 源码中搜索。
PS: 这里有一个奇怪的地方,就是 INIT_TYPE 的时候,把 PyBytes_Type 给了 str

PyBytes_Type

Objects/bytesobject.c

PyTypeObject PyBytes_Type = {
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
    "bytes",
    PyBytesObject_SIZE,
    sizeof(char),
    0,                                          /* tp_dealloc */
    0,                                          /* tp_vectorcall_offset */
    0,                                          /* tp_getattr */
    0,                                          /* tp_setattr */
    0,                                          /* tp_as_async */
    (reprfunc)bytes_repr,                       /* tp_repr */
    &bytes_as_number,                           /* tp_as_number */
    &bytes_as_sequence,                         /* tp_as_sequence */
    &bytes_as_mapping,                          /* tp_as_mapping */
    (hashfunc)bytes_hash,                       /* tp_hash */
    0,                                          /* tp_call */
    bytes_str,                                  /* tp_str */
    PyObject_GenericGetAttr,                    /* tp_getattro */
    0,                                          /* tp_setattro */
    &bytes_as_buffer,                           /* tp_as_buffer */
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
        Py_TPFLAGS_BYTES_SUBCLASS,              /* tp_flags */
    bytes_doc,                                  /* tp_doc */
    0,                                          /* tp_traverse */
    0,                                          /* tp_clear */
    (richcmpfunc)bytes_richcompare,             /* tp_richcompare */
    0,                                          /* tp_weaklistoffset */
    bytes_iter,                                 /* tp_iter */
    0,                                          /* tp_iternext */
    bytes_methods,                              /* tp_methods */
    0,                                          /* tp_members */
    0,                                          /* tp_getset */
    &PyBaseObject_Type,                         /* tp_base */
    0,                                          /* tp_dict */
    0,                                          /* tp_descr_get */
    0,                                          /* tp_descr_set */
    0,                                          /* tp_dictoffset */
    0,                                          /* tp_init */
    0,                                          /* tp_alloc */
    bytes_new,                                  /* tp_new */
    PyObject_Del,                               /* tp_free */
};
typedef struct {
    PyObject_VAR_HEAD
    Py_hash_t ob_shash;
    char ob_sval[1];

    /* Invariants:
     *     ob_sval contains space for 'ob_size+1' elements.
     *     ob_sval[ob_size] == 0.
     *     ob_shash is the hash of the string or -1 if not computed yet.
     */
} PyBytesObject;

我不知道这个 PyBytesObject 是个啥。
回头找 int 相关信息,果然找到 typedef struct _longobject PyLongObject;
就在网上搜索,结果找到 python 官网(Bytes Objects)有相关信息:

PyBytesObject

This subtype of PyObject represents a Python bytes object.

PyTypeObject PyBytes_Type

This instance of PyTypeObject represents the Python bytes type; it is the same object as bytes in the Python layer.

PyUnicode_Type

Objects/unicodeobject.c

PyTypeObject PyUnicode_Type = {
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
    "str",                        /* tp_name */
    sizeof(PyUnicodeObject),      /* tp_basicsize */
    0,                            /* tp_itemsize */
    /* Slots */
    (destructor)unicode_dealloc,  /* tp_dealloc */
    0,                            /* tp_vectorcall_offset */
    0,                            /* tp_getattr */
    0,                            /* tp_setattr */
    0,                            /* tp_as_async */
    unicode_repr,                 /* tp_repr */
    &unicode_as_number,           /* tp_as_number */
    &unicode_as_sequence,         /* tp_as_sequence */
    &unicode_as_mapping,          /* tp_as_mapping */
    (hashfunc) unicode_hash,      /* tp_hash*/
    0,                            /* tp_call*/
    (reprfunc) unicode_str,       /* tp_str */
    PyObject_GenericGetAttr,      /* tp_getattro */
    0,                            /* tp_setattro */
    0,                            /* tp_as_buffer */
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
    Py_TPFLAGS_UNICODE_SUBCLASS,   /* tp_flags */
    unicode_doc,                  /* tp_doc */
    0,                            /* tp_traverse */
    0,                            /* tp_clear */
    PyUnicode_RichCompare,        /* tp_richcompare */
    0,                            /* tp_weaklistoffset */
    unicode_iter,                 /* tp_iter */
    0,                            /* tp_iternext */
    unicode_methods,              /* tp_methods */
    0,                            /* tp_members */
    0,                            /* tp_getset */
    &PyBaseObject_Type,           /* tp_base */
    0,                            /* tp_dict */
    0,                            /* tp_descr_get */
    0,                            /* tp_descr_set */
    0,                            /* tp_dictoffset */
    0,                            /* tp_init */
    0,                            /* tp_alloc */
    unicode_new,                  /* tp_new */
    PyObject_Del,                 /* tp_free */
};

字符串格式化

我想找 % 格式化,{} 格式化,以及最新的 fstring 的实现方式,但是代码的复杂性,让我在预计的时间之内无法完成,只好到此打住。

可能得从 parse.cast.c 语法树开始,再到 parse_string.c

// Objects/unicodeobject.c
// PyMethodDef unicode_methods 中的几行:
{"format", (PyCFunction)(void(*)(void)) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
{"format_map", (PyCFunction) do_string_format_map, METH_O, format_map__doc__},
UNICODE___FORMAT___METHODDEF

// Objects/clinic/unicodeobject.c.h
#define UNICODE___FORMAT___METHODDEF    \
    {"__format__", (PyCFunction)unicode___format__, METH_O, unicode___format____doc__},

static PyObject *
unicode___format__(PyObject *self, PyObject *arg)
{
    PyObject *return_value = NULL;
    PyObject *format_spec;

    if (!PyUnicode_Check(arg)) {
        _PyArg_BadArgument("__format__", "argument", "str", arg);
        goto exit;
    }
    if (PyUnicode_READY(arg) == -1) {
        goto exit;
    }
    format_spec = arg;
    return_value = unicode___format___impl(self, format_spec);

exit:
    return return_value;
}
static PyObject *
do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
{
    SubString input;

    /* PEP 3101 says only 2 levels, so that
       "{0:{1}}".format('abc', 's')            # works
       "{0:{1:{2}}}".format('abc', 's', '')    # fails
    */
    int recursion_depth = 2;

    AutoNumber auto_number;

    if (PyUnicode_READY(self) == -1)
        return NULL;

    AutoNumber_Init(&auto_number);
    SubString_init(&input, self, 0, PyUnicode_GET_LENGTH(self));
    return build_string(&input, args, kwargs, recursion_depth, &auto_number);
}

static PyObject *
do_string_format_map(PyObject *self, PyObject *obj)
{
    return do_string_format(self, NULL, obj);
}

没啥头绪,放弃。

#5 Python 源码学习 04: int

2021-07-20

经过源码分析,可以得知,所有类型的定义都是通过 object.c 中的 INIT_TYPE 语句,比如 INIT_TYPE(&PyLong_Type, "int");

然后,可能是通过 SETBUILTIN("int", &PyLong_Type); 设置成内置方法。

int 类型最后就指向了一个叫做 PyLong_TypePyTypeObject 类型变量。

PyTypeObject

Objects/longobject.c

PyTypeObject PyLong_Type = {
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
    "int",                                      /* tp_name */
    offsetof(PyLongObject, ob_digit),           /* tp_basicsize */
    sizeof(digit),                              /* tp_itemsize */
    0,                                          /* tp_dealloc */
    0,                                          /* tp_vectorcall_offset */
    0,                                          /* tp_getattr */
    0,                                          /* tp_setattr */
    0,                                          /* tp_as_async */
    long_to_decimal_string,                     /* tp_repr */
    &long_as_number,                            /* tp_as_number */
    0,                                          /* tp_as_sequence */
    0,                                          /* tp_as_mapping */
    (hashfunc)long_hash,                        /* tp_hash */
    0,                                          /* tp_call */
    0,                                          /* tp_str */
    PyObject_GenericGetAttr,                    /* tp_getattro */
    0,                                          /* tp_setattro */
    0,                                          /* tp_as_buffer */
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
        Py_TPFLAGS_LONG_SUBCLASS,               /* tp_flags */
    long_doc,                                   /* tp_doc */
    0,                                          /* tp_traverse */
    0,                                          /* tp_clear */
    long_richcompare,                           /* tp_richcompare */
    0,                                          /* tp_weaklistoffset */
    0,                                          /* tp_iter */
    0,                                          /* tp_iternext */
    long_methods,                               /* tp_methods */
    0,                                          /* tp_members */
    long_getset,                                /* tp_getset */
    0,                                          /* tp_base */
    0,                                          /* tp_dict */
    0,                                          /* tp_descr_get */
    0,                                          /* tp_descr_set */
    0,                                          /* tp_dictoffset */
    0,                                          /* tp_init */
    0,                                          /* tp_alloc */
    long_new,                                   /* tp_new */
    PyObject_Del,                               /* tp_free */
};

其中所有的方法就在 PyMethodDef long_methodsPyNumberMethods long_as_number 中。
尤其是 long_as_number 可能就是那些重载操作符的基础。

long_newlong_new_impl

可能是 int 方法对应的实现。

#2 Python 源码学习 02: PyObject

2021-06-19

源码

Include/object.h

/* Nothing is actually declared to be a PyObject, but every pointer to
 * a Python object can be cast to a PyObject*.  This is inheritance built
 * by hand.  Similarly every pointer to a variable-size Python object can,
 * in addition, be cast to PyVarObject*.
 */
typedef struct _object {
    _PyObject_HEAD_EXTRA   // 如果开启了 Py_TRACE_REFS 增加一个 _ob_next, _ob_prev
                           // 使 all live heap objects 组成一个双向链表
    Py_ssize_t ob_refcnt;  // 长整型
    PyTypeObject *ob_type;
} PyObject;

/* Cast argument to PyObject* type. */
#define _PyObject_CAST(op) ((PyObject*)(op))
#define _PyObject_CAST_CONST(op) ((const PyObject*)(op))

typedef struct {
    PyObject ob_base;
    Py_ssize_t ob_size; /* Number of items in variable part */
} PyVarObject;

/* Cast argument to PyVarObject* type. */
#define _PyVarObject_CAST(op) ((PyVarObject*)(op))

#define Py_REFCNT(ob)           (_PyObject_CAST(ob)->ob_refcnt)
#define Py_TYPE(ob)             (_PyObject_CAST(ob)->ob_type)
#define Py_SIZE(ob)             (_PyVarObject_CAST(ob)->ob_size)

PyObject

相当于所有 Python 对象的父类,包含类型,引用计数等信息。

注释说的很清楚,不会有直接声明的 PyObject 变量,只会有 PyObject* 指针,所有指向 Python 对象的指针都可以转换成 PyObject*

PyVarObject

表示 ob_size 个 PyObject,也就是说 PyVarObject 是一个 PyObject 的容器。