🐍
cpython source code reading (3.10.13), 1/n
(この記事は書きかけです。)
概要
何回かに分けて下記を理解していく予定です。
- .pyが解釈されて実行されるまで実行されるまで
- list型の詳細
- dict型の詳細
対象のソースコードはこれです。Linuxのコードを読んでいきます。
Python-3.10.13.tgz
まず、
"* .pyが解釈されて実行されるまで実行されるまで"
について、正常系の処理の流れを追っていきます。
main()
main()を検索するところから始めます。
Programs/python.c
/* Minimal main program -- everything is loaded from the library */
#include "Python.h"
#ifdef MS_WINDOWS
int
wmain(int argc, wchar_t **argv)
{
return Py_Main(argc, argv);
}
#else
int
main(int argc, char **argv)
{
return Py_BytesMain(argc, argv);
}
#endif
Linuxなので#else側です。
どんどん追って行きます。
Modules/main.c
Py_BytesMain(int argc, char **argv)
pymain_main(_PyArgv *args)
Py_RunMain(void)
pymain_run_python(int *exitcode)
pymain_run_file(const PyConfig *config)
pymain_run_file_obj(PyObject *program_name, PyObject *filename,
Python/pythonrun.c
_PyRun_AnyFileObject(FILE *fp, PyObject *filename, int closeit,
PyCompilerFlags *flags)
_PyRun_SimpleFileObject(FILE *fp, PyObject *filename, int closeit,
PyCompilerFlags *flags)
.py か .pyc か?
.pycは.pyをコンパイルしたバイトコード。
Python/pythonrun.c
int
_PyRun_SimpleFileObject(FILE *fp, PyObject *filename, int closeit,
PyCompilerFlags *flags)
{
...
if (pyc) {
FILE *pyc_fp;
/* Try to run a pyc file. First, re-open in binary */
if (closeit) {
fclose(fp);
}
pyc_fp = _Py_fopen_obj(filename, "rb");
if (pyc_fp == NULL) {
fprintf(stderr, "python: Can't reopen .pyc file\n");
goto done;
}
if (set_main_loader(d, filename, "SourcelessFileLoader") < 0) {
fprintf(stderr, "python: failed to set __main__.__loader__\n");
ret = -1;
fclose(pyc_fp);
goto done;
}
v = run_pyc_file(pyc_fp, d, d, flags);
} else {
/* When running from stdin, leave __main__.__loader__ alone */
if (PyUnicode_CompareWithASCIIString(filename, "<stdin>") != 0 &&
set_main_loader(d, filename, "SourceFileLoader") < 0) {
fprintf(stderr, "python: failed to set __main__.__loader__\n");
ret = -1;
goto done;
}
v = pyrun_file(fp, filename, Py_file_input, d, d,
closeit, flags);
}
.py の場合
Python/pythonrun.c
pyrun_file(FILE *fp, PyObject *filename, int start, PyObject *globals,
PyObject *locals, int closeit, PyCompilerFlags *flags)
run_mod(mod_ty mod, PyObject *filename, PyObject *globals, PyObject *locals,
PyCompilerFlags *flags, PyArena *arena)
run_eval_code_obj(PyThreadState *tstate, PyCodeObject *co, PyObject *globals, PyObject *locals)
Python/ceval.c
PyEval_EvalCode(PyObject *co, PyObject *globals, PyObject *locals)
_PyEval_Vector(PyThreadState *tstate, PyFrameConstructor *con,
PyObject *locals,
PyObject* const* args, size_t argcount,
PyObject *kwnames)
.pyc の場合
Python/pythonrun.c
run_pyc_file(FILE *fp, PyObject *globals, PyObject *locals,
PyCompilerFlags *flags)
run_eval_code_obj(PyThreadState *tstate, PyCodeObject *co, PyObject *globals, PyObject *locals)
Python/ceval.c
PyEval_EvalCode(PyObject *co, PyObject *globals, PyObject *locals)
.pyの場合と合流しました。
PyFrameObjectとは?
あとで調べる
frameがなんの単位かわからないけど、code segment, symbolなどを保持しているので、ソースコードの解析結果の中間データなのだと思う。2つ以上存在することがあるのか?
Python/ceval.c
PyFrameObject *f = _PyEval_MakeFrameVector(
tstate, con, locals, args, argcount, kwnames);
Include/internal/pycore_ceval.h
static inline PyObject*
_PyEval_EvalFrame(PyThreadState *tstate, PyFrameObject *f, int throwflag)
{
return tstate->interp->eval_frame(tstate, f, throwflag);
}
Include/pyframe.h
typedef struct _frame PyFrameObject;
Include/cpython/frameobject.h
struct _frame {
PyObject_VAR_HEAD
struct _frame *f_back; /* previous frame, or NULL */
PyCodeObject *f_code; /* code segment */
PyObject *f_builtins; /* builtin symbol table (PyDictObject) */
PyObject *f_globals; /* global symbol table (PyDictObject) */
PyObject *f_locals; /* local symbol table (any mapping) */
PyObject **f_valuestack; /* points after the last local */
PyObject *f_trace; /* Trace function */
int f_stackdepth; /* Depth of value stack */
char f_trace_lines; /* Emit per-line trace events? */
char f_trace_opcodes; /* Emit per-opcode trace events? */
/* Borrowed reference to a generator, or NULL */
PyObject *f_gen;
int f_lasti; /* Last instruction if called */
int f_lineno; /* Current line number. Only valid if non-zero */
int f_iblock; /* index in f_blockstack */
PyFrameState f_state; /* What state the frame is in */
PyTryBlock f_blockstack[CO_MAXBLOCKS]; /* for try and loop blocks */
PyObject *f_localsplus[1]; /* locals+stack, dynamically sized */
};
Python/pystate.c
PyInterpreterState_New(void)
interp->eval_frame = _PyEval_EvalFrameDefault;
Python/ceval.c
PyObject* _Py_HOT_FUNCTION
_PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
...
main_loop:
for (;;) {
...
dispatch_opcode:
#ifdef DYNAMIC_EXECUTION_PROFILE
#ifdef DXPAIRS
dxpairs[lastopcode][opcode]++;
lastopcode = opcode;
#endif
dxp[opcode]++;
#endif
switch (opcode) {
/* BEWARE!
It is essential that any operation that fails must goto error
and that all operation that succeed call DISPATCH() ! */
case TARGET(NOP): {
DISPATCH();
}
...
} /* switch */
/* This should never be reached. Every opcode should end with DISPATCH()
or goto error. */
Py_UNREACHABLE();
これがmainloopぽい。
ほとんどのcaseの最後にDISPATCH()マクロがある。
Py_UNREACHABLE(); とあるので、バグがなければDISPATCH()を経由して次のopcodeを実行することになる。
opcode
Include/opcode.h
/* Auto-generated by Tools/scripts/generate_opcode_h.py from Lib/opcode.py */
#ifndef Py_OPCODE_H
#define Py_OPCODE_H
#ifdef __cplusplus
extern "C" {
#endif
/* Instruction opcodes for compiled code */
#define POP_TOP 1
#define ROT_TWO 2
#define ROT_THREE 3
#define DUP_TOP 4
...
opcodeが160個くらいある
DISPATCH()とは?
Python/ceval.c
#define DISPATCH() \
{ \
if (trace_info.cframe.use_tracing OR_DTRACE_LINE OR_LLTRACE) { \
goto tracing_dispatch; \
} \
f->f_lasti = INSTR_OFFSET(); \
NEXTOPARG(); \
goto *opcode_targets[opcode]; \
}
tracing_dispatch:
Python/opcode_targets.h
static void *opcode_targets[256] = {
&&_unknown_opcode,
&&TARGET_POP_TOP,
&&TARGET_ROT_TWO,
...
TARGET_POP_TOP などはどこで定義されているか不明?
Discussion