Pythonの初期化
Pythonコマンドが実行されてからインタプリタが始まるまでの流れを追います。Victorがかなり整理してくれたけれどもそれでも複雑。
貼っているソースコードは流れを追いやすいようにエラー処理等を削っています。
Modules/main.c
Windows なら Py_Main(), Unix なら Py_BytesMain() が呼ばれる。こいつらはコマンドライン引数を _PyArgv
に格納して pymain_main()
を呼ぶ。
pymain_main(_PyArgv *args)
{
pymain_init(args);
return Py_RunMain();
}
Py_RunMain()
に入ると初期のimportなどの実行が始まるので、今回追うのは pymain_init()
のみ。
/* --- pymain_init() ---------------------------------------------- */
static PyStatus
pymain_init(const _PyArgv *args)
{
PyStatus status;
status = _PyRuntime_Initialize();
PyPreConfig preconfig;
PyPreConfig_InitPythonConfig(&preconfig);
status = _Py_PreInitializeFromPyArgv(&preconfig, args);
PyConfig config;
PyConfig_InitPythonConfig(&config);
/* pass NULL as the config: config is read from command line arguments,
environment variables, configuration files */
if (args->use_bytes_argv) {
status = PyConfig_SetBytesArgv(&config, args->argc, args->bytes_argv);
}
else {
status = PyConfig_SetArgv(&config, args->argc, args->wchar_argv);
}
status = Py_InitializeFromConfig(&config);
status = _PyStatus_OK();
done:
PyConfig_Clear(&config);
return status;
}
Python/pylifecycle.c
PyStatus
_PyRuntime_Initialize(void)
{
/* XXX We only initialize once in the process, which aligns with
the static initialization of the former globals now found in
_PyRuntime. However, _PyRuntime *should* be initialized with
every Py_Initialize() call, but doing so breaks the runtime.
This is because the runtime state is not properly finalized
currently. */
if (runtime_initialized) {
return _PyStatus_OK();
}
runtime_initialized = 1;
return _PyRuntimeState_Init(&_PyRuntime);
}
Python/pystate.c
PyStatus
_PyRuntimeState_Init(_PyRuntimeState *runtime)
{
/* Force default allocator, since _PyRuntimeState_Fini() must
use the same allocator than this function. */
PyMemAllocatorEx old_alloc;
_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
PyStatus status = _PyRuntimeState_Init_impl(runtime);
PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
return status;
}
アロケーターをデフォルトに強制してから _PyRuntimeState_Init_impl()
Python/pystate.c
static PyStatus
_PyRuntimeState_Init_impl(_PyRuntimeState *runtime)
{
/* We preserve the hook across init, because there is
currently no public API to set it between runtime
initialization and interpreter initialization. */
void *open_code_hook = runtime->open_code_hook;
void *open_code_userdata = runtime->open_code_userdata;
_Py_AuditHookEntry *audit_hook_head = runtime->audit_hook_head;
// bpo-42882: Preserve next_index value if Py_Initialize()/Py_Finalize()
// is called multiple times.
Py_ssize_t unicode_next_index = runtime->unicode_ids.next_index;
memset(runtime, 0, sizeof(*runtime));
runtime->open_code_hook = open_code_hook;
runtime->open_code_userdata = open_code_userdata;
runtime->audit_hook_head = audit_hook_head;
_PyEval_InitRuntimeState(&runtime->ceval);
PyPreConfig_InitPythonConfig(&runtime->preconfig);
runtime->gilstate.check_enabled = 1;
/* A TSS key must be initialized with Py_tss_NEEDS_INIT
in accordance with the specification. */
Py_tss_t initial = Py_tss_NEEDS_INIT;
runtime->gilstate.autoTSSkey = initial;
runtime->interpreters.mutex = PyThread_allocate_lock();
if (runtime->interpreters.mutex == NULL) {
return _PyStatus_NO_MEMORY();
}
runtime->interpreters.next_id = -1;
runtime->xidregistry.mutex = PyThread_allocate_lock();
if (runtime->xidregistry.mutex == NULL) {
return _PyStatus_NO_MEMORY();
}
// Set it to the ID of the main thread of the main interpreter.
runtime->main_thread = PyThread_get_thread_ident();
runtime->unicode_ids.lock = PyThread_allocate_lock();
if (runtime->unicode_ids.lock == NULL) {
return _PyStatus_NO_MEMORY();
}
runtime->unicode_ids.next_index = unicode_next_index;
return _PyStatus_OK();
}
Python/ceval.c
void
_PyEval_InitRuntimeState(struct _ceval_runtime_state *ceval)
{
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
_gil_initialize(&ceval->gil);
#endif
}
Python/preconfig.c
void
PyPreConfig_InitPythonConfig(PyPreConfig *config)
{
_PyPreConfig_InitCompatConfig(config);
config->_config_init = (int)_PyConfig_INIT_PYTHON;
config->isolated = 0;
config->parse_argv = 1;
config->use_environment = 1;
/* Set to -1 to enable C locale coercion (PEP 538) and UTF-8 Mode (PEP 540)
depending on the LC_CTYPE locale, PYTHONUTF8 and PYTHONCOERCECLOCALE
environment variables. */
config->coerce_c_locale = -1;
config->coerce_c_locale_warn = -1;
config->utf8_mode = -1;
#ifdef MS_WINDOWS
config->legacy_windows_fs_encoding = 0;
#endif
}
PyPreConfig_InitPythonConfig(&runtime->preconfig);
で runtime->preconfig
が初期化された。
_PyRuntimeState_Init()
終わり。
_PyRuntime_Initialize()
終わり。 pymain_init()
に戻る。
main_init()
PyPreConfig preconfig;
PyPreConfig_InitPythonConfig(&preconfig);
に入っていく。
Python/preconfig.c
void
PyPreConfig_InitPythonConfig(PyPreConfig *config)
{
_PyPreConfig_InitCompatConfig(config);
config->_config_init = (int)_PyConfig_INIT_PYTHON;
config->isolated = 0;
config->parse_argv = 1;
config->use_environment = 1;
/* Set to -1 to enable C locale coercion (PEP 538) and UTF-8 Mode (PEP 540)
depending on the LC_CTYPE locale, PYTHONUTF8 and PYTHONCOERCECLOCALE
environment variables. */
config->coerce_c_locale = -1;
config->coerce_c_locale_warn = -1;
config->utf8_mode = -1;
#ifdef MS_WINDOWS
config->legacy_windows_fs_encoding = 0;
#endif
}
void
_PyPreConfig_InitCompatConfig(PyPreConfig *config)
{
memset(config, 0, sizeof(*config));
config->_config_init = (int)_PyConfig_INIT_COMPAT;
config->parse_argv = 0;
config->isolated = -1;
config->use_environment = -1;
config->configure_locale = 1;
/* bpo-36443: C locale coercion (PEP 538) and UTF-8 Mode (PEP 540)
are disabled by default using the Compat configuration.
Py_UTF8Mode=1 enables the UTF-8 mode. PYTHONUTF8 environment variable
is ignored (even if use_environment=1). */
config->utf8_mode = 0;
config->coerce_c_locale = 0;
config->coerce_c_locale_warn = 0;
config->dev_mode = -1;
#ifdef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
/* bpo-40512: pymalloc is not compatible with subinterpreters,
force usage of libc malloc() which is thread-safe. */
#ifdef Py_DEBUG
config->allocator = PYMEM_ALLOCATOR_MALLOC_DEBUG;
#else
config->allocator = PYMEM_ALLOCATOR_MALLOC;
#endif
#else
config->allocator = PYMEM_ALLOCATOR_NOT_SET;
#endif
#ifdef MS_WINDOWS
config->legacy_windows_fs_encoding = -1;
#endif
}
-1 は未設定で今後設定されるもの。 0/1 は off/on のデフォルトが設定されているけれども、このあと別の設定で上書きされる可能性もある。
pymain_main()
status = _Py_PreInitializeFromPyArgv(&preconfig, args);
Python/pylifecycle.c
PyStatus
_Py_PreInitializeFromPyArgv(const PyPreConfig *src_config, const _PyArgv *args)
{
PyStatus status;
_PyRuntimeState *runtime = &_PyRuntime;
if (runtime->preinitialized) {
/* If it's already configured: ignored the new configuration */
return _PyStatus_OK();
}
runtime->preinitializing = 1;
PyPreConfig config;
status = _PyPreConfig_InitFromPreConfig(&config, src_config);
status = _PyPreConfig_Read(&config, args);
status = _PyPreConfig_Write(&config);
runtime->preinitializing = 0;
runtime->preinitialized = 1;
return _PyStatus_OK();
}
src_config は PyPreConfig_InitPythonConfig()
でデフォルト値に初期化されたやつ。
Python/preconfig.c
PyStatus
_PyPreConfig_InitFromPreConfig(PyPreConfig *config,
const PyPreConfig *config2)
{
PyPreConfig_InitPythonConfig(config);
preconfig_copy(config, config2);
return _PyStatus_OK();
}
IntiPythonConfig してそこの config2 をコピーしてるけど、 config2 も InitPythonConfig で初期化しているので実質なにもしてない(pythonコマンドじゃなくてPythonをライブラリとしてembedしているときは違う手順になることもあるのだろう。)
_PyPreConfig_Read(). これが本体ぽい。
/* Read the configuration from:
- command line arguments
- environment variables
- Py_xxx global configuration variables
- the LC_CTYPE locale */
PyStatus
_PyPreConfig_Read(PyPreConfig *config, const _PyArgv *args)
{
PyStatus status;
status = _PyRuntime_Initialize();
if (_PyStatus_EXCEPTION(status)) {
return status;
}
preconfig_get_global_vars(config);
/* Copy LC_CTYPE locale, since it's modified later */
const char *loc = setlocale(LC_CTYPE, NULL);
if (loc == NULL) {
return _PyStatus_ERR("failed to LC_CTYPE locale");
}
char *init_ctype_locale = _PyMem_RawStrdup(loc);
if (init_ctype_locale == NULL) {
return _PyStatus_NO_MEMORY();
}
/* Save the config to be able to restore it if encodings change */
PyPreConfig save_config;
status = _PyPreConfig_InitFromPreConfig(&save_config, config);
if (_PyStatus_EXCEPTION(status)) {
return status;
}
/* Set LC_CTYPE to the user preferred locale */
if (config->configure_locale) {
_Py_SetLocaleFromEnv(LC_CTYPE);
}
_PyPreCmdline cmdline = _PyPreCmdline_INIT;
int init_utf8_mode = Py_UTF8Mode;
#ifdef MS_WINDOWS
int init_legacy_encoding = Py_LegacyWindowsFSEncodingFlag;
#endif
int locale_coerced = 0;
int loops = 0;
while (1) {
int utf8_mode = config->utf8_mode;
/* Watchdog to prevent an infinite loop */
loops++;
if (loops == 3) {
status = _PyStatus_ERR("Encoding changed twice while "
"reading the configuration");
goto done;
}
/* bpo-34207: Py_DecodeLocale() and Py_EncodeLocale() depend
on Py_UTF8Mode and Py_LegacyWindowsFSEncodingFlag. */
Py_UTF8Mode = config->utf8_mode;
#ifdef MS_WINDOWS
Py_LegacyWindowsFSEncodingFlag = config->legacy_windows_fs_encoding;
#endif
if (args) {
// Set command line arguments at each iteration. If they are bytes
// strings, they are decoded from the new encoding.
status = _PyPreCmdline_SetArgv(&cmdline, args);
if (_PyStatus_EXCEPTION(status)) {
goto done;
}
}
status = preconfig_read(config, &cmdline);
if (_PyStatus_EXCEPTION(status)) {
goto done;
}
/* The legacy C locale assumes ASCII as the default text encoding, which
* causes problems not only for the CPython runtime, but also other
* components like GNU readline.
*
* Accordingly, when the CLI detects it, it attempts to coerce it to a
* more capable UTF-8 based alternative.
*
* See the documentation of the PYTHONCOERCECLOCALE setting for more
* details.
*/
int encoding_changed = 0;
if (config->coerce_c_locale && !locale_coerced) {
locale_coerced = 1;
_Py_CoerceLegacyLocale(0);
encoding_changed = 1;
}
if (utf8_mode == -1) {
if (config->utf8_mode == 1) {
/* UTF-8 Mode enabled */
encoding_changed = 1;
}
}
else {
if (config->utf8_mode != utf8_mode) {
encoding_changed = 1;
}
}
if (!encoding_changed) {
break;
}
/* Reset the configuration before reading again the configuration,
just keep UTF-8 Mode and coerce C locale value. */
int new_utf8_mode = config->utf8_mode;
int new_coerce_c_locale = config->coerce_c_locale;
preconfig_copy(config, &save_config);
config->utf8_mode = new_utf8_mode;
config->coerce_c_locale = new_coerce_c_locale;
/* The encoding changed: read again the configuration
with the new encoding */
}
status = _PyStatus_OK();
done:
if (init_ctype_locale != NULL) {
setlocale(LC_CTYPE, init_ctype_locale);
PyMem_RawFree(init_ctype_locale);
}
Py_UTF8Mode = init_utf8_mode ;
#ifdef MS_WINDOWS
Py_LegacyWindowsFSEncodingFlag = init_legacy_encoding;
#endif
_PyPreCmdline_Clear(&cmdline);
return status;
}