Open13

Pythonの初期化

methanemethane

Pythonコマンドが実行されてからインタプリタが始まるまでの流れを追います。Victorがかなり整理してくれたけれどもそれでも複雑。
貼っているソースコードは流れを追いやすいようにエラー処理等を削っています。

Modules/main.c

Windows なら Py_Main(), Unix なら Py_BytesMain() が呼ばれる。こいつらはコマンドライン引数を _PyArgv に格納して pymain_main() を呼ぶ。

pymain_main(_PyArgv *args)
{
    pymain_init(args);
    return Py_RunMain();
}

Py_RunMain() に入ると初期のimportなどの実行が始まるので、今回追うのは pymain_init() のみ。

methanemethane
/* --- pymain_init() ---------------------------------------------- */

static PyStatus
pymain_init(const _PyArgv *args)
{
    PyStatus status;
    status = _PyRuntime_Initialize();

    PyPreConfig preconfig;
    PyPreConfig_InitPythonConfig(&preconfig);

    status = _Py_PreInitializeFromPyArgv(&preconfig, args);

    PyConfig config;
    PyConfig_InitPythonConfig(&config);

    /* pass NULL as the config: config is read from command line arguments,
       environment variables, configuration files */
    if (args->use_bytes_argv) {
        status = PyConfig_SetBytesArgv(&config, args->argc, args->bytes_argv);
    }
    else {
        status = PyConfig_SetArgv(&config, args->argc, args->wchar_argv);
    }

    status = Py_InitializeFromConfig(&config);
    status = _PyStatus_OK();

done:
    PyConfig_Clear(&config);
    return status;
}
methanemethane

Python/pylifecycle.c

PyStatus
_PyRuntime_Initialize(void)
{
    /* XXX We only initialize once in the process, which aligns with
       the static initialization of the former globals now found in
       _PyRuntime.  However, _PyRuntime *should* be initialized with
       every Py_Initialize() call, but doing so breaks the runtime.
       This is because the runtime state is not properly finalized
       currently. */
    if (runtime_initialized) {
        return _PyStatus_OK();
    }
    runtime_initialized = 1;

    return _PyRuntimeState_Init(&_PyRuntime);
}
methanemethane

Python/pystate.c

PyStatus
_PyRuntimeState_Init(_PyRuntimeState *runtime)
{
    /* Force default allocator, since _PyRuntimeState_Fini() must
       use the same allocator than this function. */
    PyMemAllocatorEx old_alloc;
    _PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, &old_alloc);

    PyStatus status = _PyRuntimeState_Init_impl(runtime);

    PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
    return status;
}

アロケーターをデフォルトに強制してから _PyRuntimeState_Init_impl()

methanemethane

Python/pystate.c

static PyStatus
_PyRuntimeState_Init_impl(_PyRuntimeState *runtime)
{
    /* We preserve the hook across init, because there is
       currently no public API to set it between runtime
       initialization and interpreter initialization. */
    void *open_code_hook = runtime->open_code_hook;
    void *open_code_userdata = runtime->open_code_userdata;
    _Py_AuditHookEntry *audit_hook_head = runtime->audit_hook_head;
    // bpo-42882: Preserve next_index value if Py_Initialize()/Py_Finalize()
    // is called multiple times.
    Py_ssize_t unicode_next_index = runtime->unicode_ids.next_index;

    memset(runtime, 0, sizeof(*runtime));

    runtime->open_code_hook = open_code_hook;
    runtime->open_code_userdata = open_code_userdata;
    runtime->audit_hook_head = audit_hook_head;

    _PyEval_InitRuntimeState(&runtime->ceval);

    PyPreConfig_InitPythonConfig(&runtime->preconfig);

    runtime->gilstate.check_enabled = 1;

    /* A TSS key must be initialized with Py_tss_NEEDS_INIT
       in accordance with the specification. */
    Py_tss_t initial = Py_tss_NEEDS_INIT;
    runtime->gilstate.autoTSSkey = initial;

    runtime->interpreters.mutex = PyThread_allocate_lock();
    if (runtime->interpreters.mutex == NULL) {
        return _PyStatus_NO_MEMORY();
    }
    runtime->interpreters.next_id = -1;

    runtime->xidregistry.mutex = PyThread_allocate_lock();
    if (runtime->xidregistry.mutex == NULL) {
        return _PyStatus_NO_MEMORY();
    }

    // Set it to the ID of the main thread of the main interpreter.
    runtime->main_thread = PyThread_get_thread_ident();

    runtime->unicode_ids.lock = PyThread_allocate_lock();
    if (runtime->unicode_ids.lock == NULL) {
        return _PyStatus_NO_MEMORY();
    }
    runtime->unicode_ids.next_index = unicode_next_index;

    return _PyStatus_OK();
}
methanemethane

Python/ceval.c

void
_PyEval_InitRuntimeState(struct _ceval_runtime_state *ceval)
{
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
    _gil_initialize(&ceval->gil);
#endif
}
methanemethane

Python/preconfig.c

void
PyPreConfig_InitPythonConfig(PyPreConfig *config)
{
    _PyPreConfig_InitCompatConfig(config);

    config->_config_init = (int)_PyConfig_INIT_PYTHON;
    config->isolated = 0;
    config->parse_argv = 1;
    config->use_environment = 1;
    /* Set to -1 to enable C locale coercion (PEP 538) and UTF-8 Mode (PEP 540)
       depending on the LC_CTYPE locale, PYTHONUTF8 and PYTHONCOERCECLOCALE
       environment variables. */
    config->coerce_c_locale = -1;
    config->coerce_c_locale_warn = -1;
    config->utf8_mode = -1;
#ifdef MS_WINDOWS
    config->legacy_windows_fs_encoding = 0;
#endif
}

PyPreConfig_InitPythonConfig(&runtime->preconfig);runtime->preconfig が初期化された。

_PyRuntimeState_Init() 終わり。

_PyRuntime_Initialize() 終わり。 pymain_init() に戻る。

methanemethane

main_init()

    PyPreConfig preconfig;
    PyPreConfig_InitPythonConfig(&preconfig);

に入っていく。

Python/preconfig.c

void
PyPreConfig_InitPythonConfig(PyPreConfig *config)
{
    _PyPreConfig_InitCompatConfig(config);

    config->_config_init = (int)_PyConfig_INIT_PYTHON;
    config->isolated = 0;
    config->parse_argv = 1;
    config->use_environment = 1;
    /* Set to -1 to enable C locale coercion (PEP 538) and UTF-8 Mode (PEP 540)
       depending on the LC_CTYPE locale, PYTHONUTF8 and PYTHONCOERCECLOCALE
       environment variables. */
    config->coerce_c_locale = -1;
    config->coerce_c_locale_warn = -1;
    config->utf8_mode = -1;
#ifdef MS_WINDOWS
    config->legacy_windows_fs_encoding = 0;
#endif
}
void
_PyPreConfig_InitCompatConfig(PyPreConfig *config)
{
    memset(config, 0, sizeof(*config));

    config->_config_init = (int)_PyConfig_INIT_COMPAT;
    config->parse_argv = 0;
    config->isolated = -1;
    config->use_environment = -1;
    config->configure_locale = 1;

    /* bpo-36443: C locale coercion (PEP 538) and UTF-8 Mode (PEP 540)
       are disabled by default using the Compat configuration.

       Py_UTF8Mode=1 enables the UTF-8 mode. PYTHONUTF8 environment variable
       is ignored (even if use_environment=1). */
    config->utf8_mode = 0;
    config->coerce_c_locale = 0;
    config->coerce_c_locale_warn = 0;

    config->dev_mode = -1;
#ifdef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
    /* bpo-40512: pymalloc is not compatible with subinterpreters,
       force usage of libc malloc() which is thread-safe. */
#ifdef Py_DEBUG
    config->allocator = PYMEM_ALLOCATOR_MALLOC_DEBUG;
#else
    config->allocator = PYMEM_ALLOCATOR_MALLOC;
#endif
#else
    config->allocator = PYMEM_ALLOCATOR_NOT_SET;
#endif
#ifdef MS_WINDOWS
    config->legacy_windows_fs_encoding = -1;
#endif
}
methanemethane

-1 は未設定で今後設定されるもの。 0/1 は off/on のデフォルトが設定されているけれども、このあと別の設定で上書きされる可能性もある。

methanemethane

pymain_main()

    status = _Py_PreInitializeFromPyArgv(&preconfig, args);

Python/pylifecycle.c

PyStatus
_Py_PreInitializeFromPyArgv(const PyPreConfig *src_config, const _PyArgv *args)
{
    PyStatus status;
    _PyRuntimeState *runtime = &_PyRuntime;

    if (runtime->preinitialized) {
        /* If it's already configured: ignored the new configuration */
        return _PyStatus_OK();
    }
    runtime->preinitializing = 1;

    PyPreConfig config;
    status = _PyPreConfig_InitFromPreConfig(&config, src_config);
    status = _PyPreConfig_Read(&config, args);
    status = _PyPreConfig_Write(&config);

    runtime->preinitializing = 0;
    runtime->preinitialized = 1;
    return _PyStatus_OK();
}
methanemethane

src_config は PyPreConfig_InitPythonConfig() でデフォルト値に初期化されたやつ。

methanemethane

Python/preconfig.c

PyStatus
_PyPreConfig_InitFromPreConfig(PyPreConfig *config,
                               const PyPreConfig *config2)
{
    PyPreConfig_InitPythonConfig(config);
    preconfig_copy(config, config2);
    return _PyStatus_OK();
}

IntiPythonConfig してそこの config2 をコピーしてるけど、 config2 も InitPythonConfig で初期化しているので実質なにもしてない(pythonコマンドじゃなくてPythonをライブラリとしてembedしているときは違う手順になることもあるのだろう。)

methanemethane

_PyPreConfig_Read(). これが本体ぽい。

/* Read the configuration from:

   - command line arguments
   - environment variables
   - Py_xxx global configuration variables
   - the LC_CTYPE locale */
PyStatus
_PyPreConfig_Read(PyPreConfig *config, const _PyArgv *args)
{
    PyStatus status;

    status = _PyRuntime_Initialize();
    if (_PyStatus_EXCEPTION(status)) {
        return status;
    }

    preconfig_get_global_vars(config);

    /* Copy LC_CTYPE locale, since it's modified later */
    const char *loc = setlocale(LC_CTYPE, NULL);
    if (loc == NULL) {
        return _PyStatus_ERR("failed to LC_CTYPE locale");
    }
    char *init_ctype_locale = _PyMem_RawStrdup(loc);
    if (init_ctype_locale == NULL) {
        return _PyStatus_NO_MEMORY();
    }

    /* Save the config to be able to restore it if encodings change */
    PyPreConfig save_config;

    status = _PyPreConfig_InitFromPreConfig(&save_config, config);
    if (_PyStatus_EXCEPTION(status)) {
        return status;
    }

    /* Set LC_CTYPE to the user preferred locale */
    if (config->configure_locale) {
        _Py_SetLocaleFromEnv(LC_CTYPE);
    }

    _PyPreCmdline cmdline = _PyPreCmdline_INIT;
    int init_utf8_mode = Py_UTF8Mode;
#ifdef MS_WINDOWS
    int init_legacy_encoding = Py_LegacyWindowsFSEncodingFlag;
#endif

    int locale_coerced = 0;
    int loops = 0;

    while (1) {
        int utf8_mode = config->utf8_mode;

        /* Watchdog to prevent an infinite loop */
        loops++;
        if (loops == 3) {
            status = _PyStatus_ERR("Encoding changed twice while "
                                   "reading the configuration");
            goto done;
        }

        /* bpo-34207: Py_DecodeLocale() and Py_EncodeLocale() depend
           on Py_UTF8Mode and Py_LegacyWindowsFSEncodingFlag. */
        Py_UTF8Mode = config->utf8_mode;
#ifdef MS_WINDOWS
        Py_LegacyWindowsFSEncodingFlag = config->legacy_windows_fs_encoding;
#endif

        if (args) {
            // Set command line arguments at each iteration. If they are bytes
            // strings, they are decoded from the new encoding.
            status = _PyPreCmdline_SetArgv(&cmdline, args);
            if (_PyStatus_EXCEPTION(status)) {
                goto done;
            }
        }

        status = preconfig_read(config, &cmdline);
        if (_PyStatus_EXCEPTION(status)) {
            goto done;
        }

        /* The legacy C locale assumes ASCII as the default text encoding, which
         * causes problems not only for the CPython runtime, but also other
         * components like GNU readline.
         *
         * Accordingly, when the CLI detects it, it attempts to coerce it to a
         * more capable UTF-8 based alternative.
         *
         * See the documentation of the PYTHONCOERCECLOCALE setting for more
         * details.
         */
        int encoding_changed = 0;
        if (config->coerce_c_locale && !locale_coerced) {
            locale_coerced = 1;
            _Py_CoerceLegacyLocale(0);
            encoding_changed = 1;
        }

        if (utf8_mode == -1) {
            if (config->utf8_mode == 1) {
                /* UTF-8 Mode enabled */
                encoding_changed = 1;
            }
        }
        else {
            if (config->utf8_mode != utf8_mode) {
                encoding_changed = 1;
            }
        }

        if (!encoding_changed) {
            break;
        }

        /* Reset the configuration before reading again the configuration,
           just keep UTF-8 Mode and coerce C locale value. */
        int new_utf8_mode = config->utf8_mode;
        int new_coerce_c_locale = config->coerce_c_locale;
        preconfig_copy(config, &save_config);
        config->utf8_mode = new_utf8_mode;
        config->coerce_c_locale = new_coerce_c_locale;

        /* The encoding changed: read again the configuration
           with the new encoding */
    }
    status = _PyStatus_OK();

done:
    if (init_ctype_locale != NULL) {
        setlocale(LC_CTYPE, init_ctype_locale);
        PyMem_RawFree(init_ctype_locale);
    }
    Py_UTF8Mode = init_utf8_mode ;
#ifdef MS_WINDOWS
    Py_LegacyWindowsFSEncodingFlag = init_legacy_encoding;
#endif
    _PyPreCmdline_Clear(&cmdline);
    return status;
}