🤖
Pythonで__init__.pyは必須ではなかった
__init__.py
はいらない子になっていた?
Pythonでのパッケージの作成方法をネット上で調べていると、多くの記事で __init__.py を設置しないといけない
、とか__init__.py がないとパッケージとして認識されない
というような __init__.py 必須
という情報が多くあります。
んが、ちゃんと勉強しようと思って "シリコンバレー一流プログラマーが教える Pythonプロフェッショナル大全" を読んでいたところ、"Python 3.3以降は__init__.pyが不要" と記載されておりました。
確認
こちらの記事でより仕様定義の情報等も記載いただいておりましたので蛇足になりますが一応動作を確認しました。
コード
.
├── sample.py
└── sample_package
└── utils.py
sample.py
import sample_package.utils as utils
utils.echo("hello")
sample_package/utils.py
def echo(word: str):
print(word)
Python 3.12
$ python --version
Python 3.12.2
$ python sample.py
hello
Python 3.3
$ python --version
Python 3.2.6
$ python sample.py
Traceback (most recent call last):
File "sample.py", line 1, in <module>
import sample_package.utils as utils
ImportError: No module named sample_package.utils
エラーになります。__init__.py
を設置してから実行してみます。
$ echo "" > sample_package/__init__.py
$ python sample.py
hello
エラーなく実行されました。
さいごに
__init__.py
はディレクトリをパッケージとして認識させる用途だけではなく、import時の初期化処理や*
でインポートされた場合の__all__
の定義を記載するなど他にも用途があるためあくまで必須ではないだけであり、多くのパッケージでは利用されているものである、という認識です。
pandas
では多くの依存チェックや初期化処理などが記載されていました。
pandasの`__init__.py
from __future__ import annotations
import os
import warnings
__docformat__ = "restructuredtext"
# Let users know if they're missing any of our hard dependencies
_hard_dependencies = ("numpy", "pytz", "dateutil")
_missing_dependencies = []
for _dependency in _hard_dependencies:
try:
__import__(_dependency)
except ImportError as _e: # pragma: no cover
_missing_dependencies.append(f"{_dependency}: {_e}")
if _missing_dependencies: # pragma: no cover
raise ImportError(
"Unable to import required dependencies:\n" + "\n".join(_missing_dependencies)
)
del _hard_dependencies, _dependency, _missing_dependencies
try:
# numpy compat
from pandas.compat import (
is_numpy_dev as _is_numpy_dev, # pyright: ignore[reportUnusedImport] # noqa: F401
)
except ImportError as _err: # pragma: no cover
_module = _err.name
raise ImportError(
f"C extension: {_module} not built. If you want to import "
"pandas from the source directory, you may need to run "
"'python setup.py build_ext' to build the C extensions first."
) from _err
from pandas._config import (
get_option,
set_option,
reset_option,
describe_option,
option_context,
options,
)
# let init-time option registration happen
import pandas.core.config_init # pyright: ignore[reportUnusedImport] # noqa: F401
from pandas.core.api import (
# dtype
ArrowDtype,
Int8Dtype,
Int16Dtype,
Int32Dtype,
Int64Dtype,
UInt8Dtype,
UInt16Dtype,
UInt32Dtype,
UInt64Dtype,
Float32Dtype,
Float64Dtype,
CategoricalDtype,
PeriodDtype,
IntervalDtype,
DatetimeTZDtype,
StringDtype,
BooleanDtype,
# missing
NA,
isna,
isnull,
notna,
notnull,
# indexes
Index,
CategoricalIndex,
RangeIndex,
MultiIndex,
IntervalIndex,
TimedeltaIndex,
DatetimeIndex,
PeriodIndex,
IndexSlice,
# tseries
NaT,
Period,
period_range,
Timedelta,
timedelta_range,
Timestamp,
date_range,
bdate_range,
Interval,
interval_range,
DateOffset,
# conversion
to_numeric,
to_datetime,
to_timedelta,
# misc
Flags,
Grouper,
factorize,
unique,
value_counts,
NamedAgg,
array,
Categorical,
set_eng_float_format,
Series,
DataFrame,
)
from pandas.core.dtypes.dtypes import SparseDtype
from pandas.tseries.api import infer_freq
from pandas.tseries import offsets
from pandas.core.computation.api import eval
from pandas.core.reshape.api import (
concat,
lreshape,
melt,
wide_to_long,
merge,
merge_asof,
merge_ordered,
crosstab,
pivot,
pivot_table,
get_dummies,
from_dummies,
cut,
qcut,
)
from pandas import api, arrays, errors, io, plotting, tseries
from pandas import testing
from pandas.util._print_versions import show_versions
from pandas.io.api import (
# excel
ExcelFile,
ExcelWriter,
read_excel,
# parsers
read_csv,
read_fwf,
read_table,
# pickle
read_pickle,
to_pickle,
# pytables
HDFStore,
read_hdf,
# sql
read_sql,
read_sql_query,
read_sql_table,
# misc
read_clipboard,
read_parquet,
read_orc,
read_feather,
read_gbq,
read_html,
read_xml,
read_json,
read_stata,
read_sas,
read_spss,
)
from pandas.io.json._normalize import json_normalize
from pandas.util._tester import test
# use the closest tagged version if possible
_built_with_meson = False
try:
from pandas._version_meson import ( # pyright: ignore [reportMissingImports]
__version__,
__git_version__,
)
_built_with_meson = True
except ImportError:
from pandas._version import get_versions
v = get_versions()
__version__ = v.get("closest-tag", v["version"])
__git_version__ = v.get("full-revisionid")
del get_versions, v
# GH#55043 - deprecation of the data_manager option
if "PANDAS_DATA_MANAGER" in os.environ:
warnings.warn(
"The env variable PANDAS_DATA_MANAGER is set. The data_manager option is "
"deprecated and will be removed in a future version. Only the BlockManager "
"will be available. Unset this environment variable to silence this warning.",
FutureWarning,
stacklevel=2,
)
del warnings, os
# module level doc-string
__doc__ = """
pandas - a powerful data analysis and manipulation library for Python
=====================================================================
**pandas** is a Python package providing fast, flexible, and expressive data
structures designed to make working with "relational" or "labeled" data both
easy and intuitive. It aims to be the fundamental high-level building block for
doing practical, **real world** data analysis in Python. Additionally, it has
the broader goal of becoming **the most powerful and flexible open source data
analysis / manipulation tool available in any language**. It is already well on
its way toward this goal.
Main Features
-------------
Here are just a few of the things that pandas does well:
- Easy handling of missing data in floating point as well as non-floating
point data.
- Size mutability: columns can be inserted and deleted from DataFrame and
higher dimensional objects
- Automatic and explicit data alignment: objects can be explicitly aligned
to a set of labels, or the user can simply ignore the labels and let
`Series`, `DataFrame`, etc. automatically align the data for you in
computations.
- Powerful, flexible group by functionality to perform split-apply-combine
operations on data sets, for both aggregating and transforming data.
- Make it easy to convert ragged, differently-indexed data in other Python
and NumPy data structures into DataFrame objects.
- Intelligent label-based slicing, fancy indexing, and subsetting of large
data sets.
- Intuitive merging and joining data sets.
- Flexible reshaping and pivoting of data sets.
- Hierarchical labeling of axes (possible to have multiple labels per tick).
- Robust IO tools for loading data from flat files (CSV and delimited),
Excel files, databases, and saving/loading data from the ultrafast HDF5
format.
- Time series-specific functionality: date range generation and frequency
conversion, moving window statistics, date shifting and lagging.
"""
# Use __all__ to let type checkers know what is part of the public API.
# Pandas is not (yet) a py.typed library: the public API is determined
# based on the documentation.
__all__ = [
"ArrowDtype",
"BooleanDtype",
"Categorical",
"CategoricalDtype",
"CategoricalIndex",
"DataFrame",
"DateOffset",
"DatetimeIndex",
"DatetimeTZDtype",
"ExcelFile",
"ExcelWriter",
"Flags",
"Float32Dtype",
"Float64Dtype",
"Grouper",
"HDFStore",
"Index",
"IndexSlice",
"Int16Dtype",
"Int32Dtype",
"Int64Dtype",
"Int8Dtype",
"Interval",
"IntervalDtype",
"IntervalIndex",
"MultiIndex",
"NA",
"NaT",
"NamedAgg",
"Period",
"PeriodDtype",
"PeriodIndex",
"RangeIndex",
"Series",
"SparseDtype",
"StringDtype",
"Timedelta",
"TimedeltaIndex",
"Timestamp",
"UInt16Dtype",
"UInt32Dtype",
"UInt64Dtype",
"UInt8Dtype",
"api",
"array",
"arrays",
"bdate_range",
"concat",
"crosstab",
"cut",
"date_range",
"describe_option",
"errors",
"eval",
"factorize",
"get_dummies",
"from_dummies",
"get_option",
"infer_freq",
"interval_range",
"io",
"isna",
"isnull",
"json_normalize",
"lreshape",
"melt",
"merge",
"merge_asof",
"merge_ordered",
"notna",
"notnull",
"offsets",
"option_context",
"options",
"period_range",
"pivot",
"pivot_table",
"plotting",
"qcut",
"read_clipboard",
"read_csv",
"read_excel",
"read_feather",
"read_fwf",
"read_gbq",
"read_hdf",
"read_html",
"read_json",
"read_orc",
"read_parquet",
"read_pickle",
"read_sas",
"read_spss",
"read_sql",
"read_sql_query",
"read_sql_table",
"read_stata",
"read_table",
"read_xml",
"reset_option",
"set_eng_float_format",
"set_option",
"show_versions",
"test",
"testing",
"timedelta_range",
"to_datetime",
"to_numeric",
"to_pickle",
"to_timedelta",
"tseries",
"unique",
"value_counts",
"wide_to_long",
]
最後に...結局 __init__.py
があったりなかったりするのも紛らわしいですし混乱しそうなので、今まで通りとりあえず__init__.py
置いとけでよいのだろうと思いました。。
組み込みの名前空間パッケージ
Python 3.3 では PEP 420 から 暗黙の 名前空間パッケージを追加しました。ネイティブな名前空間パッケージを作成するのに必要なことは、名前空間パッケージのディレクトリから__init__.py
を取り除くことだけです。
Discussion
__init__.py
の有無は意味として異なるため、必須ではありませんが不要でもありません。__init__.py
がないとき、そのパッケージはPEP420に従って名前空間パッケージとなります。名前空間パッケージは、簡単に言えば異なるライブラリを同じ名前でインポートできるようにするために用いるもので、__init__.py
を省略していないときとは異なる動作をします。(そのため私は、名前空間パッケージとしての動作が必要なときのみ__init__.py
を省略するほうが良いと思います。)なお私の知る名前空間パッケージの利用例として、discord.pyが派生のために用意している
discord.ext
があります。discord-ext-menusはこれ利用してdiscord.ext.menus
からインポートできるようになっています。存在の有無には意味があります。
なくてもimportの処理は動作できますが、名前空間の探索処理、解決速度には違いが生じます。