# coding: utf-8
# Copyright (c) Max-Planck-Institut für Eisenforschung GmbH - Computational Materials Design (CM) Department
# Distributed under the terms of "New BSD License", see the LICENSE file.
"""Interface for classes to serialize to dictionary.
This also contains classes to ease the transition from HDF based storage to
dict based serialization. Roughly we want to proceed as follows:
1. Any new object directly implements :class:`.HasDict` and derives from
:class:`.HasHDFfromDict` to be compatible to older code that still relies
on the HDF interface.
2. Any old object that doesn't yet directly implements :class:`.HasDict`
can trivially derive from :class:`.HasDictfromHDF` to be compatible to
newer code that uses the dict based serialization.
3. Step by step we can transition old objects to directly implement
:class:`.HasDict`.
"""
from abc import ABC, abstractmethod
from collections import defaultdict
from typing import Any
from pyiron_base.interfaces.has_hdf import HasHDF
from pyiron_base.storage.hdfio import (
DummyHDFio,
_extract_module_class_name,
_import_class,
)
__author__ = "Jan Janssen"
__copyright__ = (
"Copyright 2023, Max-Planck-Institut für Eisenforschung GmbH - "
"Computational Materials Design (CM) Department"
)
__version__ = "1.0"
__maintainer__ = "Jan Janssen"
__email__ = "janssen@mpie.de"
__status__ = "production"
__date__ = "Dec 20, 2023"
[docs]
def create_from_dict(obj_dict):
"""
Create and restores an object previously written as a dictionary.
Args:
obj_dict (dict): must be the output of HasDict.to_dict()
Returns:
object: restored object
"""
if "TYPE" not in obj_dict:
raise ValueError(
"invalid obj_dict! must contain type information and be the output of HasDict.to_dict!"
)
type_field = obj_dict["TYPE"]
module_path, class_name = _extract_module_class_name(type_field)
class_object = _import_class(module_path, class_name)
version = obj_dict.get("DICT_VERSION", None)
obj = class_object.instantiate(obj_dict, version)
obj.from_dict(obj_dict, version)
return obj
def _join_children_dict(children: dict[str, dict[str, Any]]) -> dict[str, Any]:
"""
Given a nested dictionary, flatten the first level.
>>> d = {'a': {'a1': 3}, 'b': {'b1': 4, 'b2': {'c': 42}}}
>>> _join_children_dict(d)
{'a/a1': 3, 'b/b1': 4, 'b/b2': {'c': 42}}
This is intended as a utility function for nested HasDict objects, that
to_dict their children and then want to give a flattened dict for
writing to ProjectHDFio.write_dict_to_hdf.
See also :func:`._split_children_dict`.
"""
return {
"/".join((k1, k2)): v2 for k1, v1 in children.items() for k2, v2 in v1.items()
}
def _split_children_dict(obj_dict: dict[str, Any]) -> dict[str, Any | dict[str, Any]]:
"""
Undoes _join_children_dict.
Classes that use :func:`._join_children_dict` in their `_to_dict`, must
call this function in their `_from_dict`.
"""
subs = defaultdict(dict)
plain = {}
for k, v in obj_dict.items():
if "/" not in k:
plain[k] = v
continue
root, k = k.split("/", maxsplit=1)
subs[root][k] = v
# using update keeps type stability, i.e. we always return a plain dict
plain.update(subs)
return plain
def _from_dict_children(obj_dict: dict) -> dict:
"""
Recurse through `obj_dict` and restore any objects with :class:`~.HasDict`.
Args:
obj_dict (dict): data previously returned from :meth:`.to_dict`
"""
def load(inner_dict):
# object is a not a dict, so nothing to do
if not isinstance(inner_dict, dict):
return inner_dict
# if object is a dict but doesn't have type information, recurse through it to load any sub dicts that might
if not all(k in inner_dict for k in ("NAME", "TYPE", "OBJECT", "DICT_VERSION")):
return {k: load(v) for k, v in inner_dict.items()}
# object has type info, so just load it
return create_from_dict(inner_dict)
return {k: load(v) for k, v in obj_dict.items()}
def _to_dict_children(obj_dict: dict) -> dict:
"""
Call to_dict on any objects in the values that support it.
Intended as a helper function for recursives object that want to to_dict
their nested objects automatically. It uses :func:`._join_children_dict`
for any dictionaries returned from the children.
The function only goes through the *first* layer of dictionary values and
does *not* recurse through nested dictionaries.
Args:
obj_dict (dict): data previously returned from :meth:`._to_dict`
Returns:
obj_dict (dict): new dictionary with the obj_dict of the children
"""
data_dict = {}
child_dict = {}
for k, v in obj_dict.items():
if isinstance(v, HasDict):
child_dict[k] = v.to_dict()
elif isinstance(v, HasHDF):
child_dict[k] = HasDictfromHDF.to_dict(v)
else:
data_dict[k] = v
return data_dict | _join_children_dict(child_dict)
[docs]
class HasDict(ABC):
"""
Abstract interface to convert objects to dictionaries for storage.
Subclasses must to implement :meth:`~._from_dict` and :meth:`~._to_dict` and may implement :meth:`.instantiate`.
:meth:`._to_dict` is excepted to return a `dict` mapping string names to the values the object needs serialized.
On recreating an object from scratch with :func:`.create_from_dict` first :meth:`.instantiate` is called and then
:meth:`.from_dict` with the same `obj_dict`, i.e. it is roughly equivalent to
>>> my_dict = dict(...)
>>> my_object = MyType.instantiate(my_dict)
>>> my_object.from_dict(my_dict)
Implementations should make sure that calling `to_dict` after `from_dict` returns an equivalent dictionary even when
the object was not obtained from :meth:`.instantiate`, such that
>>> my_dict = dict(...)
>>> my_object = MyType(...)
>>> my_object.from_dict(my_dict)
>>> my_object.to_dict() == my_dict
True
"""
__dict_version__ = "0.1.0"
"""A version string saved together with data returned from :meth:`._to_dict` and is passed back into
:meth:`._from_dict`. Implementations can use this change their representation and still read older data."""
[docs]
@classmethod
def instantiate(cls, obj_dict: dict, version: str = None) -> "Self":
"""
Create a blank instance of this class.
This can be used when some values are already necessary for the objects `__init__`.
Args:
obj_dict (dict): data previously returned from :meth:`.to_dict`
version (str): version tag written together with the data
Returns:
object: a blank instance of the object that is sufficiently initialized to call :meth:`._from_dict` on it
"""
return cls()
[docs]
def from_dict(self, obj_dict: dict, version: str = None):
"""
Populate the object from the serialized object.
Args:
obj_dict (dict): data previously returned from :meth:`.to_dict`
version (str): version tag written together with the data
"""
obj_dict = _split_children_dict(obj_dict)
if version is None:
version = obj_dict.get("DICT_VERSION", None)
self._from_dict(obj_dict, version)
@abstractmethod
def _from_dict(self, obj_dict: dict, version: str = None):
"""
Populate the object from the serialized object.
Implementations must use :func:`._from_dict_children` if they use
`._to_dict_children` in their implementation of :meth:`._to_dict`.
Args:
obj_dict (dict): data previously returned from :meth:`.to_dict`
version (str): version tag written together with the data
"""
pass
[docs]
def to_dict(self) -> dict:
"""
Reduce the object to a dictionary.
Returns:
dict: serialized state of this object
"""
type_dict = self._type_to_dict()
return self._to_dict() | type_dict
@abstractmethod
def _to_dict(self) -> dict:
"""
Reduce the object to a dictionary.
Implementations may use :func:`._to_dict_children`, if they
automatically want to call `to_dict` on any objects possible from their
returned dictionary.
Returns:
dict: serialized state of this object
"""
pass
def _type_to_dict(self):
# Needed for the HasDictfromHDF/HasHDFfromDict classes. When an object
# derives from from both them and HasHDF/HasDict it will generally need
# HDF_VERSION and DICT_VERSION defined for the version checking inside
# from_dict/from_hdf to work properly. So the code below tries to
# escalate to super in case this is the case and falls back to {} if it
# is not
try:
type_dict = super()._type_to_dict()
except AttributeError:
type_dict = {}
type_dict |= {
"NAME": self.__class__.__name__,
"TYPE": str(type(self)),
"OBJECT": self.__class__.__name__, # unused alias
"DICT_VERSION": self.__dict_version__,
}
if hasattr(self, "__version__"):
type_dict["VERSION"] = self.__version__
return type_dict
[docs]
class HasHDFfromDict(HasHDF, HasDict):
"""
Implements HasHDF in terms of HasDict.
This class is intended for "new-style" objects that are used in a context
that only assumes that they implements HasHDF.
Implementors may still override :meth:`.HasHDF._get_hdf_group_name`.
"""
def _from_hdf(self, hdf, version=None):
self.from_dict(hdf.read_dict_from_hdf(recursive=True))
def _to_hdf(self, hdf):
hdf.write_dict_to_hdf(self.to_dict())
[docs]
class HasDictfromHDF(HasDict, HasHDF):
"""
Implements HasDict in terms of HasHDF.
This class is intended for "old-style" objects that should be able to be
used as children for objects that already implement HasDict and expect
their children to implmement it.
"""
[docs]
@classmethod
def instantiate(cls, obj_dict: dict, version: str = None) -> "Self":
hdf = DummyHDFio(None, "/", obj_dict)
return cls(**cls.from_hdf_args(hdf))
def _from_dict(self, obj_dict: dict, version: str = None):
# DummyHDFio(project=None) looks a bit weird, but it was added there
# only to support saving/loading jobs which already use the HasDict
# interface
group_name = self._get_hdf_group_name()
if group_name is not None:
hdf = DummyHDFio(None, "/", {group_name: obj_dict})
else:
hdf = DummyHDFio(None, "/", obj_dict)
self.from_hdf(hdf)
def _to_dict(self):
hdf = DummyHDFio(None, "/")
self.to_hdf(hdf)
group_name = self._get_hdf_group_name()
data = hdf.to_dict()
if group_name is not None:
return data[group_name]
else:
return data