"""Module implementing abstract Data class."""
from abc import abstractmethod, abstractstaticmethod
from .cache import Cached, Hashable
from .graph import Node
[docs]class Data(Cached, Hashable, Node):
"""
Data placeholder class.
This class represents data that has either not yet been computed, or is
furthermore not fully specified. Classes inheriting
:class:`~.data.Data` implement
placeholders for specific data types, e.g. Pandas dataframes or numpy
arrays.
Subclasses of :class:`~.data.Data` are typically instantiated by
invocations of :class:`~.tool.Tool`.
Thus :class:`~.data.Data` and
:class:`~.invocation.Invocation` are connected and form the backbone
of the computational graph, with :class:`~.tool.Tool` objects
connected to :class:`~.invocation.Invocation` as objects that can be
configured.
Note that the `provider` attribute itself an
:class:`~.invocation.Invocation`, can be "partial", in which case the
data object itself is callable. When called, arguments are passed to the
provider which will create new invocations; potentially now non-partial
ones.
"""
[docs] def __init__(self, desc=None, name=None):
"""
Construct a placeholder data object.
:param desc: a plain-text description of this data object
:param name: a short-hand name for this data object
"""
self.desc = desc
self._data = None
self._populated = False
self._missing_args = {}
self.name = name
self.provider = None
self.provider_slot = None
[docs] def __repr__(self):
"""Represent data including provider and name."""
prov = repr(self.provider)
if self.provider is None:
prov = "None"
name = self.name
if self.name is None:
name = "None"
return (f"{self.__class__.__name__}(name={name}, "
f"_populated={repr(self._populated)}, "
f"_data={repr(self._data)}, "
f"provider={prov})")
[docs] def __call__(self, *args):
"""Enable calling for placeholder Data objects."""
if self._populated:
raise ValueError("Cannot invoke populated Data object")
if self.provider is None:
if len(args) != 1:
raise ValueError(
"Can only invoke placeholder Data with single argument"
)
if args[0] is None: # This arg should remain a placeholder
return self
if not isinstance(args[0], type(self)):
raise TypeError(
"Cannot invoke placeholder type t with any type other than t"
)
return args[0]
# if there's a provider, then invoke with these args
outps = self.provider.invoke(*args).o
if isinstance(outps, tuple):
assert self.provider_slot < len(outps)
return outps[self.provider_slot]
else:
assert self.provider_slot == 0
return outps
[docs] def missing_args(self):
"""Count number of missing arguments."""
if self._populated:
return 0
if self.provider is None:
return 1 # This is a placeholder
return self.provider.missing_args()
[docs] def get_hash(self):
"""Return hash of provider if exists, or of data itself for constants."""
import hashlib
hashbase = self.__class__.__name__ # + self.version()
if self.provider is None:
if self.data is None:
hashbase += "NODATA"
else:
hashbase += self.serialize(self.data)
else:
hashbase += self.provider.get_hash()
return hashlib.md5(hashbase.encode("utf-8")).hexdigest()
[docs] @classmethod
def constant(cls, val, name="constant"):
"""Create a constant from appropriately typed variable."""
ob = cls(name=name)
ob.data = val
return ob
[docs] @abstractstaticmethod
def serialize(data):
"""Convert data to string."""
return NotImplemented
[docs] def get_data(self):
"""Getter for data attribute."""
if not self._populated:
if self.provider is None:
raise Exception("Cannot populate data without provider")
self.provider.populate()
return self._data
[docs] @abstractstaticmethod
def check_type(value):
"""Guard value to ensure it is of proper type."""
pass
[docs] def set_data(self, value, cache=True):
"""Setter for data attribute."""
if not self.check_type(value):
raise ValueError(
f"Cannot set value of {self.__class__.__name__} "
f"with object of type {type(value)}"
)
self._data = value
self._populated = True
if cache and self.provider is not None:
self.cache_write()
data = property(get_data, set_data)
[docs] @abstractmethod
def read(self, filename):
"""Read data from disk."""
self.set_data(NotImplemented, cache=False)
[docs] @abstractmethod
def write(self, filename):
"""Write data to disk."""
raise NotImplementedError()
[docs] def parents(self):
"""Return provider as only parent if it is set."""
if self.provider is None:
return []
return [self.provider]