"""Instances of Data for common data payloads."""
import json
import os
from ..api.data import Data
[docs]class PandasData(Data):
"""Data type for Pandas DataFrames and Series."""
[docs] def __init__(self, *args, **kwargs):
"""Construct PandasData."""
Data.__init__(self, *args, **kwargs)
self.format = format
[docs] @staticmethod
def serialize(data):
"""Write to msgpack."""
import base64
return str(base64.b64encode(data.to_msgpack()))
[docs] def read(self, filename):
"""Read from msgpack."""
import pandas as pd
df = pd.read_msgpack(filename)
self.set_data(df, cache=False)
[docs] @staticmethod
def check_type(value):
"""Check that value is a DataFrame or Series."""
import pandas as pd
return isinstance(value, (pd.DataFrame, pd.Series))
[docs] def write(self, filename):
"""Write msgpack."""
self.data.to_msgpack(filename)
[docs]class JSONData(Data):
"""
A Data class for primitive JSON serializable types.
The so-called "primitive types" in JSON are:
- string
- numeric types
- object (in python this is a :class:`dict`)
- array
- boolean
- null
In this class, hierarchies of the following types are supported:
- :class:`bool`
- :class:`dict`
- :class:`float`
- :class:`int`
- :class:`list`
- :obj:`None`
- :class:`str`
Note that although other types than these may be serializable in Python (by
subclassing :class:`json.JSONEncoder`), the primitive types can be
serialized/deserialized unambiguously. For example, we do not support
tuples, although the :mod:`json` module supports serializing them by casting
them to lists.
"""
[docs] @staticmethod
def serialize(data):
"""Convert to JSON text."""
return json.dumps(data, sort_keys=True)
[docs] def read(self, filename):
"""Read JSON text."""
self.set_data(json.load(open(filename, "r")), cache=False)
[docs] def write(self, filename):
"""Write as JSON text."""
json.dump(self.data, open(filename, "w"), sort_keys=True)
[docs] @staticmethod
def check_type(value):
"""Check that value is a hierarchy of primitive JSON types."""
return isinstance(value, (int, float, list, dict, str))
[docs]class KerasModelData(Data):
"""A Data class for Keras models."""
[docs] @staticmethod
def check_type(value):
"""Check that value is a keras.models.Model."""
from keras.models import Model
return isinstance(value, Model)
[docs] def read(self, filename):
"""Read from HDF5."""
from keras.models import load_model
self.data = load_model(filename)
[docs] def write(self, filename):
"""Write to HDF5."""
self.data.save(filename)
[docs]class TorchModelData(Data):
"""A Data class for PyTorch models."""
[docs] @staticmethod
def check_type(value):
"""Check that value is a torch.nn.Module."""
from torch.nn import Module
return isinstance(value, Module)
[docs] def read(self, filename):
"""Load state dict and module class."""
from torch import load
self.data = load(filename)
[docs] def write(self, filename):
"""Write state dict and serialize module class."""
from torch import save
save(self.data, filename)
[docs]class FileData(Data):
"""
Base class for any disk-native data.
For example, SQLiteData will use this as a base class.
"""
[docs] @staticmethod
def check_type(value):
"""Check that value is a filename."""
return isinstance(value, str) and os.path.exists(value)
[docs] @staticmethod
def serialize(data):
"""Simply return the filename."""
return data
[docs] def read(self, filename):
"""Read by setting the filename."""
self.set_data(filename)
[docs] def write(self, filename):
"""Copy file to new location."""
from shutil import copyfile
if filename != self.cache_new_location():
copyfile(self.cache_new_location(), filename)