Source code for dcbench.common.table

import copy
from dataclasses import dataclass
from itertools import chain
from typing import Dict, Iterator, Mapping, Optional, Sequence, Union

import pandas as pd

Attribute = Union[int, float, str, bool]


[docs]@dataclass class AttributeSpec: description: str attribute_type: type optional: bool = False
[docs]class RowMixin: attribute_specs: Mapping[str, AttributeSpec] def __init__(self, id: str, attributes: Mapping[str, Attribute] = None): self.id = id self._attributes = attributes @property def attributes(self) -> Optional[Mapping[str, Attribute]]: return self._attributes @attributes.setter def attributes(self, value: Mapping[str, Attribute]): self._check_attribute_specs(value) self._attributes = value @classmethod def _check_attribute_specs(cls, attributes: Mapping[str, Attribute]): for name, attribute in attributes.items(): if name not in cls.attribute_specs: raise ValueError( f"Passed attribute name '{name}', but the specification for" f" {cls.__name__} doesn't include it." ) if not isinstance(attribute, cls.attribute_specs[name].attribute_type): raise ValueError( f"Passed an attribute of type {type(attribute)} to {cls.__name__}" f" for the attribute named '{name}'. The specification for" f" {cls.__name__} expects an attribute of type" f" {cls.attribute_specs[name].attribute_type}." ) for name, attribute_spec in cls.attribute_specs.items(): if attribute_spec.optional: continue if name not in attributes: raise ValueError( f"Must pass required attribute with key {name} to {cls.__name__}." )
[docs]class RowUnion(RowMixin): def __init__(self, id: str, elements: Sequence[RowMixin]): self._elements = elements attributes: Dict[str, Attribute] = {} for element in reversed(elements): attributes.update(element.attributes) super().__init__(id, attributes=attributes)
[docs]def predicate(a: Attribute, b: Union[Attribute, slice, Sequence[Attribute]]) -> bool: if isinstance(b, slice): return (b.start is not None and a >= b.start) and ( b.stop is not None and a < b.stop ) elif isinstance(b, Sequence): return a in b else: return a == b
[docs]class Table(Mapping[str, RowMixin]): def __init__(self, data: Sequence[RowMixin]): self._data = {item.id: item for item in data} def __getitem__(self, k: str) -> RowMixin: result = self._data.get(k, None) if result is None: raise KeyError() return result def __iter__(self) -> Iterator[str]: return self._data.__iter__() def __len__(self) -> int: return self._data.__len__() def _add_row(self, row: RowMixin) -> None: self._data[row.id] = row @property def df(self): return pd.DataFrame.from_dict( {k: v.attributes for k, v in self._data.items()}, orient="index" )
[docs] def where(self, **kwargs: Union[Attribute, slice, Sequence[Attribute]]) -> "Table": result_data = [ item for item in self._data.values() if all( predicate(item.attributes.get(k, None), v) for (k, v) in kwargs.items() ) ] return type(self)(result_data)
[docs] def average( self, *targets: str, groupby: Optional[Sequence[str]] = None, std: bool = False ) -> "Table": groupby = groupby or [] df = self.df[chain(targets, groupby)] if groupby is not None and len(groupby) > 0: df = df.groupby(groupby) df_result = df.mean() if isinstance(df_result, pd.Series): df_result = df_result.to_frame().T if std: df_std = df.std() if isinstance(df_std, pd.Series): df_std = df_std.to_frame().T df_result = pd.merge( df_result, df_std, left_index=True, right_index=True, suffixes=("", ":std"), ) df_result = df_result.reset_index() result_rows = [ RowMixin(id=str(id), attributes=row) for id, row in df_result.iterrows() ] return Table(result_rows)
def __repr__(self) -> str: return self.df.__repr__() def _repr_html_(self) -> Optional[str]: return self.df._repr_html_() def __add__(self, other: RowMixin) -> "Table": result = copy.deepcopy(self) result._add_row(other) return result def __iadd__(self, other: RowMixin) -> "Table": self._add_row(other) return self