Source code for delayedarray.SparseNdarray

from bisect import bisect_left
from typing import Callable, List, Optional, Sequence, Tuple, Union
from collections import namedtuple
import numpy
import copy

from ._isometric import translate_ufunc_to_op_simple, translate_ufunc_to_op_with_args, ISOMETRIC_OP_WITH_ARGS, _choose_operator, _infer_along_with_args
from ._subset import _spawn_indices, _getitem_subset_preserves_dimensions, _getitem_subset_discards_dimensions, _repr_subset
from ._mask import (
    _allocate_unmasked_ndarray, 
    _allocate_maybe_masked_ndarray, 
    _convert_to_unmasked_1darray, 
    _convert_to_maybe_masked_1darray,
    _concatenate_unmasked_ndarrays,
    _concatenate_maybe_masked_ndarrays
)
from ._statistics import array_mean, array_var, array_sum, _create_offset_multipliers, array_all, array_any

__author__ = "ltla"
__copyright__ = "ltla"
__license__ = "MIT"


[docs] class SparseNdarray: """ The ``SparseNdarray``, as its name suggests, is a sparse n-dimensional array. It is inspired by the ``SVT_Array`` class from the `DelayedArray R/Bioconductor package <https://bioconductor.org/packages/DelayedArray>`_. Internally, the ``SparseNdarray`` is represented as a nested list where each nesting level corresponds to a dimension in reverse order, i.e., the outer-most list corresponds to elements of the last dimension in ``shape``. The list at each level has length equal to the extent of its dimension, where each entry contains another list representing the contents of the corresponding element of that dimension. This recursion continues until the second dimension (i.e., the penultimate nesting level), where each entry instead contains ``(index, value)`` tuples. In effect, this is a tree where the non-leaf nodes are lists and the leaf nodes are tuples. Each ``(index, value)`` tuple represents a sparse vector for the corresponding element of the first dimension of the ``SparseNdarray``. ``index`` should be a :py:class:`~numpy.ndarray` of integers where entries are strictly increasing and less than the extent of the first dimension. All ``index`` objects in the same ``SparseNdarray`` should have the same ``dtype`` (defined by the ``index_dtype`` property). ``value`` may be any numeric/boolean :py:class:`~numpy.ndarray` but the ``dtype`` should be consistent across all ``value`` objects in the ``SparseNdarray``. If the array contains masked values, all ``value`` objects should be a ``MaskedArray``, otherwise they should be regular NumPy arrays. Any entry of any (nested) list may also be None, indicating that the corresponding element of the dimension contains no non-zero values. In fact, the entire tree may be None, indicating that there are no non-zero values in the entire array. For 1-dimensional arrays, the contents should be a single ``(index, value)`` tuple containing the sparse contents. This may also be None if there are no non-zero values in the array. """
[docs] def __init__( self, shape: Tuple[int, ...], contents, dtype: Optional[numpy.dtype] = None, index_dtype: Optional[numpy.dtype] = None, is_masked: Optional[bool] = None, check: bool = True, ): """ Args: shape: Tuple specifying the dimensions of the array. contents: For ``n``-dimensional arrays where ``n`` > 1, a nested list representing a tree where each leaf node is a tuple containing a sparse vector (or None). For 1-dimensional arrays, a tuple containing a sparse vector. Alternatively None, if the array is empty. dtype: NumPy type of the array values. If None, this is inferred from ``contents``. index_dtype: NumPy type of the array indices. If None, this is inferred from ``contents``. is_masked: Whether ``contents`` contains masked values. If None, this is inferred from ``contents``. check: Whether to check the consistency of the ``contents`` during construction. This can be set to False for speed. """ self._shape = shape self._contents = contents ndim = len(shape) if dtype is None or index_dtype is None or is_masked is None: if contents is not None: if ndim > 1: info = _peek(contents, dim=ndim-1) if info is not None: index_dtype0 = info[0] dtype0 = info[1] is_masked0 = info[2] else: dtype0 = None index_dtype0 = None is_masked0 = False else: index_dtype0 = contents[0].dtype dtype0 = contents[1].dtype is_masked0 = numpy.ma.isMaskedArray(contents[1]) if dtype is None: dtype = dtype0 if index_dtype is None: index_dtype = index_dtype0 if is_masked is None: is_masked = is_masked0 if dtype is None: raise ValueError("cannot infer 'dtype' from 'contents'") if index_dtype is None: raise ValueError("cannot infer 'index_dtype' from 'contents'") if is_masked is None: is_masked = False # Sometimes people put the numpy data class instead of the dtype. # It's a common enough mistake that we ought to catch it here. if not isinstance(dtype, numpy.dtype): dtype = numpy.dtype(dtype) if not isinstance(index_dtype, numpy.dtype): index_dtype = numpy.dtype(index_dtype) self._dtype = dtype self._index_dtype = index_dtype self._is_masked = is_masked if check is True and contents is not None: payload = _CheckPayload(dtype=self._dtype, index_dtype=self._index_dtype, is_masked=self._is_masked, max_index=self._shape[0]) if len(shape) > 1: _recursive_check(self._contents, self._shape, payload=payload, dim=ndim-1) else: _check_sparse_tuple(self._contents[0], self._contents[1], payload=payload)
@property def shape(self) -> Tuple[int, ...]: """ Returns: Tuple of integers specifying the extent of each dimension. """ return self._shape @property def dtype(self) -> numpy.dtype: """ Returns: NumPy type of the values. """ return self._dtype @property def index_dtype(self) -> numpy.dtype: """ Returns: NumPy type of the indices. """ return self._index_dtype @property def is_masked(self) -> bool: """ Returns: Whether the values are masked. """ return self._is_masked @property def contents(self): """Contents of the array. This is intended to be read-only and should only be modified if you really know what you're doing. Returns: A nested list, for a n-dimensional array where n > 1. A tuple containing a sparse vector (i.e., indices and values), for a 1-dimensional array. Alternatively None, if the array contains no non-zero elements. """ return self._contents
[docs] def __repr__(self) -> str: """ Pretty-print this ``SparseNdarray``. This uses :py:func:`~numpy.array2string` and responds to all of its options. Returns: String containing a prettified display of the array contents. """ preamble = "<" + " x ".join([str(x) for x in self._shape]) + ">" preamble += " " + type(self).__name__ + " object of type '" + self._dtype.name + "'" indices = _repr_subset(self._shape) bits_and_pieces = _extract_dense_array_from_SparseNdarray(self, indices) converted = numpy.array2string(bits_and_pieces, separator=", ", threshold=0) return preamble + "\n" + converted
# For NumPy:
[docs] def __array__(self, dtype: Optional[numpy.dtype] = None, copy: bool = True) -> numpy.ndarray: """Convert a ``SparseNdarray`` to a NumPy array. Args: dtype: The desired NumPy type of the output array. If None, the type of the seed is used. copy: Currently ignored. The output is never a reference to the underlying seed, even if the seed is another NumPy array. Returns: Dense array of the same type as :py:attr:`~dtype` and shape as :py:attr:`~shape`. """ indices = _spawn_indices(self._shape) return _extract_dense_array_from_SparseNdarray(self, indices, dtype=dtype)
# Assorted dunder methods.
[docs] def __add__(self, other) -> Union["SparseNdarray", numpy.ndarray]: """Add something to the right-hand-side of a ``SparseNdarray``. Args: other: A numeric scalar; or a NumPy array with dimensions as described in :py:class:`~delayedarray.UnaryIsometricOpWithArgs.UnaryIsometricOpWithArgs`; or any seed object of the same dimensions as :py:attr:`~shape`. Returns: Array containing the result of the addition. This may or may not be sparse depending on ``other``. """ return _operate_with_args_on_SparseNdarray(self, other, operation="add", right=True)
[docs] def __radd__(self, other) -> Union["SparseNdarray", numpy.ndarray]: """Add something to the left-hand-side of a ``SparseNdarray``. Args: other: A numeric scalar; or a NumPy array with dimensions as described in :py:class:`~delayedarray.UnaryIsometricOpWithArgs.UnaryIsometricOpWithArgs`; or any seed object of the same dimensions as :py:attr:`~shape`. Returns: Array containing the result of the addition. This may or may not be sparse depending on ``other``. """ return _operate_with_args_on_SparseNdarray(self, other, operation="add", right=False)
[docs] def __sub__(self, other) -> Union["SparseNdarray", numpy.ndarray]: """Subtract something from the right-hand-side of a ``SparseNdarray``. Args: other: A numeric scalar; or a NumPy array with dimensions as described in :py:class:`~delayedarray.UnaryIsometricOpWithArgs.UnaryIsometricOpWithArgs`; or any seed object of the same dimensions as :py:attr:`~shape`. Returns: Array containing the result of the subtraction. This may or may not be sparse depending on ``other``. """ return _operate_with_args_on_SparseNdarray(self, other, operation="subtract", right=True)
[docs] def __rsub__(self, other): """Subtract a ``SparseNdarray`` from something else. Args: other: A numeric scalar; or a NumPy array with dimensions as described in :py:class:`~delayedarray.UnaryIsometricOpWithArgs.UnaryIsometricOpWithArgs`; or any seed object of the same dimensions as :py:attr:`~shape`. Returns: Array containing the result of the subtraction. This may or may not be sparse depending on ``other``. """ return _operate_with_args_on_SparseNdarray(self, other, operation="subtract", right=False)
[docs] def __mul__(self, other): """Multiply a ``SparseNdarray`` with something on the right hand side. Args: other: A numeric scalar; or a NumPy array with dimensions as described in :py:class:`~delayedarray.UnaryIsometricOpWithArgs.UnaryIsometricOpWithArgs`; or any seed object of the same dimensions as :py:attr:`~shape`. Returns: Array containing the result of the multiplication. This may or may not be sparse depending on ``other``. """ return _operate_with_args_on_SparseNdarray(self, other, operation="multiply", right=True)
[docs] def __rmul__(self, other): """Multiply a ``SparseNdarray`` with something on the left hand side. Args: other: A numeric scalar; or a NumPy array with dimensions as described in :py:class:`~delayedarray.UnaryIsometricOpWithArgs.UnaryIsometricOpWithArgs`; or any seed object of the same dimensions as :py:attr:`~shape`. Returns: Array containing the result of the multiplication. This may or may not be sparse depending on ``other``. """ return _operate_with_args_on_SparseNdarray(self, other, operation="multiply", right=False)
[docs] def __truediv__(self, other): """Divide a ``SparseNdarray`` by something. Args: other: A numeric scalar; or a NumPy array with dimensions as described in :py:class:`~delayedarray.UnaryIsometricOpWithArgs.UnaryIsometricOpWithArgs`; or any seed object of the same dimensions as :py:attr:`~shape`. Returns: Array containing the result of the division. This may or may not be sparse depending on ``other``. """ return _operate_with_args_on_SparseNdarray(self, other, operation="divide", right=True)
[docs] def __rtruediv__(self, other): """Divide something by a ``SparseNdarray``. Args: other: A numeric scalar; or a NumPy array with dimensions as described in :py:class:`~delayedarray.UnaryIsometricOpWithArgs.UnaryIsometricOpWithArgs`; or any seed object of the same dimensions as :py:attr:`~shape`. Returns: Array containing the result of the division. This may or may not be sparse depending on ``other``. """ return _operate_with_args_on_SparseNdarray(self, other, operation="divide", right=False)
[docs] def __mod__(self, other): """Take the remainder after dividing a ``SparseNdarray`` by something. Args: other: A numeric scalar; or a NumPy array with dimensions as described in :py:class:`~delayedarray.UnaryIsometricOpWithArgs.UnaryIsometricOpWithArgs`; or any seed object of the same dimensions as :py:attr:`~shape`. Returns: Array containing the result of the modulo. This may or may not be sparse depending on ``other``. """ return _operate_with_args_on_SparseNdarray(self, other, operation="remainder", right=True)
[docs] def __rmod__(self, other): """Take the remainder after dividing something by a ``SparseNdarray``. Args: other: A numeric scalar; or a NumPy array with dimensions as described in :py:class:`~delayedarray.UnaryIsometricOpWithArgs.UnaryIsometricOpWithArgs`; or any seed object of the same dimensions as :py:attr:`~shape`. Returns: Array containing the result of the modulo. This may or may not be sparse depending on ``other``. """ return _operate_with_args_on_SparseNdarray( self, other, operation="remainder", right=False )
[docs] def __floordiv__(self, other): """Divide a ``SparseNdarray`` by something and take the floor. Args: other: A numeric scalar; or a NumPy array with dimensions as described in :py:class:`~delayedarray.UnaryIsometricOpWithArgs.UnaryIsometricOpWithArgs`; or any seed object of the same dimensions as :py:attr:`~shape`. Returns: Array containing the result of the floor division. This may or may not be sparse depending on ``other``. """ return _operate_with_args_on_SparseNdarray( self, other, operation="floor_divide", right=True )
[docs] def __rfloordiv__(self, other): """Divide something by a ``SparseNdarray`` and take the floor. Args: other: A numeric scalar; or a NumPy array with dimensions as described in :py:class:`~delayedarray.UnaryIsometricOpWithArgs.UnaryIsometricOpWithArgs`; or any seed object of the same dimensions as :py:attr:`~shape`. Returns: Array containing the result of the floor division. This may or may not be sparse depending on ``other``. """ return _operate_with_args_on_SparseNdarray( self, other, operation="floor_divide", right=False )
[docs] def __pow__(self, other): """Raise a ``SparseNdarray`` to the power of something. Args: other: A numeric scalar; or a NumPy array with dimensions as described in :py:class:`~delayedarray.UnaryIsometricOpWithArgs.UnaryIsometricOpWithArgs`; or any seed object of the same dimensions as :py:attr:`~shape`. Returns: Array containing the result of the power operation. This may or may not be sparse depending on ``other``. """ return _operate_with_args_on_SparseNdarray(self, other, operation="power", right=True)
[docs] def __rpow__(self, other): """Raise something to the power of the contents of a ``SparseNdarray``. Args: other: A numeric scalar; or a NumPy array with dimensions as described in :py:class:`~delayedarray.UnaryIsometricOpWithArgs.UnaryIsometricOpWithArgs`; or any seed object of the same dimensions as :py:attr:`~shape`. Returns: Array containing the result of the power operation. This may or may not be sparse depending on ``other``. """ return _operate_with_args_on_SparseNdarray(self, other, operation="power", right=False)
[docs] def __eq__(self, other) -> Union["SparseNdarray", numpy.ndarray]: """Check for equality between a ``SparseNdarray`` and something. Args: other: A numeric scalar; or a NumPy array with dimensions as described in :py:class:`~delayedarray.UnaryIsometricOpWithArgs.UnaryIsometricOpWithArgs`; or any seed object of the same dimensions as :py:attr:`~shape`. Returns: Array containing the result of the check. This may or may not be sparse depending on ``other``. """ return _operate_with_args_on_SparseNdarray(self, other, operation="equal", right=True)
[docs] def __req__(self, other) -> Union["SparseNdarray", numpy.ndarray]: """Check for equality between something and a ``SparseNdarray``. Args: other: A numeric scalar; or a NumPy array with dimensions as described in :py:class:`~delayedarray.UnaryIsometricOpWithArgs.UnaryIsometricOpWithArgs`; or any seed object of the same dimensions as :py:attr:`~shape`. Returns: Array containing the result of the check. This may or may not be sparse depending on ``other``. """ return _operate_with_args_on_SparseNdarray(self, other, operation="equal", right=False)
[docs] def __ne__(self, other) -> Union["SparseNdarray", numpy.ndarray]: """Check for non-equality between a ``SparseNdarray`` and something. Args: other: A numeric scalar; or a NumPy array with dimensions as described in :py:class:`~delayedarray.UnaryIsometricOpWithArgs.UnaryIsometricOpWithArgs`; or any seed object of the same dimensions as :py:attr:`~shape`. Returns: Array containing the result of the check. This may or may not be sparse depending on ``other``. """ return _operate_with_args_on_SparseNdarray(self, other, operation="not_equal", right=True)
[docs] def __rne__(self, other) -> Union["SparseNdarray", numpy.ndarray]: """Check for non-equality between something and a ``SparseNdarray``. Args: other: A numeric scalar; or a NumPy array with dimensions as described in :py:class:`~delayedarray.UnaryIsometricOpWithArgs.UnaryIsometricOpWithArgs`; or any seed object of the same dimensions as :py:attr:`~shape`. Returns: Array containing the result of the check. This may or may not be sparse depending on ``other``. """ return _operate_with_args_on_SparseNdarray(self, other, operation="not_equal", right=False)
[docs] def __ge__(self, other) -> Union["SparseNdarray", numpy.ndarray]: """Check whether a ``SparseNdarray`` is greater than or equal to something. Args: other: A numeric scalar; or a NumPy array with dimensions as described in :py:class:`~delayedarray.UnaryIsometricOpWithArgs.UnaryIsometricOpWithArgs`; or any seed object of the same dimensions as :py:attr:`~shape`. Returns: Array containing the result of the check. This may or may not be sparse depending on ``other``. """ return _operate_with_args_on_SparseNdarray(self, other, operation="greater_equal", right=True)
[docs] def __rge__(self, other) -> Union["SparseNdarray", numpy.ndarray]: """Check whether something is greater than or equal to a ``SparseNdarray``. Args: other: A numeric scalar; or a NumPy array with dimensions as described in :py:class:`~delayedarray.UnaryIsometricOpWithArgs.UnaryIsometricOpWithArgs`; or any seed object of the same dimensions as :py:attr:`~shape`. Returns: Array containing the result of the check. This may or may not be sparse depending on ``other``. """ return _operate_with_args_on_SparseNdarray( self, other, operation="greater_equal", right=False )
[docs] def __le__(self, other) -> Union["SparseNdarray", numpy.ndarray]: """Check whether a ``SparseNdarray`` is less than or equal to something. Args: other: A numeric scalar; or a NumPy array with dimensions as described in :py:class:`~delayedarray.UnaryIsometricOpWithArgs.UnaryIsometricOpWithArgs`; or any seed object of the same dimensions as :py:attr:`~shape`. Returns: Array containing the result of the check. This may or may not be sparse depending on ``other``. """ return _operate_with_args_on_SparseNdarray( self, other, operation="less_equal", right=True )
[docs] def __rle__(self, other) -> Union["SparseNdarray", numpy.ndarray]: """Check whether something is greater than or equal to a ``SparseNdarray``. Args: other: A numeric scalar; or a NumPy array with dimensions as described in :py:class:`~delayedarray.UnaryIsometricOpWithArgs.UnaryIsometricOpWithArgs`; or any seed object of the same dimensions as :py:attr:`~shape`. Returns: Array containing the result of the check. This may or may not be sparse depending on ``other``. """ return _operate_with_args_on_SparseNdarray( self, other, operation="less_equal", right=False )
[docs] def __gt__(self, other) -> Union["SparseNdarray", numpy.ndarray]: """Check whether a ``SparseNdarray`` is greater than something. Args: other: A numeric scalar; or a NumPy array with dimensions as described in :py:class:`~delayedarray.UnaryIsometricOpWithArgs.UnaryIsometricOpWithArgs`; or any seed object of the same dimensions as :py:attr:`~shape`. Returns: Array containing the result of the check. This may or may not be sparse depending on ``other``. """ return _operate_with_args_on_SparseNdarray(self, other, operation="greater", right=True)
[docs] def __rgt__(self, other) -> Union["SparseNdarray", numpy.ndarray]: """Check whether something is greater than a ``SparseNdarray``. Args: other: A numeric scalar; or a NumPy array with dimensions as described in :py:class:`~delayedarray.UnaryIsometricOpWithArgs.UnaryIsometricOpWithArgs`; or any seed object of the same dimensions as :py:attr:`~shape`. Returns: Array containing the result of the check. This may or may not be sparse depending on ``other``. """ return _operate_with_args_on_SparseNdarray(self, other, operation="greater", right=False)
[docs] def __lt__(self, other) -> Union["SparseNdarray", numpy.ndarray]: """Check whether a ``SparseNdarray`` is less than something. Args: other: A numeric scalar; or a NumPy array with dimensions as described in :py:class:`~delayedarray.UnaryIsometricOpWithArgs.UnaryIsometricOpWithArgs`; or any seed object of the same dimensions as :py:attr:`~shape`. Returns: Array containing the result of the check. This may or may not be sparse depending on ``other``. """ return _operate_with_args_on_SparseNdarray(self, other, operation="less", right=True)
[docs] def __rlt__(self, other) -> Union["SparseNdarray", numpy.ndarray]: """Check whether something is less than a ``SparseNdarray``. Args: other: A numeric scalar; or a NumPy array with dimensions as described in :py:class:`~delayedarray.UnaryIsometricOpWithArgs.UnaryIsometricOpWithArgs`; or any seed object of the same dimensions as :py:attr:`~shape`. Returns: Array containing the result of the check. This may or may not be sparse depending on ``other``. """ return _operate_with_args_on_SparseNdarray(self, other, operation="less", right=False)
# Simple methods.
[docs] def __neg__(self): """Negate the contents of a ``SparseNdarray``. Returns: A ``SparseNdarray`` containing the delayed negation. """ return _transform_sparse_array_from_SparseNdarray(self, lambda l, i, v : (i, -v), self._dtype)
[docs] def __abs__(self): """Take the absolute value of the contents of a ``SparseNdarray``. Returns: A ``SparseNdarray`` containing the delayed absolute value operation. """ return _transform_sparse_array_from_SparseNdarray(self, lambda l, i, v : (i, abs(v)), self._dtype)
[docs] def __or__(self, other) -> Union["SparseNdarray", numpy.ndarray]: """Element-wise OR with something. Args: other: A numeric scalar; or a NumPy array with dimensions as described in :py:class:`~delayedarray.UnaryIsometricOpWithArgs.UnaryIsometricOpWithArgs`; or a ``DelayedArray`` of the same dimensions as :py:attr:`~shape`. Returns: Array containing the result of the check. This may or may not be sparse depending on ``other``. """ return _operate_with_args_on_SparseNdarray(self, other, operation="logical_or", right=True)
[docs] def __ror__(self, other) -> Union["SparseNdarray", numpy.ndarray]: """Element-wise OR with the right-hand-side of a ``DelayedArray``. Args: other: A numeric scalar; or a NumPy array with dimensions as described in :py:class:`~delayedarray.UnaryIsometricOpWithArgs.UnaryIsometricOpWithArgs`; or a ``DelayedArray`` of the same dimensions as :py:attr:`~shape`. Returns: Array containing the result of the check. This may or may not be sparse depending on ``other``. """ return _operate_with_args_on_SparseNdarray(self, other, operation="logical_or", right=False)
[docs] def __and__(self, other) -> Union["SparseNdarray", numpy.ndarray]: """Element-wise AND with something. Args: other: A numeric scalar; or a NumPy array with dimensions as described in :py:class:`~delayedarray.UnaryIsometricOpWithArgs.UnaryIsometricOpWithArgs`; or a ``DelayedArray`` of the same dimensions as :py:attr:`~shape`. Returns: Array containing the result of the check. This may or may not be sparse depending on ``other``. """ return _operate_with_args_on_SparseNdarray(self, other, operation="logical_and", right=True)
[docs] def __rand__(self, other) -> Union["SparseNdarray", numpy.ndarray]: """Element-wise AND with the right-hand-side of a ``DelayedArray``. Args: other: A numeric scalar; or a NumPy array with dimensions as described in :py:class:`~delayedarray.UnaryIsometricOpWithArgs.UnaryIsometricOpWithArgs`; or a ``DelayedArray`` of the same dimensions as :py:attr:`~shape`. Returns: Array containing the result of the check. This may or may not be sparse depending on ``other``. """ return _operate_with_args_on_SparseNdarray(self, other, operation="logical_and", right=False)
# Subsetting.
[docs] def __getitem__(self, subset: Tuple[Union[slice, Sequence], ...]) -> Union["SparseNdarray", numpy.ndarray]: """Take a subset of this ``SparseNdarray``. This follows the same logic as NumPy slicing and will generate a :py:class:`~delayedarray.Subset.Subset` object when the subset operation preserves the dimensionality of the seed, i.e., ``args`` is defined using the :py:meth:`~numpy.ix_` function. Args: args: A :py:class:`tuple` of length equal to the dimensionality of this ``SparseNdarray``. Any NumPy slicing is supported but only subsets that preserve dimensionality will generate a delayed subset operation. Raises: ValueError: If ``args`` contain more dimensions than the shape of the array. Returns: If the dimensionality is preserved by ``args``, a ``SparseNdarray`` containing a delayed subset operation is returned. Otherwise, a :py:class:`~numpy.ndarray` is returned containing the realized subset. """ cleaned = _getitem_subset_preserves_dimensions(self.shape, subset) if cleaned is not None: # No need to sanitize here, as the extractors can take unsorted subsets. return _extract_sparse_array_from_SparseNdarray(self, cleaned) return _getitem_subset_discards_dimensions(self, subset, _extract_dense_array_from_SparseNdarray)
# NumPy methods.
[docs] def __array_ufunc__(self, ufunc, method, *inputs, **kwargs) -> "SparseNdarray": """Interface with NumPy array methods. This is used to implement mathematical operations like NumPy's :py:meth:`~numpy.log`, or to override operations between NumPy class instances and ``SparseNdarray`` objects where the former is on the left hand side. Check out NumPy's ``__array_ufunc__`` `documentation <https://numpy.org/doc/stable/reference/arrays.classes.html#numpy.class.__array_ufunc__>`_ for more details. Returns: A ``SparseNdarray`` instance containing the requested delayed operation. """ if ufunc.__name__ in translate_ufunc_to_op_with_args or ufunc.__name__ == "true_divide": # This is required to support situations where the NumPy array is on # the LHS, such that the numpy.ndarray method gets called first. op = ufunc.__name__ if ufunc.__name__ == "true_divide": op = "divide" first_is_da = isinstance(inputs[0], SparseNdarray) da = inputs[1 - int(first_is_da)] v = inputs[int(first_is_da)] return _operate_with_args_on_SparseNdarray(self, v, op, right=False) elif ufunc.__name__ in translate_ufunc_to_op_simple: dummy = ufunc(numpy.zeros(1, dtype=self._dtype)) if dummy[0] == 0: return _transform_sparse_array_from_SparseNdarray(self, lambda l, i, v : (i, ufunc(v)), dummy.dtype) else: return ufunc(self.__array__()) elif ufunc.__name__ == "absolute": return self.__abs__() raise NotImplementedError(f"'{ufunc.__name__}' is not implemented!")
[docs] def __array_function__(self, func, types, args, kwargs) -> "SparseNdarray": """Interface to NumPy's high-level array functions. This is used to implement array operations like NumPy's :py:meth:`~numpy.concatenate`, Check out NumPy's ``__array_function__`` `documentation <https://numpy.org/doc/stable/reference/arrays.classes.html#numpy.class.__array_function__>`_ for more details. Returns: A ``SparseNdarray`` instance containing the requested operation. """ if func == numpy.concatenate: if "axis" in kwargs: axis = kwargs["axis"] else: axis = 0 return _concatenate_SparseNdarrays(args[0], along=axis) if func == numpy.transpose: if "axes" in kwargs: axes = kwargs["axes"] else: axes = list(range(len(self._shape) - 1, -1, -1)) return _transpose_SparseNdarray(self, axes) if func == numpy.round: return _transform_sparse_array_from_SparseNdarray(self, lambda l, i, v : (i, func(v, **kwargs)), self._dtype) if func == numpy.mean: return self.mean(**kwargs) if func == numpy.sum: return self.sum(**kwargs) if func == numpy.var: return self.var(**kwargs) if func == numpy.any: return self.any(**kwargs) if func == numpy.all: return self.all(**kwargs) raise NotImplementedError(f"'{func.__name__}' is not implemented!")
[docs] def astype(self, dtype: numpy.dtype, **kwargs) -> "SparseNdarray": """See :py:meth:`~numpy.ndarray.astype` for details. All keyword arguments are currently ignored. """ return _transform_sparse_array_from_SparseNdarray(self, lambda l, i, v : (i, v.astype(dtype)), dtype)
@property def T(self) -> "SparseNdarray": """ Returns: A ``SparseNdarray`` containing the transposed contents. """ axes = list(range(len(self._shape) - 1, -1, -1)) return _transpose_SparseNdarray(self, axes)
[docs] def sum(self, axis: Optional[Union[int, Tuple[int, ...]]] = None, dtype: Optional[numpy.dtype] = None) -> numpy.ndarray: """ Take the sum of values across the ``SparseNdarray``, possibly over a given axis or set of axes. Args: axis: A single integer specifying the axis over which to calculate the sum. Alternatively, a tuple (multiple axes) or None (no axes), see :py:func:`~numpy.sum` for details. dtype: NumPy type for the output array. If None, this is automatically chosen based on the type of the ``SparseNdarray``, see :py:func:`~numpy.sum` for details. Returns: A NumPy array containing the summed values. If ``axis = None``, this will be a NumPy scalar instead. """ return array_sum( self, axis=axis, dtype=dtype, reduce_over_x=_reduce_SparseNdarray, masked=self._is_masked, )
[docs] def mean(self, axis: Optional[Union[int, Tuple[int, ...]]] = None, dtype: Optional[numpy.dtype] = None) -> numpy.ndarray: """ Take the mean of values across the ``SparseNdarray``, possibly over a given axis or set of axes. Args: axis: A single integer specifying the axis over which to calculate the mean. Alternatively, a tuple (multiple axes) or None (no axes), see :py:func:`~numpy.mean` for details. dtype: NumPy type for the output array. If None, this is automatically chosen based on the type of the ``SparseNdarray``, see :py:func:`~numpy.mean` for details. Returns: A NumPy array containing the mean values. If ``axis = None``, this will be a NumPy scalar instead. """ return array_mean( self, axis=axis, dtype=dtype, reduce_over_x=_reduce_SparseNdarray, masked=self._is_masked, )
[docs] def var(self, axis: Optional[Union[int, Tuple[int, ...]]] = None, dtype: Optional[numpy.dtype] = None, ddof: int = 0) -> numpy.ndarray: """ Take the variances of values across the ``SparseNdarray``, possibly over a given axis or set of axes. Args: axis: A single integer specifying the axis over which to calculate the variance. Alternatively, a tuple (multiple axes) or None (no axes), see :py:func:`~numpy.var` for details. dtype: NumPy type for the output array. If None, this is automatically chosen based on the type of the ``SparseNdarray``, see :py:func:`~numpy.var` for details. ddof: Delta in the degrees of freedom to subtract from the denominator. Typically set to 1 to obtain the sample variance. Returns: A NumPy array containing the variances. If ``axis = None``, this will be a NumPy scalar instead. """ return array_var( self, axis=axis, dtype=dtype, ddof=ddof, reduce_over_x=_reduce_SparseNdarray, masked=self._is_masked, )
[docs] def any(self, axis: Optional[Union[int, Tuple[int, ...]]] = None, dtype: Optional[numpy.dtype] = None) -> numpy.ndarray: """Test whether any array element along a given axis evaluates to True. Compute this test across the ``SparseNdarray``, possibly over a given axis or set of axes. If the seed has a ``any()`` method, that method is called directly with the supplied arguments. Args: axis: A single integer specifying the axis over which to test for any. Alternatively, a tuple (multiple axes) or None (no axes), see :py:func:`~numpy.any` for details. dtype: NumPy type for the output array. If None, this is automatically chosen based on the type of the ``SparseNdarray``, see :py:func:`~numpy.any` for details. Returns: A NumPy array containing the variances. If ``axis = None``, this will be a NumPy scalar instead. """ return array_any( self, axis=axis, dtype=dtype, reduce_over_x=_reduce_SparseNdarray, masked=self._is_masked, )
[docs] def all(self, axis: Optional[Union[int, Tuple[int, ...]]] = None, dtype: Optional[numpy.dtype] = None) -> numpy.ndarray: """Test whether all array elements along a given axis evaluate to True. Compute this test across the ``SparseNdarray``, possibly over a given axis or set of axes. If the seed has a ``all()`` method, that method is called directly with the supplied arguments. Args: axis: A single integer specifying the axis over which to test for any. Alternatively, a tuple (multiple axes) or None (no axes), see :py:func:`~numpy.any` for details. dtype: NumPy type for the output array. If None, this is automatically chosen based on the type of the ``SparseNdarray``, see :py:func:`~numpy.any` for details. Returns: A NumPy array containing the variances. If ``axis = None``, this will be a NumPy scalar instead. """ return array_all( self, axis=axis, dtype=dtype, reduce_over_x=_reduce_SparseNdarray, masked=self._is_masked, )
# Other stuff
[docs] def __copy__(self) -> "SparseNdarray": """ Returns: A deep copy of this object. """ return SparseNdarray( shape=self._shape, contents=copy.deepcopy(self._contents), index_dtype=self._index_dtype, dtype=self._dtype, is_masked=self._is_masked, check=False )
[docs] def copy(self) -> "SparseNdarray": """ Returns: A deep copy of this object. """ return self.__copy__()
######################################################### ######################################################### def _peek(contents: list, dim: int): if dim == 1: for x in contents: if x is not None: return x[0].dtype, x[1].dtype, numpy.ma.isMaskedArray(x[1]) else: for x in contents: if x is not None: out = _peek(x, dim - 1) if out is not None: return out return None _CheckPayload = namedtuple("_CheckPayload", [ "max_index", "dtype", "index_dtype", "is_masked" ]) def _check_sparse_tuple(indices: numpy.ndarray, values: numpy.ndarray, payload: _CheckPayload): if len(indices) != len(values): raise ValueError("length of index and value vectors should be the same") if values.dtype != payload.dtype: raise ValueError("inconsistent data types for different value vectors") if indices.dtype != payload.index_dtype: raise ValueError("inconsistent data types for different index vectors") for i, ix in enumerate(indices): if ix < 0 or ix >= payload.max_index: raise ValueError("index vectors out of range for the last dimension") for i in range(1, len(indices)): if indices[i] <= indices[i - 1]: raise ValueError("index vectors should be sorted in strictly increasing order") if payload.is_masked != numpy.ma.isMaskedArray(values): raise ValueError("inconsistent masking status for different value vectors") def _recursive_check(contents: list, shape: Tuple[int, ...], payload: _CheckPayload, dim: int): if len(contents) != shape[dim]: raise ValueError("length of 'contents' or its components should match the extent of the corresponding dimension") if dim == 1: for x in contents: if x is not None: _check_sparse_tuple(x[0], x[1], payload) else: for x in contents: if x is not None: _recursive_check(x, shape, payload, dim=dim-1) ######################################################### ######################################################### _SubsetSummary = namedtuple("_SubsetSummary", [ "subset", "increasing", "consecutive", "search_first", "search_last", "first_index", "past_last_index", "random_map" ]) def _characterize_indices(subset: Sequence[int], dim_extent: int): if len(subset) == 0: return _SubsetSummary( subset = subset, increasing = False, consecutive = False, first_index = None, past_last_index = None, search_first = None, search_last = None, random_map = None, ) for i in range(1, len(subset)): if subset[i] <= subset[i-1]: random_map = {} for i, x in enumerate(subset): if x in random_map: if isinstance(random_map[x], list): random_map[x].append(i) else: random_map[x] = [random_map[x], i] else: random_map[x] = i first = min(subset) return _SubsetSummary( subset = subset, increasing = False, consecutive = False, first_index = first, past_last_index = None, search_first = (first > 0), search_last = None, random_map = random_map ) consecutive = True for i in range(1, len(subset)): if subset[i] != subset[i - 1] + 1: consecutive = False break first = subset[0] last = subset[-1] + 1 return _SubsetSummary( subset = subset, increasing = True, consecutive = consecutive, first_index = first, past_last_index = last, search_first = (first > 0), search_last = (last < dim_extent), random_map = None, ) def _extract_sparse_vector_internal(indices: numpy.ndarray, values: numpy.ndarray, subset_summary: _SubsetSummary, f: Callable): subset = subset_summary.subset start_pos = 0 if subset_summary.search_first: start_pos = bisect_left(indices, subset_summary.first_index) end_pos = len(indices) if subset_summary.increasing: pos = 0 x = start_pos for i in subset: while x < end_pos and i > indices[x]: x += 1 if x == end_pos: break if i == indices[x]: f(pos, i, values[x]) pos += 1 else: for x in range(start_pos, end_pos): ix = indices[x] if ix in subset_summary.random_map: targets = subset_summary.random_map[ix] if isinstance(targets, int): f(targets, ix, values[x]) else: for t in targets: f(t, ix, values[x]) def _extract_sparse_vector_to_dense(indices: numpy.ndarray, values: numpy.ndarray, subset_summary: _SubsetSummary, output: numpy.ndarray): if len(subset_summary.subset) == 0: pass elif subset_summary.consecutive: start_pos = 0 first = indices.dtype.type(subset_summary.first_index) # avoid casting of uint64s to floats during subtraction. if subset_summary.search_first: start_pos = bisect_left(indices, first) end_pos = len(indices) if subset_summary.search_last: end_pos = bisect_left(indices, subset_summary.past_last_index, lo=start_pos, hi=end_pos) for x in range(start_pos, end_pos): output[indices[x] - first] = values[x] else: def f(p, i, v): output[p] = v _extract_sparse_vector_internal(indices, values, subset_summary, f) def _recursive_extract_dense_array(contents: numpy.ndarray, subset: Tuple[Sequence[int], ...], subset_summary: _SubsetSummary, output: numpy.ndarray, dim: int): curdex = subset[dim] if dim == 1: pos = 0 for i in curdex: x = contents[i] if x is not None: _extract_sparse_vector_to_dense(x[0], x[1], subset_summary=subset_summary, output=output[pos]) pos += 1 else: pos = 0 for i in curdex: x = contents[i] if x is not None: _recursive_extract_dense_array( contents=x, subset=subset, subset_summary=subset_summary, output=output[pos], dim=dim - 1, ) pos += 1 def _extract_dense_array_from_SparseNdarray(x: SparseNdarray, subset: Tuple[Sequence[int], ...], dtype: Optional[numpy.dtype] = None) -> numpy.ndarray: idims = [len(y) for y in subset] subset_summary = _characterize_indices(subset[0], x._shape[0]) # We reverse the dimensions so that we use F-contiguous storage. This also # makes it slightly easier to do the recursion as we can just index by # the first dimension to obtain a subarray at each recursive step. if dtype is None: dtype = x._dtype output = numpy.zeros((*reversed(idims),), dtype=dtype) if x._is_masked: output = numpy.ma.MaskedArray(output, mask=False) if x._contents is not None: ndim = len(x._shape) if ndim > 1: _recursive_extract_dense_array(x._contents, subset, subset_summary=subset_summary, output=output, dim=ndim-1) else: _extract_sparse_vector_to_dense(x._contents[0], x._contents[1], subset_summary=subset_summary, output=output) return output.T def _extract_sparse_vector_to_sparse(indices: numpy.ndarray, values: numpy.ndarray, subset_summary: _SubsetSummary): if len(subset_summary.subset) == 0: pass elif subset_summary.consecutive: start_pos = 0 first = indices.dtype.type(subset_summary.first_index) # avoid casting of uint64's to floats during subtraction. if subset_summary.search_first: start_pos = bisect_left(indices, first) end_pos = len(indices) if subset_summary.search_last: end_pos = bisect_left(indices, subset_summary.past_last_index, lo=start_pos, hi=end_pos) if start_pos == 0 and end_pos == len(indices): new_indices = indices new_values = values else: new_indices = indices[start_pos:end_pos] new_values = values[start_pos:end_pos] if first: new_indices = new_indices - first return new_indices, new_values elif subset_summary.increasing: new_indices = [] new_values = [] def f(p, i, v): new_indices.append(p) new_values.append(v) _extract_sparse_vector_internal(indices, values, subset_summary, f) if len(new_indices) == 0: return None return ( _convert_to_unmasked_1darray(new_indices, dtype=indices.dtype), _convert_to_maybe_masked_1darray(new_values, dtype=values.dtype, masked=numpy.ma.isMaskedArray(values)) ) else: new_pairs = [] def f(p, i, v): new_pairs.append((p, v)) _extract_sparse_vector_internal(indices, values, subset_summary, f) new_pairs.sort() shape = (len(new_pairs),) new_indices = _allocate_unmasked_ndarray(shape, dtype=indices.dtype) new_values = _allocate_maybe_masked_ndarray(shape, dtype=values.dtype, masked=numpy.ma.isMaskedArray(values)) for i, pair in enumerate(new_pairs): new_indices[i] = pair[0] new_values[i] = pair[1] return new_indices, new_values def _recursive_extract_sparse_array(contents: list, shape: Tuple[int, ...], subset: Tuple[Sequence[int], ...], subset_summary: _SubsetSummary, dim: int): curdex = subset[dim] new_contents = [] if dim == 1: for i in curdex: x = contents[i] if x is not None: y = _extract_sparse_vector_to_sparse(x[0], x[1], subset_summary) new_contents.append(y) else: new_contents.append(None) else: for i in curdex: if contents[i] is not None: y = _recursive_extract_sparse_array( contents[i], shape, subset=subset, subset_summary=subset_summary, dim=dim - 1, ) new_contents.append(y) else: new_contents.append(None) for x in new_contents: if x is not None: return new_contents return None def _extract_sparse_array_from_SparseNdarray(x: SparseNdarray, subset: Tuple[Sequence[int], ...]) -> SparseNdarray: idims = [len(y) for y in subset] subset_summary = _characterize_indices(subset[0], x._shape[0]) new_contents = None if x._contents is not None: ndim = len(x.shape) if ndim > 1: new_contents = _recursive_extract_sparse_array(x._contents, x._shape, subset=subset, subset_summary=subset_summary, dim=ndim-1) else: new_contents = _extract_sparse_vector_to_sparse(x._contents[0], x._contents[1], subset_summary) return SparseNdarray(shape=(*idims,), contents=new_contents, index_dtype=x.index_dtype, dtype=x.dtype, is_masked=x.is_masked, check=False) ######################################################### ######################################################### _TransformPayload = namedtuple("_TransformPayload", [ "fun", "output_dtype" ]) def _transform_sparse_vector(location: Sequence[int], indices: numpy.ndarray, values: numpy.ndarray, payload: _TransformPayload): idx, val = payload.fun(location, indices, values) return (idx.astype(indices.dtype, copy=False), val.astype(payload.output_dtype, copy=False)) # a bit of safety with respect to types. def _recursive_transform_sparse_array(contents: list, shape: Tuple[int, ...], payload: _TransformPayload, dim: int, location: Sequence[int] = []): new_contents = [] location.append(0) if dim == 1: for i in range(shape[dim]): location[-1] = i x = contents[i] if x is not None: new_contents.append(_transform_sparse_vector(location, x[0], x[1], payload)) else: new_contents.append(None) else: for i in range(shape[dim]): if contents[i] is not None: location[-1] = i y = _recursive_transform_sparse_array( contents=contents[i], shape=shape, payload=payload, dim=dim-1, location=location, ) new_contents.append(y) else: new_contents.append(None) location.pop() for x in new_contents: if x is not None: return new_contents return None def _transform_sparse_array_from_SparseNdarray(x: SparseNdarray, f: Callable, output_dtype: numpy.dtype) -> SparseNdarray: new_contents = None if x._contents is not None: payload = _TransformPayload(fun=f, output_dtype=output_dtype) ndim = len(x._shape) if ndim > 1: new_contents = _recursive_transform_sparse_array(contents=x._contents, shape=x._shape, payload=payload, dim=ndim-1) else: new_contents = _transform_sparse_vector((), indices=x._contents[0], values=x._contents[1], payload=payload) return SparseNdarray(shape=x._shape, contents=new_contents, index_dtype=x.index_dtype, dtype=output_dtype, is_masked=x.is_masked, check=False) ######################################################### ######################################################### _BinaryOpPayload = namedtuple("_BinaryOpPayload", [ "fun", "dtype1", "dtype2", "output_dtype", "output_index_dtype" ]) def _binary_operate_sparse_vector(vector1: Tuple[numpy.ndarray, numpy.ndarray], vector2: Tuple[numpy.ndarray, numpy.ndarray], payload: _BinaryOpPayload): if vector1 is None and vector2 is None: return None elif vector1 is not None and vector2 is None: indices1, values1 = vector1 mock = numpy.zeros((1,), dtype=payload.dtype2) # get vector of length 1 for correct type coercion. return indices1.astype(payload.output_index_dtype, copy=False), payload.fun(values1, mock).astype(payload.output_dtype, copy=False) elif vector1 is None and vector2 is not None: indices2, values2 = vector2 mock = numpy.zeros((1,), dtype=payload.dtype1) return indices2.astype(payload.output_index_dtype, copy=False), payload.fun(mock, values2).astype(payload.output_dtype, copy=False) else: indices1, values1 = vector1 indices2, values2 = vector2 f = payload.fun i1 = 0 len1 = len(indices1) z1 = values1.dtype.type(0) i2 = 0 len2 = len(indices2) z2 = values2.dtype.type(0) outval = [] outidx = [] while i1 < len1 and i2 < len2: ix1 = indices1[i1] ix2 = indices2[i2] if ix1 > ix2: outval.append(f(z1, values2[i2])) outidx.append(ix2) i2 += 1 elif ix1 < ix2: outval.append(f(values1[i1], z2)) outidx.append(ix1) i1 += 1 else: outval.append(f(values1[i1], values2[i2])) outidx.append(ix1) i1 += 1 i2 += 1 # Only one of the following should be called. while i2 < len2: outval.append(f(z1, values2[i2])) outidx.append(indices2[i2]) i2 += 1 while i1 < len1: outval.append(f(values1[i1], z2)) outidx.append(indices1[i1]) i1 += 1 return ( _convert_to_unmasked_1darray(outidx, dtype=payload.output_index_dtype), _convert_to_maybe_masked_1darray(outval, dtype=payload.output_dtype, masked=numpy.ma.isMaskedArray(values1) or numpy.ma.isMaskedArray(values2)) ) def _recursive_binary_operation_on_SparseNdarray(contents1: list, contents2: list, payload: _BinaryOpPayload, dim: int): if contents1 is None and contents2 is None: return None new_contents = [] if contents1 is not None and contents2 is None: if dim == 1: for con1 in contents1: new_contents.append(_binary_operate_sparse_vector(con1, None, payload)) else: for con1 in contents1: new_contents.append(_recursive_binary_operation_on_SparseNdarray(con1, None, payload, dim=dim - 1)) elif contents1 is None and contents2 is not None: if dim == 1: for con2 in contents2: new_contents.append(_binary_operate_sparse_vector(None, con2, payload)) else: for con2 in contents2: new_contents.append(_recursive_binary_operation_on_SparseNdarray(None, con2, payload, dim=dim - 1)) else: if dim == 1: for i, con1 in enumerate(contents1): new_contents.append(_binary_operate_sparse_vector(con1, contents2[i], payload)) else: for i, con1 in enumerate(contents1): new_contents.append(_recursive_binary_operation_on_SparseNdarray(con1, contents2[i], payload, dim=dim - 1)) for x in new_contents: if x is not None: return new_contents return None def _binary_operation_on_SparseNdarray(x: SparseNdarray, y: SparseNdarray, operation: ISOMETRIC_OP_WITH_ARGS): op = _choose_operator(operation) dummy1 = numpy.zeros(1, dtype=x._dtype) dummy2 = numpy.zeros(1, dtype=y._dtype) dummy = op(dummy1, dummy2) if dummy[0] != 0: return op(x.__array__(), y.__array__()) if x._contents is None and y._contents is None: new_contents = None else: payload = _BinaryOpPayload(fun=op, dtype1=x._dtype, dtype2=y._dtype, output_index_dtype=x.index_dtype, output_dtype=dummy.dtype) ndim = len(x._shape) if ndim > 1: new_contents = _recursive_binary_operation_on_SparseNdarray(x._contents, y._contents, payload=payload, dim = ndim - 1) else: new_contents = _binary_operate_sparse_vector(x._contents, y._contents, payload=payload) return SparseNdarray(shape=x._shape, contents=new_contents, index_dtype=x.index_dtype, dtype=dummy.dtype, is_masked=x.is_masked, check=False) ######################################################### ######################################################### def _operate_with_args_on_SparseNdarray(x: SparseNdarray, other, operation: ISOMETRIC_OP_WITH_ARGS, right: bool) -> SparseNdarray: if isinstance(other, SparseNdarray): return _binary_operation_on_SparseNdarray(x, other, operation) along = _infer_along_with_args(x._shape, other) num_other = 1 op = _choose_operator(operation) dummy = numpy.zeros(num_other, dtype=x._dtype) if right: dummy = op(dummy, other) else: dummy = op(other, dummy) if num_other and not (dummy == 0).all(): # densifying. if right: return op(x.__array__(), other) else: return op(other, x.__array__()) if isinstance(other, numpy.ndarray): num_other = numpy.prod(other.shape) other = other.reshape((num_other,)) # flattening if along is None: if right: def f2(location, indices, values): return indices, op(values, other) else: def f2(location, indices, values): return indices, op(other, values) elif along > 0: if right: def f2(location, indices, values): p = location[-along] # remember, location is (i) reversed and (ii) missing the final dimension, so '-along' works. return indices, op(values, other[p:p+1]) # get vector of length 1 for correct type coercion. else: def f2(location, indices, values): p = location[-along] return indices, op(other[p:p+1], values) else: if right: def f2(location, indices, values): return indices, op(values, other[indices]) else: def f2(location, indices, values): return indices, op(other[indices], values) return _transform_sparse_array_from_SparseNdarray(x, f2, dummy.dtype) ######################################################### ######################################################### _TransposeFillPayload = namedtuple("_TransposeFillPayload", [ "perm", "new_shape", "new_contents" ]) def _transpose_SparseNdarray_contents_internal(location: Sequence[int], indices: numpy.ndarray, values: numpy.ndarray, payload: _TransposeFillPayload): destination = [] final = None for i, p in enumerate(payload.perm): if p == 0: final = i destination.append(None) else: destination.append(location[-p]) # remember, location is (i) reversed and (ii) missing the final dimension, so '-p' works. ndim = len(payload.new_shape) for i, ix in enumerate(indices): destination[final] = ix target = payload.new_contents for j in range(ndim - 1, 1, -1): # walking backwards from the later dimensions to fill 'new_contents' correctly. d = destination[j] if target[d] is None: replacement = [None] * payload.new_shape[j - 1] target[d] = replacement target = target[d] d = destination[1] if target[d] is None: target[d] = ([], []) outi, outv = target[d] outi.append(destination[0]) outv.append(values[i]) def _recursive_transpose_SparseNdarray_fill(contents: list, payload: _TransposeFillPayload, dim: int, location: Sequence[int] = []): location.append(0) if dim == 1: for i, con in enumerate(contents): if con is not None: location[-1] = i _transpose_SparseNdarray_contents_internal(location, con[0], con[1], payload) else: for i, con in enumerate(contents): if con is not None: location[-1] = i _recursive_transpose_SparseNdarray_fill(con, payload, location=location, dim=dim - 1) location.pop() _TransposeReallocPayload = namedtuple("_TransposeReallocatePayload", [ "output_dtype", "output_index_dtype", "output_is_masked" ]) def _recursive_transpose_SparseNdarray_reallocate(contents: list, payload: _TransposeReallocPayload, dim: int): if dim == 1: for i, con in enumerate(contents): if con is not None: contents[i] = ( _convert_to_unmasked_1darray(con[0], dtype=payload.output_index_dtype), _convert_to_maybe_masked_1darray(con[1], dtype=payload.output_dtype, masked=payload.output_is_masked) ) else: for i, con in enumerate(contents): if con is not None: _recursive_transpose_SparseNdarray_reallocate(con, payload, dim=dim - 1) def _transpose_SparseNdarray(x: SparseNdarray, perm): ndim = len(x._shape) if ndim == 1: return x new_shape = [] for p in perm: new_shape.append(x._shape[p]) new_contents = None if x._contents is not None: new_contents = [None] * new_shape[-1] _recursive_transpose_SparseNdarray_fill( x._contents, _TransposeFillPayload(perm=perm, new_shape=new_shape, new_contents=new_contents), dim=ndim - 1, ) _recursive_transpose_SparseNdarray_reallocate( new_contents, _TransposeReallocPayload(output_dtype=x._dtype, output_index_dtype=x._index_dtype, output_is_masked=x._is_masked), dim=ndim - 1, ) return SparseNdarray(shape=(*new_shape,), contents=new_contents, index_dtype=x._index_dtype, dtype=x._dtype, is_masked=x.is_masked, check=False) ######################################################### ######################################################### _ConcatenatePayload = namedtuple("_ConcatenatePayload", [ "shapes", "offset", "output_dtype", "output_index_dtype", "output_is_masked"]) def _concatenate_sparse_vectors(idx: List[numpy.ndarray], val: List[numpy.ndarray], payload: _ConcatenatePayload): newidx = _concatenate_unmasked_ndarrays(idx, axis=0).astype(payload.output_index_dtype, copy=False) newval = _concatenate_maybe_masked_ndarrays(val, axis=0, masked=payload.output_is_masked).astype(payload.output_dtype, copy=False) return (newidx, newval) def _coerce_concatenated_SparseNdarray_types(contents: list, payload: _ConcatenatePayload, dim: int): if dim == 1: for i, con in enumerate(contents): if con is not None: idx2 = con[0].astype(payload.output_index_dtype, copy=False) val2 = con[1].astype(payload.output_dtype, copy=False) contents[i] = (idx2, val2) else: for i, con in enumerate(contents): if con is not None: _coerce_concatenated_SparseNdarray_types(con, payload, dim=dim - 1) def _recursive_concatenate_SparseNdarrays(contents: list, final_shape: Tuple[int, ...], along: int, payload: _ConcatenatePayload, dim: int): if along == dim: all_none = True for x in contents: if x is not None: all_none = False new_contents = None if not all_none: new_contents = [] for i, x in enumerate(contents): if x is not None: new_contents += x else: new_contents += [None] * payload.shapes[i][along] _coerce_concatenated_SparseNdarray_types(new_contents, payload=payload, dim=dim) return new_contents elif dim == 1: new_contents = [] for i in range(final_shape[dim]): outidx = [] outval = [] for j, c in enumerate(contents): if c is not None and c[i] is not None: outidx.append(c[i][0] + payload.offset[j]) outval.append(c[i][1]) if len(outval): new_contents.append(_concatenate_sparse_vectors(outidx, outval, payload)) else: new_contents.append(None) return new_contents else: new_contents = [] collected = [None] * len(contents) for i in range(final_shape[dim]): for j, c in enumerate(contents): if c is not None: collected[j] = c[i] new_contents.append(_recursive_concatenate_SparseNdarrays(collected, final_shape, along, payload, dim=dim-1)) return new_contents def _concatenate_SparseNdarrays(xs: List[SparseNdarray], along: int): all_contents = [] all_shapes = [] for x in xs: all_contents.append(x._contents) all_shapes.append(x._shape) combined = 0 ref_shape = all_shapes[0] for shape in all_shapes: if len(shape) != len(ref_shape): raise ValueError("inconsistent dimensionalities for combining SparseNdarrays") for d, s in enumerate(shape): if d == along: combined += s elif s != ref_shape[d]: raise ValueError("inconsistent shapes for combining SparseNdarrays along axis " + str(along)) new_shape = list(ref_shape) new_shape[along] = combined dummy_collected = [] dummy_collected_index = [] for x in xs: dummy_collected.append(numpy.zeros(1, dtype=x._dtype)) dummy_collected_index.append(numpy.zeros(1, dtype=x._index_dtype)) output_dtype = _concatenate_unmasked_ndarrays(dummy_collected, axis=0).dtype output_index_dtype = _concatenate_unmasked_ndarrays(dummy_collected_index, axis=0).dtype output_is_masked = any(y._is_masked for y in xs) all_none = True for con in all_contents: if con is not None: all_none = False new_contents = None if not all_none: offset = None ndim = len(new_shape) # Upgrading the index type to ensure we can hold the output, if we're # concatenating on the first dimension (and thus possibly increasing # the indices in the leaves of the subsequent SparseNdarrays). if along == 0: last = 0 offset = [] for i, shape in enumerate(all_shapes): offset.append(last) last += shape[along] for candidate in [output_index_dtype, numpy.uint32, numpy.uint64]: if last < numpy.iinfo(candidate).max: output_index_dtype = candidate break payload = _ConcatenatePayload( shapes=all_shapes, offset=offset, output_dtype=output_dtype, output_index_dtype=output_index_dtype, output_is_masked=output_is_masked ) if ndim > 1: new_contents = _recursive_concatenate_SparseNdarrays(all_contents, new_shape, along=along, payload=payload, dim=ndim-1) else: outidx = [] outval = [] for j, c in enumerate(all_contents): if c is not None: outidx.append(c[0] + offset[j]) outval.append(c[1]) new_contents = _concatenate_sparse_vectors(outidx, outval, payload) return SparseNdarray(shape=(*new_shape,), contents=new_contents, dtype=output_dtype, index_dtype=output_index_dtype, is_masked=output_is_masked, check=False) ######################################################### ######################################################### _ReductionPayload = namedtuple("_ReductionPayload", [ "multipliers", "operation" ]) def _reduce_sparse_vector(idx: numpy.ndarray, val: numpy.ndarray, payload: _ReductionPayload, offset: int = 0): for j, ix in enumerate(idx): extra = payload.multipliers[0] * ix payload.operation(offset + extra, val[j]) return def _recursive_reduce_SparseNdarray(contents, payload: _ReductionPayload, dim: int, offset: int = 0): mult = payload.multipliers[dim] if dim == 1: for i, con in enumerate(contents): if con is not None: _reduce_sparse_vector(con[0], con[1], payload, offset = offset + mult * i) else: for i, con in enumerate(contents): if con is not None: _recursive_reduce_SparseNdarray(con, payload, dim = dim - 1, offset = offset + mult * i) return def _reduce_SparseNdarray(x: SparseNdarray, axes: List[int], operation: Callable): if x.contents is not None: multipliers = _create_offset_multipliers(x.shape, axes) payload = _ReductionPayload(operation=operation, multipliers=multipliers) ndim = len(x.shape) if ndim == 1: _reduce_sparse_vector(x.contents[0], x.contents[1], payload) else: _recursive_reduce_SparseNdarray(x.contents, payload, dim=ndim - 1) return