[docs]classSubset(DelayedOp):"""Delayed subset operation, based on Bioconductor's ``DelayedArray::DelayedSubset`` class. This will slice the array along one or more dimensions, equivalent to the outer product of subset indices. This class is intended for developers to construct new :py:class:`~delayedarray.DelayedArray.DelayedArray` instances. In general, end users should not be interacting with ``Subset`` objects directly. """
[docs]def__init__(self,seed,subset:Tuple[Sequence[int],...]):""" Args: seed: Any object that satisfies the seed contract, see :py:class:`~delayedarray.DelayedArray.DelayedArray` for details. subset: Tuple of length equal to the dimensionality of ``seed``, containing the subsetted elements for each dimension. Each entry should be a vector of integer indices specifying the elements of the corresponding dimension to retain, where each integer is non-negative and less than the extent of the dimension. Unsorted and/or duplicate indices are allowed. """self._seed=seediflen(subset)!=len(seed.shape):raiseValueError("Dimensionality of 'seed' and 'subset' should be the same.")self._subset=subsetfinal_shape=[]foridxinsubset:final_shape.append(len(idx))self._shape=(*final_shape,)
@propertydefshape(self)->Tuple[int,...]:""" Returns: Tuple of integers specifying the extent of each dimension of the subsetted object. """returnself._shape@propertydefdtype(self)->dtype:""" Returns: NumPy type for the subsetted contents, same as ``seed``. """returnself._seed.dtype@propertydefseed(self):""" Returns: The seed object. """returnself._seed@propertydefsubset(self)->Tuple[Sequence[int],...]:""" Returns: Subset sequences to be applied to each dimension of the seed. """returnself._subset
def_simplify_subset(x:Subset)->Any:seed=x.seedifnottype(seed)isSubset:# Don't use isinstance, we don't want to collapse for Subset# subclasses that might be doing god knows what.returnxall_subsets=[]noop=Truefori,subinenumerate(x.subset):seed_sub=seed.subset[i]new_sub=biocutils.subset_sequence(seed_sub,sub)ifnoopandnot_is_single_subset_noop(seed.seed.shape[i],new_sub):noop=Falseall_subsets.append(new_sub)ifnoop:returnseed.seednew_x=copy.copy(x)new_x._seed=seed.seednew_x._subset=(*all_subsets,)returnnew_xdef_extract_array(x:Subset,subset:Tuple[Sequence[int],...],f:Callable):newsub=list(subset)expanded=[]is_safe=0fori,sinenumerate(newsub):cursub=x._subset[i]replacement=biocutils.subset_sequence(cursub,s)san_sub,san_remap=_sanitize_subset(replacement)newsub[i]=san_subifsan_remapisNone:is_safe+=1san_remap=range(len(san_sub))expanded.append(san_remap)raw=f(x._seed,(*newsub,))ifis_safe!=len(subset):raw=raw[ix_(*expanded)]returnraw
[docs]@create_dask_array.registerdefcreate_dask_array_Subset(x:Subset):"""See :py:meth:`~delayedarray.create_dask_array.create_dask_array`."""target=create_dask_array(x._seed)# Oh god, this is horrible. But dask doesn't support ix_ yet.ndim=len(target.shape)foriinrange(ndim):replacement=x._subset[i]ifisinstance(replacement,range):replacement=list(replacement)current=[slice(None)]*ndimcurrent[i]=replacementtarget=target[(...,*current)]returntarget