Source code for biocutils.order

from typing import Any, Union, Sequence, Optional
from functools import singledispatch

import numpy

from .subset import subset
from .Factor import Factor


[docs] @singledispatch def order( x: Any, force_last: Union[set, Sequence] = [None, numpy.ma.masked, numpy.nan], decreasing: bool = False, dtype: Optional[numpy.dtype] = None, ) -> numpy.ndarray: """ Obtain an ordering of entries of ``x``. This ordering should be stable. Args: x: Values to be ordered. All values should be comparable aside from those listed in ``force_last``. force_last: Values that are incomparable and will be placed last, i.e., at the end of the ordering. No attempt is made to order values within ``force_last``. decreasing: Whether to order by decreasing value. Default is to order by increasing value. dtype: Integer type of the output array. If ``None``, defaults to the smallest type that can hold the length of ``x``. Returns: Integer NumPy array containing the ordering required to permute ``x`` into a sorted state, i.e., given an output array ``o``, ``subset(x, o)`` will be sorted. Examples: >>> import biocutils >>> >>> x = [ ... 15, ... 1, ... 22, ... 3, ... 14, ... ] >>> o = biocutils.order( ... x ... ) >>> print(o) >>> biocutils.subset( ... x, o ... ) >>> o = biocutils.order( ... x, ... decreasing=True, ... ) >>> print(o) >>> biocutils.subset( ... x, o ... ) >>> >>> x = [ ... "C", ... "B", ... None, ... "D", ... "D", ... None, ... "A", ... ] >>> o = biocutils.order( ... x ... ) >>> print(o) >>> biocutils.subset( ... x, o ... ) >>> o = biocutils.order( ... x, ... force_last=set( ... [None, "A"] ... ), ... ) >>> print(o) >>> biocutils.subset( ... x, o ... ) >>> >>> # Factor ordering respects the ordering in the levels. >>> x = biocutils.Factor.from_sequence( ... [ ... "C", ... "B", ... "D", ... "A", ... "C", ... "A", ... "D", ... ], ... [ ... "D", ... "C", ... "B", ... "A", ... ], ... ) >>> o = biocutils.order( ... x ... ) >>> print(o) >>> print( ... biocutils.subset( ... x, o ... ) ... ) """ collected = [] forced = [] if len(force_last) > 0: for i, y in enumerate(x): if y not in force_last: collected.append(i) else: forced.append(i) else: collected = list(range(len(x))) def key(i): return x[i] collected.sort(key=key, reverse=decreasing) if dtype is None: dtype = numpy.min_scalar_type(len(x) - 1) output = numpy.ndarray(len(x), dtype=dtype) output[: len(collected)] = collected if len(forced) > 0: output[len(collected) :] = forced return output
@order.register def _order_Factor( x: Factor, force_last: Union[set, Sequence] = set([None]), decreasing: bool = False, dtype: Optional[numpy.dtype] = None, ) -> numpy.ndarray: new_force_last = set() for i, lev in enumerate(x.get_levels()): if lev in force_last: new_force_last.add(i) if None in force_last: new_force_last.add(-1) # For consistency with R, we order by codes. return order.registry[object](x.get_codes(), force_last=new_force_last, decreasing=decreasing, dtype=dtype)
[docs] @singledispatch def sort(x: Any, force_last: Union[set, Sequence] = [None, numpy.ma.masked], decreasing: bool = False) -> Any: """ Sort an arbitrary iterable sequence. Args: x: Values to be sorted. All values should be comparable aside from those listed in ``force_last``. force_last: Values that are incomparable and will be placed last, i.e., at the end of the ordering. No attempt is made to order values within ``force_last``. decreasing: Whether to sort by decreasing value. Default is to sort by increasing value. Returns: Sorted contents of ``x``. This is usually of the same class as ``x``. Examples: >>> import biocutils >>> biocutils.sort( ... range( ... 20, 10, -1 ... ) ... ) >>> biocutils.sort( ... [ ... "A", ... "B", ... None, ... "C", ... "D", ... ], ... decreasing=True, ... ) >>> import numpy >>> biocutils.sort( ... numpy.random.rand( ... 10 ... ) ... ) """ return subset(x, order(x, force_last=force_last, decreasing=decreasing))