Source code for compressed_lists.partition

from typing import List, Optional, Sequence, Union
from warnings import warn

import biocutils as ut
import numpy as np

__author__ = "Jayaram Kancherla"
__copyright__ = "Jayaram Kancherla"
__license__ = "MIT"


def _validate_names(names, expected_len):
    if names is None:
        return

    if len(names) != expected_len:
        raise ValueError("Length of names must match length of ends.")


[docs] class Partitioning: """Represents partitioning information for a `CompressedList`. This is similar to the ``PartitioningByEnd`` class in Bioconductor. It keeps track of where each element begins and ends in the unlisted data. """
[docs] def __init__(self, ends: Sequence[int], names: Optional[Sequence[str]] = None, validate: bool = True): """Initialize a Partitioning object. Args: ends: Sequence of ending positions for each partition. names: Optional names for each partition. validate: Internal use only. """ self._ends = np.array(ends, dtype=np.int64) # Calculate starts from ends self._starts = np.zeros_like(self._ends) if len(self._ends) > 0: self._starts[1:] = self._ends[:-1] self._names = None if names is not None: self._names = ut.NamedList(names) if validate: _validate_names(names, len(ends))
[docs] @classmethod def from_lengths(cls, lengths: Sequence[int], names: Optional[Sequence[str]] = None) -> "Partitioning": """Create a Partitioning from a sequence of lengths. Args: lengths: Sequence of partition lengths. names: Optional names for each partition. Returns: A new Partitioning object. """ ends = np.cumsum(lengths) return cls(ends, names)
[docs] @classmethod def from_list(cls, lst: List, names: Optional[Sequence[str]] = None) -> "Partitioning": """Create a Partitioning from a list by using the lengths of each element. Args: lst: A list to create partitioning from. names: Optional names for each partition. Returns: A new Partitioning object. """ lengths = [len(item) if hasattr(item, "__len__") else 1 for item in lst] return cls.from_lengths(lengths, names)
def _define_output(self, in_place: bool = False) -> "Partitioning": if in_place is True: return self else: return self.__copy__() ######################### ######>> Copying <<###### #########################
[docs] def __deepcopy__(self, memo=None, _nil=[]): """ Returns: A deep copy of the current ``Partitioning``. """ from copy import deepcopy _ends_copy = deepcopy(self._ends) _names_copy = deepcopy(self._names) current_class_const = type(self) return current_class_const( ends=_ends_copy, names=_names_copy, )
[docs] def __copy__(self): """ Returns: A shallow copy of the current ``Partitioning``. """ current_class_const = type(self) return current_class_const( ends=self._ends, names=self._names, )
[docs] def copy(self): """Alias for :py:meth:`~__copy__`.""" return self.__copy__()
###################################### ######>> length and iterators <<###### ######################################
[docs] def __len__(self) -> int: """Return the number of partitions.""" return len(self._ends)
[docs] def get_nobj(self) -> int: """Return the total number of objects across all partitions.""" return self._ends[-1] if len(self._ends) > 0 else 0
[docs] def nobj(self) -> int: """Alias for :py:attr:`~get_nobj`.""" return self.get_nobj()
[docs] def get_element_lengths(self) -> np.ndarray: """Return the lengths of each partition.""" return self._ends - self._starts
[docs] def element_lengths(self) -> int: """Alias for :py:attr:`~get_element_lengths`.""" return self.get_element_lengths()
########################## ######>> Printing <<###### ##########################
[docs] def __repr__(self) -> str: """ Returns: A string representation. """ output = f"{type(self).__name__}(number_of_elements={len(self)}" if self._names is not None: output += ", names=" + ut.print_truncated_list(self._names) output += ")" return output
def __str__(self) -> str: """ Returns: A pretty-printed string containing the contents of this object. """ output = f"class: {type(self).__name__}\n" output += f"num of elements: ({len(self)})\n" output += f"names({0 if self._names is None else len(self._names)}): {' ' if self._names is None else ut.print_truncated_list(self._names)}\n" return output ########################## ######>> accessors <<##### ##########################
[docs] def get_partition_range(self, i: int) -> tuple: """Get the start and end indices for partition ``i``.""" if i < 0 or i >= len(self): raise IndexError(f"Partition index {i} out of range.") return (self._starts[i], self._ends[i])
[docs] def __getitem__(self, key: Union[int, slice]) -> Union[tuple, List[tuple]]: """Get partition range(s) by index or slice. Args: key: Integer index or slice. Returns: Tuple of (start, end) or list of such tuples. """ if isinstance(key, int): return self.get_partition_range(key) elif isinstance(key, slice): indices = range(*key.indices(len(self))) return [self.get_partition_range(i) for i in indices] else: raise TypeError("Index must be 'int' or 'slice'.")
###################### ######>> names <<##### ######################
[docs] def get_names(self) -> Optional[ut.NamedList]: """Return the names of each partition.""" return self._names
[docs] def set_names(self, names: Optional[List[str]], in_place: bool = False) -> "Partitioning": """Set the names of list elements. Args: names: New names, same as the number of elements. May be `None` to remove row names. in_place: Whether to modify the ``Partitioning`` in place. Returns: A modified ``Partitioning`` object, either as a copy of the original or as a reference to the (in-place-modified) original. """ if names is not None and not isinstance(names, ut.Names): names = ut.Names(names) _validate_names(names, len(self._ends)) output = self._define_output(in_place) output._names = names return output
@property def names(self) -> Optional[ut.Names]: """Alias for :py:attr:`~get_names`, provided for back-compatibility.""" return self.get_names() @names.setter def names(self, names: Optional[List[str]]): """Alias for :py:meth:`~set_names` with ``in_place = True``. As this mutates the original object, a warning is raised. """ warn( "Setting property 'row_names' is an in-place operation, use 'set_names' instead", UserWarning, ) self.set_names(names, in_place=True) ##################### ######>> ends <<##### #####################
[docs] def get_ends(self) -> Optional[ut.NamedList]: """Return the names of each partition.""" return self._ends
@property def ends(self) -> Optional[ut.Names]: """Alias for :py:attr:`~get_ends`, provided for back-compatibility.""" return self.get_ends() ####################### ######>> starts <<##### #######################
[docs] def get_starts(self) -> Optional[ut.NamedList]: """Return the starts of each partition.""" return self._starts
@property def starts(self) -> Optional[ut.Names]: """Alias for :py:attr:`~get_starts`, provided for back-compatibility.""" return self.get_starts()