Source code for summarizedexperiment.BaseSE

import warnings
from collections import OrderedDict, namedtuple
from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
from warnings import warn

import biocframe
import biocutils as ut

from ._frameutils import _sanitize_frame
from .type_checks import is_matrix_like

__author__ = "jkanche, keviny2"
__copyright__ = "jkanche"
__license__ = "MIT"

SliceResult = namedtuple(
    "SlicerResult",
    [
        "rows",
        "columns",
        "assays",
        "row_names",
        "column_names",
        "row_indices",
        "col_indices",
    ],
)


def _guess_assay_shape(assays, rows, cols, row_names, col_names) -> tuple:
    _keys = list(assays.keys())
    if len(_keys) > 0:
        _first = _keys[0]
        return assays[_first].shape

    _r = 0
    if rows is not None:
        _r = rows.shape[0]
    elif row_names is not None:
        _r = len(row_names)

    _c = 0
    if cols is not None:
        _c = cols.shape[0]
    elif col_names is not None:
        _c = len(col_names)

    return (_r, _c)


def _validate_assays(assays, shape) -> tuple:
    if assays is None or not isinstance(assays, dict):  # or len(assays.keys()) == 0
        raise Exception("`assays` must be a dictionary and contain atleast one 2-dimensional matrix.")

    for asy, mat in assays.items():
        if not is_matrix_like(mat):
            raise TypeError(f"Assay: '{asy}' is not a supported matrix representation.")

        if len(mat.shape) > 2:
            raise ValueError(
                "Only 2-dimensional matrices are accepted, "
                f"provided {len(mat.shape)} dimensions for `assay`: '{asy}'."
            )

        if shape is None:
            shape = mat.shape
            continue

        if mat.shape != shape:
            raise ValueError(f"Assay: '{asy}' must be of shape '{shape}'" f" but provided '{mat.shape}'.")


def _validate_rows(rows, names, shape):
    if not isinstance(rows, biocframe.BiocFrame):
        raise TypeError("'row_data' is not a `BiocFrame` object.")

    if rows.shape[0] != shape[0]:
        raise ValueError(
            f"Number of features ('row_data') mismatch with number of rows in assays. Must be '{shape[0]}'"
            f" but provided '{rows.shape[0]}'."
        )

    if names is not None:
        if len(names) != shape[0]:
            raise ValueError(
                f"Length of 'row_names' mismatch with number of rows. Must be '{shape[0]}'"
                f" but provided '{len(names)}'."
            )

        if len(set(names)) != len(names):
            warn("'row_data' does not contain unique 'row_names'.", UserWarning)


def _validate_cols(cols, names, shape):
    if not isinstance(cols, biocframe.BiocFrame):
        raise TypeError("'column_data' is not a `BiocFrame` object.")

    if cols.shape[0] != shape[1]:
        raise ValueError(
            f"Number of samples ('column_data') mismatch with number of columns in assays. Must be '{shape[1]}'"
            f" but provided '{cols.shape[0]}'."
        )

    if names is not None:
        if len(names) != shape[1]:
            raise ValueError(
                f"Length of 'column_names' mismatch with number of columns. Must be '{shape[1]}'"
                f" but provided '{len(names)}'."
            )

        if len(set(names)) != len(names):
            warn("'column_data' does not contain unique 'row_names'.", UserWarning)


def _validate_metadata(metadata):
    if not isinstance(metadata, dict):
        raise TypeError("'metadata' should be a dictionary")


[docs] class BaseSE: """Base class for ``SummarizedExperiment``. This class provides common properties and methods that can be utilized across all derived classes. This container represents genomic experiment data in the form of ``assays``, features in ``row_data``, sample data in ``column_data``, and any other relevant ``metadata``. If row_names are not provided, the row_names from row_data are used as the experiment's row names. Similarly if column_names are not provided the row_names of the column_data are used as the experiment's column names. """
[docs] def __init__( self, assays: Dict[str, Any] = None, row_data: Optional[biocframe.BiocFrame] = None, column_data: Optional[biocframe.BiocFrame] = None, row_names: Optional[List[str]] = None, column_names: Optional[List[str]] = None, metadata: Optional[dict] = None, validate: bool = True, ) -> None: """Initialize an instance of ``BaseSE``. Args: assays: A dictionary containing matrices, with assay names as keys and 2-dimensional matrices represented as either :py:class:`~numpy.ndarray` or :py:class:`~scipy.sparse.spmatrix`. Alternatively, you may use any 2-dimensional matrix that has the ``shape`` property and implements the slice operation using the ``__getitem__`` dunder method. All matrices in assays must be 2-dimensional and have the same shape (number of rows, number of columns). row_data: Features, must be the same length as the number of rows of the matrices in assays. Feature information is coerced to a :py:class:`~biocframe.BiocFrame.BiocFrame`. Defaults to None. column_data: Sample data, must be the same length as the number of columns of the matrices in assays. Sample information is coerced to a :py:class:`~biocframe.BiocFrame.BiocFrame`. Defaults to None. row_names: A list of strings, same as the number of rows. If ``row_names`` are not provided, these are inferred from ``row_data``. Defaults to None. column_names: A list of string, same as the number of columns. if ``column_names`` are not provided, these are inferred from ``column_data``. Defaults to None. metadata: Additional experimental metadata describing the methods. Defaults to None. validate: Internal use only. """ self._assays = assays if assays is not None else {} self._shape = _guess_assay_shape(self._assays, row_data, column_data, row_names, column_names) if self._shape is None: raise RuntimeError("Failed to guess the 'shape' from the provided parameters!") self._rows = _sanitize_frame(row_data, self._shape[0]) self._cols = _sanitize_frame(column_data, self._shape[1]) if row_names is None: row_names = self._rows.row_names if row_names is not None and not isinstance(row_names, ut.Names): row_names = ut.Names(row_names) self._row_names = row_names if column_names is None: column_names = self._cols.row_names if column_names is not None and not isinstance(column_names, ut.Names): column_names = ut.Names(column_names) self._column_names = column_names self._metadata = metadata if metadata is not None else {} if validate: _validate_assays(self._assays, self._shape) if self._shape is None: raise RuntimeError("Cannot guess 'shape' from assays!") _validate_rows(self._rows, self._row_names, self._shape) _validate_cols(self._cols, self._column_names, self._shape) _validate_metadata(self._metadata)
def _define_output(self, in_place: bool = False) -> "BaseSE": if in_place is True: return self else: return self.__copy__() ######################### ######>> Copying <<###### #########################
[docs] def __deepcopy__(self, memo=None, _nil=[]): """ Returns: A deep copy of the current ``BaseSE``. """ from copy import deepcopy _assays_copy = deepcopy(self._assays) _rows_copy = deepcopy(self._rows) _cols_copy = deepcopy(self._cols) _metadata_copy = deepcopy(self.metadata) _row_names_copy = deepcopy(self._row_names) _col_names_copy = deepcopy(self._column_names) current_class_const = type(self) return current_class_const( assays=_assays_copy, row_data=_rows_copy, column_data=_cols_copy, row_names=_row_names_copy, column_names=_col_names_copy, metadata=_metadata_copy, )
[docs] def __copy__(self): """ Returns: A shallow copy of the current ``BaseSE``. """ current_class_const = type(self) return current_class_const( assays=self._assays, row_data=self._rows, column_data=self._cols, row_names=self._row_names, column_names=self._column_names, metadata=self._metadata, )
[docs] def copy(self): """Alias for :py:meth:`~__copy__`.""" return self.__copy__()
###################################### ######>> length and iterators <<###### ######################################
[docs] def __len__(self) -> int: """ Returns: Number of rows. """ return self.shape[0]
@property def shape(self) -> Tuple[int, int]: """Get shape of the experiment. Returns: Tuple[int, int]: A tuple (m,n), where `m` is the number of features/rows, and `n` is the number of samples/columns. """ return self._shape @property def dims(self) -> Tuple[int, int]: """Alias to :py:attr:`~summarizedexperiment.BaseSE.BaseSE.shape`. Returns: Tuple[int, int]: A tuple (m,n), where `m` is the number of features/rows, and `n` is the number of samples/columns. """ return self.shape ########################## ######>> Printing <<###### ##########################
[docs] def __repr__(self) -> str: """ Returns: A string representation. """ output = f"{type(self).__name__}(number_of_rows={self.shape[0]}" output += f", number_of_columns={self.shape[1]}" output += ", assays=" + ut.print_truncated_list(self.assay_names) output += ", row_data=" + self._rows.__repr__() output += ", column_data=" + self._cols.__repr__() if self._row_names is not None: output += ", row_names=" + ut.print_truncated_list(self._row_names) if self._column_names is not None: output += ", column_names=" + ut.print_truncated_list(self._column_names) if len(self._metadata) > 0: output += ", metadata=" + ut.print_truncated_dict(self._metadata) output += ")" return output
def __str__(self) -> str: """ Returns: A pretty-printed string containing the contents of this object. """ output = f"class: {type(self).__name__}\n" output += f"dimensions: ({self.shape[0]}, {self.shape[1]})\n" output += f"assays({len(self.assay_names)}): {ut.print_truncated_list(self.assay_names)}\n" output += ( f"row_data columns({len(self._rows.column_names)}): {ut.print_truncated_list(self._rows.column_names)}\n" ) output += f"row_names({0 if self._row_names is None else len(self._row_names)}): {' ' if self._row_names is None else ut.print_truncated_list(self._row_names)}\n" output += ( f"column_data columns({len(self._cols.column_names)}): {ut.print_truncated_list(self._cols.column_names)}\n" ) output += f"column_names({0 if self._column_names is None else len(self._column_names)}): {' ' if self._column_names is None else ut.print_truncated_list(self._column_names)}\n" output += f"metadata({str(len(self.metadata))}): {ut.print_truncated_list(list(self.metadata.keys()), sep=' ', include_brackets=False, transform=lambda y: y)}\n" return output ######################## ######>> assays <<###### ########################
[docs] def get_assays(self) -> Dict[str, Any]: """Access assays/experimental data. Returns: A dictionary with keys as assay names and value the experimental data. """ return self._assays
[docs] def set_assays(self, assays: Dict[str, Any], in_place: bool = False) -> "BaseSE": """Set new experiment data (assays). Args: assays: New assays. in_place: Whether to modify the ``BaseSE`` in place. Returns: A modified ``BaseSE`` object, either as a copy of the original or as a reference to the (in-place-modified) original. """ _validate_assays(assays, self._shape) output = self._define_output(in_place) output._assays = assays return output
@property def assays(self) -> Dict[str, Any]: """Alias for :py:meth:`~get_assays`.""" return self.get_assays() @assays.setter def assays(self, assays: Dict[str, Any]): """Alias for :py:meth:`~set_assays` with ``in_place = True``. As this mutates the original object, a warning is raised. """ warn( "Setting property 'assays' is an in-place operation, use 'set_assays' instead", UserWarning, ) self.set_assays(assays, in_place=True) ########################## ######>> row_data <<###### ##########################
[docs] def get_row_data(self, replace_row_names: bool = True) -> biocframe.BiocFrame: """Get features, the `row_names` of row_data are replaced by the row_names from the experiment. Args: replace_row_names: Whether to replace `row_data`'s row_names with the row_names from the experiment. Defaults to True. Returns: Feature information. """ _row_copy = self._rows.copy() if replace_row_names: return _row_copy.set_row_names(self._row_names, in_place=False) return _row_copy
[docs] def set_row_data( self, rows: Optional[biocframe.BiocFrame], replace_row_names: bool = False, in_place: bool = False, ) -> "BaseSE": """Set new feature information. Args: rows: New feature information. If ``rows`` is None, an empty :py:class:`~biocframe.BiocFrame.BiocFrame` object is created. replace_row_names: Whether to replace experiment's row_names with the names from the new object. Defaults to False. in_place: Whether to modify the ``BaseSE`` in place. Returns: A modified ``BaseSE`` object, either as a copy of the original or as a reference to the (in-place-modified) original. """ rows = _sanitize_frame(rows, self._shape[0]) _validate_rows(rows, self._row_names, self._shape) output = self._define_output(in_place) output._rows = rows if replace_row_names: return output.set_row_names(rows._row_names, in_place=in_place) return output
@property def rowdata(self) -> Dict[str, Any]: """Alias for :py:meth:`~get_rowdata`.""" return self.get_row_data() @rowdata.setter def rowdata(self, rows: Optional[biocframe.BiocFrame]): """Alias for :py:meth:`~set_rowdata` with ``in_place = True``. As this mutates the original object, a warning is raised. """ warn( "Setting property 'rowdata' is an in-place operation, use 'set_rowdata' instead", UserWarning, ) self.set_row_data(rows, in_place=True) @property def row_data(self) -> Dict[str, Any]: """Alias for :py:meth:`~get_rowdata`.""" return self.get_row_data() @row_data.setter def row_data(self, rows: Optional[biocframe.BiocFrame]): """Alias for :py:meth:`~set_rowdata` with ``in_place = True``. As this mutates the original object, a warning is raised. """ warn( "Setting property 'rowdata' is an in-place operation, use 'set_rowdata' instead", UserWarning, ) self.set_row_data(rows, in_place=True) ########################## ######>> col_data <<###### ##########################
[docs] def get_column_data(self, replace_row_names: bool = True) -> biocframe.BiocFrame: """Get sample data. Args: replace_row_names: Whether to replace `column_data`'s row_names with the row_names from the experiment. Defaults to True. Returns: Sample information. """ _col_copy = self._cols.copy() if replace_row_names: return _col_copy.set_row_names(self._column_names, in_place=False) return _col_copy
[docs] def set_column_data( self, cols: Optional[biocframe.BiocFrame], replace_column_names: bool = False, in_place: bool = False, ) -> "BaseSE": """Set sample data. Args: cols: New sample data. If ``cols`` is None, an empty :py:class:`~biocframe.BiocFrame.BiocFrame` object is created. replace_column_names: Whether to replace experiment's column_names with the names from the new object. Defaults to False. in_place: Whether to modify the ``BaseSE`` in place. Returns: A modified ``BaseSE`` object, either as a copy of the original or as a reference to the (in-place-modified) original. """ cols = _sanitize_frame(cols, self._shape[1]) _validate_cols(cols, self._column_names, self._shape) output = self._define_output(in_place) output._cols = cols if replace_column_names: return output.set_column_names(cols.row_names, in_place=in_place) return output
@property def columndata(self) -> Dict[str, Any]: """Alias for :py:meth:`~get_coldata`.""" return self.get_column_data() @columndata.setter def columndata(self, cols: Optional[biocframe.BiocFrame]): """Alias for :py:meth:`~set_coldata` with ``in_place = True``. As this mutates the original object, a warning is raised. """ warn( "Setting property 'coldata' is an in-place operation, use 'set_columndata' instead", UserWarning, ) self.set_column_data(cols, in_place=True) @property def coldata(self) -> Dict[str, Any]: """Alias for :py:meth:`~get_coldata`.""" return self.get_column_data() @coldata.setter def coldata(self, cols: Optional[biocframe.BiocFrame]): """Alias for :py:meth:`~set_coldata` with ``in_place = True``. As this mutates the original object, a warning is raised. """ warn( "Setting property 'coldata' is an in-place operation, use 'set_columndata' instead", UserWarning, ) self.set_column_data(cols, in_place=True) @property def column_data(self) -> Dict[str, Any]: """Alias for :py:meth:`~get_coldata`.""" return self.get_column_data() @column_data.setter def column_data(self, cols: Optional[biocframe.BiocFrame]): """Alias for :py:meth:`~set_coldata` with ``in_place = True``. As this mutates the original object, a warning is raised. """ warn( "Setting property 'coldata' is an in-place operation, use 'set_coldata' instead", UserWarning, ) self.set_column_data(cols, in_place=True) @property def col_data(self) -> Dict[str, Any]: """Alias for :py:meth:`~get_coldata`.""" return self.get_column_data() @col_data.setter def col_data(self, cols: Optional[biocframe.BiocFrame]): """Alias for :py:meth:`~set_coldata` with ``in_place = True``. As this mutates the original object, a warning is raised. """ warn( "Setting property 'coldata' is an in-place operation, use 'set_columndata' instead", UserWarning, ) self.set_column_data(cols, in_place=True) ########################## ######>> row names <<##### ##########################
[docs] def get_row_names(self) -> Optional[ut.Names]: """ Returns: List of row names, or None if no row names are available. """ return self._row_names
[docs] def set_row_names(self, names: Optional[List[str]], in_place: bool = False) -> "BaseSE": """Set new row names. Args: names: New names, same as the number of rows. May be `None` to remove row names. in_place: Whether to modify the ``BaseSE`` in place. Returns: A modified ``BaseSE`` object, either as a copy of the original or as a reference to the (in-place-modified) original. """ if names is not None and not isinstance(names, ut.Names): names = ut.Names(names) _validate_rows(self._rows, names, self.shape) output = self._define_output(in_place) output._row_names = names return output
@property def rownames(self) -> Optional[ut.Names]: """Alias for :py:attr:`~get_row_names`, provided for back-compatibility.""" return self.get_row_names() @rownames.setter def rownames(self, names: Optional[List[str]]): """Alias for :py:meth:`~set_row_names` with ``in_place = True``. As this mutates the original object, a warning is raised. """ warn( "Setting property 'row_names' is an in-place operation, use 'set_row_names' instead", UserWarning, ) self.set_row_names(names, in_place=True) @property def row_names(self) -> Optional[ut.Names]: """Alias for :py:attr:`~get_row_names`, provided for back-compatibility.""" return self.get_row_names() @row_names.setter def row_names(self, names: Optional[List[str]]): """Alias for :py:meth:`~set_row_names` with ``in_place = True``. As this mutates the original object, a warning is raised. """ warn( "Setting property 'row_names' is an in-place operation, use 'set_row_names' instead", UserWarning, ) self.set_row_names(names, in_place=True) ############################# ######>> column names <<##### #############################
[docs] def get_column_names(self) -> Optional[ut.Names]: """ Returns: List of column names, or None if no column names are available. """ return self._column_names
[docs] def set_column_names(self, names: Optional[List[str]], in_place: bool = False) -> "BaseSE": """Set new column names. Args: names: New names, same as the number of columns. May be `None` to remove column names. in_place: Whether to modify the ``BaseSE`` in place. Returns: A modified ``BaseSE`` object, either as a copy of the original or as a reference to the (in-place-modified) original. """ if names is not None and not isinstance(names, ut.Names): names = ut.Names(names) _validate_cols(self._cols, names, self.shape) output = self._define_output(in_place) output._column_names = names return output
@property def columnnames(self) -> Optional[ut.Names]: """Alias for :py:attr:`~get_column_names`, provided for back-compatibility.""" return self.get_column_names() @columnnames.setter def columnnames(self, names: Optional[List[str]]): """Alias for :py:meth:`~set_column_names` with ``in_place = True``. As this mutates the original object, a warning is raised. """ warn( "Setting property 'column_names' is an in-place operation, use 'set_column_names' instead", UserWarning, ) self.set_column_names(names, in_place=True) @property def colnames(self) -> Optional[ut.Names]: """Alias for :py:attr:`~get_column_names`, provided for back-compatibility.""" return self.get_column_names() @colnames.setter def colnames(self, names: Optional[List[str]]): """Alias for :py:meth:`~set_column_names` with ``in_place = True``. As this mutates the original object, a warning is raised. """ warn( "Setting property 'column_names' is an in-place operation, use 'set_column_names' instead", UserWarning, ) self.set_column_names(names, in_place=True) @property def col_names(self) -> Optional[ut.Names]: """Alias for :py:attr:`~get_column_names`, provided for back-compatibility.""" return self.get_column_names() @col_names.setter def col_names(self, names: Optional[List[str]]): """Alias for :py:meth:`~set_column_names` with ``in_place = True``. As this mutates the original object, a warning is raised. """ warn( "Setting property 'column_names' is an in-place operation, use 'set_column_names' instead", UserWarning, ) self.set_column_names(names, in_place=True) @property def column_names(self) -> Optional[ut.Names]: """Alias for :py:attr:`~get_column_names`, provided for back-compatibility.""" return self.get_column_names() @column_names.setter def column_names(self, names: Optional[List[str]]): """Alias for :py:meth:`~set_column_names` with ``in_place = True``. As this mutates the original object, a warning is raised. """ warn( "Setting property 'column_names' is an in-place operation, use 'set_column_names' instead", UserWarning, ) self.set_column_names(names, in_place=True) ########################### ######>> metadata <<####### ###########################
[docs] def get_metadata(self) -> dict: """ Returns: Dictionary of metadata for this object. """ return self._metadata
[docs] def set_metadata(self, metadata: dict, in_place: bool = False) -> "BaseSE": """Set additional metadata. Args: metadata: New metadata for this object. in_place: Whether to modify the ``BaseSE`` in place. Returns: A modified ``BaseSE`` object, either as a copy of the original or as a reference to the (in-place-modified) original. """ if not isinstance(metadata, dict): raise TypeError(f"`metadata` must be a dictionary, provided {type(metadata)}.") output = self._define_output(in_place) output._metadata = metadata return output
@property def metadata(self) -> dict: """Alias for :py:attr:`~get_metadata`.""" return self.get_metadata() @metadata.setter def metadata(self, metadata: dict): """Alias for :py:attr:`~set_metadata` with ``in_place = True``. As this mutates the original object, a warning is raised. """ warn( "Setting property 'metadata' is an in-place operation, use 'set_metadata' instead", UserWarning, ) self.set_metadata(metadata, in_place=True) ############################# ######>> assay names <<###### #############################
[docs] def get_assay_names(self) -> List[str]: """Get assay names. Returns: List of assay names. """ return list(self.assays.keys())
[docs] def set_assay_names(self, names: List[str], in_place: bool = False) -> "BaseSE": """Replace :py:attr:`~summarizedexperiment.BaseSE.BaseSE.assays`'s names. Args: names: New names. in_place: Whether to modify the ``BaseSE`` in place. Returns: A modified ``BaseSE`` object, either as a copy of the original or as a reference to the (in-place-modified) original. """ current_names = self.assay_names if len(names) != len(current_names): raise ValueError("Length of 'names' does not match the number of `assays`.") new_assays = OrderedDict() for idx in range(len(names)): new_assays[names[idx]] = self._assays.pop(current_names[idx]) output = self._define_output(in_place) output._assays = new_assays return output
@property def assay_names(self) -> List[str]: """Alias for :py:attr:`~get_assay_names`.""" return self.get_assay_names() @assay_names.setter def assay_names(self, names: List[str]): """Alias for :py:attr:`~set_assay_names` with ``in_place = True``. As this mutates the original object, a warning is raised. """ warn( "Setting property 'assay_names' is an in-place operation, use 'set_assay_names' instead", UserWarning, ) self.set_assay_names(names, in_place=True) ################################ ######>> assay getters <<####### ################################
[docs] def get_assay(self, assay: Union[int, str]) -> Any: """Convenience method to access an :py:attr:`~summarizedexperiment.BaseSE.BaseSE.assays` by name or index. Args: assay: Name or index position of the assay. Raises: AttributeError: If the assay name does not exist. IndexError: If index is greater than the number of assays. Returns: Experiment data. """ if isinstance(assay, int): if assay < 0: raise IndexError("Index cannot be negative.") if assay > len(self.get_assay_names()): raise IndexError("Index greater than the number of assays.") return self.assays[self.get_assay_names()[assay]] elif isinstance(assay, str): if assay not in self._assays: raise AttributeError(f"Assay: {assay} does not exist.") return self._assays[assay] raise TypeError(f"'assay' must be a string or integer, provided '{type(assay)}'.")
[docs] def assay(self, assay: Union[int, str]) -> Any: """Alias for :py:attr:`~assay`. For backwards compatibility""" return self.get_assay(assay)
[docs] def set_assay(self, name: str, assay: Any, in_place: bool = False) -> "BaseSE": """Add or replace :py:attr:`~summarizedexperiment.BaseSE.BaseSE.assays`'s. Args: name: New or existing assay name. assay: A 2-dimensional matrix represented as either :py:class:`~numpy.ndarray` or :py:class:`~scipy.sparse.spmatrix`. Alternatively, you may use any 2-dimensional matrix that has the ``shape`` property and implements the slice operation using the ``__getitem__`` dunder method. Dimensions of the matrix must match the shape of the current experiment (number of rows, number of columns). in_place: Whether to modify the ``BaseSE`` in place. Returns: A modified ``BaseSE`` object, either as a copy of the original or as a reference to the (in-place-modified) original. """ if assay.shape != self.shape: raise ValueError("Provided assay does not match the dimensions of the experiment.") output = self._define_output(in_place) if in_place is False: output._assays = output._assays.copy() output._assays[name] = assay return output
########################## ######>> slicers <<####### ########################## def _normalize_row_slice(self, rows: Union[str, int, bool, Sequence]): _scalar = None if not (isinstance(rows, slice) and rows == slice(None)): rows, _scalar = ut.normalize_subscript(rows, len(self._rows), self._row_names) return rows, _scalar def _normalize_column_slice(self, columns: Union[str, int, bool, Sequence]): _scalar = None if not (isinstance(columns, slice) and columns == slice(None)): columns, _scalar = ut.normalize_subscript(columns, len(self._cols), self._column_names) return columns, _scalar
[docs] def subset_assays( self, rows: Optional[Union[str, int, bool, Sequence]], columns: Optional[Union[str, int, bool, Sequence]], ) -> Dict[str, Any]: """Subset all assays by the slice defined by rows and columns. If both ``row_indices`` and ``col_indices`` are None, a shallow copy of the current assays is returned. Args: rows: Row indices to subset. Integer indices, a boolean filter, or (if the current object is named) names specifying the ranges to be extracted, see :py:meth:`~biocutils.normalize_subscript.normalize_subscript`. columns: Column indices to subset. Integer indices, a boolean filter, or (if the current object is named) names specifying the ranges to be extracted, see :py:meth:`~biocutils.normalize_subscript.normalize_subscript`. Returns: Sliced experiment data. """ if rows is None and columns is None: warnings.warn("No slice is provided, this returns a copy of all assays!") return self.assays.copy() if rows is None: rows = slice(None) if columns is None: columns = slice(None) rows, _ = self._normalize_row_slice(rows) columns, _ = self._normalize_column_slice(columns) new_assays = OrderedDict() for asy, mat in self.assays.items(): if not (isinstance(rows, slice) and rows == slice(None)): mat = mat[rows, :] if not (isinstance(columns, slice) and columns == slice(None)): mat = mat[:, columns] new_assays[asy] = mat return new_assays
def _generic_slice( self, rows: Optional[Union[str, int, bool, Sequence]], columns: Optional[Union[str, int, bool, Sequence]], ) -> SliceResult: """Slice ``SummarizedExperiment`` along the rows and/or columns, based on their indices or names. Args: rows: Rows to be extracted. Integer indices, a boolean filter, or (if the current object is named) names specifying the ranges to be extracted, see :py:meth:`~biocutils.normalize_subscript.normalize_subscript`. columns: Columns to be extracted. Integer indices, a boolean filter, or (if the current object is named) names specifying the ranges to be extracted, see :py:meth:`~biocutils.normalize_subscript.normalize_subscript`. Returns: The sliced tuple containing the new rows, columns, assays and realized indices for use in downstream methods. """ new_rows = self.row_data new_cols = self.column_data new_row_names = self._row_names new_col_names = self._column_names new_assays = {} if rows is None: rows = slice(None) if columns is None: columns = slice(None) if not (isinstance(rows, slice) and rows == slice(None)): rows, _ = self._normalize_row_slice(rows=rows) new_rows = ut.subset(new_rows, rows) if new_row_names is not None: new_row_names = ut.subset_sequence(new_row_names, rows) if not (isinstance(columns, slice) and columns == slice(None)) and self.column_data is not None: columns, _ = self._normalize_column_slice(columns=columns) new_cols = ut.subset(new_cols, columns) if new_col_names is not None: new_col_names = ut.subset_sequence(new_col_names, columns) new_assays = self.subset_assays(rows=rows, columns=columns) return SliceResult(new_rows, new_cols, new_assays, new_row_names, new_col_names, rows, columns)
[docs] def get_slice( self, rows: Optional[Union[str, int, bool, Sequence]], columns: Optional[Union[str, int, bool, Sequence]], ) -> "BaseSE": """Alias for :py:attr:`~__getitem__`, for back-compatibility.""" slicer = self._generic_slice(rows=rows, columns=columns) current_class_const = type(self) return current_class_const( assays=slicer.assays, row_data=slicer.rows, column_data=slicer.columns, row_names=slicer.row_names, column_names=slicer.column_names, metadata=self._metadata, )
[docs] def __getitem__( self, args: Union[int, str, Sequence, tuple], ) -> "BaseSE": """Subset a ``SummarizedExperiment``. Args: args: Integer indices, a boolean filter, or (if the current object is named) names specifying the ranges to be extracted, see :py:meth:`~biocutils.normalize_subscript.normalize_subscript`. Alternatively a tuple of length 1. The first entry specifies the rows to retain based on their names or indices. Alternatively a tuple of length 2. The first entry specifies the rows to retain, while the second entry specifies the columns to retain, based on their names or indices. Raises: ValueError: If too many or too few slices provided. Returns: Same type as caller with the sliced rows and columns. """ if isinstance(args, (str, int)): return self.get_slice(args, slice(None)) if isinstance(args, tuple): if len(args) == 0: raise ValueError("At least one slicing argument must be provided.") if len(args) == 1: return self.get_slice(args[0], slice(None)) elif len(args) == 2: return self.get_slice(args[0], args[1]) else: raise ValueError(f"`{type(self).__name__}` only supports 2-dimensional slicing.") raise TypeError("args must be a sequence or a scalar integer or string or a tuple of atmost 2 values.")
################################ ######>> AnnData interop <<##### ################################
[docs] def to_anndata(self): """Transform :py:class:`~BaseSE`-like into a :py:class:`~anndata.AnnData` representation. Returns: An ``AnnData`` representation of the experiment. """ from anndata import AnnData from delayedarray import ( DelayedArray, is_sparse, to_dense_array, to_scipy_sparse_matrix, ) layers = OrderedDict() for asy, mat in self.assays.items(): if isinstance(mat, DelayedArray) or issubclass(type(mat), DelayedArray): if is_sparse(mat): warnings.warn( "Converting delayedarray into sparse, may require more memory", RuntimeWarning, ) mat = to_scipy_sparse_matrix(mat) else: warnings.warn( "Converting delayedarray into dense, may require more memory", RuntimeWarning, ) mat = to_dense_array(mat) layers[asy] = mat.transpose() trows = self._rows.to_pandas() if self._row_names is not None: trows.index = self._row_names tcols = self._cols.to_pandas() if self._column_names is not None: tcols.index = self._column_names if tcols.empty: tcols.index = range(self._shape[1]) obj = AnnData( obs=tcols, var=trows, uns=self.metadata, layers=layers, ) return obj