from typing import Any, Optional, Sequence, Tuple, Union
import numpy
def _raise_int(idx: int, length):
raise IndexError(
"subscript ("
+ str(idx)
+ ") out of range for vector-like object of length "
+ str(length)
)
def _is_scalar_bool(sub):
return isinstance(sub, bool) or isinstance(sub, numpy.bool_)
[docs]
class NormalizedSubscript:
"""
Subscript normalized by :py:func:`~normalize_subscript`. This
is used to indicate that no further normalization is required,
such that :py:func:`~normalize_subscript` is just a no-op.
"""
def __init__(self, subscript: Sequence[int]):
"""
Args:
subscript:
Sequence of integers for a normalized subscript.
"""
self._subscript = subscript
@property
def subscript(self) -> Sequence[int]:
"""
Returns:
The subscript, as a sequence of integer positions.
"""
return self._subscript
[docs]
def __getitem__(self, index: Any) -> Any:
"""
Args:
index:
Any argument accepted by the ``__getitem__`` method of the
:py:attr:`~subscript`.
Returns:
The same return value as the ``__getitem__`` method of the
subscript. This should be an integer if ``index`` is an integer.
"""
return self._subscript[index]
[docs]
def __len__(self) -> int:
"""
Returns:
Length of the subscript.
"""
return len(self._subscript)
SubscriptTypes = Union[slice, range, Sequence, int, str, bool, NormalizedSubscript]
[docs]
def normalize_subscript(
sub: SubscriptTypes,
length: int,
names: Optional[Sequence[str]] = None,
non_negative_only: bool = True,
) -> Tuple:
"""
Normalize a subscript for ``__getitem__`` or friends into a sequence of
integer indices, for consistent downstream use.
Args:
sub:
The subscript. This can be any of the following:
- A slice.
- A range containing indices to elements. Negative values are
allowed. An error is raised if the indices are out of range.
- A single integer specifying the index of an element. A negative
value is allowed. An error is raised if the index is out of range.
- A single string that can be found in ``names``, which is
converted to the index of the first occurrence of that string in
``names``. An error is raised if the string cannot be found.
- A single boolean, which is converted into a list containing the
first element if true, and an empty list if false.
- A sequence of strings, integers and/or booleans. Strings are
converted to indices based on first occurrence in ``names``,
as described above. Integers should be indices to an element.
Each truthy boolean is converted to an index equal to its
position in ``sub``, and each Falsey boolean is ignored.
- A :py:class:`~NormalizedSubscript`, in which case the
``subscript`` property is directly returned.
length:
Length of the object.
names:
List of names for each entry in the object. If not None, this
should have length equal to ``length``. Some optimizations
are possible if this is a :py:class:`~Names.Names` object.
non_negative_only:
Whether negative indices must be converted into non-negative
equivalents. Setting this to `False` may improve efficiency.
Returns:
A tuple containing (i) a sequence of integer indices in ``[0, length)``
specifying the subscript elements, and (ii) a boolean indicating whether
``sub`` was a scalar.
"""
if isinstance(sub, NormalizedSubscript):
return sub.subscript, False
if _is_scalar_bool(sub): # before ints, as bools are ints.
if sub:
return [0], True
else:
return [], False
if isinstance(sub, int) or isinstance(sub, numpy.integer):
if sub < -length or sub >= length:
_raise_int(sub, length)
if sub < 0 and non_negative_only:
sub += length
return [int(sub)], True
if isinstance(sub, str):
if names is None:
raise IndexError(
"failed to find subscript '"
+ sub
+ "' for vector-like object with no names"
)
i = -1
from .Names import Names
if isinstance(names, Names):
i = names.map(sub)
else:
for j, n in enumerate(names):
if n == sub:
i = j
break
if i < 0:
raise IndexError("cannot find subscript '" + sub + "' in the names")
return [i], True
if isinstance(sub, slice):
return range(*sub.indices(length)), False
if isinstance(sub, range):
if len(sub) == 0:
return [], False
first = sub[0]
last = sub[-1]
if first >= length:
_raise_int(first, length)
if last >= length:
_raise_int(last, length)
if first < -length:
_raise_int(first, length)
if last < -length:
_raise_int(last, length)
if not non_negative_only:
return sub, False
else:
if sub.start < 0:
if sub.stop < 0:
return range(length + sub.start, length + sub.stop, sub.step), False
else:
return [(x < 0) * length + x for x in sub], False
else:
if sub.stop < 0:
return [(x < 0) * length + x for x in sub], False
else:
return sub, False
can_return_early = True
for x in sub:
if isinstance(x, str) or _is_scalar_bool(x) or (x < 0 and non_negative_only):
can_return_early = False
break
if can_return_early:
for x in sub:
if x >= length or x < -length:
_raise_int(x, length)
return sub, False
output = []
has_strings = set()
string_positions = []
from .Names import Names
are_names_indexed = isinstance(names, Names)
for i, x in enumerate(sub):
if isinstance(x, str):
if are_names_indexed:
i = names.map(x)
if i < 0:
raise IndexError("cannot find subscript '" + x + "' in the names")
output.append(i)
else:
has_strings.add(x)
string_positions.append(len(output))
output.append(None)
elif _is_scalar_bool(x):
if x:
output.append(i)
elif x < 0:
if x < -length:
_raise_int(x, length)
output.append(int(x) + length)
else:
if x >= length:
_raise_int(x, length)
output.append(int(x))
if len(has_strings):
if names is None:
raise IndexError(
"cannot find string subscripts for vector-like object with no names"
)
mapping = {}
for i, y in enumerate(names):
if y in has_strings:
mapping[y] = i
has_strings.remove(y) # remove it so we only consider the first.
for i in string_positions:
output[i] = mapping[sub[i]]
return output, False