Source code for biocutils.union
from typing import Sequence
from .is_missing_scalar import is_missing_scalar
from .map_to_index import DUPLICATE_METHOD
[docs]
def union(*x: Sequence, duplicate_method: DUPLICATE_METHOD = "first") -> list:
"""
Identify the union of values in multiple sequences, while preserving the
order of the first (or last) occurence of each value.
Args:
x:
Zero, one or more sequences of interest containing hashable values.
We ignore missing values as defined by
:py:meth:`~biocutils.is_missing_scalar.is_missing_scalar`.
duplicate_method:
Whether to take the first or last occurrence of each value in the
ordering of the output. If first, the first occurrence in the
earliest sequence of ``x`` is reported; if last, the last
occurrence in the latest sequence of ``x`` is reported.
Returns:
Union of values across all ``x``.
"""
nargs = len(x)
if nargs == 0:
return []
output = []
present = set()
def handler(f):
if not is_missing_scalar(f) and f not in present:
output.append(f)
present.add(f)
if duplicate_method == "first":
for a in x:
for f in a:
handler(f)
else:
for a in reversed(x):
for f in reversed(a):
handler(f)
output.reverse()
return output