[docs]classHdf5DenseArraySeed:"""HDF5-backed dataset as a ``DelayedArray`` dense array seed."""def__init__(self,path:str,name:str,dtype:Optional[dtype]=None,native_order:bool=False,)->None:""" Args: path: Path to the HDF5 file. name: Name of the dataset containing the array. dtype: NumPy type of the data. Defaults to the HDF5 type on disk; otherwise, values are transformed to ``dtype`` during extraction. native_order: Whether to use HDF5's native order of dimensions. HDF5 orders dimensions by slowest to fastest changing. If ``native`` is True, the same ordering is used for this array, i.e., this array's shape is the same as that reported in the file, equivalent to C storage order. If False, this array's shape is reversed compared to that reported in the file, equivalent to Fortran storage order. In this case, the first dimension in this array will be the fastest changing one, etc. """self._path=pathself._name=nameself._native_order=native_orderwithFile(self._path,"r")ashandle:dset=handle[name]self._modify_dtype=dtypeisnotNoneanddtype!=dset.dtypeifnotself._modify_dtype:dtype=dset.dtypeself._dtype=dtypeifnative_order:self._shape=dset.shapeelse:self._shape=(*list(reversed(dset.shape)),)ifdset.chunksisnotNone:ifnative_order:self._chunks=dset.chunkselse:self._chunks=(*list(reversed(dset.chunks)),)else:chunk_sizes=[1]*len(self._shape)ifnative_order:chunk_sizes[-1]=self._shape[-1]else:chunk_sizes[0]=self._shape[0]self._chunks=(*chunk_sizes,)@propertydefdtype(self)->dtype:""" Returns: NumPy type of this array. """returnself._dtype@propertydefshape(self)->Tuple[int,...]:""" Returns: Tuple containing the dimensions of this array. """returnself._shape@propertydefpath(self)->str:""" Returns: Path to the HDF5 file. """returnself._path@propertydefname(self)->str:""" Returns: Name of the dataset inside the file. """returnself._name
[docs]@chunk_grid.registerdefchunk_grid_Hdf5DenseArraySeed(x:Hdf5DenseArraySeed):"""See :py:meth:`~delayedarray.chunk_grid.chunk_grid`. The cost factor is set to 20 to reflect the computational work involved in extracting data from disk. """returnchunk_shape_to_grid(x._chunks,x._shape,cost_factor=20)
[docs]@extract_dense_array.registerdefextract_dense_array_Hdf5DenseArraySeed(x:Hdf5DenseArraySeed,subset:Tuple[Sequence[int],...])->numpy.ndarray:"""See :py:meth:`~delayedarray.extract_dense_array.extract_dense_array`."""converted=[]num_lists=0forsinsubset:ifisinstance(s,range):# convert back to slice for HDF5 access efficiency.converted.append(slice(s.start,s.stop,s.step))else:num_lists+=1converted.append(s)# Currently h5py doesn't support indexing with multiple lists at once.# So let's convert all but one of the highest-density entries to slices.reextract=Noneifnum_lists>1:lowest_density=1chosen=0fori,sinenumerate(converted):ifnotisinstance(s,slice)andlen(s):lowest=s[1]highest=s[-1]current_density=(highest-lowest)/len(s)iflowest_density>current_density:lowest_density=current_densitychosen=ireextract=[]fori,sinenumerate(converted):ifisinstance(s,slice)ori==chosen:reextract.append(range(len(subset[i])))else:lowest=s[0]highest=s[-1]converted[i]=slice(lowest,highest+1)reextract.append([j-lowestforjins])# Re-opening the handle as needed, so as to avoid# blocking other applications that need this file.withFile(x._path,"r")ashandle:dset=handle[x._name]ifx._native_order:out=dset[(*converted,)]else:converted.reverse()out=dset[(*converted,)].TifreextractisnotNone:out=out[ix_(*reextract)]# Making other transformations for consistency.ifx._modify_dtype:out=out.astype(x._dtype,copy=False)ifnotout.flags.f_contiguous:out=asfortranarray(out)returnout
[docs]classHdf5DenseArray(DelayedArray):"""HDF5-backed dataset as a ``DelayedArray`` dense array. This subclass allows developers to implement custom methods for HDF5-backed arrays. """def__init__(self,path:str,name:str,**kwargs):"""To construct a ``Hdf5DenseArray`` from an existing :py:class:`~Hdf5DenseArraySeed`, use :py:meth:`~delayedarray.wrap.wrap` instead. Args: path: Path to the HDF5 file. name: Name of the dataset containing the array. kwargs: Further arguments to pass to the :py:class:`~Hdf5DenseArraySeed` constructor. """ifisinstance(path,Hdf5DenseArraySeed):seed=pathelse:seed=Hdf5DenseArraySeed(path,name,**kwargs)super(Hdf5DenseArray,self).__init__(seed)@propertydefpath(self)->str:""" Returns: Path to the HDF5 file. """returnself.seed.path@propertydefname(self)->str:""" Returns: Name of the dataset inside the file. """returnself.seed.name