Source code for delayedarray.to_scipy_sparse_matrix
importnumpyfromfunctoolsimportsingledispatchfromtypingimportAny,Literalfrombiocutils.package_utilsimportis_package_installedfrom.SparseNdarrayimportSparseNdarrayfrom.to_sparse_arrayimportto_sparse_arrayifis_package_installed("scipy"):importscipy.sparsedef_to_csc(x:Any)->scipy.sparse.csc_matrix:all_indptrs=numpy.zeros(x.shape[1]+1,dtype=numpy.uint64)ifx.contentsisnotNone:all_indices=[]all_values=[]counter=0fori,yinenumerate(x.contents):ifyisnotNone:counter+=len(y[0])all_indices.append(y[0])all_values.append(y[1])all_indptrs[i+1]=counterall_indices=numpy.concatenate(all_indices)all_values=numpy.concatenate(all_values)else:all_indices=numpy.zeros(0,dtype=x.index_dtype)all_values=numpy.zeros(0,dtype=x.dtype)returnscipy.sparse.csc_matrix((all_values,all_indices,all_indptrs),shape=x.shape)def_to_csr(x:Any)->scipy.sparse.csr_matrix:all_indptrs=numpy.zeros(x.shape[0]+1,dtype=numpy.uint64)ifx.contentsisnotNone:# First pass (in memory) to obtain the total sizes.fori,yinenumerate(x.contents):ifyisnotNone:forixiny[0]:all_indptrs[ix+1]+=1foriinrange(1,len(all_indptrs)):all_indptrs[i]+=all_indptrs[i-1]all_indices=numpy.ndarray(all_indptrs[-1],dtype=x.index_dtype)all_values=numpy.ndarray(all_indptrs[-1],dtype=x.dtype)# Second pass to fill the allocations that we just made.offsets=all_indptrs.copy()fori,yinenumerate(x.contents):ifyisnotNone:vals=y[1]forj,ixinenumerate(y[0]):o=offsets[ix]all_indices[o]=iall_values[o]=vals[j]offsets[ix]+=1else:all_indices=numpy.zeros(0,dtype=x.index_dtype)all_values=numpy.zeros(0,dtype=x.dtype)returnscipy.sparse.csr_matrix((all_values,all_indices,all_indptrs),shape=x.shape)def_to_coo(x:Any)->scipy.sparse.coo_matrix:ifx.contentsisnotNone:# First pass (in memory) to obtain the total sizes.total_count=0fori,yinenumerate(x.contents):ifyisnotNone:total_count+=len(y[0])all_rows=numpy.ndarray(total_count,dtype=x.index_dtype)all_cols=numpy.ndarray(total_count,dtype=numpy.uint64)all_values=numpy.ndarray(total_count,dtype=x.dtype)# Second pass to fill the allocations that we just made.counter=0fori,yinenumerate(x.contents):ifyisnotNone:vals=y[1]forj,ixinenumerate(y[0]):all_rows[counter]=ixall_cols[counter]=iall_values[counter]=vals[j]counter+=1else:all_indices=numpy.zeros(0,dtype=x.index_dtype)all_values=numpy.zeros(0,dtype=x.dtype)returnscipy.sparse.coo_matrix((all_values,(all_rows,all_cols)),shape=x.shape)
[docs]@singledispatchdefto_scipy_sparse_matrix(x:Any,format:Literal["coo","csr","csc"]="csc")->scipy.sparse.spmatrix:""" Convert a 2-dimensional array into a SciPy sparse matrix. Args: x: Input matrix where :py:func:`~delayedarray.is_sparse.is_sparse` returns True and :py:func:`~delayedarray.is_masked.is_masked` returns False. format: Type of SciPy matrix to create - coordinate (coo), compressed sparse row (csr) or compressed sparse column (csc). Returns: A SciPy sparse matrix with the contents of ``x``. """# One might think that we could be more memory-efficient by doing block# processing. However, there is no advantage from doing so as we eventually# need to hold all the blocks in memory before concatenation. We'd only# avoid this if we did two passes; one to collect the total size for# allocation, and another to actually fill the vectors; not good, so we# just forget about it and load it all into memory up-front.returnto_scipy_sparse_matrix_from_SparseNdarray(to_sparse_array(x),format=format)