[docs]defread_tenx_mtx(path:str)->SingleCellExperiment:"""Read 10X Matrix market directory as :py:class:`~singlecellexperiment.SingleCellExperiment.SingleCellExperiment`. Args: path: Path to 10X MTX directory. Directory must contain `matrix.mtx`, and optionally a `genes.tsv` to represent featires and `barcodes.tsv` for cell annotations. Returns: A single-cell experiment object. """importpandasaspdfromscipy.ioimportmmreadfromscipy.sparseimportcsr_matrixmat=mmread(f"{path}/matrix.mtx")mat=csr_matrix(mat)genes=pd.read_csv(path+"/genes.tsv",header=None,sep="\t")genes.columns=["gene_ids","gene_symbols"]cells=pd.read_csv(path+"/barcodes.tsv",header=None,sep="\t")cells.columns=["barcode"]returnSingleCellExperiment(assays={"counts":mat},row_data=from_pandas(genes),column_data=from_pandas(cells),)
[docs]defread_tenx_h5(path:str,realize_assays:bool=False)->SingleCellExperiment:"""Read 10X H5 file as :py:class:`~singlecellexperiment.SingleCellExperiment.SingleCellExperiment`. Note: Currently only supports version 3 of the 10X H5 format. Args: path: Path to 10x H5 file. realize_assays: Whether to realize assays into memory. Defaults to False. Returns: A single-cell experiment object. """importdelayedarrayasdaimporth5pyfromhdf5arrayimportHdf5CompressedSparseMatrixh5=h5py.File(path,mode="r")if"matrix"notinh5.keys():raiseValueError(f"H5 file ({path}) is not a 10X V3 format.")groups=h5["matrix"].keys()# read the matrixshape=tuple(h5["matrix"]["shape"][:])counts=Hdf5CompressedSparseMatrix(path=path,group_name="matrix",by_column=True,shape=shape)ifrealize_assaysisTrue:counts=da.to_scipy_sparse_matrix(counts,"csr")# read featuresfeatures=Noneignore_list=[]if"features"ingroups:features={}forkey,valinh5["matrix"]["features"].items():temp_features=[x.decode("ascii")forxinval]iflen(temp_features)!=counts.shape[0]:ignore_list.append(key)else:features[key]=temp_featuresfeatures=BiocFrame(features,number_of_rows=counts.shape[0])iflen(ignore_list)>0:warn(f"These columns from h5 are ignored - {', '.join(ignore_list)} because of ""inconsistent length with the count matrix.")barcodes=Noneif"barcodes"ingroups:barcodes={}barcodes["barcodes"]=[x.decode("ascii")forxinh5["matrix"]["barcodes"]]barcodes=BiocFrame(barcodes,number_of_rows=counts.shape[1])h5.close()returnSingleCellExperiment(assays={"counts":counts},row_data=features,column_data=barcodes)