[docs]defread_tenx_mtx(path:str)->SingleCellExperiment:"""Read 10X Matrix market directory as :py:class:`~singlecellexperiment.SingleCellExperiment.SingleCellExperiment`. Args: path: Path to 10X MTX directory. Directory must contain `matrix.mtx`, and optionally `genes.tsv` (CellRanger version 2) or `features.tsv` (CellRanger version 3 and later) to represent features and `barcodes.tsv` for cell annotations. Returns: A single-cell experiment object. """importpandasaspdfromscipy.ioimportmmreadfromscipy.sparseimportcsr_matrixmat=mmread(f"{path}/matrix.mtx")mat=csr_matrix(mat)features_path=path+"/features.tsv"genes_path=path+"/genes.tsv"ifos.path.exists(features_path)andos.path.exists(genes_path):warn("Both 'features.tsv' and 'genes.tsv' files are present in the directory. ""Prioritizing 'features.tsv' for processing.")ifos.path.exists(features_path):genes=pd.read_csv(features_path,header=None,sep="\t",usecols=[0,1])else:genes=pd.read_csv(genes_path,header=None,sep="\t")genes.columns=["gene_ids","gene_symbols"]cells=pd.read_csv(path+"/barcodes.tsv",header=None,sep="\t")cells.columns=["barcode"]returnSingleCellExperiment(assays={"counts":mat},row_data=from_pandas(genes),column_data=from_pandas(cells),)
[docs]defread_tenx_h5(path:str,realize_assays:bool=False)->SingleCellExperiment:"""Read 10X H5 file as :py:class:`~singlecellexperiment.SingleCellExperiment.SingleCellExperiment`. Note: Currently only supports version 3 of the 10X H5 format. Args: path: Path to 10x H5 file. realize_assays: Whether to realize assays into memory. Defaults to False. Returns: A single-cell experiment object. """importdelayedarrayasdaimporth5pyfromhdf5arrayimportHdf5CompressedSparseMatrixh5=h5py.File(path,mode="r")if"matrix"notinh5.keys():raiseValueError(f"H5 file ({path}) is not a 10X V3 format.")groups=h5["matrix"].keys()# read the matrixshape=tuple(h5["matrix"]["shape"][:])counts=Hdf5CompressedSparseMatrix(path=path,group_name="matrix",by_column=True,shape=shape)ifrealize_assaysisTrue:counts=da.to_scipy_sparse_matrix(counts,"csr")# read featuresfeatures=Noneignore_list=[]if"features"ingroups:features={}forkey,valinh5["matrix"]["features"].items():temp_features=[x.decode("ascii")forxinval]iflen(temp_features)!=counts.shape[0]:ignore_list.append(key)else:features[key]=temp_featuresfeatures=BiocFrame(features,number_of_rows=counts.shape[0])iflen(ignore_list)>0:warn(f"These columns from h5 are ignored - {', '.join(ignore_list)} because of ""inconsistent length with the count matrix.")barcodes=Noneif"barcodes"ingroups:barcodes={}barcodes["barcodes"]=[x.decode("ascii")forxinh5["matrix"]["barcodes"]]barcodes=BiocFrame(barcodes,number_of_rows=counts.shape[1])h5.close()returnSingleCellExperiment(assays={"counts":counts},row_data=features,column_data=barcodes)