Source code for genomicranges.io.ucsc

from typing import Literal

from .gtf import parse_gtf

__author__ = "jkanche"
__copyright__ = "jkanche"
__license__ = "MIT"


[docs] def access_gtf_ucsc( genome: str, type: Literal["refGene", "ensGene", "knownGene", "ncbiRefSeq"] = "refGene", ) -> str: """Generate a path to a genome gtf file from UCSC, e.g. for `hg19 genome <http://hgdownload.cse.ucsc.edu/goldenPath/hg19/bigZips/genes/>`_. Args: genome: Genome shortcode; e.g. hg19, hg38, mm10 etc. type: Defaults to "refGene". Raises: Exception, ValueError: When ``type`` does not match with a valid input. Returns: The URI to the file. """ base_path = f"http://hgdownload.cse.ucsc.edu/goldenPath/{genome}/bigZips/genes/" if type not in ["refGene", "ensGene", "knownGene", "ncbiRefSeq"]: raise ValueError( f"type must be one of refGene, ensGene, knownGene or ncbiRefSeq, provided {type}" ) full_path = f"{base_path}/{genome}.{type}.gtf.gz" return full_path
[docs] def read_ucsc( genome: str, type: Literal["refGene", "ensGene", "knownGene", "ncbiRefSeq"] = "refGene", ) -> "GenomicRanges": """Load a genome annotation from UCSC as :py:class:`~genomicranges.GenomicRanges.GenomicRanges`. Args: genome: Genome shortcode; e.g. hg19, hg38, mm10 etc. type: Defaults to "refGene". Returns: The gene model from UCSC. """ path = access_gtf_ucsc(genome, type=type) compressed = True data = parse_gtf(path, compressed=compressed) from ..GenomicRanges import GenomicRanges return GenomicRanges.from_pandas(data)