Source code for orgdb.record
from __future__ import annotations
from dataclasses import dataclass
from datetime import date, datetime
from typing import Optional
__author__ = "Jayaram Kancherla"
__copyright__ = "Jayaram Kancherla"
__license__ = "MIT"
[docs]
@dataclass(frozen=True)
class OrgDbRecord:
"""Container for a single OrgDb entry."""
orgdb_id: str
release_date: Optional[date]
url: str
species: Optional[str] = None # e.g. "Hs" or "Hsapiens"
id_type: Optional[str] = None # e.g. "eg" (Entrez Gene) or "tair"
bioc_version: Optional[str] = None
[docs]
@classmethod
def from_config_entry(cls, orgdb_id: str, entry: dict) -> "OrgDbRecord":
"""Build a record from a ORGDB_CONFIG entry:
{
"release_date": "YYYY-MM-DD", # optional
"url": "https://..."
}
"""
url = entry["url"]
date_str = entry.get("release_date")
rel_date: Optional[date]
if date_str:
rel_date = datetime.strptime(date_str, "%Y-%m-%d").date()
else:
rel_date = None
species, id_type = _parse_orgdb_id(orgdb_id)
bioc_version = _parse_bioc_version(url)
return cls(
orgdb_id=orgdb_id,
release_date=rel_date,
url=url,
species=species,
id_type=id_type,
bioc_version=bioc_version,
)
def _parse_orgdb_id(orgdb_id: str):
"""Parse IDs like:
org.Hs.eg.db
org.At.tair.db
into (species, id_type).
"""
name = orgdb_id
if name.startswith("org."):
name = name[len("org.") :]
if name.endswith(".db"):
name = name[: -len(".db")]
if name.endswith(".sqlite"):
name = name[: -len(".sqlite")]
parts = name.split(".")
if len(parts) < 2:
return None, None
species = parts[0]
id_type = parts[1]
return species, id_type
def _parse_bioc_version(url: str) -> Optional[str]:
"""Extract the Bioconductor/AnnotationHub-like version from URL.
Example:
.../standard/3.19/org.Hs.eg.sqlite -> "3.19"
"""
parts = url.rstrip("/").split("/")
if len(parts) < 2:
return None
candidate = parts[-2]
if candidate.replace(".", "").isdigit():
return candidate
return None