importpandasaspdfromowlready2importThingClass,get_ontology__author__="Jayaram Kancherla"__copyright__="Jayaram Kancherla"__license__="MIT"# originally published to gist# https://gist.github.com/jkanche/1f010c38a090cefd8f2f5e21c20fc1b8
[docs]defowl_to_dataframe(owl_location:str):"""Extract nodes and their lineages from ontologies as :py:class:`~pandas.DataFrame`. Example: .. code-block:: python from biorat.ontology import ( owl_to_dataframe, ) result_df = owl_to_dataframe( "https://github.com/obophenotype/cell-ontology/releases/download/v2024-09-26/cl.owl" ) print(result_df) Args: owl_location: Location or the URL of the OWL file. Supports any argument acceepted by :py:func:`~owlready.get_ontology`. Returns: A Pandas DataFrame of the nodes, their labels and lineages. """onto=get_ontology(owl_location).load()recs=[]# recursively traverse the ontologydefget_lineage(cls):lineage=[]forparentincls.is_a:ifisinstance(parent,ThingClass):lineage.append((parent.label.first()orparent.name,parent.name))lineage.extend(get_lineage(parent))returnlineage# Iterate through all classes in the ontologyforclsinonto.classes():rec={}rec["iri"]=cls.irirec["term_id"]=cls.name# Get the label (use the first label if available, otherwise the class name)rec["label"]=cls.label.first()orcls.name# Get the lineagelineage_items=get_lineage(cls)rec["lineage_ids"]=" > ".join(reversed([item[0]foriteminlineage_items]))rec["lineage_labels"]=" > ".join(reversed([item[1]foriteminlineage_items]))recs.append(rec)df=pd.DataFrame(recs)returndf