Source code for gpf.gene_sets.gene_sets_db

"""Class for handling a database of gene set collections."""
from __future__ import annotations

import logging
from collections.abc import Sequence
from functools import cached_property
from typing import Any

from gain.gene_sets.gene_set import BaseGeneSetCollection, GeneSet

logger = logging.getLogger(__name__)


[docs] class GeneSetsDb: """Class that represents a dictionary of gene set collections.""" def __init__( self, gene_set_collections: Sequence[BaseGeneSetCollection], ) -> None: self.gene_set_collections: dict[str, BaseGeneSetCollection] = { gsc.collection_id: gsc.load() for gsc in gene_set_collections } @cached_property def collections_descriptions(self) -> list[dict[str, Any]]: """Collect gene set descriptions. Iterates and creates a list of descriptions for each gene set collection """ gene_sets_collections_desc = [] for gsc in self.gene_set_collections.values(): label = gsc.web_label format_str = gsc.web_format_str gsc_id = gsc.collection_id if not label or not format_str: continue gene_sets_collections_desc.append( { "desc": label, "name": gsc_id, "format": format_str.split("|"), }, ) return gene_sets_collections_desc
[docs] def has_gene_set_collection(self, gsc_id: str) -> bool: """Check the database if contains the specified gene set collection.""" return gsc_id in self.gene_set_collections
[docs] def get_gene_set_collection_ids(self) -> set[str]: """Return all gene set collection ids. Including the ids of collections which have not been loaded. """ return set(self.gene_set_collections.keys())
[docs] def get_gene_set_ids(self, collection_id: str) -> set[str]: """Return the IDs of all the gene sets in specified collection.""" gsc = self.gene_set_collections[collection_id] return set(gsc.gene_sets.keys())
[docs] def get_all_gene_sets(self, collection_id: str) -> list[GeneSet]: """Return all the gene sets in the specified collection.""" gsc = self.gene_set_collections[collection_id] logger.debug( "gene sets from %s: %s", collection_id, len(gsc.gene_sets.keys())) return gsc.get_all_gene_sets()
[docs] def get_gene_set( self, collection_id: str, gene_set_id: str, ) -> GeneSet | None: """Find and return a gene set in a gene set collection.""" gsc = self.gene_set_collections[collection_id] return gsc.get_gene_set(gene_set_id)
def __len__(self) -> int: return len(self.gene_set_collections)