import logging
import operator
from functools import cached_property, lru_cache
from typing import Any, cast
from dae.gene_sets.denovo_gene_set_collection import DenovoGeneSetCollection
from dae.gene_sets.denovo_gene_set_helpers import (
DenovoGeneSetHelpers,
)
logger = logging.getLogger(__name__)
[docs]
class DenovoGeneSetsDb:
"""Class to manage available de Novo gene sets."""
def __init__(self, gpf_instance: Any):
self.gpf_instance = gpf_instance
self._gene_set_collections_cache: dict[
str, DenovoGeneSetCollection] = {}
self._gene_set_configs_cache: dict[str, Any] = {}
def __len__(self) -> int:
return len(self._denovo_gene_set_collections)
[docs]
def has_gene_sets(self) -> bool:
return len(self._denovo_gene_set_collections) > 0
[docs]
def reload(self) -> None:
self._gene_set_collections_cache = {}
self._gene_set_configs_cache = {}
@property
def _denovo_gene_set_collections(
self) -> dict[str, DenovoGeneSetCollection]:
if not self._gene_set_collections_cache:
self._load_cache()
return self._gene_set_collections_cache
@property
def _denovo_gene_set_configs(self) -> dict[str, Any]:
if not self._gene_set_configs_cache:
self._load_cache()
return self._gene_set_configs_cache
def _load_cache(self) -> None:
for study_id in self.get_genotype_data_ids():
study = self.gpf_instance.get_genotype_data(study_id)
assert study is not None, study_id
dgsc = DenovoGeneSetHelpers.load_collection(study)
if dgsc is None:
logger.info(
"No denovo gene set collection for %s", study_id)
continue
self._gene_set_configs_cache[study_id] = dgsc.config
self._gene_set_collections_cache[study_id] = dgsc
[docs]
def build_cache(self, genotype_data_ids: list[str]) -> None:
for study_id in genotype_data_ids:
study = self.gpf_instance.get_genotype_data(study_id)
assert study is not None, study_id
DenovoGeneSetHelpers.build_collection(study)
@cached_property
def collections_descriptions(self) -> list[dict[str, Any]]:
"""Return gene set descriptions."""
return [{
"desc": "Denovo",
"name": "denovo",
"format": ["key", " (|count|)"],
}]
@cached_property
def denovo_gene_sets_types(self) -> list[dict[str, Any]]:
"""Return denovo gene sets types descriptions."""
return sorted([
gs_collection.get_gene_sets_types_legend()
for gs_collection in self._denovo_gene_set_collections.values()
], key=operator.itemgetter("datasetId"))
[docs]
def get_collection_types_legend(
self, gs_collection_id: str,
) -> dict[str, Any]:
return self._denovo_gene_set_collections[gs_collection_id]\
.get_gene_sets_types_legend()
[docs]
@lru_cache(maxsize=64)
def get_genotype_data_ids(self) -> set[str]:
"""Return list of genotype data IDs with denovo gene sets."""
study_ids = set(
self.gpf_instance.get_genotype_data_ids())
result = set()
for study_id in study_ids:
config = self.gpf_instance.get_genotype_data_config(study_id)
if config is None:
logger.error(
"unable to load genotype data %s", study_id)
raise ValueError(
f"unable to load genotype data {study_id}")
if config.denovo_gene_sets and \
config.denovo_gene_sets.enabled and \
config.denovo_gene_sets.selected_person_set_collections:
result.add(study_id)
return result
[docs]
def get_gene_set_ids(self, genotype_data_id: str) -> list[str]:
return cast(
list[str],
self._gene_set_configs_cache[
genotype_data_id].gene_sets_names)
[docs]
def get_gene_set(
self,
gene_set_id: str,
gene_set_spec: dict[str, dict[str, list[str]]],
collection_id: str = "denovo", # noqa: ARG002
) -> dict[str, Any] | None:
# pylint: disable=unused-argument
"""Return de Novo gene set matching the spec for permitted datasets."""
return DenovoGeneSetCollection.get_gene_set_from_collections(
gene_set_id,
list(self._denovo_gene_set_collections.values()),
gene_set_spec,
)
[docs]
def get_all_gene_sets(
self,
denovo_gene_set_spec: dict[str, dict[str, list[str]]],
collection_id: str = "denovo", # noqa: ARG002
) -> list[dict[str, Any]]:
# pylint: disable=unused-argument
"""Return all de Novo gene sets matching the spec for permitted DS."""
return DenovoGeneSetCollection.get_all_gene_sets(
list(self._denovo_gene_set_collections.values()),
denovo_gene_set_spec,
)