from __future__ import annotations
import logging
import pathlib
from threading import Lock
from box import Box
from dae.configuration.gpf_config_parser import GPFConfigParser
from dae.configuration.schemas.phenotype_data import (
groups_file_schema,
pheno_conf_schema,
)
from dae.pheno.pheno_data import PhenotypeData, PhenotypeGroup, PhenotypeStudy
logger = logging.getLogger(__name__)
[docs]
class PhenoRegistry:
"""Class to register phenotype data."""
CACHE_LOCK = Lock()
def __init__(self) -> None:
self._cache: dict[str, PhenotypeData] = {}
def _register_study(self, study: PhenotypeData) -> None:
if study.pheno_id in self._cache:
raise ValueError(
f"Pheno ID {study.pheno_id} already loaded.",
)
self._cache[study.pheno_id] = study
[docs]
def register_phenotype_data(
self, phenotype_data: PhenotypeData, lock: bool = True,
) -> None:
"""Register a phenotype data study."""
if lock:
with self.CACHE_LOCK:
self._register_study(phenotype_data)
else:
self._register_study(phenotype_data)
[docs]
@classmethod
def load_pheno_data(cls, path: pathlib.Path) -> PhenotypeData:
"""Create a PhenotypeStudy object from a configuration file."""
if not path.is_file() or (
not path.name.endswith(".yaml")
and not path.name.endswith(".conf")
):
raise ValueError("Invalid PhenotypeStudy path")
config = GPFConfigParser.load_config(str(path), pheno_conf_schema)
pheno_id = config["phenotype_data"]["name"]
logger.info("creating phenotype data <%s>", pheno_id)
return PhenotypeStudy(
pheno_id,
config["phenotype_data"]["dbfile"],
config=config["phenotype_data"],
)
[docs]
@classmethod
def load_pheno_groups(
cls, path: pathlib.Path,
registry: PhenoRegistry,
) -> list[PhenotypeGroup]:
"""
Load groups from groups file.
Groups file should be a config file named 'groups.yaml' in the base
Pheno DB directory.
"""
if not path.is_file() or path.suffix not in (".yaml", ".conf") \
or path.stem != "groups":
raise ValueError("Invalid groups config file.")
config = GPFConfigParser.load_config(str(path), groups_file_schema)
return [
PhenotypeGroup(
group.pheno_id, [
registry.get_phenotype_data(child)
for child in group.children
],
) for group in config.pheno_groups
]
[docs]
def has_phenotype_data(self, data_id: str) -> bool:
with self.CACHE_LOCK:
return data_id in self._cache
[docs]
def get_phenotype_data(self, data_id: str) -> PhenotypeData:
with self.CACHE_LOCK:
return self._cache[data_id]
[docs]
def get_phenotype_data_config(self, data_id: str) -> Box | None:
with self.CACHE_LOCK:
return self._cache[data_id].config
[docs]
def get_phenotype_data_ids(self) -> list[str]:
return list(self._cache.keys())
[docs]
def get_all_phenotype_data(self) -> list[PhenotypeData]:
return list(self._cache.values())