Source code for dae.pheno.common

from __future__ import annotations

import enum
from pprint import pprint

from box import Box
from pydantic import BaseModel, ConfigDict


[docs] class RankRange(BaseModel): min_rank: int | None = None max_rank: int | None = None
[docs] class InferenceConfig(BaseModel): """Classification inference configuration class.""" model_config = ConfigDict(extra="forbid") min_individuals: int = 1 non_numeric_cutoff: float = 0.06 value_max_len: int = 32 continuous: RankRange = RankRange(min_rank=10) ordinal: RankRange = RankRange(min_rank=1) categorical: RankRange = RankRange(min_rank=1, max_rank=15) skip: bool = False measure_type: str | None = None
[docs] class ImportConfig(BaseModel): """Pheno import tool configuration.""" model_config = ConfigDict(extra="forbid") report_only: bool = False instruments_tab_separated: bool = False person_column: str = "personId" db_filename: str = "pheno.db" default_inference: InferenceConfig = InferenceConfig() output: str = "output" verbose: int = 0 instruments_dir: str = "" pedigree: str = ""
[docs] class MeasureType(enum.Enum): """Definition of measure types.""" # pylint: disable=invalid-name continuous = 1 ordinal = 2 categorical = 3 text = 4 raw = 5 other = 100 skipped = 1000
[docs] @staticmethod def from_str(measure_type: str) -> MeasureType: if measure_type in MeasureType.__members__: return MeasureType[measure_type] raise ValueError("unexpected measure type", measure_type)
[docs] @staticmethod def is_numeric(measure_type: MeasureType) -> bool: return measure_type in {MeasureType.continuous, MeasureType.ordinal}
[docs] @staticmethod def is_text(measure_type: MeasureType) -> bool: return not MeasureType.is_numeric(measure_type)
[docs] def default_config() -> Box: """Construct phenotype database preparation configuration.""" config = { "report_only": False, "parallel": 4, "family": {"composite_key": False}, "instruments": {"tab_separated": False, "dir": "."}, "person": { "role": { "type": "column", "column": "role", "mapping": "INTERNAL", }, "column": None, }, "db": {"filename": "pheno.db"}, "skip": {"measures": []}, "classification": { "min_individuals": 1, "non_numeric_cutoff": 0.06, "value_max_len": 32, "continuous": {"min_rank": 10}, "ordinal": {"min_rank": 1}, "categorical": {"min_rank": 1, "max_rank": 15}, }, "output": "output", } return Box(config)
[docs] def check_phenotype_data_config(config: InferenceConfig) -> bool: """Check phenotype database preparation config for consistency.""" categorical = config.categorical.min_rank if categorical and categorical < 1: print("categorical min rank expected to be > 0") return False ordinal = config.ordinal.min_rank if ordinal and categorical and ordinal < categorical: print("ordinal min rank expected to be >= categorical min rank") return False continuous = config.continuous.min_rank if continuous and ordinal and continuous < ordinal: print("continuous min rank expected to be >= ordinal min rank") return False individuals = config.min_individuals if individuals < 1: print("minimal number of individuals expected to be >= 1") return False return True
[docs] def dump_config(config: InferenceConfig) -> None: """Print phenotype database preparation configuration.""" print("--------------------------------------------------------") print("CLASSIFICATION BOUNDARIES:") print("--------------------------------------------------------") pprint(config.dict()) # noqa: T203 print("--------------------------------------------------------")