Source code for dae.pheno.common

from __future__ import annotations

import enum

from pydantic import BaseModel, ConfigDict


[docs] class RankRange(BaseModel): model_config = ConfigDict(extra="forbid") min_rank: int | None = None max_rank: int | None = None
[docs] class InferenceConfig(BaseModel): """Classification inference configuration class.""" model_config = ConfigDict(extra="forbid") min_individuals: int = 1 non_numeric_cutoff: float = 0.06 value_max_len: int = 32 continuous: RankRange = RankRange(min_rank=10) ordinal: RankRange = RankRange(min_rank=1) categorical: RankRange = RankRange(min_rank=1, max_rank=15) skip: bool = False value_type: str | None = None histogram_type: str | None = None
[docs] class DataDictionaryConfig(BaseModel): """Pydantic model for data dictionary config entries.""" model_config = ConfigDict(extra="forbid") path: str instrument: str | None = None delimiter: str = "\t" instrument_column: str = "instrumentName" measure_column: str = "measureName" description_column: str = "description"
[docs] class MeasureDescriptionsConfig(BaseModel): model_config = ConfigDict(extra="forbid") files: list[DataDictionaryConfig] | None = None # {Instrument -> {Measure -> Description}} dictionary: dict[str, dict[str, str]] | None = None
[docs] class RegressionMeasure(BaseModel): model_config = ConfigDict(extra="forbid") instrument_name: str measure_names: list[str] jitter: float display_name: str
[docs] class StudyConfig(BaseModel): model_config = ConfigDict(extra="forbid") regressions: str | dict[str, RegressionMeasure] | None = None
[docs] class InstrumentConfig(BaseModel): model_config = ConfigDict(extra="forbid") path: str instrument: str | None = None delimiter: str | None = None person_column: str | None = None
[docs] class PhenoImportConfig(BaseModel): """Pheno import config.""" model_config = ConfigDict(extra="forbid") id: str input_dir: str output_dir: str instrument_files: list[str | InstrumentConfig] pedigree: str person_column: str delimiter: str = "," skip_pedigree_measures: bool = False inference_config: str | dict[str, InferenceConfig] | None = None data_dictionary: MeasureDescriptionsConfig | None = None study_config: StudyConfig | None = None
[docs] class MeasureType(enum.Enum): """Definition of measure types.""" # pylint: disable=invalid-name continuous = 1 ordinal = 2 categorical = 3 text = 4 raw = 5 other = 100 skipped = 1000
[docs] @staticmethod def from_str(measure_type: str) -> MeasureType: if measure_type in MeasureType.__members__: return MeasureType[measure_type] raise ValueError("unexpected measure type", measure_type)
[docs] @staticmethod def is_numeric(measure_type: MeasureType) -> bool: return measure_type in {MeasureType.continuous, MeasureType.ordinal}
[docs] @staticmethod def is_text(measure_type: MeasureType) -> bool: return not MeasureType.is_numeric(measure_type)