Source code for dae.schema2_storage.schema2_layout

import logging
from dataclasses import dataclass

from dae.utils import fs_utils

logger = logging.getLogger(__name__)


[docs] @dataclass(frozen=True) class Schema2DatasetLayout: """Schema2 dataset layout data class.""" study: str pedigree: str summary: str | None family: str | None meta: str base_dir: str | None = None
[docs] def has_variants(self) -> bool: return self.summary is not None and self.family is not None
[docs] def load_schema2_dataset_layout( study_dir: str, *, has_variants: bool = True, ) -> Schema2DatasetLayout: """ Create dataset layout for a given directory. Assumes that the dataset already exists, therefore it should check whether summary and family tables exist. """ summary_path = fs_utils.join(study_dir, "summary") summary = summary_path if fs_utils.exists(summary_path) else None family_path = fs_utils.join(study_dir, "family") family = family_path if fs_utils.exists(family_path) else None if not has_variants: summary = None family = None return Schema2DatasetLayout( study_dir, fs_utils.join(study_dir, "pedigree", "pedigree.parquet"), summary, family, fs_utils.join(study_dir, "meta", "meta.parquet"))
[docs] def create_schema2_dataset_layout(study_dir: str) -> Schema2DatasetLayout: """ Create dataset layout for a given directory. Used for creating new datasets, where all tables should exist. """ summary = fs_utils.join(study_dir, "summary") family = fs_utils.join(study_dir, "family") return Schema2DatasetLayout( study_dir, fs_utils.join(study_dir, "pedigree", "pedigree.parquet"), summary, family, fs_utils.join(study_dir, "meta", "meta.parquet"))