Source code for dae.query_variants.base_query_variants

import abc
import logging
from collections.abc import Generator
from typing import Any

import numpy as np

from dae.parquet.schema2.variant_serializers import VariantsDataSerializer
from dae.pedigrees.families_data import FamiliesData
from dae.query_variants.query_runners import QueryRunner
from dae.utils.regions import Region
from dae.variants.attributes import Inheritance
from dae.variants.family_variant import FamilyVariant
from dae.variants.variant import SummaryVariant, SummaryVariantFactory

RealAttrFilterType = list[tuple[str, tuple[float | None, float | None]]]

logger = logging.getLogger(__name__)


[docs] class QueryVariantsBase(abc.ABC): """Base class variants' query interface.""" RUNNER_CLASS: type[QueryRunner] def __init__(self, families: FamiliesData) -> None: self.families = families self.serializer = VariantsDataSerializer.build_serializer()
[docs] @abc.abstractmethod def build_summary_variants_query_runner( self, *, regions: list[Region] | None = None, genes: list[str] | None = None, effect_types: list[str] | None = None, variant_type: str | None = None, real_attr_filter: RealAttrFilterType | None = None, ultra_rare: bool | None = None, frequency_filter: RealAttrFilterType | None = None, return_reference: bool | None = None, return_unknown: bool | None = None, limit: int | None = None, **kwargs: Any, ) -> QueryRunner: """Create query runner for searching summary variants."""
[docs] @abc.abstractmethod def query_summary_variants( self, *, regions: list[Region] | None = None, genes: list[str] | None = None, effect_types: list[str] | None = None, variant_type: str | None = None, real_attr_filter: RealAttrFilterType | None = None, ultra_rare: bool | None = None, frequency_filter: RealAttrFilterType | None = None, return_reference: bool | None = None, return_unknown: bool | None = None, limit: int | None = None, **kwargs: Any, ) -> Generator[SummaryVariant, None, None]: """Execute the summary variants query and yields summary variants."""
[docs] @abc.abstractmethod def build_family_variants_query_runner( self, *, regions: list[Region] | None = None, genes: list[str] | None = None, effect_types: list[str] | None = None, family_ids: list[str] | None = None, person_ids: list[str] | None = None, inheritance: list[str] | None = None, roles: str | None = None, sexes: str | None = None, variant_type: str | None = None, real_attr_filter: RealAttrFilterType | None = None, ultra_rare: bool | None = None, frequency_filter: RealAttrFilterType | None = None, return_reference: bool | None = None, return_unknown: bool | None = None, limit: int | None = None, study_filters: list[str] | None = None, pedigree_fields: tuple | None = None, **kwargs: Any, ) -> QueryRunner: # pylint: disable=too-many-arguments """Create a query runner for searching family variants."""
[docs] @abc.abstractmethod def query_variants( self, *, regions: list[Region] | None = None, genes: list[str] | None = None, effect_types: list[str] | None = None, family_ids: list[str] | None = None, person_ids: list[str] | None = None, inheritance: list[str] | None = None, roles: str | None = None, sexes: str | None = None, variant_type: str | None = None, real_attr_filter: RealAttrFilterType | None = None, ultra_rare: bool | None = None, frequency_filter: RealAttrFilterType | None = None, return_reference: bool | None = None, return_unknown: bool | None = None, limit: int | None = None, pedigree_fields: tuple | None = None, **kwargs: Any, ) -> Generator[FamilyVariant, None, None]: # pylint: disable=too-many-arguments """Execute the family variants query and yields family variants."""
[docs] def deserialize_summary_variant( self, sv_data: bytes, ) -> SummaryVariant: """Deserialize a summary variant from a summary blob.""" sv_record = self.serializer.deserialize_summary_record(sv_data) return SummaryVariantFactory.summary_variant_from_records( sv_record, )
[docs] def deserialize_family_variant( self, sv_data: bytes, fv_data: bytes, ) -> FamilyVariant: """Deserialize a family variant from a summary and family blobs.""" sv_record = self.serializer.deserialize_summary_record(sv_data) fv_record = self.serializer.deserialize_family_record(fv_data) inheritance_in_members = { int(k): [Inheritance.from_value(inh) for inh in v] for k, v in fv_record["inheritance_in_members"].items() } fattributes = fv_record.get("family_variant_attributes") fv = FamilyVariant( SummaryVariantFactory.summary_variant_from_records( sv_record, ), self.families[fv_record["family_id"]], np.array(fv_record["genotype"]), np.array(fv_record["best_state"]), inheritance_in_members=inheritance_in_members, ) if fattributes: for fa, fattr in zip( fv.family_alt_alleles, fattributes, strict=True): fa.update_attributes(fattr) return fv