Source code for dae.variants.core

from __future__ import annotations

import logging
from enum import Enum

from dae.annotation.annotatable import Annotatable, CNVAllele, VCFAllele
from dae.utils.variant_utils import trim_parsimonious

logger = logging.getLogger(__name__)


[docs] class Allele: """Class representing alleles.""" TYPE_DISPLAY_NAME = { "substitution": "sub", "small_insertion": "ins", "small_deletion": "del", "complex": "comp", "large_insertion": "cnv+", "large_deletion": "cnv-", }
[docs] class Type(Enum): """Enumerator for allele type.""" # pylint: disable=invalid-name,unsupported-binary-operation position = 0 substitution = 1 small_insertion = 1 << 1 small_deletion = 1 << 2 complex = 1 << 3 indel = small_insertion | small_deletion | complex large_deletion = 1 << 4 large_duplication = 1 << 5 cnv = large_deletion | large_duplication tandem_repeat = 1 << 6 tandem_repeat_ins = tandem_repeat | small_insertion tandem_repeat_del = tandem_repeat | small_deletion def __and__(self, other: Allele.Type) -> int: if other is None: return 0 assert isinstance(other, Allele.Type), type(other) return self.value & other.value def __or__(self, other: Allele.Type) -> int: if other is None: return 0 assert isinstance(other, Allele.Type) return self.value | other.value def __ior__(self, other: Allele.Type) -> Allele.Type: if other is None: return 0 assert isinstance(other, Allele.Type) return Allele.Type(self.value | other.value) def __repr__(self) -> str: return Allele.TYPE_DISPLAY_NAME.get(self.name) or self.name
[docs] @classmethod def is_cnv(cls, vt: Allele.Type) -> bool: if vt is None: return False if not isinstance(vt, Allele.Type): return False return bool(vt & cls.cnv)
[docs] @classmethod def is_tr(cls, vt: Allele.Type) -> bool: if vt is None: return False if not isinstance(vt, Allele.Type): return False return bool(vt & cls.tandem_repeat)
def __init__(self, chrom: str, pos: int, pos_end: int | None = None, ref: str | None = None, alt: str | None = None, allele_type: Allele.Type | None = None): self._chrom: str = chrom self._pos: int = pos self._pos_end: int | None = pos_end self._ref: str | None = ref self._alt: str | None = alt self._allele_type: Allele.Type assert isinstance(self._chrom, str) assert isinstance(self._pos, int) assert self._pos_end is None or isinstance(self._pos_end, int) assert self._alt is None or isinstance(self._alt, str) assert self._ref is None or isinstance(self._ref, str) if allele_type is not None: self._allele_type = allele_type else: if (not self._pos_end and not self._ref and not self._alt) or (self._ref and not self._alt): self._allele_type = Allele.Type.position self._pos_end = self._pos elif self._ref and self._alt: if len(self._ref) == 1 and len(self._alt) == 1: self._allele_type = Allele.Type.substitution elif len(self._ref) == 1 and len(self._alt) > 1 and \ self._ref[0] == self._alt[0]: self._allele_type = Allele.Type.small_insertion elif len(self._ref) > 1 and len(self._alt) == 1 and \ self._ref[0] == self._alt[0]: self._allele_type = Allele.Type.small_deletion else: self._allele_type = Allele.Type.complex if not self._pos_end: self._pos_end = self._pos + len(self._ref) - 1 if self._allele_type is None or \ not isinstance(self._allele_type, Allele.Type): raise ValueError( f"Can not determine the type of variant: " f"{self._chrom}:{self._pos} {self._ref}->{self._alt}")
[docs] def get_annotatable(self) -> Annotatable: """Return an annotatable version of the allele.""" if Allele.Type.large_duplication & self.allele_type: assert self.end_position is not None return CNVAllele( self.chrom, self.position, self.end_position, Annotatable.Type.LARGE_DUPLICATION) if Allele.Type.large_deletion & self.allele_type: assert self.end_position is not None return CNVAllele( self.chrom, self.position, self.end_position, Annotatable.Type.LARGE_DELETION) if Allele.Type.substitution == self.allele_type: assert self.reference is not None and \ self.alternative is not None pos, ref, alt = trim_parsimonious( self.position, self.reference, self.alternative) return VCFAllele(self.chrom, pos, ref, alt) if Allele.Type.indel & self.allele_type: assert self.reference is not None and \ self.alternative is not None pos, ref, alt = trim_parsimonious( self.position, self.reference, self.alternative) return VCFAllele(self.chrom, pos, ref, alt) logger.error("unexpected allele: %s", self) raise ValueError(f"unexpeced allele: {self}")
@property def chromosome(self) -> str: return self._chrom @property def chrom(self) -> str: return self._chrom @property def position(self) -> int: return self._pos @property def end_position(self) -> int | None: return self._pos_end @property def reference(self) -> str | None: return self._ref @property def alternative(self) -> str | None: return self._alt @property def allele_type(self) -> Allele.Type: return self._allele_type
[docs] @staticmethod def build_position_allele(chrom: str, pos: int) -> Allele: return Allele(chrom, pos)
[docs] @staticmethod def build_vcf_allele( chrom: str, pos: int, ref: str, alt: str) -> Allele: return Allele(chrom, pos, ref=ref, alt=alt)
[docs] @staticmethod def build_cnv_allele(chrom: str, pos: int, pos_end: int, allele_type: Type) -> Allele: return Allele(chrom, pos, pos_end, allele_type=allele_type)