Source code for dae.annotation.annotatable

from __future__ import annotations

import enum


[docs] class Annotatable: """Base class for annotatables used in annotation pipeline."""
[docs] class Type(enum.Enum): """Defines annotatable types.""" POSITION = 0 REGION = 1 SUBSTITUTION = 2 SMALL_INSERTION = 3 SMALL_DELETION = 4 COMPLEX = 5 LARGE_DUPLICATION = 6 LARGE_DELETION = 7
[docs] @staticmethod def from_string(variant: str) -> Annotatable.Type: """Construct annotatable type from string argument.""" # pylint: disable=too-many-return-statements vtype = variant.lower() if vtype == "position": return Annotatable.Type.POSITION if vtype == "region": return Annotatable.Type.REGION if vtype == "substitution": return Annotatable.Type.SUBSTITUTION if vtype == "small_insertion": return Annotatable.Type.SMALL_INSERTION if vtype == "small_deletion": return Annotatable.Type.SMALL_DELETION if vtype == "complex": return Annotatable.Type.COMPLEX if vtype == "large_duplication": return Annotatable.Type.LARGE_DUPLICATION if vtype == "large_deletion": return Annotatable.Type.LARGE_DELETION raise ValueError(f"unexpected annotatable type: {variant}")
def __init__( self, chrom: str, pos: int, pos_end: int, annotatable_type: Annotatable.Type, ): self._chrom = chrom self._pos = pos self._pos_end = pos_end self.type = annotatable_type @property def chrom(self) -> str: return self._chrom @property def chromosome(self) -> str: return self._chrom @property def pos(self) -> int: return self._pos @property def position(self) -> int: return self._pos @property def end_position(self) -> int: return self._pos_end @property def pos_end(self) -> int: return self._pos_end def __len__(self) -> int: return self._pos_end - self._pos + 1 def __repr__(self) -> str: raise NotImplementedError def __eq__(self, other: object) -> bool: if not isinstance(other, Annotatable): return False return self.type == other.type and self.chrom == other.chrom and \ self.pos == other.pos and self.pos_end == other.pos_end
[docs] @staticmethod def tokenize(value: str) -> tuple[str, list[str]]: # value := TYPE(arg1, arg2, ...) tokens = value.split("(") if len(tokens) != 2: raise ValueError("Attempted to tokenize invalid input - ", value) return tokens[0], tokens[1].rstrip(")").replace(" ", "").split(",")
[docs] @staticmethod def from_string(value: str) -> Annotatable: """Deserialize an Annotatable instance from a string value.""" a_type, _ = Annotatable.tokenize(value) if a_type in ("Position", "POSITION"): return Position.from_string(value) if a_type in ("Region", "REGION"): return Region.from_string(value) if a_type in ("VCFAllele", "SUBSTITUTION", "COMPLEX", "SMALL_DELETION", "SMALL_INSERTION"): return VCFAllele.from_string(value) if a_type in ("CNVAllele", "LARGE_DUPLICATION", "LARGE_DELETION"): return CNVAllele.from_string(value) raise ValueError("No matching Annotatable type found for: ", value)
[docs] class Position(Annotatable): """Annotatable class representing a single position in a chromosome.""" def __init__(self, chrom: str, pos: int): super().__init__( chrom, pos, pos, Annotatable.Type.POSITION) def __repr__(self) -> str: return f"Position({self.chrom},{self.pos})"
[docs] @staticmethod def from_string(value: str) -> Position: a_type, args = Annotatable.tokenize(value) if a_type not in ("Position", "POSITION"): raise ValueError if len(args) != 2: raise ValueError return Position(args[0], int(args[1]))
[docs] class Region(Annotatable): """Annotatable class representing a region in a chromosome.""" def __init__(self, chrom: str, pos_begin: int, pos_end: int): super().__init__( chrom, pos_begin, pos_end, Annotatable.Type.REGION) def __repr__(self) -> str: return f"Region({self.chrom},{self.pos},{self.pos_end})"
[docs] @staticmethod def from_string(value: str) -> Region: a_type, args = Annotatable.tokenize(value) if a_type not in ("Region", "REGION"): raise ValueError if len(args) != 3: raise ValueError return Region(args[0], int(args[1]), int(args[2]))
[docs] class VCFAllele(Annotatable): """Defines small variants annotatable.""" def __init__(self, chrom: str, pos: int, ref: str, alt: str): assert ref is not None assert alt is not None self._ref = ref self._alt = alt allele_type = None if len(ref) == 1 and len(alt) == 1: allele_type = Annotatable.Type.SUBSTITUTION pos_end = pos elif len(ref) == 1 and len(alt) > 1 and ref[0] == alt[0]: allele_type = Annotatable.Type.SMALL_INSERTION pos_end = pos + 1 elif len(ref) > 1 and len(alt) == 1 and ref[0] == alt[0]: allele_type = Annotatable.Type.SMALL_DELETION pos_end = pos + len(ref) else: allele_type = Annotatable.Type.COMPLEX pos_end = pos + len(ref) super().__init__(chrom, pos, pos_end, allele_type) @property def ref(self) -> str: return self._ref @property def reference(self) -> str: return self._ref @property def alt(self) -> str: return self._alt @property def alternative(self) -> str: return self._alt def __repr__(self) -> str: return ( f"VCFAllele({self.chrom},{self.pos}" f",{self.ref},{self.alt})" ) def __eq__(self, other: object) -> bool: if not super().__eq__(other): return False if not isinstance(other, VCFAllele): return False return self.ref == other.ref and self.alt == other.alt
[docs] @staticmethod def from_string(value: str) -> VCFAllele: a_type, args = Annotatable.tokenize(value) if a_type not in ("VCFAllele", "SUBSTITUTION", "COMPLEX", "SMALL_DELETION", "SMALL_INSERTION"): raise ValueError if len(args) != 4: raise ValueError return VCFAllele(args[0], int(args[1]), args[2], args[3])
[docs] class CNVAllele(Annotatable): """Defines copy number variants annotatable.""" def __init__( self, chrom: str, pos_begin: int, pos_end: int, cnv_type: Annotatable.Type, ): assert cnv_type in { Annotatable.Type.LARGE_DELETION, Annotatable.Type.LARGE_DUPLICATION}, cnv_type super().__init__(chrom, pos_begin, pos_end, cnv_type) def __repr__(self) -> str: return f"CNVAllele({self.chrom},{self.pos},{self.pos_end},{self.type})"
[docs] @staticmethod def from_string(value: str) -> CNVAllele: a_type, args = Annotatable.tokenize(value) if a_type == "CNVAllele": if len(args) != 4: raise ValueError cnv_type = Annotatable.Type.from_string(args[3]) elif a_type in ("LARGE_DUPLICATION", "LARGE_DELETION"): if len(args) != 3: raise ValueError cnv_type = Annotatable.Type.from_string(a_type) else: raise ValueError return CNVAllele(args[0], int(args[1]), int(args[2]), cnv_type)