Source code for dae.annotation.normalize_allele_annotator

"""Provides normalize allele annotator and helpers."""
import logging
from typing import Any

from dae.annotation.annotatable import Annotatable, VCFAllele
from dae.annotation.annotation_config import AnnotatorInfo, AttributeInfo
from dae.annotation.annotation_pipeline import (
    AnnotationPipeline,
    Annotator,
)
from dae.annotation.annotator_base import AnnotatorBase
from dae.genomic_resources.genomic_context import get_genomic_context
from dae.genomic_resources.reference_genome import (
    ReferenceGenome,
    build_reference_genome_from_resource,
)
from dae.genomic_resources.variant_utils import normalize_variant

logger = logging.getLogger(__name__)


[docs] def build_normalize_allele_annotator(pipeline: AnnotationPipeline, info: AnnotatorInfo) -> Annotator: return NormalizeAlleleAnnotator(pipeline, info)
[docs] class NormalizeAlleleAnnotator(AnnotatorBase): """Annotator to normalize VCF alleles.""" def __init__(self, pipeline: AnnotationPipeline, info: AnnotatorInfo): genome_resrouce_id = info.parameters.get("genome") if genome_resrouce_id is None: genome = get_genomic_context().get_reference_genome() if genome is None: raise ValueError( f"The {info} has no reference genome " f"specified and a genome is missing in " f"the context.") else: resource = pipeline.repository.get_resource(genome_resrouce_id) genome = build_reference_genome_from_resource(resource) assert isinstance(genome, ReferenceGenome) info.resources += [genome.resource] if not info.attributes: info.attributes = [ AttributeInfo( "normalized_allele", "normalized_allele", True, # noqa FBT003 {})] super().__init__(pipeline, info, { "normalized_allele": ("annotatable", "Normalized allele."), }) self.genome = genome
[docs] def close(self) -> None: self.genome.close() super().close()
[docs] def open(self) -> Annotator: self.genome.open() return super().open()
def _do_annotate(self, annotatable: Annotatable, _: dict[str, Any]) \ -> dict[str, Any]: if not isinstance(annotatable, VCFAllele): return {"normalized_allele": annotatable} assert isinstance(annotatable, VCFAllele), annotatable normalized_allele = normalize_allele(annotatable, self.genome) return {"normalized_allele": normalized_allele}
[docs] def normalize_allele(allele: VCFAllele, genome: ReferenceGenome) -> VCFAllele: """Normalize an allele. Using algorithm defined in following https://genome.sph.umich.edu/wiki/Variant_Normalization """ chrom, pos, ref, alts = normalize_variant( allele.chrom, allele.pos, allele.ref, [allele.alt], genome) return VCFAllele(chrom, pos, ref, alts[0])