Source code for dae.genomic_resources.liftover_chain

"""Provides LiftOver chain resource."""

from __future__ import annotations

import logging
from typing import Any, cast

from pyliftover import LiftOver  # type: ignore

from dae.genomic_resources import GenomicResource
from dae.genomic_resources.repository import GenomicResourceRepo
from dae.genomic_resources.repository_factory import (
    build_genomic_resource_repository,
)
from dae.genomic_resources.resource_implementation import (
    ResourceConfigValidationMixin,
    get_base_resource_schema,
)

logger = logging.getLogger(__name__)


[docs] class LiftoverChain(ResourceConfigValidationMixin): """Defines Lift Over chain wrapper around pyliftover objects.""" def __init__(self, resource: GenomicResource): self.resource = resource config = resource.get_config() if resource.get_type() != "liftover_chain": logger.error( "trying to use genomic resource %s " "as a liftover chain but its type is %s; %s", resource.resource_id, resource.get_type(), config) raise ValueError(f"wrong resource type: {config}") chrom_prefix = config.get("chrom_prefix") if chrom_prefix is None: self.chrom_variant_coordinates = None self.chrom_target_coordinates = None else: self.chrom_variant_coordinates = chrom_prefix.get( "variant_coordinates", None) self.chrom_target_coordinates = chrom_prefix.get( "target_coordinates", None) self.liftover: LiftOver | None = None self.source_genome_id: str | None = None self.target_genome_id: str | None = None if config.get("meta") is not None \ and config["meta"].get("labels") is not None: labels = config["meta"]["labels"] self.source_genome_id = labels.get("source_genome") self.target_genome_id = labels.get("target_genome")
[docs] def close(self) -> None: del self.liftover self.liftover = None
[docs] def open(self) -> LiftoverChain: filename: str = self.resource.get_config()["filename"] with self.resource.open_raw_file( filename, "rb", compression=True) as chain_file: self.liftover = LiftOver(chain_file) return self
[docs] def is_open(self) -> bool: return self.liftover is not None
@property def files(self) -> set[str]: return {self.resource.get_config()["filename"]}
[docs] @staticmethod def map_chromosome(chrom: str, mapping: dict[str, str] | None) -> str: """Map a chromosome (contig) name according to configuration.""" if not mapping: return chrom if "del_prefix" in mapping: del_prefix = mapping["del_prefix"] if chrom.startswith(del_prefix): chrom = chrom.lstrip(del_prefix) if "add_prefix" in mapping: add_prefix = mapping["add_prefix"] chrom = f"{add_prefix}{chrom}" return chrom
[docs] def convert_coordinate( self, chrom: str, pos: int, ) -> tuple[str, int, str, int] | None: """Lift over a genomic coordinate.""" chrom = self.map_chromosome(chrom, self.chrom_variant_coordinates) assert self.liftover is not None lo_coordinates = self.liftover.convert_coordinate(chrom, pos - 1) if not lo_coordinates: return None if len(lo_coordinates) > 1: logger.info( "liftover_variant: liftover returns more than one target " "position: %s", lo_coordinates) coordinates = list(lo_coordinates[0]) coordinates[0] = self.map_chromosome( coordinates[0], self.chrom_target_coordinates) coordinates[1] += 1 assert coordinates[2] in {"+", "-"} return cast(tuple[str, int, str, int], tuple(coordinates))
[docs] @staticmethod def get_schema() -> dict[str, Any]: return { **get_base_resource_schema(), "filename": {"type": "string"}, "chrom_prefix": {"type": "dict", "schema": { "variant_coordinates": {"type": "dict", "schema": { "del_prefix": {"type": "string"}, "add_prefix": {"type": "string"}, }}, "target_coordinates": {"type": "dict", "schema": { "del_prefix": {"type": "string"}, "add_prefix": {"type": "string"}, }}, }}, }
[docs] def build_liftover_chain_from_resource( resource: GenomicResource) -> LiftoverChain: """Load a Lift Over chain from GRR resource.""" config: dict = resource.get_config() if resource.get_type() != "liftover_chain": logger.error( "trying to use genomic resource %s " "as a liftover chaing but its type is %s; %s", resource.resource_id, resource.get_type(), config) raise ValueError(f"wrong resource type: {config}") return LiftoverChain(resource)
[docs] def build_liftover_chain_from_resource_id( resource_id: str, grr: GenomicResourceRepo | None = None, ) -> LiftoverChain: if grr is None: grr = build_genomic_resource_repository() return build_liftover_chain_from_resource( grr.get_resource(resource_id))