Source code for dae.tools.cnv_liftover

#!/usr/bin/env python

import argparse
import logging
import sys
import textwrap
from typing import cast

from dae.annotation.annotatable import CNVAllele
from dae.annotation.annotation_factory import load_pipeline_from_yaml
from dae.genomic_resources.reference_genome import (
    build_reference_genome_from_resource,
)
from dae.genomic_resources.repository_factory import (
    build_genomic_resource_repository,
)
from dae.gpf_instance.gpf_instance import GPFInstance
from dae.pedigrees.loader import FamiliesLoader
from dae.utils.statistics import StatsCollection
from dae.utils.verbosity_configuration import VerbosityConfiguration
from dae.variants.family_variant import FamilyAllele
from dae.variants_loaders.cnv.loader import CNVLoader

logger = logging.getLogger("denovo_liftover")


[docs] def parse_cli_arguments(argv: list[str]) -> argparse.Namespace: """Create CLI parser.""" parser = argparse.ArgumentParser(description="liftover CNV variants") VerbosityConfiguration.set_arguments(parser) FamiliesLoader.cli_arguments(parser) CNVLoader.cli_arguments(parser) parser.add_argument( "-c", "--chain", help="chain resource id", default="liftover/hg19ToHg38") parser.add_argument( "-t", "--target-genome", help="target genome", default="hg38/genomes/GRCh38-hg38") parser.add_argument( "-s", "--source-genome", help="source genome", default="hg19/genomes/GATK_ResourceBundle_5777_b37_phiX174") parser.add_argument( "--stats", help="filename to store liftover statistics", default="stats.txt", ) parser.add_argument( "-o", "--output", help="output filename", default="cnv_liftover.txt") return parser.parse_args(argv)
[docs] def main( argv: list[str] | None = None, gpf_instance: GPFInstance | None = None) -> None: """Liftover de Novo variants tool main function.""" # pylint: disable=too-many-locals if argv is None: argv = sys.argv[1:] if gpf_instance is None: gpf_instance = GPFInstance.build() args = parse_cli_arguments(argv) VerbosityConfiguration.set(args) grr = build_genomic_resource_repository() source_genome = build_reference_genome_from_resource( grr.get_resource(args.source_genome)) assert source_genome is not None source_genome.open() families_filenames, families_params = \ FamiliesLoader.parse_cli_arguments(args) families_filename = families_filenames[0] families_loader = FamiliesLoader( families_filename, **families_params, ) families = families_loader.load() variants_filenames, variants_params = \ CNVLoader.parse_cli_arguments(args) variants_loader = CNVLoader( families, variants_filenames, # type: ignore params=variants_params, genome=source_genome, ) pipeline_config = textwrap.dedent( f""" - liftover_annotator: chain: {args.chain} source_genome: {args.source_genome} target_genome: {args.target_genome} attributes: - source: liftover_annotatable name: target_annotatable """, ) pipeline = load_pipeline_from_yaml(pipeline_config, gpf_instance.grr) pipeline.open() stats: StatsCollection = StatsCollection() with open(args.output, "wt") as output: header = [ "location38", "cnv_type38", "location19", "cnv_type19", "size_change", "familyId", "personId", ] output.write("\t".join(header)) output.write("\n") for sv, fvs in variants_loader.full_variants_iterator(): assert len(sv.alt_alleles) == 1 aa = sv.alt_alleles[0] annotatable: CNVAllele = cast(CNVAllele, aa.get_annotatable()) result = pipeline.annotate(annotatable) liftover_annotatable: CNVAllele = \ cast(CNVAllele, result.get("target_annotatable")) if liftover_annotatable is None: logger.error("can't liftover %s", aa) stats.inc(("no_liftover", )) continue for fv in fvs: stats.inc(("liftover", )) stats.inc(( annotatable.type.name, liftover_annotatable.type.name)) size19 = len(annotatable) size38 = len(liftover_annotatable) size_diff = (100.0 * abs(size19 - size38)) / size19 if size_diff >= 50: logger.warning( "CNV variant changed more than 50 percent: %s; " "%s -> %s", size_diff, annotatable, liftover_annotatable) stats.inc((f"size_diff: {int(size_diff / 10) * 10}", )) for aa in fv.alt_alleles: fa = cast(FamilyAllele, aa) line: list[str] = [] person_ids = [] for person_id in fa.variant_in_members: if person_id is not None: person_ids.append(person_id) assert len(person_ids) >= 1 line = [ f"{liftover_annotatable.chrom}:" f"{liftover_annotatable.pos}-" f"{liftover_annotatable.pos_end}", liftover_annotatable.type.name, f"{annotatable.chrom}:" f"{annotatable.pos}-" f"{annotatable.pos_end}", annotatable.type.name, str(size_diff), fa.family_id, ";".join(person_ids), ] output.write("\t".join(line)) output.write("\n") stats.inc(("output", )) stats.save(args.stats)