from collections.abc import Generator, Sequence
from typing import Any, Callable, Dict, List, TextIO
from dae.genomic_resources.reference_genome import ReferenceGenome
from dae.utils.dae_utils import dae2vcf_variant
from dae.variants.variant import allele_type_from_cshl_variant
[docs]
def variant_to_variant_type() -> Callable[[Dict[str, Any]], Dict[str, Any]]:
"""Return a function extracting the variant type from a vcf variant."""
def transformer(result: Dict[str, Any]) -> Dict[str, Any]:
variant: str = result["variant"]
variant_type = allele_type_from_cshl_variant(variant)
result["variant_type"] = variant_type
return result
return transformer
[docs]
def adjust_chrom_prefix(
add_chrom_prefix: str | None = None,
del_chrom_prefix: str | None = None,
) -> Callable[[Dict[str, Any]], Dict[str, Any]]:
"""Return a function that adds/removes a prefix to/from chrom names."""
if add_chrom_prefix is not None:
def _add_chrom_prefix(record: Dict[str, Any]) -> Dict[str, Any]:
chrom = record["chrom"]
if add_chrom_prefix not in chrom:
record["chrom"] = f"{add_chrom_prefix}{chrom}"
return record
return _add_chrom_prefix
if del_chrom_prefix is not None:
def _del_chrom_prefix(record: Dict[str, Any]) -> Dict[str, Any]:
chrom = record["chrom"]
if del_chrom_prefix in chrom:
record["chrom"] = chrom[len(del_chrom_prefix):]
return record
return _del_chrom_prefix
def _identity(record: Dict[str, Any]) -> Dict[str, Any]:
return record
return _identity
[docs]
def flexible_variant_loader(
infile: TextIO,
in_header: List[str],
line_splitter: Callable,
transformers: Sequence[Callable[[Dict[str, Any]], Dict[str, Any]]],
filters: Sequence[Callable[[Dict[str, Any]], bool]],
) -> Generator[Dict[str, Any], None, None]:
"""Split,transform and filter each line from infile."""
for line in infile:
parts = line_splitter(line)
assert len(in_header) == len(parts), (in_header, parts)
result: Dict[str, Any] = dict(zip(in_header, parts))
for transformer in transformers:
result = transformer(result)
if not all(f(result) for f in filters):
continue
yield result