Source code for gain.utils.variant_utils

"""Pure string utilities for variant manipulation."""

DNA_COMPLEMENT_NUCLEOTIDES = {
    "A": "T",
    "C": "G",
    "G": "C",
    "T": "A",
    "N": "N",
}


[docs] def complement(nucleotides: str) -> str: return "".join( [ DNA_COMPLEMENT_NUCLEOTIDES.get(n.upper(), "N") for n in nucleotides ])
[docs] def reverse_complement(nucleotides: str) -> str: return complement(nucleotides[::-1])
[docs] def trim_str_left(pos: int, ref: str, alt: str) -> tuple[int, str, str]: """Trim identical nucleotides prefixes and adjust position accordingly.""" assert alt and ref, (pos, ref, alt) # noqa PT018 idx = 0 for idx, sequence in enumerate(zip(ref, alt)): # noqa B007 if sequence[0] != sequence[1]: break if ref[idx] == alt[idx]: ref = ref[idx + 1:] alt = alt[idx + 1:] pos += idx + 1 else: ref = ref[idx:] alt = alt[idx:] pos += idx return pos, ref, alt
[docs] def trim_str_right(pos: int, ref: str, alt: str) -> tuple[int, str, str]: """Trim identical nucleotides suffixes and adjust position accordingly.""" assert alt, (pos, ref, alt) assert ref, (pos, ref, alt) idx = 0 for idx, sequence in enumerate(zip(ref[::-1], alt[::-1])): # noqa B007 if sequence[0] != sequence[1]: break # not made simple if ref[-(idx + 1)] == alt[-(idx + 1)]: ref, alt = ref[: -(idx + 1)], alt[: -(idx + 1)] else: if idx == 0: ref, alt = ref[:], alt[:] else: ref, alt = ref[:-idx], alt[:-idx] return pos, ref, alt
[docs] def trim_str_left_right(pos: int, ref: str, alt: str) -> tuple[int, str, str]: if len(ref) == 0 or len(alt) == 0: return pos, ref, alt pos, ref, alt = trim_str_left(pos, ref, alt) if len(ref) == 0 or len(alt) == 0: return pos, ref, alt return trim_str_right(pos, ref, alt)
[docs] def trim_str_right_left(pos: int, ref: str, alt: str) -> tuple[int, str, str]: if len(ref) == 0 or len(alt) == 0: return pos, ref, alt pos, ref, alt = trim_str_right(pos, ref, alt) if len(ref) == 0 or len(alt) == 0: return pos, ref, alt return trim_str_left(pos, ref, alt)
[docs] def trim_parsimonious(pos: int, ref: str, alt: str) -> tuple[int, str, str]: """Trim identical nucleotides on both ends and adjust position.""" assert alt, (pos, ref, alt) assert ref, (pos, ref, alt) r_pos, r_ref, r_alt = trim_str_right(pos, ref, alt) if len(r_ref) == 0: r_alt = alt[:len(r_alt) + 1] r_ref = ref[0:1] assert r_alt[-1] == r_ref[-1] return r_pos, r_ref, r_alt if len(r_alt) == 0: r_ref = ref[:len(r_ref) + 1] r_alt = alt[0:1] assert r_alt[-1] == r_ref[-1] return r_pos, r_ref, r_alt l_pos, l_ref, l_alt = trim_str_left(r_pos, r_ref, r_alt) if len(l_ref) == 0: l_ref = r_alt[-len(l_alt) - 1] l_alt = r_alt[-len(l_alt) - 1:] l_pos -= 1 return l_pos, l_ref, l_alt if len(l_alt) == 0: l_alt = r_ref[-len(l_ref) - 1] l_ref = r_ref[-len(l_ref) - 1:] l_pos -= 1 return l_pos, l_ref, l_alt return l_pos, l_ref, l_alt