[docs]defnormalize_variant(chrom:str,pos:int,ref:str,alts:list[str],genome:ReferenceGenome,)->tuple[str,int,str,list[str]]:"""Normalize a variant. Using algorithm defined in the https://genome.sph.umich.edu/wiki/Variant_Normalization """whileTrue:changed=Falseiflen(ref)>0andall(len(alt)>0andref[-1]==alt[-1]foraltinalts):logger.debug("shrink from right: %s:%d%s>%s",chrom,pos,ref,alts)ifall(ref==altforaltinalts)andlen(ref)==1:logger.info("no variant: %s:%d%s>%s",chrom,pos,ref,alts)else:ref=ref[:-1]alts=[alt[:-1]foraltinalts]changed=Trueifpos>1and(len(ref)==0orany(len(alt)==0foraltinalts)):logger.debug("moving left variant: %s:%d%s>%s",chrom,pos,ref,alts)left=genome.get_sequence(chrom,pos-1,pos-1)pos-=1ref=f"{left}{ref}"alts=[f"{left}{alt}"foraltinalts]changed=Trueifnotchanged:breakwhilelen(ref)>=2andall(len(alt)>=2andref[0]==alt[0]foraltinalts):pos+=1ref=ref[1:]alts=[alt[1:]foraltinalts]returnchrom,pos,ref,alts
[docs]defmaximally_extend_variant(chrom:str,pos:int,ref:str,alts:list[str],genome:ReferenceGenome,)->tuple[str,int,str,list[str]]:"""Maximally extend a variant."""chrom,pos,ref,alts=normalize_variant(chrom,pos,ref,alts,genome)ifnotall(alt[0]==ref[0]foraltinalts):left=genome.get_sequence(chrom,pos-1,pos-1)pos-=1ref=f"{left}{ref}"alts=[f"{left}{alt}"foraltinalts]ifnotall(alt[-1]==ref[-1]foraltinalts):right=genome.get_sequence(chrom,pos+len(ref),pos+len(ref))ref=f"{ref}{right}"alts=[f"{alt}{right}"foraltinalts]whileTrue:changed=Falsefor(s1,s2)initertools.pairwise([ref,*alts]):iflen(s1)>len(s2):s1,s2=s2,s1ifs2.startswith(s1)ors2.endswith(s1):right=genome.get_sequence(chrom,pos+len(ref),pos+len(ref))ifright:ref=f"{ref}{right}"alts=[f"{alt}{right}"foraltinalts]changed=Truebreakifnotchanged:breakreturnchrom,pos,ref,alts