dae.annotation package

Subpackages

dae.annotation.tests package

Submodules

dae.annotation.annotatable module

class dae.annotation.annotatable.Annotatable(chrom: str, pos: int, pos_end: int, annotatable_type: Type)[source]

Bases: object

Base class for annotatables used in annotation pipeline.

class Type(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]

Bases: Enum

Defines annotatable types.

COMPLEX = 5

LARGE_DELETION = 7

LARGE_DUPLICATION = 6

POSITION = 0

REGION = 1

SMALL_DELETION = 4

SMALL_INSERTION = 3

SUBSTITUTION = 2

static from_string(variant: str) → Type[source]: Construct annotatable type from string argument.

property chrom: str

property chromosome: str

property end_position: int

static from_string(value: str) → Annotatable[source]: Deserialize an Annotatable instance from a string value.

property pos: int

property pos_end: int

property position: int

static tokenize(value: str) → tuple[str, list[str]][source]

class dae.annotation.annotatable.CNVAllele(chrom: str, pos_begin: int, pos_end: int, cnv_type: Type)[source]

Bases: Annotatable

Defines copy number variants annotatable.

static from_string(value: str) → CNVAllele[source]: Deserialize an Annotatable instance from a string value.

class dae.annotation.annotatable.Position(chrom: str, pos: int)[source]

Bases: Annotatable

Annotatable class representing a single position in a chromosome.

static from_string(value: str) → Position[source]: Deserialize an Annotatable instance from a string value.

class dae.annotation.annotatable.Region(chrom: str, pos_begin: int, pos_end: int)[source]

Bases: Annotatable

Annotatable class representing a region in a chromosome.

static from_string(value: str) → Region[source]: Deserialize an Annotatable instance from a string value.

class dae.annotation.annotatable.VCFAllele(chrom: str, pos: int, ref: str, alt: str)[source]

Bases: Annotatable

Defines small variants annotatable.

property alt: str

property alternative: str

static from_string(value: str) → VCFAllele[source]: Deserialize an Annotatable instance from a string value.

property ref: str

property reference: str

dae.annotation.annotate_columns module

class dae.annotation.annotate_columns.AnnotateColumnsTool(raw_args: list[str] | None = None, gpf_instance: GPFInstance | None = None)[source]

Bases: AnnotationTool

Annotation tool for TSV-style text files.

static annotate(args: Namespace, pipeline_config: list[dict[str, Any]], grr_definition: dict | None, ref_genome_id: str | None, out_file_path: str, region: tuple = (), compress_output: bool = False) → None[source]: Annotate a variants file with a given pipeline configuration.

static batch_annotate(args: Namespace, pipeline: AnnotationPipeline, line_iterator: Iterable, header_columns: list[str], record_to_annotatable: RecordToAnnotable, batch_work_dir: str | None = None) → Generator[list[str], None, None][source]: Annotate given lines as a batch.

get_argument_parser() → ArgumentParser[source]: Configure argument parser.

static single_annotate(args: Namespace, pipeline: AnnotationPipeline, line_iterator: Iterable, header_columns: list[str], record_to_annotatable: RecordToAnnotable) → Generator[list[str], None, None][source]: Annotate given lines one by one.

work() → None[source]

dae.annotation.annotate_columns.cli(raw_args: list[str] | None = None) → None[source]

dae.annotation.annotate_columns.combine(args: Any, pipeline_config: list[dict[str, Any]] | RawFullConfig, grr_definition: dict | None, ref_genome_id: str | None, partfile_paths: list[str], out_file_path: str) → None[source]: Combine annotated region parts into a single VCF file.

dae.annotation.annotate_columns.produce_tabix_index(filepath: str, args: Any, header: list[str], ref_genome: ReferenceGenome | None) → None[source]: Produce a tabix index file for the given variants file.

dae.annotation.annotate_columns.read_input(args: Any, region: tuple = ()) → tuple[Any, Any, list[str]][source]

Return a file object, line iterator and list of header columns.

Handles differences between tabixed and non-tabixed input files.

dae.annotation.annotate_doc module

dae.annotation.annotate_doc.cli(raw_args: list[str] | None = None) → None[source]: Run command line interface for annotate_vcf tool.

dae.annotation.annotate_doc.configure_argument_parser() → ArgumentParser[source]: Construct and configure argument parser.

dae.annotation.annotate_schema2_parquet module

class dae.annotation.annotate_schema2_parquet.AnnotateSchema2ParquetTool(raw_args: list[str] | None = None, gpf_instance: GPFInstance | None = None)[source]

Bases: AnnotationTool

Annotation tool for the Parquet file format.

static annotate(input_dir: str, output_dir: str, pipeline_config: list[dict[str, Any]], region: str, grr_definition: dict, bucket_idx: int, allow_repeated_attributes: bool) → None[source]: Run annotation over a given directory of Parquet files.

get_argument_parser() → ArgumentParser[source]: Construct and configure argument parser.

work() → None[source]

dae.annotation.annotate_schema2_parquet.cli(raw_args: list[str] | None = None, gpf_instance: GPFInstance | None = None) → None[source]

dae.annotation.annotate_utils module

class dae.annotation.annotate_utils.AnnotationTool(raw_args: list[str] | None = None, gpf_instance: GPFInstance | None = None)[source]

Bases: object

Base class for annotation tools. Format-agnostic.

abstract get_argument_parser() → ArgumentParser[source]

run() → None[source]: Construct annotation tasks and process them.

abstract work() → None[source]

dae.annotation.annotate_utils.produce_partfile_paths(input_file_path: str, regions: list[tuple[str, int, int]], work_dir: str) → list[str][source]: Produce a list of file paths for output region part files.

dae.annotation.annotate_utils.produce_regions(pysam_file: TabixFile, region_size: int) → list[tuple[str, int, int]][source]: Given a region size, produce contig regions to annotate by.

dae.annotation.annotate_vcf module

class dae.annotation.annotate_vcf.AnnotateVCFTool(raw_args: list[str] | None = None, gpf_instance: GPFInstance | None = None)[source]

Bases: AnnotationTool

Annotation tool for the VCF file format.

static annotate(input_file: str, region: tuple[str, int, int] | None, pipeline_config: list[dict[str, Any]], grr_definition: dict | None, out_file_path: str, allow_repeated_attributes: bool = False, pipeline_config_old: str | None = None) → None[source]: Annotate a region from a given input VCF file using a pipeline.

get_argument_parser() → ArgumentParser[source]: Construct and configure argument parser.

work() → None[source]

dae.annotation.annotate_vcf.cli(raw_args: list[str] | None = None) → None[source]

dae.annotation.annotate_vcf.combine(input_file_path: str, pipeline_config: list[dict[str, Any]], grr_definition: dict | None, partfile_paths: list[str], output_file_path: str) → None[source]: Combine annotated region parts into a single VCF file.

dae.annotation.annotate_vcf.update_header(variant_file: VariantFile, pipeline: AnnotationPipeline | ReannotationPipeline) → None[source]: Update a variant file’s header with annotation pipeline scores.

dae.annotation.annotation_config module

class dae.annotation.annotation_config.AnnotationConfigParser[source]

Bases: object

Parser for annotation configuration.

static has_wildcard(string: str) → bool[source]: Ascertain whether a string contains a valid wildcard.

static match_labels_query(query: dict[str, str], resource_labels: dict[str, str]) → bool[source]: Check if the labels query for a wildcard matches.

static parse_complete(raw: dict[str, Any], idx: int) → AnnotatorInfo[source]: Parse a full-form annotation config.

static parse_minimal(raw: str, idx: int) → AnnotatorInfo[source]: Parse a minimal-form annotation config.

static parse_raw(pipeline_raw_config: list[dict[str, Any]] | RawFullConfig | None, grr: GenomicResourceRepo | None = None) → tuple[AnnotationPreambule | None, list[AnnotatorInfo]][source]: Parse raw dictionary annotation pipeline configuration.

static parse_raw_attribute_config(raw_attribute_config: dict[str, Any]) → AttributeInfo[source]: Parse annotation attribute raw configuration.

static parse_raw_attributes(raw_attributes_config: Any) → list[AttributeInfo][source]: Parse annotator pipeline attribute configuration.

static parse_short(raw: dict[str, Any], idx: int, grr: GenomicResourceRepo | None = None) → list[AnnotatorInfo][source]: Parse a short-form annotation config.

static parse_str(content: str, source_file_name: str | None = None, grr: GenomicResourceRepo | None = None) → tuple[AnnotationPreambule | None, list[AnnotatorInfo]][source]: Parse annotation pipeline configuration string.

static query_resources(annotator_type: str, wildcard: str, grr: GenomicResourceRepo) → list[str][source]: Collect resources matching a given query.

exception dae.annotation.annotation_config.AnnotationConfigurationError[source]: Bases: ValueError

class dae.annotation.annotation_config.AnnotationPreambule(summary: str, description: str, input_reference_genome: str, input_reference_genome_res: dae.genomic_resources.repository.GenomicResource | None, metadata: dict[str, Any])[source]

Bases: object

description: str

input_reference_genome: str

input_reference_genome_res: GenomicResource | None

metadata: dict[str, Any]

summary: str

class dae.annotation.annotation_config.AnnotatorInfo(_type: str, attributes: list[AttributeInfo], parameters: ParamsUsageMonitor | dict[str, Any], documentation: str = '', resources: list[GenomicResource] | None = None, annotator_id: str = 'N/A')[source]

Bases: object

Defines annotator configuration.

annotator_id: str

attributes: list[AttributeInfo]

documentation: str = ''

parameters: ParamsUsageMonitor

resources: list[GenomicResource]

type: str

class dae.annotation.annotation_config.AttributeInfo(name: str, source: str, internal: bool, parameters: ParamsUsageMonitor | dict[str, Any], _type: str = 'str', description: str = '', documentation: str | None = None)[source]

Bases: object

Defines annotation attribute configuration.

description: str = ''

property documentation: str

internal: bool

name: str

parameters: ParamsUsageMonitor

source: str

type: str = 'str'

class dae.annotation.annotation_config.ParamsUsageMonitor(data: dict[str, Any])[source]

Bases: Mapping

Class to monitor usage of annotator parameters.

get_unused_keys() → set[str][source]

get_used_keys() → set[str][source]

class dae.annotation.annotation_config.RawFullConfig[source]

Bases: TypedDict

annotators: list[dict[str, Any]]

preambule: RawPreambule

class dae.annotation.annotation_config.RawPreambule[source]

Bases: TypedDict

description: str

input_reference_genome: str

metadata: dict[str, Any]

summary: str

dae.annotation.annotation_factory module

Factory for creation of annotation pipeline.

dae.annotation.annotation_factory.build_annotation_pipeline(config: list[dict[str, Any]] | RawFullConfig, grr: GenomicResourceRepo, *, allow_repeated_attributes: bool = False, work_dir: Path | None = None) → AnnotationPipeline[source]: Build an annotation pipeline.

dae.annotation.annotation_factory.check_for_repeated_attributes_in_annotator(annotator_config: AnnotatorInfo) → None[source]: Check for repeated attributes in annotator configuration.

dae.annotation.annotation_factory.check_for_repeated_attributes_in_pipeline(pipeline: AnnotationPipeline, *, allow_repeated_attributes: bool = False) → None[source]: Check for repeated attributes in pipeline configuration.

dae.annotation.annotation_factory.check_for_unused_parameters(info: AnnotatorInfo) → None[source]: Check annotator configuration for unused parameters.

dae.annotation.annotation_factory.copy_annotation_pipeline(pipeline: AnnotationPipeline) → AnnotationPipeline[source]: Copy an annotation pipeline instance.

dae.annotation.annotation_factory.copy_reannotation_pipeline(pipeline: ReannotationPipeline) → ReannotationPipeline[source]: Copy a reannotation pipeline instance.

dae.annotation.annotation_factory.get_annotator_factory(annotator_type: str) → Callable[[AnnotationPipeline, AnnotatorInfo], Annotator][source]

Find and return a factory function for creation of an annotator type.

If the specified annotator type is not found, this function raises ValueError exception.

Returns:: the annotator factory for the specified annotator type.
Raises:: ValueError – when can’t find an annotator factory for the specified annotator type.

dae.annotation.annotation_factory.get_available_annotator_types() → list[str][source]: Return the list of all registered annotator factory types.

dae.annotation.annotation_factory.load_pipeline_from_file(raw_path: str, grr: GenomicResourceRepo, *, allow_repeated_attributes: bool = False, work_dir: Path | None = None) → AnnotationPipeline[source]: Load an annotation pipeline from a configuration file.

dae.annotation.annotation_factory.load_pipeline_from_yaml(raw: str, grr: GenomicResourceRepo, *, allow_repeated_attributes: bool = False, work_dir: Path | None = None) → AnnotationPipeline[source]: Load an annotation pipeline from a YAML-formatted string.

dae.annotation.annotation_factory.register_annotator_factory(annotator_type: str, factory: Callable[[AnnotationPipeline, AnnotatorInfo], Annotator]) → None[source]

Register additional annotator factory.

By default all genotype storage factories should be registered at [dae.genotype_storage.factories] extenstion point. All registered factories are loaded automatically. This function should be used if you want to bypass extension point mechanism and register addition genotype storage factory programatically.

dae.annotation.annotation_factory.resolve_repeated_attributes(pipeline: AnnotationPipeline, repeated_attributes: set[str]) → None[source]: Resolve repeated attributes in pipeline configuration via renaming.

dae.annotation.annotation_pipeline module

Provides annotation pipeline class.

class dae.annotation.annotation_pipeline.AnnotationPipeline(repository: GenomicResourceRepo)[source]

Bases: object

Provides annotation pipeline abstraction.

add_annotator(annotator: Annotator) → None[source]

annotate(annotatable: Annotatable, context: dict | None = None) → dict[source]: Apply all annotators to an annotatable.

batch_annotate(annotatables: list[Annotatable | None], contexts: list[dict] | None = None, batch_work_dir: str | None = None) → list[dict][source]: Apply all annotators to a list of annotatables.

close() → None[source]: Close the annotation pipeline.

get_annotator_by_attribute_info(attribute_info: AttributeInfo) → Annotator | None[source]

get_attribute_info(attribute_name: str) → AttributeInfo | None[source]

get_attributes() → list[AttributeInfo][source]

get_info() → list[AnnotatorInfo][source]

get_resource_ids() → set[str][source]

open() → AnnotationPipeline[source]: Open all annotators in the pipeline and mark it as open.

class dae.annotation.annotation_pipeline.Annotator(pipeline: AnnotationPipeline | None, info: AnnotatorInfo)[source]

Bases: ABC

Annotator provides a set of attrubutes for a given Annotatable.

abstract annotate(annotatable: Annotatable | None, context: dict[str, Any]) → dict[str, Any][source]: Produce annotation attributes for an annotatable.

property attributes: list[AttributeInfo]

batch_annotate(annotatables: list[Annotatable | None], contexts: list[dict[str, Any]], batch_work_dir: str | None = None) → Iterable[dict[str, Any]][source]

close() → None[source]

get_info() → AnnotatorInfo[source]

is_open() → bool[source]

open() → Annotator[source]

property resource_ids: set[str]

property resources: list[GenomicResource]

property used_context_attributes: tuple[str, ...]

class dae.annotation.annotation_pipeline.AnnotatorDecorator(child: Annotator)[source]

Bases: Annotator

Defines annotator decorator base class.

close() → None[source]

is_open() → bool[source]

open() → Annotator[source]

class dae.annotation.annotation_pipeline.InputAnnotableAnnotatorDecorator(child: Annotator)[source]

Bases: AnnotatorDecorator

Defines annotator decorator to use input annotatable if defined.

annotate(_: Annotatable | None, context: dict[str, Any]) → dict[str, Any][source]: Produce annotation attributes for an annotatable.

static decorate(child: Annotator) → Annotator[source]

property used_context_attributes: tuple[str, ...]

class dae.annotation.annotation_pipeline.ReannotationPipeline(pipeline_new: AnnotationPipeline, pipeline_old: AnnotationPipeline)[source]

Bases: AnnotationPipeline

Special pipeline that handles reannotation of a previous pipeline.

AnnotationDependencyGraph: alias of dict[AnnotatorInfo, list[tuple[AnnotatorInfo, AttributeInfo]]]

annotate(annotatable: Annotatable, record: dict) → dict[source]: Apply all annotators to an annotatable.

annotate_summary_allele(allele: SummaryAllele) → dict[source]

static build_dependency_graph(pipeline: AnnotationPipeline) → AnnotationDependencyGraph[source]: Make dependency graph for an annotation pipeline.

get_attributes() → list[AttributeInfo][source]

get_dependencies_for(info: AnnotatorInfo) → set[AnnotatorInfo][source]: Get all dependencies for a given annotator.

get_dependents_for(info: AnnotatorInfo) → set[AnnotatorInfo][source]: Get all dependents for a given annotator.

class dae.annotation.annotation_pipeline.ValueTransformAnnotatorDecorator(child: Annotator, value_transformers: dict[str, Callable[[Any], Any]])[source]

Bases: AnnotatorDecorator

Define value transformer annotator decorator.

annotate(annotatable: Annotatable | None, context: dict[str, Any]) → dict[str, Any][source]: Produce annotation attributes for an annotatable.

static decorate(child: Annotator) → Annotator[source]: Apply value transform decorator to an annotator.

dae.annotation.annotator_base module

Provides base class for annotators.

class dae.annotation.annotator_base.AnnotatorBase(pipeline: AnnotationPipeline | None, info: AnnotatorInfo, source_type_desc: dict[str, tuple[str, str]])[source]

Bases: Annotator

Base implementation of the Annotator class.

annotate(annotatable: Annotatable | None, context: dict[str, Any]) → dict[str, Any][source]: Produce annotation attributes for an annotatable.

batch_annotate(annotatables: list[Annotatable | None], contexts: list[dict[str, Any]], batch_work_dir: str | None = None) → list[dict[str, Any]][source]

dae.annotation.cnv_collection_annotator module

class dae.annotation.cnv_collection_annotator.CnvCollectionAnnotator(pipeline: AnnotationPipeline, info: AnnotatorInfo)[source]

Bases: Annotator

Simple effect annotator class.

annotate(annotatable: Annotatable | None, _: dict[str, Any]) → dict[str, Any][source]: Produce annotation attributes for an annotatable.

close() → None[source]

open() → Annotator[source]

dae.annotation.cnv_collection_annotator.build_cnv_collection_annotator(pipeline: AnnotationPipeline, info: AnnotatorInfo) → Annotator[source]

dae.annotation.context module

class dae.annotation.context.CLIAnnotationContext(context_objects: dict[str, Any], source: tuple[str, ...])[source]

Bases: CLIGenomicContext

Defines annotation pipeline genomics context.

static add_context_arguments(parser: ArgumentParser) → None[source]: Add command line arguments to the argument parser.

static context_builder(args: Namespace) → CLIAnnotationContext[source]: Build a CLI genomic context.

static get_pipeline(context: GenomicContext) → AnnotationPipeline[source]: Construct an annotation pipeline.

static register(args: Namespace) → None[source]

Register a virtual subclass of an ABC.

Returns the subclass, to allow usage as a class decorator.

dae.annotation.debug_annotator module

class dae.annotation.debug_annotator.HelloWorldAnnotator(pipeline: AnnotationPipeline, info: AnnotatorInfo)[source]

Bases: Annotator

Defines example annotator.

annotate(_annotatable: Annotatable | None, _context: dict[str, Any]) → dict[str, Any][source]: Produce annotation attributes for an annotatable.

dae.annotation.debug_annotator.build_annotator(pipeline: AnnotationPipeline, info: AnnotatorInfo) → Annotator[source]: Create an example hello world annotator.

dae.annotation.effect_annotator module

class dae.annotation.effect_annotator.EffectAnnotatorAdapter(pipeline: AnnotationPipeline, info: AnnotatorInfo)[source]

Bases: AnnotatorBase

Adapts effect annotator to be used in annotation infrastructure.

close() → None[source]

open() → Annotator[source]

dae.annotation.effect_annotator.build_effect_annotator(pipeline: AnnotationPipeline, info: AnnotatorInfo) → Annotator[source]

dae.annotation.gene_score_annotator module

Module containing the gene score annotator.

class dae.annotation.gene_score_annotator.GeneScoreAnnotator(pipeline: AnnotationPipeline | None, info: AnnotatorInfo, gene_score_resource: GenomicResource, input_gene_list: str)[source]

Bases: Annotator

Gene score annotator class.

DEFAULT_AGGREGATOR_TYPE = 'dict'

aggregate_gene_values(score_id: str, gene_symbols: list[str], aggregator_type: str) → Any[source]: Aggregate gene score values.

annotate(_: Annotatable | None, context: dict[str, Any]) → dict[str, Any][source]: Produce annotation attributes for an annotatable.

property used_context_attributes: tuple[str, ...]

dae.annotation.gene_score_annotator.build_gene_score_annotator(pipeline: AnnotationPipeline, info: AnnotatorInfo) → Annotator[source]: Create a gene score annotator.

dae.annotation.gene_set_annotator module

class dae.annotation.gene_set_annotator.GeneSetAnnotator(pipeline: AnnotationPipeline | None, info: AnnotatorInfo, gene_set_resource: GenomicResource, gene_set_id: str, input_gene_list: str)[source]

Bases: Annotator

Gene set annotator class.

annotate(_: Annotatable | None, context: dict[str, Any]) → dict[str, Any][source]: Produce annotation attributes for an annotatable.

property used_context_attributes: tuple[str, ...]

dae.annotation.gene_set_annotator.build_gene_set_annotator(pipeline: AnnotationPipeline, info: AnnotatorInfo) → Annotator[source]: Create a gene set annotator.

dae.annotation.liftover_annotator module

Provides a lift over annotator and helpers.

class dae.annotation.liftover_annotator.AbstractLiftoverAnnotator(pipeline: AnnotationPipeline | None, info: AnnotatorInfo, chain: LiftoverChain, source_genome: ReferenceGenome, target_genome: ReferenceGenome)[source]

Bases: AnnotatorBase

Liftovver annotator class.

close() → None[source]

liftover_allele(allele: VCFAllele) → VCFAllele | None[source]: Liftover an allele.

liftover_cnv(cnv_allele: Annotatable) → Annotatable | None[source]: Liftover CNV allele annotatable.

liftover_position(position: Annotatable) → Annotatable | None[source]: Liftover position annotatable.

liftover_region(region: Annotatable) → Annotatable | None[source]: Liftover region annotatable.

open() → Annotator[source]

class dae.annotation.liftover_annotator.BasicLiftoverAnnotator(pipeline: AnnotationPipeline | None, info: AnnotatorInfo, chain: LiftoverChain, source_genome: ReferenceGenome, target_genome: ReferenceGenome)[source]

Bases: AbstractLiftoverAnnotator

Basic liftover annotator class.

class dae.annotation.liftover_annotator.BcfLiftoverAnnotator(pipeline: AnnotationPipeline | None, info: AnnotatorInfo, chain: LiftoverChain, source_genome: ReferenceGenome, target_genome: ReferenceGenome)[source]

Bases: AbstractLiftoverAnnotator

BCF tools liftover re-implementation annotator class.

dae.annotation.liftover_annotator.basic_liftover_allele(chrom: str, pos: int, ref: str, alt: str, liftover_chain: LiftoverChain, source_genome: ReferenceGenome, target_genome: ReferenceGenome) → tuple[str, int, str, str] | None[source]: Basic liftover an allele.

dae.annotation.liftover_annotator.basic_liftover_variant(chrom: str, pos: int, ref: str, alts: list[str], liftover_chain: LiftoverChain, source_genome: ReferenceGenome, target_genome: ReferenceGenome) → tuple[str, int, str, list[str]] | None[source]: Basic liftover variant utility function.

dae.annotation.liftover_annotator.bcf_liftover_allele(chrom: str, pos: int, ref: str, alt: str, liftover_chain: LiftoverChain, source_genome: ReferenceGenome, target_genome: ReferenceGenome) → tuple[str, int, str, str] | None[source]: Liftover a variant.

dae.annotation.liftover_annotator.bcf_liftover_variant(chrom: str, pos: int, ref: str, alts: list[str], liftover_chain: LiftoverChain, source_genome: ReferenceGenome, target_genome: ReferenceGenome) → tuple[str, int, str, list[str]] | None[source]: BCF liftover variant utility function.

dae.annotation.liftover_annotator.build_liftover_annotator(pipeline: AnnotationPipeline, info: AnnotatorInfo) → Annotator[source]: Create a liftover annotator.

dae.annotation.normalize_allele_annotator module

Provides normalize allele annotator and helpers.

class dae.annotation.normalize_allele_annotator.NormalizeAlleleAnnotator(pipeline: AnnotationPipeline, info: AnnotatorInfo)[source]

Bases: AnnotatorBase

Annotator to normalize VCF alleles.

close() → None[source]

open() → Annotator[source]

dae.annotation.normalize_allele_annotator.build_normalize_allele_annotator(pipeline: AnnotationPipeline, info: AnnotatorInfo) → Annotator[source]

dae.annotation.normalize_allele_annotator.normalize_allele(allele: VCFAllele, genome: ReferenceGenome) → VCFAllele[source]

Normalize an allele.

Using algorithm defined in following https://genome.sph.umich.edu/wiki/Variant_Normalization

dae.annotation.record_to_annotatable module

class dae.annotation.record_to_annotatable.CSHLAlleleRecordToAnnotatable(columns: tuple, ref_genome: ReferenceGenome | None)[source]

Bases: RecordToAnnotable

Transform a CSHL variant record into a VCF allele annotatable.

build(record: dict[str, str]) → Annotatable[source]

class dae.annotation.record_to_annotatable.RecordToAnnotable(columns: tuple, ref_genome: ReferenceGenome | None)[source]

Bases: ABC

abstract build(record: dict[str, str]) → Annotatable[source]

class dae.annotation.record_to_annotatable.RecordToCNVAllele(columns: tuple, ref_genome: ReferenceGenome | None)[source]

Bases: RecordToAnnotable

Transform a columns record into a CNV allele annotatable.

build(record: dict[str, str]) → Annotatable[source]

class dae.annotation.record_to_annotatable.RecordToPosition(columns: tuple, ref_genome: ReferenceGenome | None)[source]

Bases: RecordToAnnotable

build(record: dict[str, str]) → Annotatable[source]

class dae.annotation.record_to_annotatable.RecordToRegion(columns: tuple, ref_genome: ReferenceGenome | None)[source]

Bases: RecordToAnnotable

build(record: dict[str, str]) → Annotatable[source]

class dae.annotation.record_to_annotatable.RecordToVcfAllele(columns: tuple, ref_genome: ReferenceGenome | None)[source]

Bases: RecordToAnnotable

build(record: dict[str, str]) → Annotatable[source]

class dae.annotation.record_to_annotatable.VcfLikeRecordToVcfAllele(columns: tuple, ref_genome: ReferenceGenome | None)[source]

Bases: RecordToAnnotable

Transform a columns record into VCF allele annotatable.

build(record: dict[str, str]) → Annotatable[source]

dae.annotation.record_to_annotatable.add_record_to_annotable_arguments(parser: ArgumentParser) → None[source]

dae.annotation.record_to_annotatable.build_record_to_annotatable(parameters: dict[str, str], available_columns: set[str], ref_genome: ReferenceGenome | None = None) → RecordToAnnotable[source]: Transform a variant record into an annotatable.

dae.annotation.score_annotator module

This contains the implementation of the three score annotators.

Genomic score annotators defined are positions_score, np_score, and allele_score.

class dae.annotation.score_annotator.AlleleScoreAnnotator(pipeline: AnnotationPipeline, info: AnnotatorInfo)[source]

Bases: GenomicScoreAnnotatorBase

This class implements allele_score annotator.

annotate(annotatable: Annotatable | None, _: dict[str, Any]) → dict[str, Any][source]: Produce annotation attributes for an annotatable.

build_score_aggregator_documentation(attr_info: AttributeInfo) → list[str][source]: Collect score aggregator documentation.

class dae.annotation.score_annotator.GenomicScoreAnnotatorBase(pipeline: AnnotationPipeline, info: AnnotatorInfo, score: GenomicScore)[source]

Bases: Annotator

Genomic score base annotator.

add_score_aggregator_documentation(attribute_info: AttributeInfo, aggregator: str, attribute_conf_agg: str | None) → None[source]: Collect score aggregator documentation.

abstract build_score_aggregator_documentation(attr_info: AttributeInfo) → list[str][source]: Construct score aggregator documentation.

close() → None[source]

is_open() → bool[source]

open() → Annotator[source]

class dae.annotation.score_annotator.NPScoreAnnotator(pipeline: AnnotationPipeline, info: AnnotatorInfo)[source]

Bases: PositionScoreAnnotatorBase

This class implements np_score annotator.

build_score_aggregator_documentation(attr_info: AttributeInfo) → list[str][source]: Collect score aggregator documentation.

class dae.annotation.score_annotator.PositionScoreAnnotator(pipeline: AnnotationPipeline, info: AnnotatorInfo)[source]

Bases: PositionScoreAnnotatorBase

This class implements the position_score annotator.

The position_score annotator requires the resrouce_id parameter, whose value must be an id of a genomic resource of type position_score.

The position_score resource provides a set of scores (see …) that the position_score annotator uses as attributes to assign to the annotatable.

The position_score annotator recognized one attribute level parameter called position_aggregator that controls how the position scores are aggregator for annotates that ref to a region of the reference genome.

build_score_aggregator_documentation(attr_info: AttributeInfo) → list[str][source]: Collect score aggregator documentation.

class dae.annotation.score_annotator.PositionScoreAnnotatorBase(pipeline: AnnotationPipeline, info: AnnotatorInfo, score: GenomicScore)[source]

Bases: GenomicScoreAnnotatorBase

Defines position score base annotator class.

annotate(annotatable: Annotatable | None, _: dict[str, Any]) → dict[str, Any][source]: Produce annotation attributes for an annotatable.

dae.annotation.score_annotator.build_allele_score_annotator(pipeline: AnnotationPipeline, info: AnnotatorInfo) → Annotator[source]

dae.annotation.score_annotator.build_np_score_annotator(pipeline: AnnotationPipeline, info: AnnotatorInfo) → Annotator[source]

dae.annotation.score_annotator.build_position_score_annotator(pipeline: AnnotationPipeline, info: AnnotatorInfo) → Annotator[source]

dae.annotation.score_annotator.get_genomic_resource(pipeline: AnnotationPipeline, info: AnnotatorInfo, resource_type: str) → GenomicResource[source]: Return genomic score resource used for given genomic score annotator.

dae.annotation.simple_effect_annotator module

class dae.annotation.simple_effect_annotator.SimpleEffectAnnotator(pipeline: AnnotationPipeline, info: AnnotatorInfo)[source]

Bases: AnnotatorBase

Simple effect annotator class.

call_region(chrom: str, beg: int, end: int, transcripts: list[TranscriptModel], func_name: str, classification: str) → tuple[str, set[str]] | None[source]: Call a region with a specific classification.

cds_intron_regions(transcript: TranscriptModel) → list[Region][source]: Return whether region is CDS intron.

noncoding_regions(transcript: TranscriptModel) → list[Region][source]: Return whether the region is noncoding.

open() → Annotator[source]

peripheral_regions(transcript: TranscriptModel) → list[Region][source]: Return whether the region is peripheral.

run_annotate(chrom: str, beg: int, end: int) → tuple[str, set[str]][source]: Return classification with a set of affected genes.

utr_regions(transcript: TranscriptModel) → list[Region][source]: Return whether the region is classified as UTR.

dae.annotation.simple_effect_annotator.build_simple_effect_annotator(pipeline: AnnotationPipeline, info: AnnotatorInfo) → Annotator[source]

dae.annotation package

Subpackages

Submodules

dae.annotation.annotatable module

dae.annotation.annotate_columns module

dae.annotation.annotate_doc module

dae.annotation.annotate_schema2_parquet module

dae.annotation.annotate_utils module

dae.annotation.annotate_vcf module

dae.annotation.annotation_config module

dae.annotation.annotation_factory module

dae.annotation.annotation_pipeline module

dae.annotation.annotator_base module

dae.annotation.cnv_collection_annotator module

dae.annotation.context module

dae.annotation.debug_annotator module

dae.annotation.effect_annotator module

dae.annotation.gene_score_annotator module

dae.annotation.gene_set_annotator module

dae.annotation.liftover_annotator module

dae.annotation.normalize_allele_annotator module

dae.annotation.record_to_annotatable module

dae.annotation.score_annotator module

dae.annotation.simple_effect_annotator module

Module contents