Source code for dae.annotation.gene_set_annotator

import logging
from typing import Any

from dae.annotation.annotatable import Annotatable
from dae.annotation.annotation_config import (
    AnnotationConfigParser,
    AnnotatorInfo,
)
from dae.annotation.annotation_pipeline import (
    AnnotationPipeline,
    Annotator,
)
from dae.annotation.annotator_base import AnnotatorBase
from dae.gene_sets.gene_sets_db import build_gene_set_collection_from_resource
from dae.genomic_resources import GenomicResource

logger = logging.getLogger(__name__)


[docs] def build_gene_set_annotator( pipeline: AnnotationPipeline, info: AnnotatorInfo, ) -> Annotator: """Create a gene set annotator.""" gene_set_resource_id = info.parameters["resource_id"] if not gene_set_resource_id: raise ValueError(f"The {info} needs a 'resource_id' parameter.") gene_set_resource = pipeline.repository.get_resource( gene_set_resource_id) if gene_set_resource is None: raise ValueError(f"The {gene_set_resource_id} is not available.") input_gene_list = info.parameters.get("input_gene_list") if input_gene_list is None: raise ValueError(f"The {input} must have an 'input_gene_list' " "parameter") input_gene_list_info = pipeline.get_attribute_info(input_gene_list) if input_gene_list_info is None: raise ValueError(f"The {input_gene_list} is not privided by the " "pipeline.") if input_gene_list_info.type != "object": raise ValueError(f"The {input_gene_list} privided by the pipeline " "is not of type object.") return GeneSetAnnotator( pipeline, info, gene_set_resource, input_gene_list, )
[docs] class GeneSetAnnotator(AnnotatorBase): """Gene set annotator class.""" def __init__( self, pipeline: AnnotationPipeline | None, info: AnnotatorInfo, gene_set_resource: GenomicResource, input_gene_list: str, ): self.gene_set_resource = gene_set_resource self.gene_set_collection = build_gene_set_collection_from_resource( self.gene_set_resource) self.gene_sets = self.gene_set_collection.get_all_gene_sets() info.resources += [gene_set_resource] attrs = { gene_set.name: ("bool", gene_set.desc) for gene_set in self.gene_set_collection.get_all_gene_sets() } attrs["in_sets"] = ( "object", ( "List of gene sets of the collection, " "which have at least 1 gene from the input gene list" ), ) if not info.attributes: info.attributes = AnnotationConfigParser.parse_raw_attributes([ *attrs.keys(), ]) self.input_gene_list = input_gene_list info.documentation = ( "This annotator uses the" f"**{self.gene_set_collection.collection_id}**" ) super().__init__(pipeline, info, attrs) @property def used_context_attributes(self) -> tuple[str, ...]: return (self.input_gene_list,) def _do_annotate( self, _: Annotatable | None, context: dict[str, Any], ) -> dict[str, Any]: genes = context.get(self.input_gene_list) if genes is None: return self._empty_result() genes_set = set(genes) in_sets: list[str] = [] output: dict[str, Any] = {"in_sets": in_sets} for gs in self.gene_sets: output[gs.name] = False if genes_set.intersection(set(gs.syms)): output[gs.name] = True in_sets.append(gs.name) return output