Source code for dae.genomic_resources.genomic_context

from __future__ import annotations

import argparse
import logging
from abc import ABC, abstractmethod
from collections.abc import Iterable
from functools import lru_cache
from typing import Any, Callable

from dae.genomic_resources.gene_models import (
    GeneModels,
    build_gene_models_from_resource,
)
from dae.genomic_resources.reference_genome import (
    ReferenceGenome,
    build_reference_genome_from_resource,
)
from dae.genomic_resources.repository import GenomicResourceRepo
from dae.genomic_resources.repository_factory import (
    build_genomic_resource_repository,
)

_REGISTERED_CONTEXT_PROVIDERS: list[GenomicContextProvider] = []
_REGISTERED_CONTEXTS: list[GenomicContext] = []

GC_GRR_KEY = "genomic_resources_repository"
GC_REFERENCE_GENOME_KEY = "reference_genome"
GC_GENE_MODELS_KEY = "gene_models"

logger = logging.getLogger(__name__)


[docs] class GenomicContext(ABC): """Abstract base class for genomic context."""
[docs] def get_reference_genome(self) -> ReferenceGenome | None: """Return reference genome from context.""" obj = self.get_context_object(GC_REFERENCE_GENOME_KEY) if obj is None: return None if isinstance(obj, ReferenceGenome): return obj raise ValueError( f"The context returned a wrong type for a reference genome: " f"{type(obj)}")
[docs] def get_gene_models(self) -> GeneModels | None: """Return gene models from context.""" obj = self.get_context_object(GC_GENE_MODELS_KEY) if obj is None: return None if isinstance(obj, GeneModels): return obj raise ValueError( f"The context returned a wrong type for gene models: " f"{type(obj)}")
[docs] def get_genomic_resources_repository( self) -> GenomicResourceRepo | None: """Return genomic resources repository from context.""" obj = self.get_context_object(GC_GRR_KEY) if obj is None: return None if isinstance(obj, GenomicResourceRepo): return obj raise ValueError( f"The context returned a wrong type for GRR: " f"{type(obj)}")
[docs] @abstractmethod def get_context_object(self, key: str) -> Any | None: """Return a genomic context object corresponding to the passed key. If there is no such object returns None. """
[docs] @abstractmethod def get_context_keys(self) -> set[str]: """Return set of all keys that could be found in the context."""
[docs] @abstractmethod def get_source(self) -> tuple[str, ...]: """Return a tuple of strings that identifies the genomic context."""
[docs] class GenomicContextProvider(ABC): """Abstract base class for genomic contexts provider."""
[docs] @abstractmethod def get_context_provider_priority(self) -> int: pass
[docs] @abstractmethod def get_context_provider_type(self) -> str: pass
[docs] @abstractmethod def get_contexts(self) -> Iterable[GenomicContext]: pass
[docs] class SimpleGenomicContext(GenomicContext): """Simple implementation of genomic context.""" def __init__( self, context_objects: dict[str, Any], source: tuple[str, ...]): self._context: dict[str, Any] = context_objects self._source = source
[docs] def get_context_object(self, key: str) -> Any | None: return self._context.get(key)
[docs] def get_context_keys(self) -> set[str]: return set(self._context.keys())
[docs] def get_source(self) -> tuple[str, ...]: return self._source
[docs] def get_all_context_objects(self) -> dict[str, Any]: return self._context
[docs] class SimpleGenomicContextProvider(GenomicContextProvider): """Simple implementation of genomic contexts provider.""" def __init__( self, context_builder: Callable[[], GenomicContext | None], provider_type: str, priority: int): self._type: str = provider_type self._priority: int = priority self._context_builder = context_builder self._contexts: Iterable[GenomicContext] | None = None
[docs] def get_context_provider_priority(self) -> int: return self._priority
[docs] def get_context_provider_type(self) -> str: return f"SingleGenomicContextProvider[{self._type}]"
[docs] def get_contexts(self) -> Iterable[GenomicContext]: if self._contexts is None: try: context = self._context_builder() if context is None: self._contexts = [] else: self._contexts = [context] except Exception: # pylint: disable=broad-except logger.exception( "problem while building genomic context") self._contexts = [] return self._contexts
[docs] def register_context_provider( context_provider: GenomicContextProvider) -> None: """Register genomic context provider.""" logger.debug( "Registering the %s " "genomic context generator with priority %s", context_provider.get_context_provider_type(), context_provider.get_context_provider_priority()) _REGISTERED_CONTEXT_PROVIDERS.append(context_provider)
[docs] def register_context(context: GenomicContext) -> None: logger.debug( "Registering the %s " "genomic context", context.get_source()) _REGISTERED_CONTEXTS.insert(0, context)
[docs] class PriorityGenomicContext(GenomicContext): """Defines a priority genomic context.""" def __init__(self, contexts: Iterable[GenomicContext]): self.contexts = contexts if self.contexts: logger.info("Using the following genomic context:") for context in self.contexts: logger.info("\t%s", context.get_source()) else: logger.info("No genomic contexts are available.")
[docs] def get_context_object(self, key: str) -> Any | None: for context in self.contexts: obj = context.get_context_object(key) if obj: logger.info( "object with key %s found in the context %s", key, context.get_source()) return obj return None
[docs] def get_context_keys(self) -> set[str]: result: set[str] = set() for context in self.contexts: result = result.union(context.get_context_keys()) return result
[docs] @lru_cache(maxsize=32) def get_source(self) -> tuple[str, ...]: result = ["PriorityGenomicContext"] for context in self.contexts: result.append(str(context.get_source())) return tuple(result)
[docs] def get_genomic_context() -> GenomicContext: contexts = _REGISTERED_CONTEXTS[:] for provider in sorted(_REGISTERED_CONTEXT_PROVIDERS, key=lambda g: (g.get_context_provider_priority(), g.get_context_provider_type())): contexts.extend(provider.get_contexts()) return PriorityGenomicContext(contexts)
[docs] class CLIGenomicContext(SimpleGenomicContext): """Defines CLI genomics context."""
[docs] @staticmethod def add_context_arguments(parser: argparse.ArgumentParser) -> None: """Add command line arguments to the argument parser.""" parser.add_argument( "-g", "--grr-filename", "--grr", default=None, help="The GRR configuration file. If the argument is absent, " "the a GRR repository from the current genomic context " "(i.e. gpf_instance) will be used or, if that fails, the " "default GRR configuration will be used.") parser.add_argument( "--grr-directory", default=None, help="Local GRR directory to use as repository.") parser.add_argument( "-R", "--reference-genome-resource-id", "--ref", default=None, help="The resource id for the reference genome. If the argument " "is absent the reference genome from the current genomic " "context will be used.") parser.add_argument( "-G", "--gene-models-resource-id", "--genes", default=None, help="The resource is of the gene models resource. If the argument" " is absent the gene models from the current genomic " "context will be used.")
[docs] @staticmethod def register(args: argparse.Namespace) -> None: context = CLIGenomicContext.context_builder(args) register_context(context)
[docs] @staticmethod def context_builder(args: argparse.Namespace) -> CLIGenomicContext: """Build a CLI genomic context.""" context_objects: dict[str, Any] = {} grr = None if args.grr_filename is None and args.grr_directory is None: grr = get_genomic_context().get_context_object(GC_GRR_KEY) elif args.grr_filename is not None: logger.info( "Using the GRR configured in the file " "%s as requested on the " "command line.", args.grr_filename) grr = build_genomic_resource_repository( file_name=args.grr_filename) elif args.grr_directory is not None: logger.info( "Using local GRR directory " "%s as requested on the " "command line.", args.grr_directory) grr = build_genomic_resource_repository({ "id": "local", "type": "directory", "directory": args.grr_directory, }) if grr is None: raise ValueError("Can't resolve genomic context GRR") context_objects[GC_GRR_KEY] = grr if args.reference_genome_resource_id is not None: logger.info( "Using the reference genome from resource " "%s provided on the command line.", args.reference_genome_resource_id) resource = grr.get_resource(args.reference_genome_resource_id) genome = build_reference_genome_from_resource(resource) genome.open() context_objects[GC_REFERENCE_GENOME_KEY] = genome if args.gene_models_resource_id is not None: logger.info( "Using the gene models from resource " "%s provided on the command line.", args.gene_models_resource_id) resource = grr.get_resource(args.gene_models_resource_id) gene_models = build_gene_models_from_resource(resource).load() context_objects[GC_GENE_MODELS_KEY] = gene_models return CLIGenomicContext( context_objects, source=("CLIGenomicContext", ))
[docs] class DefaultRepositoryContextProvider(SimpleGenomicContextProvider): """Genomic context provider for default GRR."""
[docs] @staticmethod def context_builder() -> GenomicContext: grr = build_genomic_resource_repository() return SimpleGenomicContext( { GC_GRR_KEY: grr, }, ("default_genomic_resources_repository", grr.repo_id), )
def __init__(self) -> None: super().__init__( DefaultRepositoryContextProvider.context_builder, "DefaultGRRProvider", 1000)
[docs] @staticmethod def register() -> None: register_context_provider(DefaultRepositoryContextProvider())