Source code for dae.genomic_resources.genomic_context_base

"""Base classes and interfaces for genomic context management.

This module defines the foundational abstractions for organizing and
accessing genomic resources through a unified context system.  The central
concept is :class:`GenomicContext`, which acts as a key-value store
exposing resources like genomic repositories, reference genomes, gene
models, and annotation pipelines.  Providers implementing
:class:`GenomicContextProvider` are responsible for building concrete
context instances, often by consulting configuration files or command-line
arguments.

The module also provides two concrete context implementations:
:class:`SimpleGenomicContext` for straightforward dictionary-backed contexts
and :class:`PriorityGenomicContext` for merging multiple contexts with
fallback semantics.

Key Constants
-------------
GC_GRR_KEY : str
    Standard key for the genomic resources repository object.
GC_REFERENCE_GENOME_KEY : str
    Standard key for the reference genome object.
GC_GENE_MODELS_KEY : str
    Standard key for the gene models object.
GC_ANNOTATION_PIPELINE_KEY : str
    Standard key for the annotation pipeline object.

See Also
--------
dae.genomic_resources.genomic_context
    High-level orchestration and provider registration functions.
"""
from __future__ import annotations

import argparse
import logging
from abc import ABC, abstractmethod
from collections.abc import Iterable
from typing import Any

from dae.genomic_resources.gene_models.gene_models import (
    GeneModels,
)
from dae.genomic_resources.reference_genome import (
    ReferenceGenome,
)
from dae.genomic_resources.repository import GenomicResourceRepo

logger = logging.getLogger(__name__)
GC_GRR_KEY = "genomic_resources_repository"
GC_REFERENCE_GENOME_KEY = "reference_genome"
GC_GENE_MODELS_KEY = "gene_models"
GC_ANNOTATION_PIPELINE_KEY = "annotation_pipeline"


[docs] class GenomicContext(ABC): """Abstract base class for genomic context implementations. A genomic context serves as a registry of genomic resources, exposing them via string keys. Typical resources include genomic resource repositories, reference genomes, gene models, and annotation pipelines. Subclasses must implement the key-value retrieval logic and report which keys are available. Notes ----- The class provides three typed convenience accessors (:meth:`get_reference_genome`, :meth:`get_gene_models`, :meth:`get_genomic_resources_repository`) that validate the underlying object types before returning them. These accessors raise :exc:`ValueError` if the stored object does not match the expected type. """
[docs] def get_reference_genome(self) -> ReferenceGenome | None: """Retrieve and validate the reference genome from the context. Returns ------- ReferenceGenome | None The reference genome instance if present and correctly typed, or ``None`` when the key is absent. Raises ------ ValueError If the context entry for :const:`GC_REFERENCE_GENOME_KEY` is present but does not contain a :class:`ReferenceGenome` instance. """ obj = self.get_context_object(GC_REFERENCE_GENOME_KEY) if obj is None: return None if isinstance(obj, ReferenceGenome): return obj raise ValueError( f"The context returned a wrong type for a reference genome: " f"{type(obj)}")
[docs] def get_gene_models(self) -> GeneModels | None: """Retrieve and validate the gene models from the context. Returns ------- GeneModels | None The gene models instance if present and correctly typed, or ``None`` when the key is absent. Raises ------ ValueError If the context entry for :const:`GC_GENE_MODELS_KEY` is present but does not contain a :class:`GeneModels` instance. """ obj = self.get_context_object(GC_GENE_MODELS_KEY) if obj is None: return None if isinstance(obj, GeneModels): return obj raise ValueError( f"The context returned a wrong type for gene models: " f"{type(obj)}")
[docs] def get_genomic_resources_repository( self) -> GenomicResourceRepo | None: """Retrieve and validate the genomic resources repository. Returns ------- GenomicResourceRepo | None The repository instance if present and correctly typed, or ``None`` when the key is absent. Raises ------ ValueError If the context entry for :const:`GC_GRR_KEY` is present but does not contain a :class:`GenomicResourceRepo` instance. """ obj = self.get_context_object(GC_GRR_KEY) if obj is None: return None if isinstance(obj, GenomicResourceRepo): return obj raise ValueError( f"The context returned a wrong type for GRR: " f"{type(obj)}")
[docs] @abstractmethod def get_context_object(self, key: str) -> Any | None: """Retrieve a context object by its key. Parameters ---------- key The string identifier for the desired resource. Returns ------- Any | None The stored object if the key is present, otherwise ``None``. Notes ----- Implementations must return ``None`` when the key is absent rather than raising :exc:`KeyError`. This convention allows callers to safely query for optional resources. """
[docs] @abstractmethod def get_context_keys(self) -> set[str]: """Report all keys exposed by this context. Returns ------- set[str] The complete collection of keys under which objects can be retrieved. May be empty if the context holds no resources. """
[docs] @abstractmethod def get_source(self) -> str: """Identify the origin of this context. Returns ------- str A human-readable label describing the source, such as a provider name or a file path. Useful for debugging and logging when multiple contexts are combined. """
[docs] class GenomicContextProvider(ABC): """Abstract base class for genomic context providers. Providers are responsible for building :class:`GenomicContext` instances by consulting external configuration sources, command-line arguments, or environment settings. Each provider is identified by a unique type name and assigned a priority that determines the order in which providers are invoked during context initialization. Providers typically register themselves at module import time by calling :func:`dae.genomic_resources.genomic_context.register_context_provider`. The registration system later sorts providers by priority (descending) and type name, then invokes their :meth:`init` method to produce contexts. Attributes ---------- _provider_type : str A unique identifier describing this provider. _provider_priority : int The numeric priority; higher values are consulted first. """ def __init__(self, provider_type: str, provider_priority: int) -> None: """Initialize the provider with a type and priority. Parameters ---------- provider_type A unique string label for this provider, used in logging and sorting. provider_priority The numeric priority controlling invocation order. Providers with higher priorities are initialised before those with lower values. """ self._provider_type = provider_type self._provider_priority = provider_priority
[docs] def get_context_provider_priority(self) -> int: """Return the provider's numeric priority. Returns ------- int The priority assigned at construction time. """ return self._provider_priority
[docs] def get_context_provider_type(self) -> str: """Return the provider's type identifier. Returns ------- str The unique type name assigned at construction time. """ return self._provider_type
[docs] @abstractmethod def add_argparser_arguments( self, parser: argparse.ArgumentParser, ) -> None: """Register command-line arguments that configure the provider. Parameters ---------- parser The :class:`argparse.ArgumentParser` instance that should receive additional arguments. Notes ----- Providers may add optional or required arguments. When invoked, the parsed argument namespace will be passed to :meth:`init` as keyword arguments. If a provider does not require CLI arguments it should leave the parser untouched. """
[docs] @abstractmethod def init(self, **kwargs: Any) -> GenomicContext | None: """Build a genomic context using the provided configuration. Parameters ---------- **kwargs Keyword arguments typically derived from command-line parsing, environment variables, or configuration files. The exact keys depend on what the provider declared in :meth:`add_argparser_arguments`. Returns ------- GenomicContext | None A new context instance if the provider successfully assembled the required resources, or ``None`` if the provider chooses to abstain (for example when optional arguments are omitted). Notes ----- Returning ``None`` allows a provider to conditionally participate. Other providers may then supply default or fallback contexts. """
[docs] class SimpleGenomicContext(GenomicContext): """Dictionary-backed implementation of :class:`GenomicContext`. This concrete context stores resource objects in a simple dictionary and returns them on demand. It is commonly used by providers that assemble a fixed set of resources at initialization time. Parameters ---------- context_objects A mapping from string keys to resource objects. Typical keys include :const:`GC_GRR_KEY`, :const:`GC_REFERENCE_GENOME_KEY`, :const:`GC_GENE_MODELS_KEY`, and :const:`GC_ANNOTATION_PIPELINE_KEY`. source A human-readable label identifying the origin of this context, such as a provider name or file path. Attributes ---------- _context : dict[str, Any] The internal dictionary holding the resource objects. _source : str The stored source label. """ def __init__( self, context_objects: dict[str, Any], source: str, ): self._context: dict[str, Any] = context_objects self._source = source
[docs] def get_context_object(self, key: str) -> Any | None: """Retrieve a resource by key. Parameters ---------- key The string identifier of the desired resource. Returns ------- Any | None The stored object if the key exists, otherwise ``None``. """ return self._context.get(key)
[docs] def get_context_keys(self) -> set[str]: """Report all available keys. Returns ------- set[str] The set of keys under which resources are stored. """ return set(self._context.keys())
[docs] def get_source(self) -> str: """Return the source label. Returns ------- str The human-readable identifier assigned at construction time. """ return self._source
[docs] class PriorityGenomicContext(GenomicContext): """Composite context implementing priority-based fallback lookup. This context merges multiple underlying contexts, consulting them in order when a resource is requested. The first context that provides a non-None value for a given key wins. This strategy allows CLI or user-supplied contexts to override defaults from configuration-driven providers. Parameters ---------- contexts An iterable of :class:`GenomicContext` instances, ordered by descending precedence. When a resource is requested, the priority context walks the sequence and returns the first non-None result. Attributes ---------- contexts : Iterable[GenomicContext] The ordered collection of underlying contexts. Notes ----- At construction time the context logs the sources of all constituent contexts to aid debugging. If no contexts are provided a warning is logged to indicate that no resources will be available. """ def __init__(self, contexts: Iterable[GenomicContext]): self.contexts = contexts if self.contexts: logger.debug("Using the following genomic context:") for context in self.contexts: logger.debug("\t%s", context.get_source()) else: logger.debug("No genomic contexts are available.")
[docs] def get_context_object(self, key: str) -> Any | None: """Retrieve a resource using priority-based fallback. Parameters ---------- key The string identifier of the desired resource. Returns ------- Any | None The first non-None object found among the underlying contexts, or ``None`` if every context returns ``None`` (or if no contexts are available). Notes ----- Each context is queried in order. When a context returns a non-None value the search stops and that value is returned. A log entry is generated to identify which context supplied the object. """ for context in self.contexts: obj = context.get_context_object(key) if obj: logger.info( "object with key %s found in the context %s", key, context.get_source()) return obj return None
[docs] def get_context_keys(self) -> set[str]: """Compute the union of all keys from underlying contexts. Returns ------- set[str] The merged set of keys available across all constituent contexts. If multiple contexts expose the same key the set contains it only once. """ result: set[str] = set() for context in self.contexts: result = result.union(context.get_context_keys()) return result
[docs] def get_source(self) -> str: """Generate a composite source identifier. Returns ------- str A string of the form ``"PriorityGenomicContext(source1|source2|...)"`` listing the sources of all underlying contexts in priority order. """ result = [] result = [str(context.get_source()) for context in self.contexts] return f"PriorityGenomicContext({'|'.join(result)})"