Source code for dae.genomic_resources.genomic_context

"""
Genomic context provides a way to collect various genomic resources from
various sources and make them available through a single interface.

The module follows a registry-based approach.  Providers register themselves
and are later consulted (in priority order) to build individual
:class:`~dae.genomic_resources.genomic_context_base.GenomicContext`
instances.  Every created context is combined into a
:class:`PriorityGenomicContext`, offering a single access point for
resources such as genomic resource repositories, reference genomes,
gene models, annotation pipelines, etc. Providers can be registered
programmatically via :func:`register_context_provider` or discovered
automatically through entry points.

Example usage of genomic context in a tool with command line interface:

.. code-block:: python

    import argparse
    import sys

    from dae.genomic_resources.genomic_context import (
        context_providers_add_argparser_arguments,
        context_providers_init,
        get_genomic_context,
    )


    parser = argparse.ArgumentParser()
    context_providers_add_argparser_arguments(parser)

    args = parser.parse_args(sys.argv[1:])
    context_providers_init(**vars(args))
    genomic_context = get_genomic_context()

If you don't need command line arguments you can do:

.. code-block:: python

    context_providers_init()
    genomic_context = get_genomic_context()

When you need a CLI with all defaults and without modifying the argument
parser you can do:

.. code-block:: python

    context_providers_init_with_argparser("GenomicTool")
    genomic_context = get_genomic_context()


"""
from __future__ import annotations

import argparse
import logging
import sys
from typing import Any

from dae.genomic_resources.repository_factory import (
    build_genomic_resource_repository,
)

from .genomic_context_base import (
    GenomicContext,
    GenomicContextProvider,
    PriorityGenomicContext,
    SimpleGenomicContext,
)

logger = logging.getLogger(__name__)


[docs] class DefaultRepositoryContextProvider(GenomicContextProvider): """Provide access to the default genomic resources repository. The default repository is resolved via :func:`build_genomic_resource_repository` using the environment configuration. The resulting context exposes a single key, ``"genomic_resources_repository"``, which can be consumed by other code participating in the context chain. """ def __init__(self) -> None: super().__init__( "DefaultGRRProvider", 10_000)
[docs] def add_argparser_arguments( self, parser: argparse.ArgumentParser, ) -> None: """Declare command line arguments for this provider. The default repository provider is fully configuration driven and has nothing to expose on the CLI, so the method intentionally leaves the *parser* untouched. The override exists to make the behaviour explicit in the generated documentation. """
[docs] def init( self, **kwargs: Any, # noqa: ARG002 ) -> GenomicContext: """Instantiate a context backed by the default GRR. Parameters ---------- **kwargs Accepted for interface compatibility; the provider ignores runtime keyword arguments because everything is derived from the global configuration. Returns ------- GenomicContext A context exposing a single ``genomic_resources_repository`` entry pointing at the default repository instance. """ grr = build_genomic_resource_repository() return SimpleGenomicContext( { "genomic_resources_repository": grr, }, source=f"DefaultRepositoryContextProvider({grr.repo_id})", )
_REGISTERED_CONTEXT_PROVIDERS: list[GenomicContextProvider] = [] """Collection of provider instances awaiting initialisation.""" _REGISTERED_CONTEXTS: list[GenomicContext] = [] """Contexts produced during :func:`context_providers_init`."""
[docs] def register_context_provider( context_provider: GenomicContextProvider, ) -> None: """Register *context_provider* so it participates in initialization. Parameters ---------- context_provider The provider implementation that should be considered when contexts are assembled. Providers are stored in registration order and later sorted by their priority before initialization. """ logger.debug( "Registering the %s " "genomic context generator with priority %s", context_provider.get_context_provider_type(), context_provider.get_context_provider_priority()) _REGISTERED_CONTEXT_PROVIDERS.append(context_provider)
[docs] def context_providers_init(**kwargs: Any) -> None: """Materialize contexts from every registered provider. The function walks all registered providers in priority order and asks each of them to initialise a :class:`GenomicContext`. The resulting contexts are stored for later retrieval via :func:`get_genomic_context`. Notes ----- Providers are invoked at most once per process. Subsequent calls are ignored until :func:`clear_registered_contexts` is executed, which is especially helpful in unit tests. Parameters ---------- **kwargs Keyword arguments forwarded to every provider's ``init`` method. """ if _REGISTERED_CONTEXTS: logger.debug( "Genomic context providers already initialized, skipping.") return for provider in sorted( _REGISTERED_CONTEXT_PROVIDERS, key=lambda g: (- g.get_context_provider_priority(), g.get_context_provider_type())): context = provider.init(**kwargs) if context is None: logger.info( "genomic context provider %s unable to create a context", provider.get_context_provider_type(), ) continue register_context(context)
[docs] def context_providers_init_with_argparser( toolname: str = "GenomicTool", ) -> None: """Initialise providers using arguments parsed from ``sys.argv``. Parameters ---------- toolname The program name presented to :class:`argparse.ArgumentParser`. Notes ----- This helper is useful for simple tools that do not customise their argument parser but still want to expose the command line options defined by registered context providers. """ parser = argparse.ArgumentParser(prog=toolname) context_providers_add_argparser_arguments(parser) args = parser.parse_args(sys.argv[1:]) context_providers_init(**vars(args))
[docs] def clear_registered_contexts() -> None: """Forget all contexts created by :func:`context_providers_init`. This function exists primarily for testing scenarios where the global registry should be reset between test cases. """ _REGISTERED_CONTEXTS.clear()
[docs] def context_providers_add_argparser_arguments( parser: argparse.ArgumentParser, ) -> None: """Delegate command line argument registration to each provider. Parameters ---------- parser The parser that should receive additional arguments from every registered provider. """ for provider in sorted( _REGISTERED_CONTEXT_PROVIDERS, key=lambda g: (- g.get_context_provider_priority(), g.get_context_provider_type())): provider.add_argparser_arguments(parser)
[docs] def register_context(context: GenomicContext) -> None: """Record *context* so it participates in future lookups. Parameters ---------- context The context instance to be considered when :func:`get_genomic_context` is invoked. """ logger.debug( "registering the %s " "genomic context", context.get_source()) _REGISTERED_CONTEXTS.insert(0, context)
[docs] def get_genomic_context() -> GenomicContext: """Return a priority context that merges every registered context. The returned :class:`PriorityGenomicContext` respects the registration order, giving precedence to contexts added most recently when multiple contexts expose the same key. """ contexts = _REGISTERED_CONTEXTS[:] return PriorityGenomicContext(contexts)
def _load_genomic_context_provider_plugins() -> None: """Discover context providers published via ``entry_points``. The function is executed at import time so the default providers become available automatically. Every entry point is expected to expose a callable returning a :class:`GenomicContextProvider` instance. Notes ----- Third-party packages can contribute providers by declaring the ``dae.genomic_resources.plugins`` entry-point group in their project metadata (for example, ``pyproject.toml``). """ # pylint: disable=import-outside-toplevel from importlib.metadata import entry_points discovered_plugins = entry_points(group="dae.genomic_resources.plugins") for plugin in discovered_plugins: factory = plugin.load() provider = factory() assert isinstance(provider, GenomicContextProvider) register_context_provider(provider) _load_genomic_context_provider_plugins()