"""Command-line helpers for configuring genomic resource contexts.
This module exposes :class:`CLIGenomicContextProvider`, a concrete
implementation of
:class:`~dae.genomic_resources.genomic_context_base.GenomicContextProvider`
that resolves genomic resources based on command-line arguments. Tools can
register the provider to let their users supply a genomic resources
repository, reference genome, and gene models at runtime.
"""
import argparse
import logging
from typing import Any
from dae.genomic_resources.gene_models.gene_models_factory import (
build_gene_models_from_resource,
)
from dae.genomic_resources.reference_genome import (
build_reference_genome_from_resource,
)
from dae.genomic_resources.repository_factory import (
build_genomic_resource_repository,
)
from .genomic_context_base import (
GC_GENE_MODELS_KEY,
GC_GRR_KEY,
GC_REFERENCE_GENOME_KEY,
GenomicContext,
GenomicContextProvider,
SimpleGenomicContext,
)
logger = logging.getLogger(__name__)
[docs]
class CLIGenomicContextProvider(GenomicContextProvider):
"""Resolve genomic resources from command-line arguments.
The provider allows CLI tools to override the default genomic resources
repository, reference genome, and gene models. When invoked without any
overrides, it falls back to the previously initialised genomic context so
that defaults from ``gpf_instance`` or other providers remain available.
"""
def __init__(
self,
) -> None:
"""Initialise the provider with its identifier and priority."""
super().__init__(
"CLIGenomicContextProvider",
900,
)
[docs]
def add_argparser_arguments(
self, parser: argparse.ArgumentParser,
) -> None:
"""Expose CLI options that control genomic resource resolution.
Parameters
----------
parser
The argument parser that should receive the provider specific
options.
"""
parser.add_argument(
"-g", "--grr-filename", "--grr", default=None,
help="The GRR configuration file. If the argument is absent, "
"the a GRR repository from the current genomic context "
"(i.e. gpf_instance) will be used or, if that fails, the "
"default GRR configuration will be used.")
parser.add_argument(
"--grr-directory", default=None,
help="Local GRR directory to use as repository.")
parser.add_argument(
"-R", "--reference-genome-resource-id", "--ref", default=None,
help="The resource id for the reference genome. If the argument "
"is absent the reference genome from the current genomic "
"context will be used.")
parser.add_argument(
"-G", "--gene-models-resource-id", "--genes", default=None,
help="The resource is of the gene models resource. If the argument"
" is absent the gene models from the current genomic "
"context will be used.")
[docs]
def init(self, **kwargs: Any) -> GenomicContext | None:
"""Create a :class:`SimpleGenomicContext` based on CLI arguments.
Parameters
----------
**kwargs
Arguments produced from the command-line parser. The provider
recognises ``grr_filename``, ``grr_directory``,
``reference_genome_resource_id``, and ``gene_models_resource_id``.
Returns
-------
GenomicContext | None
A context containing the resolved objects, or ``None`` if the
genomic resources repository could not be determined.
"""
# pylint: disable=import-outside-toplevel
from .genomic_context import (
get_genomic_context,
)
context_objects: dict[str, Any] = {}
grr = None
if kwargs.get("grr_filename") is None \
and kwargs.get("grr_directory") is None:
grr = get_genomic_context().get_context_object(GC_GRR_KEY)
elif kwargs.get("grr_filename") is not None:
logger.info(
"Using the GRR configured in the file "
"%s as requested on the "
"command line.", kwargs["grr_filename"])
grr = build_genomic_resource_repository(
file_name=kwargs["grr_filename"])
else:
assert kwargs.get("grr_directory") is not None
logger.info(
"Using local GRR directory "
"%s as requested on the "
"command line.", kwargs["grr_directory"])
grr = build_genomic_resource_repository({
"id": "local",
"type": "directory",
"directory": kwargs["grr_directory"],
})
if grr is None:
logger.info(
"no grr provided in the genomic context; unable to "
"resolve CLI genomic context")
return None
context_objects[GC_GRR_KEY] = grr
if kwargs.get("reference_genome_resource_id") is not None:
logger.info(
"Using the reference genome from resource "
"%s provided on the command line.",
kwargs["reference_genome_resource_id"])
resource = grr.get_resource(kwargs["reference_genome_resource_id"])
genome = build_reference_genome_from_resource(resource)
genome.open()
context_objects[GC_REFERENCE_GENOME_KEY] = genome
if kwargs.get("gene_models_resource_id") is not None:
logger.info(
"Using the gene models from resource "
"%s provided on the command line.",
kwargs["gene_models_resource_id"])
resource = grr.get_resource(kwargs["gene_models_resource_id"])
gene_models = build_gene_models_from_resource(resource).load()
context_objects[GC_GENE_MODELS_KEY] = gene_models
return SimpleGenomicContext(
context_objects, source="CLIGenomicContextProvider")