Source code for gain.genomic_resources.cli_cache_repo

"""CLI for caching genomic resources referenced by an annotation pipeline.

The tool resolves a `GenomicResourceRepo` and an `AnnotationPipeline`
from a combination of command-line flags and the registered genomic
context providers, then caches the resources the pipeline depends on.

The annotation pipeline can come from:
  - the positional ``pipeline`` argument — a file path or a GRR resource
    id of type ``annotation_pipeline``. This is supplied by the standard
    ``CLIAnnotationContextProvider`` (the same mechanism
    ``annotate_columns`` / ``annotate_vcf`` use).
  - ``-i / --instance`` — when the ``GPFInstanceContextProvider`` plugin
    is installed, this resolves to the pipeline of the configured GPF
    instance.

When both are supplied, the positional ``pipeline`` wins (it is supplied
by a higher-priority context provider); the tool logs which source it
used. When neither is supplied (positional omitted / left at its
``"context"`` sentinel and no instance pipeline available), the tool logs
a warning and exits cleanly without caching anything.
"""
import argparse
import logging
import sys
import time

from gain.annotation.annotate_utils import (
    build_cli_genomic_context,
    cache_pipeline_resources,
    get_grr_from_context,
)
from gain.annotation.annotation_genomic_context_cli import (
    get_context_pipeline,
)
from gain.genomic_resources.genomic_context import (
    context_providers_add_argparser_arguments,
)
from gain.utils.verbosity_configuration import VerbosityConfiguration

logger = logging.getLogger("grr_cache_repo")


[docs] def cli_cache_repo(argv: list[str] | None = None) -> None: """Cache genomic resources used by an annotation pipeline.""" if argv is None: argv = sys.argv[1:] parser = argparse.ArgumentParser( description="Cache the genomic resources used by an " "annotation pipeline.") parser.add_argument( "--jobs", "-j", type=int, default=4, help="Number of parallel workers fetching resources.") parser.add_argument( "--no-progress", dest="progress", action="store_false", help="Disable the progress indication (live bar on a terminal, " "milestone log lines otherwise).") context_providers_add_argparser_arguments(parser) VerbosityConfiguration.set_arguments(parser) args = parser.parse_args(argv) VerbosityConfiguration.set(args) context = build_cli_genomic_context(vars(args)) grr = get_grr_from_context(context) pipeline = get_context_pipeline(context) if pipeline is None: logger.warning("no pipeline supplied; nothing to cache") return if args.pipeline != "context": logger.info( "caching pipeline from positional arg %s", args.pipeline) else: logger.info("caching pipeline from gpf instance / genomic context") start = time.time() cache_pipeline_resources( grr, pipeline, workers=args.jobs, progress=args.progress) logger.info("cached pipeline resources in %.2f secs", time.time() - start)