Source code for dae.enrichment_tool.enrichment_cache_builder

import argparse
import logging

from dae.enrichment_tool.enrichment_helper import EnrichmentHelper
from dae.gpf_instance import GPFInstance
from dae.utils.verbosity_configuration import VerbosityConfiguration

logger = logging.getLogger("enrichment_cache_builder")


[docs] def cli( argv: list[str] | None = None, gpf_instance: GPFInstance | None = None, ) -> None: """Generate enrichment tool cache.""" description = "Generate enrichment tool cache" parser = argparse.ArgumentParser(description=description) VerbosityConfiguration.set_arguments(parser) parser.add_argument( "--show-studies", help="This option will print available " "genotype studies and groups names", default=False, action="store_true", ) parser.add_argument( "--studies", help="Specify genotype studies and groups " "names for generating enrichment cache. Default to all.", default=None, action="store", ) args = parser.parse_args(argv) VerbosityConfiguration.set(args) logging.getLogger("impala").setLevel(logging.WARNING) if gpf_instance is None: gpf_instance = GPFInstance.build() if args.show_studies: for study in gpf_instance.get_all_genotype_data(): if EnrichmentHelper.has_enrichment_config(study): print(study.study_id) else: if args.studies: study_ids = args.studies.split(",") else: study_ids = gpf_instance.get_genotype_data_ids() filtered_studies = [] for study_id in study_ids: study = gpf_instance.get_genotype_data(study_id) if EnrichmentHelper.has_enrichment_config(study): filtered_studies.append(study) logger.warning( "generating enrichment cache for studies: %s", [st.study_id for st in filtered_studies]) enrichment_helper = EnrichmentHelper(gpf_instance.grr) for study in filtered_studies: logger.info( "building enrichment cache for study %s", study.study_id) enrichment_config = enrichment_helper.get_enrichment_config(study) assert enrichment_config is not None psc_id = enrichment_config["selected_person_set_collections"][0] enrichment_helper.build_enrichment_event_counts_cache( study, psc_id)