import argparse
import logging
import sys
import time
from typing import cast
from dae.gpf_instance.gpf_instance import GPFInstance
from dae.studies.study import GenotypeDataGroup
from dae.utils.verbosity_configuration import VerbosityConfiguration
logger = logging.getLogger("generate_families_cache")
[docs]
def main(
argv: list[str] | None = None,
gpf_instance: GPFInstance | None = None,
) -> None:
"""Command line tool to create genotype groups families cache."""
description = "Create genotype groups families cache"
parser = argparse.ArgumentParser(description=description)
VerbosityConfiguration.set_arguments(parser)
parser.add_argument(
"--show-groups",
help="This option will print available "
"genotype groups IDs",
default=False,
action="store_true",
)
parser.add_argument(
"--groups",
help="Specify genotype groups "
"names for generating common report. Default to all query objects.",
default=None,
action="store",
)
if argv is None:
argv = sys.argv[1:]
args = parser.parse_args(argv)
VerbosityConfiguration.set(args)
start = time.time()
if gpf_instance is None:
gpf_instance = GPFInstance.build()
available_studies = gpf_instance.get_all_genotype_data()
elapsed = time.time() - start
logger.info(
"GPF instance genotype data loaded %.2f sec", elapsed)
if args.show_groups:
for study in available_studies:
if not study.is_group:
continue
logger.warning("genotype group: %s", study.study_id)
return
start = time.time()
studies = []
if args.groups:
study_ids = args.groups.split(",")
all_study_ids = set(gpf_instance.get_genotype_data_ids())
for study_id in study_ids:
if study_id not in all_study_ids:
logger.warning(
"study %s not found in GPF instance studies %s",
study_id, all_study_ids)
continue
study = gpf_instance.get_genotype_data(study_id)
if not study.is_group:
logger.warning(
"study %s is not a genotype data group; skipping",
study_id)
continue
studies.append(study)
study_ids = set(st.study_id for st in studies)
logger.info("build families data cache for: %s", study_ids)
else:
for study in available_studies:
if study.is_group:
studies.append(study)
study_ids = set(st.study_id for st in studies)
logger.info(
"build families data cache for all groups: %s!!!",
study_ids)
for study in studies:
study_group = cast(GenotypeDataGroup, study)
logger.info("%s is a group, caching families...", study_group.study_id)
if study_group.has_families_cache():
study_group.rebuild_families()
study_group.save_families_cache()
logger.info(
"generate families cache for %s genotype groups elapsed %.2f sec",
study_ids, elapsed)