import glob
import logging
import os
import shutil
import toml
from box import Box
logger = logging.getLogger(__name__)
[docs]
class DatasetHelpers:
"""Helper class for work with studies in impala genotype storage."""
def __init__(self, gpf_instance=None):
if gpf_instance is None:
# pylint: disable=import-outside-toplevel
from dae.gpf_instance.gpf_instance import GPFInstance
self.gpf_instance = GPFInstance.build()
else:
self.gpf_instance = gpf_instance
[docs]
def find_genotype_data_config_file(self, dataset_id):
"""Find and return config filename for a dataset."""
config = self.gpf_instance.get_genotype_data_config(dataset_id)
if config is None:
# pylint: disable=protected-access
self.gpf_instance._variants_db.reload()
config = self.gpf_instance.get_genotype_data_config(dataset_id)
if config is None:
return None
assert config is not None, dataset_id
conf_dir = config.conf_dir
result = glob.glob(os.path.join(conf_dir, "*.conf"))
assert len(result) == 1, \
f"unexpected number of config files in {conf_dir}"
config_file = result[0]
assert os.path.exists(config_file)
return config_file
[docs]
def find_genotype_data_config(self, dataset_id):
"""Find and return configuration of a dataset."""
config_file = self.find_genotype_data_config_file(dataset_id)
if config_file is None:
return None
with open(config_file, "r") as infile:
short_config = toml.load(infile)
short_config = Box(short_config)
return short_config
[docs]
def get_genotype_storage(self, dataset_id):
"""Find the genotype storage that stores a dataset."""
config = self.find_genotype_data_config(dataset_id)
if config is None:
return None
gpf_instance = self.gpf_instance
genotype_storage = gpf_instance \
.genotype_storages \
.get_genotype_storage(
config.genotype_storage.id)
return genotype_storage
[docs]
def rename_study_config(
self, dataset_id, new_id, config_content, dry_run=None):
"""Rename study config for a dataset."""
config_file = self.find_genotype_data_config_file(dataset_id)
logger.info("going to disable config file %s", config_file)
if not dry_run:
os.rename(config_file, f"{config_file}_bak")
config_dirname = os.path.dirname(config_file)
new_dirname = os.path.join(os.path.dirname(config_dirname), new_id)
logger.info(
"going to rename config directory %s to %s",
config_dirname, new_dirname)
if not dry_run:
os.rename(config_dirname, new_dirname)
new_config_file = os.path.join(new_dirname, f"{new_id}.conf")
logger.info("going to create a new config file %s", new_config_file)
if not dry_run:
with open(new_config_file, "wt") as outfile:
content = toml.dumps(config_content)
outfile.write(content)
[docs]
def remove_study_config(self, dataset_id):
config_file = self.find_genotype_data_config_file(dataset_id)
config_dir = os.path.dirname(config_file)
shutil.rmtree(config_dir)
[docs]
def disable_study_config(self, dataset_id, dry_run=None):
"""Disable dataset."""
config_file = self.find_genotype_data_config_file(dataset_id)
config_dir = os.path.dirname(config_file)
logger.info("going to disable study_config %s", config_file)
if not dry_run:
os.rename(config_file, f"{config_file}_bak")
os.rename(config_dir, f"{config_dir}_bak")