# pylint: disable=W0621,C0114,C0116,W0212,W0613
import pathlib
import textwrap
from typing import Any, cast
import pytest
from gpf_instance.gpf_instance import WGPFInstance
from dae.genotype_storage.genotype_storage_registry import (
GenotypeStorageRegistry,
)
from dae.gpf_instance import GPFInstance
from dae.person_sets import PSCQuery
from dae.testing import setup_dataset, setup_pedigree, setup_vcf, vcf_study
from dae.testing.acgt_import import acgt_gpf
from studies.query_transformer import QueryTransformer, make_query_transformer
from studies.response_transformer import (
ResponseTransformer,
make_response_transformer,
)
from studies.study_wrapper import WDAEStudy, WDAEStudyGroup
GENOTYPE_STORAGE_REGISTRY = GenotypeStorageRegistry()
[docs]
@pytest.fixture(scope="module", params=["duckdb", "inmemory"])
def gpf_fixture(
request: pytest.FixtureRequest,
tmp_path_factory: pytest.TempPathFactory,
) -> GPFInstance:
root_path = tmp_path_factory.mktemp(
"study_group_person_set_queries_genotype_storages")
storage_configs = {
# DuckDb Storage
"duckdb": {
"id": "duckdb",
"storage_type": "duckdb_parquet",
"base_dir": str(root_path),
},
# Filesystem InMemory
"inmemory": {
"id": "inmemory",
"storage_type": "inmemory",
"dir": f"{root_path}/genotype_filesystem_data",
},
}
if not GENOTYPE_STORAGE_REGISTRY.get_all_genotype_storage_ids():
for storage_config in storage_configs.values():
GENOTYPE_STORAGE_REGISTRY\
.register_storage_config(
cast(dict[str, Any], storage_config))
genotype_storage = GENOTYPE_STORAGE_REGISTRY.get_genotype_storage(
request.param)
assert genotype_storage is not None
root_path = tmp_path_factory.mktemp(
"study_group_person_set_queries")
return acgt_gpf(root_path, storage=genotype_storage)
[docs]
@pytest.fixture(scope="module")
def wgpf_fixture(
gpf_fixture: GPFInstance,
) -> WGPFInstance:
grr = gpf_fixture.grr
root_path = pathlib.Path(gpf_fixture.dae_dir)
instance_filename = str(root_path / "gpf_instance.yaml")
return WGPFInstance.build(instance_filename, grr=grr)
[docs]
@pytest.fixture(scope="module")
def study_1(wgpf_fixture: WGPFInstance) -> WDAEStudy:
root_path = pathlib.Path(wgpf_fixture.dae_dir)
ped_path = setup_pedigree(
root_path / "study_1" / "pedigree" / "in.ped",
"""
familyId personId dadId momId sex status role
f1.1 mom1 0 0 2 1 mom
f1.1 dad1 0 0 1 1 dad
f1.1 ch1 dad1 mom1 2 2 prb
f1.3 mom3 0 0 2 1 mom
f1.3 dad3 0 0 1 1 dad
f1.3 ch3 dad3 mom3 2 2 prb
""")
vcf_path1 = setup_vcf(
root_path / "study_1" / "vcf" / "in.vcf.gz",
"""
##fileformat=VCFv4.2
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##contig=<ID=chr1>
##contig=<ID=chr2>
##contig=<ID=chr3>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT mom1 dad1 ch1 mom3 dad3 ch3
chr1 1 . A C . . . GT 0/0 0/0 0/1 0/0 0/0 0/0
chr1 2 . C G . . . GT 0/0 0/0 0/0 0/0 0/0 0/1
chr1 3 . G T . . . GT 0/0 1/0 0/1 0/0 0/0 0/0
""")
project_config_update = {
"input": {
"vcf": {
"denovo_mode": "denovo",
"omission_mode": "omission",
},
},
}
return WDAEStudy(vcf_study(
root_path,
"study_1", ped_path, [vcf_path1],
wgpf_fixture,
project_config_update=project_config_update,
study_config_update={
"conf_dir": str(root_path / "study_1"),
"person_set_collections": {
"phenotype": {
"id": "phenotype",
"name": "Phenotype",
"sources": [
{
"from": "pedigree",
"source": "status",
},
],
"default": {
"color": "#aaaaaa",
"id": "unspecified",
"name": "unspecified",
},
"domain": [
{
"color": "#bbbbbb",
"id": "autism",
"name": "autism",
"values": [
"affected",
],
},
{
"color": "#00ff00",
"id": "unaffected",
"name": "unaffected",
"values": [
"unaffected",
],
},
],
},
"selected_person_set_collections": [
"phenotype",
],
},
},
), None)
[docs]
@pytest.fixture(scope="module")
def study_2(wgpf_fixture: WGPFInstance) -> WDAEStudy:
root_path = pathlib.Path(wgpf_fixture.dae_dir)
ped_path = setup_pedigree(
root_path / "study_2" / "pedigree" / "in.ped",
"""
familyId personId dadId momId sex status role
f2.1 mom1 0 0 2 1 mom
f2.1 dad1 0 0 1 1 dad
f2.1 ch1 dad1 mom1 2 2 prb
f2.3 mom3 0 0 2 1 mom
f2.3 dad3 0 0 1 1 dad
f2.3 ch3 dad3 mom3 2 2 prb
f2.3 ch4 dad3 mom3 2 0 prb
""")
vcf_path1 = setup_vcf(
root_path / "study_2" / "vcf" / "in.vcf.gz",
"""
##fileformat=VCFv4.2
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##contig=<ID=chr1>
##contig=<ID=chr2>
##contig=<ID=chr3>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT mom1 dad1 ch1 mom3 dad3 ch3 ch4
chr1 5 . A C . . . GT 0/0 0/0 0/1 0/0 0/0 0/0 0/1
chr1 6 . C G . . . GT 0/0 0/0 0/0 0/0 0/0 0/1 0/0
chr1 7 . G T . . . GT 0/0 1/0 0/1 0/0 0/0 0/0 0/1
""")
project_config_update = {
"input": {
"vcf": {
"denovo_mode": "denovo",
"omission_mode": "omission",
},
},
}
return WDAEStudy(vcf_study(
root_path,
"study_2", ped_path, [vcf_path1],
wgpf_fixture,
project_config_update=project_config_update,
study_config_update={
"conf_dir": str(root_path / "study_2"),
"person_set_collections": {
"phenotype": {
"id": "phenotype",
"name": "Phenotype",
"sources": [
{
"from": "pedigree",
"source": "status",
},
],
"default": {
"color": "#aaaaaa",
"id": "unspecified",
"name": "unspecified",
},
"domain": [
{
"color": "#bbbbbb",
"id": "epilepsy",
"name": "epilepsy",
"values": [
"affected",
],
},
{
"color": "#00ff00",
"id": "unaffected",
"name": "unaffected",
"values": [
"unaffected",
],
},
],
},
"selected_person_set_collections": [
"phenotype",
],
},
},
), None)
[docs]
@pytest.fixture
def dataset(
wgpf_fixture: WGPFInstance,
study_1: WDAEStudy,
study_2: WDAEStudy,
) -> WDAEStudyGroup:
root_path = pathlib.Path(wgpf_fixture.dae_dir)
(root_path / "dataset").mkdir(exist_ok=True)
return WDAEStudyGroup(setup_dataset(
"dataset", wgpf_fixture,
study_1.genotype_data, study_2.genotype_data,
dataset_config_update=textwrap.dedent(f"""
conf_dir: { root_path / "dataset "}
genotype_browser:
enabled: true
person_set_collections:
phenotype:
id: phenotype
name: Phenotype
sources:
- from: pedigree
source: status
domain:
- color: '#4b2626'
id: developmental_disorder
name: developmental disorder
values:
- affected
- color: '#ffffff'
id: unaffected
name: unaffected
values:
- unaffected
default:
color: '#aaaaaa'
id: unspecified
name: unspecified
selected_person_set_collections:
- phenotype""",
),
), None, children=[study_1, study_2])
[docs]
def test_dataset_simple(dataset: WDAEStudyGroup) -> None:
assert dataset is not None
assert dataset.person_set_collections
assert "phenotype" in dataset.person_set_collections
psc = dataset.genotype_data.get_person_set_collection("phenotype")
assert psc is not None
assert "autism" in psc.person_sets
assert "epilepsy" in psc.person_sets
assert "unaffected" in psc.person_sets
assert "unspecified" in psc.person_sets
[docs]
@pytest.mark.parametrize(
"psc_query, count", [
(PSCQuery("phenotype", {"epilepsy"}), 3),
(PSCQuery("phenotype", {"autism"}), 3),
(PSCQuery("phenotype", {"unaffected"}), 2),
(PSCQuery("phenotype", {"unspecified"}), 2),
(PSCQuery("phenotype", {"epilepsy", "autism"}), 6),
(PSCQuery("phenotype", {"unaffected", "autism"}), 4),
(PSCQuery("phenotype", {"epilepsy", "autism", "unaffected"}), 6),
(PSCQuery("phenotype",
{"epilepsy", "autism", "unaffected", "unspecified"}), 8),
],
)
def test_dataset_person_sets_queries(
dataset: WDAEStudy,
query_transformer: QueryTransformer,
response_transformer: ResponseTransformer,
psc_query: PSCQuery,
count: int,
) -> None:
vs = list(dataset.query_variants_wdae_streaming(
{
"personSetCollection": {
"id": psc_query.psc_id,
"checkedValues": psc_query.selected_person_sets,
},
}, [],
query_transformer, response_transformer,
))
assert len(vs) == count