# pylint: disable=W0621,C0114,C0116,W0212,W0613
import textwrap
from pathlib import Path
from typing import Any
import pytest
from dae.duckdb_storage.duckdb_genotype_storage import duckdb_storage_factory
from dae.testing import setup_dataset, setup_pedigree, setup_vcf, vcf_study
from dae.testing.alla_import import alla_gpf
from gpf_instance.gpf_instance import WGPFInstance
from studies.query_transformer import QueryTransformer, make_query_transformer
from studies.study_wrapper import WDAEStudyGroup
[docs]
@pytest.fixture(scope="module")
def instance_path(tmp_path_factory: pytest.TempPathFactory) -> Path:
return tmp_path_factory.mktemp("test_unique_family_variants_query")
[docs]
@pytest.fixture(scope="module")
def wgpf_instance(instance_path: Path) -> WGPFInstance:
storage_config = {
"id": "duckdb",
"storage_type": "duckdb",
"db": "duckdb2_storage/storage2.db",
"base_dir": str(instance_path),
}
gpf_instance = alla_gpf(
instance_path, duckdb_storage_factory(storage_config),
)
return WGPFInstance(
gpf_instance.dae_config,
gpf_instance.dae_dir,
gpf_instance.dae_config_path,
grr=gpf_instance.grr,
)
[docs]
@pytest.fixture(scope="module")
def dataset(
instance_path: Path,
wgpf_instance: WGPFInstance,
) -> WDAEStudyGroup:
root_path = instance_path
ped_path1 = setup_pedigree(
root_path / "study_1" / "in.ped", textwrap.dedent("""
familyId personId dadId momId sex status role
f1 mom1 0 0 2 1 mom
f1 dad1 0 0 1 1 dad
f1 ch1 dad1 mom1 2 2 prb
f2 mom2 0 0 2 1 mom
f2 dad2 0 0 1 1 dad
f2 ch2 dad2 mom2 2 2 prb
"""))
vcf_path1 = setup_vcf(
root_path / "study_1" / "in.vcf.gz",
"""
##fileformat=VCFv4.2
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##contig=<ID=chrA>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT mom1 dad1 ch1 dad2 ch2 mom2
chrA 1 . A C,G . . . GT 0/1 0/2 1/0 0/1 1/0 0/0
chrA 2 . A C . . . GT 0/0 0/1 0/0 0/1 0/0 0/1
""")
ped_path2 = setup_pedigree(
root_path / "study_2" / "in.ped", textwrap.dedent("""
familyId personId dadId momId sex status role
f1 mom1 0 0 2 1 mom
f1 dad1 0 0 1 1 dad
f1 ch1 dad1 mom1 2 2 prb
f2 mom2 0 0 2 1 mom
f2 dad2 0 0 1 1 dad
f2 ch2 dad2 mom2 2 2 prb
"""))
vcf_path2 = setup_vcf(
root_path / "study_2" / "in.vcf.gz",
"""
##fileformat=VCFv4.2
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##contig=<ID=chrA>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT mom1 dad1 ch1 dad2 ch2 mom2
chrA 1 . A C,G . . . GT 0/1 0/2 0/0 0/0 0/1 0/0
chrA 2 . A C . . . GT 0/1 0/0 0/0 0/1 0/0 0/1
""")
study1 = vcf_study(
root_path,
"study_1", ped_path1, [vcf_path1],
wgpf_instance,
study_config_update={
"conf_dir": str(root_path / "study_1"),
"person_set_collections": {
"phenotype": {
"id": "phenotype",
"name": "Phenotype",
"sources": [
{
"from": "pedigree",
"source": "status",
},
],
"default": {
"color": "#cccccc",
"id": "unknown",
"name": "unknown",
},
"domain": [
{
"color": "#bbbbbb",
"id": "developmental_disorder",
"name": "developmental disorder",
"values": [
"affected",
],
},
{
"color": "#00ff00",
"id": "unaffected",
"name": "unaffected",
"values": [
"unaffected",
],
},
{
"color": "#aaaaaa",
"id": "unspecified",
"name": "unspecified",
"values": [
"unspecified",
],
},
],
},
"selected_person_set_collections": [
"phenotype",
],
},
})
study2 = vcf_study(
root_path,
"study_2", ped_path2, [vcf_path2],
wgpf_instance,
study_config_update={
"conf_dir": str(root_path / "study_2"),
"person_set_collections": {
"phenotype": {
"id": "phenotype",
"name": "Phenotype",
"sources": [
{
"from": "pedigree",
"source": "status",
},
],
"default": {
"color": "#cccccc",
"id": "unknown",
"name": "unknown",
},
"domain": [
{
"color": "#ff0000",
"id": "autism",
"name": "autism",
"values": [
"affected",
],
},
{
"color": "#ffffff",
"id": "unaffected",
"name": "unaffected",
"values": [
"unaffected",
],
},
{
"color": "#aaaaaa",
"id": "unspecified",
"name": "unspecified",
"values": [
"unspecified",
],
},
],
},
"selected_person_set_collections": [
"phenotype",
],
},
})
(root_path / "dataset").mkdir(exist_ok=True)
wgpf_instance.reload()
setup_dataset(
"ds1", wgpf_instance, study1, study2,
dataset_config_update=textwrap.dedent(f"""
conf_dir: {root_path / "dataset "}
person_set_collections:
phenotype:
id: phenotype
name: Phenotype
sources:
- from: pedigree
source: status
domain:
- color: '#4b2626'
id: developmental_disorder
name: developmental disorder
values:
- affected
- color: 'ff0000'
id: autism
name: autism
values:
- affected
- color: '#ffffff'
id: unaffected
name: unaffected
values:
- unaffected
- color: '#aaaaaa'
id: unspecified
name: unspecified
values:
- unspecified
default:
color: '#cccccc'
id: unknown
name: unknown
selected_person_set_collections:
- phenotype""",
),
)
wrapper = wgpf_instance.get_wdae_wrapper("ds1")
assert isinstance(wrapper, WDAEStudyGroup)
return wrapper
[docs]
@pytest.mark.parametrize(
"person_set_collection, count",
[
({"id": "phenotype", "checkedValues": ["autism"]}, 1),
({"id": "phenotype", "checkedValues": ["developmental_disorder"]}, 2),
],
)
def test_person_set_collection_queries(
dataset: WDAEStudyGroup,
query_transformer: QueryTransformer,
person_set_collection: dict[str, Any],
count: int,
) -> None:
query: dict[str, Any] = {"personSetCollection": person_set_collection}
vs = list(dataset.query_variants_raw(query, query_transformer))
assert len(vs) == count