# pylint: disable=W0621,C0114,C0116,W0212,W0613
import json
import pathlib
import shutil
import textwrap
from collections.abc import Callable, Generator
import numpy as np
import pytest
import pytest_mock
from dae.enrichment_tool.build_coding_length_enrichment_background import (
cli as build_coding_len_background_cli,
)
from dae.enrichment_tool.enrichment_utils import (
get_enrichment_config,
)
from dae.enrichment_tool.gene_weights_background import (
GeneScoreEnrichmentBackground,
)
from dae.enrichment_tool.samocha_background import SamochaEnrichmentBackground
from dae.genomic_resources.group_repository import GenomicResourceGroupRepo
from dae.genomic_resources.repository import (
GR_CONF_FILE_NAME,
GenomicResourceRepo,
)
from dae.genomic_resources.repository_factory import (
build_genomic_resource_repository,
)
from dae.genomic_resources.testing import (
build_filesystem_test_repository,
build_inmemory_test_repository,
convert_to_tab_separated,
setup_directories,
setup_pedigree,
setup_vcf,
)
from dae.gpf_instance import GPFInstance
from dae.pedigrees.families_data import FamiliesData
from dae.studies.study import GenotypeData
from dae.testing import (
setup_empty_gene_models,
setup_genome,
setup_gpf_instance,
)
from dae.testing.import_helpers import vcf_study
from dae.testing.t4c8_import import t4c8_genes, t4c8_genome
from dae.variants.attributes import Inheritance
from dae.variants.family_variant import FamilyVariant
from dae.variants.variant import SummaryVariantFactory
from studies.study_wrapper import WDAEStudy
from enrichment_api.enrichment_builder import EnrichmentBuilder
from enrichment_api.enrichment_helper import EnrichmentHelper
from enrichment_api.enrichment_serializer import EnrichmentSerializer
[docs]
@pytest.fixture(scope="session")
def coding_len_background(
grr: GenomicResourceRepo,
) -> GeneScoreEnrichmentBackground:
res = grr.get_resource("enrichment/coding_len_testing")
assert res.get_type() == "gene_score"
background = GeneScoreEnrichmentBackground(res)
assert background is not None
assert background.name == "CodingLenBackground"
background.load()
return background
[docs]
@pytest.fixture(scope="session")
def samocha_background(
grr: GenomicResourceRepo,
) -> SamochaEnrichmentBackground:
res = grr.get_resource("enrichment/samocha_testing")
assert res.get_type() == "samocha_enrichment_background"
background = SamochaEnrichmentBackground(res)
assert background is not None
assert background.name == "Samocha's enrichment background model"
assert background.background_type == "samocha_enrichment_background"
background.load()
return background
[docs]
@pytest.fixture(scope="session")
def grr() -> GenomicResourceRepo:
return build_inmemory_test_repository({
"enrichment": {
"coding_len_testing_deprecated": {
GR_CONF_FILE_NAME: """
type: gene_weights_enrichment_background
filename: data.mem
name: CodingLenBackground
""",
"data.mem": convert_to_tab_separated("""
gene gene_weight
SAMD11 3
PLEKHN1 7
POGZ 13
"""),
},
"coding_len_testing": {
GR_CONF_FILE_NAME: """
type: gene_score
filename: data.mem
separator: "\t"
scores:
- id: gene_weight
name: CodingLenBackground
desc: Gene coding length enrichment background model
histogram:
type: number
number_of_bins: 10
view_range:
min: 0
max: 20
""",
"data.mem": convert_to_tab_separated("""
gene gene_weight
SAMD11 3
PLEKHN1 7
POGZ 13
"""),
},
"samocha_testing": {
GR_CONF_FILE_NAME: """
type: samocha_enrichment_background
filename: data.mem
""",
"data.mem": convert_to_tab_separated("""
"transcript","gene","bp","all","synonymous","missense","nonsense","splice-site","frame-shift","F","M","P_LGDS","P_MISSENSE","P_SYNONYMOUS"
"NM_017582","SAMD11",3,-1,-5,-4,-6,-6,-6,2,2,1.1,1.4,5.7
"NM_017582","PLEKHN1",7,-2,-5,-4,-6,-6,-6,2,2,1.2,1.5,5.8
"NM_014372","POGZ",11,-3,-5,-5,-6,-7,-6,2,2,6.3,4.6,2.9
"""),
},
},
})
[docs]
@pytest.fixture
def t4c8_fixture(
tmp_path: pathlib.Path,
grr: GenomicResourceRepo,
) -> GPFInstance:
root_path = tmp_path
t4c8_genes(root_path / "grr")
t4c8_genome(root_path / "grr")
setup_directories(root_path, {
"grr_definition.yaml": textwrap.dedent(f"""
id: t4c8_genes_testing
type: dir
directory: {root_path / "grr"}
"""),
})
coding_len_background_path = root_path / "grr" / "coding_len_background"
coding_len_background_path.mkdir(parents=True, exist_ok=True)
build_coding_len_background_cli([
"--grr", str(root_path / "grr_definition.yaml"),
"-o", str(coding_len_background_path / "coding_len_background.tsv"),
"t4c8_genes",
])
setup_directories(coding_len_background_path, {
"genomic_resource.yaml": textwrap.dedent("""
type: gene_score
filename: coding_len_background.tsv
separator: "\t"
scores:
- id: gene_weight
name: t4c8CodingLenBackground
desc: Gene coding length enrichment background model
histogram:
type: number
number_of_bins: 10
view_range:
min: 0
max: 20
"""),
})
local_grr = build_filesystem_test_repository(root_path / "grr")
grr = GenomicResourceGroupRepo(
[grr, local_grr], "enrichment_testing_repo",
)
return setup_gpf_instance(
root_path / "gpf_instance",
reference_genome_id="t4c8_genome",
gene_models_id="t4c8_genes",
grr=grr)
[docs]
@pytest.fixture(scope="session")
def study_data(
tmp_path_factory: pytest.TempPathFactory,
) -> tuple[pathlib.Path, pathlib.Path]:
root_path = tmp_path_factory.mktemp("test_study_data")
ped_path = setup_pedigree(
root_path / "test_study" / "in.ped",
"""
familyId personId dadId momId sex status role phenotype
f1 mom1 0 0 2 1 mom unaffected
f1 dad1 0 0 1 1 dad unaffected
f1 ch1 dad1 mom1 2 2 prb phenotype1
f2 mom2 0 0 2 1 mom unaffected
f2 dad2 0 0 1 1 dad unaffected
f2 ch2 dad2 mom2 1 2 prb phenotype1
f2 ch2.1 dad2 mom2 2 1 sib unaffected
""",
)
vcf_path = setup_vcf(
root_path / "test_study" / "in.vcf",
"""
##fileformat=VCFv4.2
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##INFO=<ID=EFF,Number=1,Type=String,Description="Effect">
##contig=<ID=1>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT mom1 dad1 ch1 mom2 dad2 ch2 ch2.1
1 865583 . G A . . . GT 0/0 0/1 0/0 0/0 0/1 0/0 0/0
1 865624 . G A . . . GT 0/0 0/0 0/1 0/0 0/0 0/1 0/0
1 865664 . G A . . . GT 0/0 0/0 0/1 0/0 0/0 0/1 0/0
1 901923 . C A . . . GT 0/1 0/0 0/0 0/1 0/0 0/0 0/0
1 905957 . C T . . . GT 0/0 0/0 0/0 0/0 0/0 0/0 0/1
""", # noqa: E501
)
return ped_path, vcf_path
[docs]
@pytest.fixture
def create_test_study(
tmp_path_factory: pytest.TempPathFactory,
study_data: tuple[pathlib.Path, pathlib.Path],
t4c8_fixture: GPFInstance,
) -> Generator[Callable[[dict], GenotypeData], None, None]:
study_path = tmp_path_factory.mktemp("f1_trio")
ped_path, vcf_path = study_data
def _create_study(study_config: dict) -> GenotypeData:
return vcf_study(
study_path, "f1_trio", ped_path, [vcf_path], t4c8_fixture,
study_config_update=study_config)
yield _create_study
shutil.rmtree(
str(pathlib.Path(t4c8_fixture.dae_dir, "studies", "f1_trio")),
)
[docs]
@pytest.fixture
def enrichment_helper(
grr: GenomicResourceRepo,
f1_trio: GenotypeData,
) -> EnrichmentHelper:
return EnrichmentHelper(grr, WDAEStudy(f1_trio, None))
[docs]
@pytest.fixture
def enrichment_builder(
f1_trio: GenotypeData,
enrichment_helper: EnrichmentHelper,
) -> EnrichmentBuilder:
return EnrichmentBuilder(enrichment_helper, WDAEStudy(f1_trio, None))
[docs]
@pytest.fixture
def enrichment_serializer(
f1_trio: GenotypeData,
enrichment_builder: EnrichmentBuilder,
) -> EnrichmentSerializer:
enrichment_config = get_enrichment_config(f1_trio)
assert enrichment_config is not None
build = enrichment_builder.build_results(
gene_syms=["SAMD11", "PLEKHN1", "POGZ"],
background_id="enrichment/coding_len_testing",
counting_id="enrichment_events_counting",
)
return EnrichmentSerializer(enrichment_config, build)
[docs]
@pytest.fixture
def psc_config(
) -> dict:
return {
"person_set_collections": {
"phenotype": {
"id": "phenotype",
"name": "Phenotype",
"sources": [
{
"from": "pedigree",
"source": "phenotype",
},
],
"default": {
"color": "#aaaaaa",
"id": "unknown",
"name": "unknown",
},
"domain": [
{
"color": "#111111",
"id": "phenotype1",
"name": "phenotype 1",
"values": [
"phenotype1",
],
},
{
"color": "#222222",
"id": "phenotype2",
"name": "phenotype 2",
"values": [
"phenotype2",
],
},
{
"color": "#333333",
"id": "phenotype3",
"name": "phenotype 3",
"values": [
"phenotype3",
],
},
{
"color": "#aaaaaa",
"id": "unaffected",
"name": "unaffected",
"values": [
"unaffected",
],
},
],
},
"selected_person_set_collections": [
"phenotype",
],
},
}
[docs]
def f1_trio_variants(
f1_trio_families: FamiliesData,
) -> list:
content = (
pathlib.Path(__file__).parent /
"fixtures" /
"f1_trio_variants.json") .read_text()
records = json.loads(content)
result = []
for sv_record, fv_record in records:
sv = SummaryVariantFactory.summary_variant_from_records(sv_record)
inheritance_in_members = {
int(k): [Inheritance.from_value(inh) for inh in v]
for k, v in fv_record["inheritance_in_members"].items()
}
fattributes = fv_record.get("family_variant_attributes")
fv = FamilyVariant(
sv,
f1_trio_families[fv_record["family_id"]],
np.array(fv_record["genotype"]),
np.array(fv_record["best_state"]),
inheritance_in_members=inheritance_in_members,
)
if fattributes:
for fa, fattr in zip(
fv.family_alt_alleles, fattributes, strict=True):
fa.update_attributes(fattr)
result.append(fv)
return result
[docs]
@pytest.fixture
def f1_trio(
tmp_path: pathlib.Path,
study_data: tuple[pathlib.Path, pathlib.Path],
psc_config: dict,
grr: GenomicResourceRepo,
mocker: pytest_mock.MockerFixture,
) -> GenotypeData:
setup_genome(
tmp_path / "alla_gpf" / "genome" / "allChr.fa",
f"""
>1
{1_000_000 * "A"}
""",
)
setup_empty_gene_models(
tmp_path / "alla_gpf" / "empty_gene_models" / "empty_genes.txt")
local_repo = build_genomic_resource_repository({
"id": "alla_local",
"type": "directory",
"directory": str(tmp_path / "alla_gpf"),
})
grr = GenomicResourceGroupRepo(
[grr, local_repo], "enrichment_testing_repo",
)
gpf_instance = setup_gpf_instance(
tmp_path / "gpf_instance",
reference_genome_id="genome",
gene_models_id="empty_gene_models",
grr=local_repo)
ped_path, vcf_path = study_data
study = vcf_study(
tmp_path / "f1_trio",
"f1_trio",
ped_path,
vcf_paths=[vcf_path],
gpf_instance=gpf_instance,
study_config_update=psc_config,
)
mocker.patch.object(
study,
"query_variants",
return_value=f1_trio_variants(study.families),
)
return study