from __future__ import annotations
import logging
from dataclasses import dataclass
from typing import Any, cast
from jinja2 import Template
from dae.annotation.annotation_pipeline import AnnotationPipeline, AttributeInfo
from dae.annotation.score_annotator import GenomicScoreAnnotatorBase
from dae.genomic_resources.genomic_scores import (
GenomicScore,
build_score_from_resource,
)
from dae.genomic_resources.histogram import (
CategoricalHistogram,
Histogram,
NullHistogram,
NumberHistogram,
)
logger = logging.getLogger(__name__)
[docs]
@dataclass
class ScoreDesc:
"""Data class to describe genomic scores in GenomicScoresDb."""
name: str
resource_id: str
source: str
hist: Histogram
description: str
help: str
small_values_desc: str | None
large_values_desc: str | None
[docs]
def to_json(self) -> dict[str, Any]:
hist_data = self.hist.to_dict()
return {
"name": self.name,
"resource_id": self.resource_id,
"source": self.source,
"hist": hist_data,
"description": self.description,
"help": self.help,
"small_values_desc": self.small_values_desc,
"large_values_desc": self.large_values_desc,
}
[docs]
@staticmethod
def from_json(data: dict[str, Any]) -> ScoreDesc:
"""Build a ScoreDesc from a JSON."""
assert "hist" in data
hist_type = data["hist"]["config"]["type"]
if hist_type == "categorical":
hist_data: Histogram = CategoricalHistogram.from_dict(data["hist"])
elif hist_type == "null":
hist_data = NullHistogram.from_dict(data["hist"])
elif hist_type == "number":
hist_data = NumberHistogram.from_dict(data["hist"])
else:
raise ValueError(f"Unknown histogram type {hist_type}")
return ScoreDesc(
data["name"],
data["resource_id"],
data["source"],
hist_data,
data["description"],
data["help"],
data.get("small_values_desc"),
data.get("large_values_desc"),
)
SCORE_HISTOGRAM = """
<div class="modal-histogram">
<div class="histogram-image">
![HISTOGRAM]({{ hist_url }})
</div>
</div>
"""
GENOMIC_SCORE_HELP = """
<div class="score-description">
## {{ data.name }}
{{ data.description}}
{{ data.resource_summary }}
{{ data.histogram }}
Genomic resource:
<a href={{data.resource_url}} target="_blank">{{ data.resource_id }}</a>
<details>
<summary class="details">
#### Details
</summary>
<div class="details-body">
##### Attribute properties:
* **source**: {{ data.source }}
{% for aggregator in data.aggregators %}
* {{ aggregator }}
{% endfor %}
##### Resource properties:
* **resource_type**: `{{ data.resource_type }}`
##### Annotator documentation:
* **annotator_type**: `{{ data.annotator_type }}`
{{ data.annotator_doc }}
</div>
</details>
</div>
"""
def _build_score_help(
score_annotator: GenomicScoreAnnotatorBase,
attr_info: AttributeInfo,
genomic_score: GenomicScore,
) -> str:
hist_url = genomic_score.get_histogram_image_url(attr_info.source)
score_def = genomic_score.get_score_definition(attr_info.source)
assert score_def is not None
histogram = Template(SCORE_HISTOGRAM).render(
hist_url=hist_url,
score_def=score_def,
)
data = {
"name": attr_info.name,
"description": attr_info.description,
"resource_id": genomic_score.resource_id,
"resource_summary": genomic_score.resource.get_summary(),
"resource_url": f"{genomic_score.resource.get_url()}/index.html",
"resource_type": genomic_score.resource.get_type(),
"histogram": histogram,
"source": attr_info.source,
"aggregators": score_annotator.build_score_aggregator_documentation(
attr_info,
),
"annotator_type": score_annotator.get_info().type,
"annotator_doc": score_annotator.get_info().documentation,
}
template = Template(GENOMIC_SCORE_HELP)
return template.render(data=data)
[docs]
class GenomicScoresRegistry:
"""Genomic scores registry allowing access to genomic scores histograms."""
def __init__(self, scores: dict[str, ScoreDesc]):
self.scores: dict[str, ScoreDesc] = {}
self.scores.update(scores)
logger.info(
"genomic scores histograms loaded: %s", list(self.scores.keys()))
[docs]
@staticmethod
def build_genomic_scores_registry(
pipeline: AnnotationPipeline,
) -> GenomicScoresRegistry:
"""Build a genomic scores registry from an annotation pipeline."""
score_annotators: list[GenomicScoreAnnotatorBase] = []
if pipeline is not None and len(pipeline.annotators) > 0:
for annotator in pipeline.annotators:
annotator_info = annotator.get_info()
if annotator_info.type not in \
{"position_score", "np_score", "allele_score"}:
continue
score_annotators.append(
cast(GenomicScoreAnnotatorBase, annotator))
annotation_scores: dict[str, ScoreDesc] = {}
for annotator in score_annotators:
score_descs = GenomicScoresRegistry._build_annotator_scores_desc(
annotator)
annotation_scores.update(score_descs)
return GenomicScoresRegistry(annotation_scores)
@staticmethod
def _build_annotator_scores_desc(
annotator: GenomicScoreAnnotatorBase,
) -> dict[str, ScoreDesc]:
annotator_info = annotator.get_info()
resource = annotator_info.resources[0]
score = build_score_from_resource(resource)
result = {}
for attr in annotator_info.attributes:
if attr.internal:
continue
score_def = score.score_definitions[attr.source]
help_doc = _build_score_help(annotator, attr, score)
score_desc = ScoreDesc(
attr.name,
resource.resource_id,
attr.source,
score.get_score_histogram(attr.source),
f"{attr.name} - {attr.description}",
help_doc,
score_def.small_values_desc,
score_def.large_values_desc,
)
if score_desc.hist is None:
logger.warning(
"unable to load histogram for %s: %s (%s)",
score.resource_id, attr.name, attr.source)
continue
result[attr.name] = score_desc
return result
[docs]
def get_scores(self) -> list[tuple[str, ScoreDesc]]:
"""Return all genomic scores histograms."""
result = []
for score_id, score in self.scores.items():
result.append((score_id, score))
return result
def __getitem__(self, score_id: str) -> ScoreDesc:
if score_id not in self.scores:
raise KeyError
res = self.scores[score_id]
return res
def __contains__(self, score_id: str) -> bool:
return score_id in self.scores
def __len__(self) -> int:
return len(self.scores)