from __future__ import annotations
import logging
from dataclasses import dataclass
from typing import Any, cast
from gain.annotation.annotation_pipeline import AnnotationPipeline
from gain.annotation.score_annotator import GenomicScoreAnnotatorBase
from gain.genomic_resources.genomic_scores import (
build_score_from_resource,
)
from gain.genomic_resources.histogram import (
CategoricalHistogram,
Histogram,
NullHistogram,
NumberHistogram,
)
logger = logging.getLogger(__name__)
[docs]
@dataclass
class ScoreDesc:
"""Data class to describe genomic scores in GenomicScoresDb."""
name: str
resource_id: str
source: str
hist: Histogram
description: str
help: str
small_values_desc: str | None
large_values_desc: str | None
[docs]
def to_json(self) -> dict[str, Any]:
hist_data = self.hist.to_dict()
return {
"name": self.name,
"resource_id": self.resource_id,
"source": self.source,
"hist": hist_data,
"description": self.description,
"help": self.help,
"small_values_desc": self.small_values_desc,
"large_values_desc": self.large_values_desc,
}
[docs]
@staticmethod
def from_json(data: dict[str, Any]) -> ScoreDesc:
"""Build a ScoreDesc from a JSON."""
assert "hist" in data
hist_type = data["hist"]["config"]["type"]
if hist_type == "categorical":
hist_data: Histogram = CategoricalHistogram.from_dict(data["hist"])
elif hist_type == "null":
hist_data = NullHistogram.from_dict(data["hist"])
elif hist_type == "number":
hist_data = NumberHistogram.from_dict(data["hist"])
else:
raise ValueError(f"Unknown histogram type {hist_type}")
return ScoreDesc(
data["name"],
data["resource_id"],
data["source"],
hist_data,
data["description"],
data["help"],
data.get("small_values_desc"),
data.get("large_values_desc"),
)
[docs]
class GenomicScoresRegistry:
"""Genomic scores registry allowing access to genomic scores histograms."""
def __init__(self, scores: dict[str, ScoreDesc]):
self.scores: dict[str, ScoreDesc] = {}
self.scores.update(scores)
logger.info(
"genomic scores histograms loaded: %s", list(self.scores.keys()))
[docs]
@staticmethod
def build_genomic_scores_registry(
pipeline: AnnotationPipeline,
) -> GenomicScoresRegistry:
"""Build a genomic scores registry from an annotation pipeline."""
score_annotators: list[GenomicScoreAnnotatorBase] = []
if pipeline is not None and len(pipeline.annotators) > 0:
for annotator in pipeline.annotators:
annotator_info = annotator.get_info()
if annotator_info.type not in \
{"position_score", "np_score", "allele_score"}:
continue
score_annotators.append(
cast(GenomicScoreAnnotatorBase, annotator))
annotation_scores: dict[str, ScoreDesc] = {}
for annotator in score_annotators:
score_descs = GenomicScoresRegistry._build_annotator_scores_desc(
annotator)
annotation_scores.update(score_descs)
return GenomicScoresRegistry(annotation_scores)
@staticmethod
def _build_annotator_scores_desc(
annotator: GenomicScoreAnnotatorBase,
) -> dict[str, ScoreDesc]:
annotator_info = annotator.get_info()
resource = annotator_info.resources[0]
score = build_score_from_resource(resource)
result = {}
for attr in annotator_info.attributes:
if attr.internal:
continue
score_def = score.score_definitions[attr.source]
help_doc = annotator.build_attribute_help(attr)
score_desc = ScoreDesc(
attr.name,
resource.resource_id,
attr.source,
score.get_score_histogram(attr.source),
f"{attr.name} - {attr.description}",
help_doc,
score_def.small_values_desc,
score_def.large_values_desc,
)
if score_desc.hist is None:
logger.warning(
"unable to load histogram for %s: %s (%s)",
score.resource_id, attr.name, attr.source)
continue
result[attr.name] = score_desc
return result
[docs]
def get_scores(self) -> list[tuple[str, ScoreDesc]]:
"""Return all genomic scores histograms."""
result = []
for score_id, score in self.scores.items():
if score.hist.type != "null_histogram":
result.append((score_id, score))
return result
def __getitem__(self, score_id: str) -> ScoreDesc:
if score_id not in self.scores:
raise KeyError
return self.scores[score_id]
def __contains__(self, score_id: str) -> bool:
return score_id in self.scores
def __len__(self) -> int:
return len(self.scores)