Source code for gain.genomic_resources.resource_implementation

from __future__ import annotations

import logging
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Any, ClassVar, cast

from cerberus import Validator
from markdown2 import markdown

from gain.task_graph.graph import TaskDesc
from gain.templates import get_template
from gain.utils.helpers import convert_size

from .repository import GenomicResource

logger = logging.getLogger(__name__)


[docs] def get_base_resource_schema() -> dict[str, Any]: return { "type": {"type": "string"}, "meta": { "type": "dict", "allow_unknown": True, "schema": { "description": {"type": "string"}, "labels": {"type": "dict", "nullable": True}, }, }, }
[docs] class ResourceStatistics: """ Base class for statistics. Subclasses should be created using mixins defined for each statistic type that the resource contains. """ def __init__(self, resource_id: str): self.resource_id = resource_id
[docs] @staticmethod def get_statistics_folder() -> str: return "statistics"
[docs] class GenomicResourceImplementation(ABC): """ Base class used by resource implementations. Resources are just a folder on a repository. Resource implementations are classes that know how to use the contents of the resource. """ def __init__(self, genomic_resource: GenomicResource): self.resource = genomic_resource self.config: dict = self.resource.get_config() self._statistics: ResourceStatistics | None = None @property def resource_id(self) -> str: return self.resource.resource_id
[docs] def get_config(self) -> dict: return self.config
@property def files(self) -> set[str]: """Return a list of resource files the implementation utilises.""" return set()
[docs] @abstractmethod def calc_statistics_hash(self) -> bytes: """ Compute the statistics hash. This hash is used to decide whether the resource statistics should be recomputed. """ raise NotImplementedError
[docs] @abstractmethod def create_statistics_build_tasks( self, **kwargs: Any, ) -> list[TaskDesc]: """Create tasks for calculating resource statistics for task graph.""" raise NotImplementedError
[docs] @abstractmethod def calc_info_hash(self) -> bytes: """Compute and return the info hash.""" raise NotImplementedError
[docs] @abstractmethod def get_info(self, **kwargs: Any) -> str: """Construct the contents of the implementation's HTML info page.""" raise NotImplementedError
[docs] @abstractmethod def get_statistics_info(self, **kwargs: Any) -> str: """Construct the contents of the implementation's HTML statistics info page. """ raise NotImplementedError
[docs] def collect_index_info( self, ) -> tuple[tuple[str, ...], tuple[str, ...]]: """Collect resource info for FTS index building. Returns a (header, row) pair where header contains field names and row contains the corresponding values for this resource. Label keys/values are appended after the fixed fields. """ res = self.resource meta = res.get_config().get("meta", {}) or {} labels: dict = res.get_labels() or {} header: tuple[str, ...] = ( "full_id", "id", "type", "description", "summary", *labels.keys(), ) row: tuple[str, ...] = ( res.get_full_id(), res.resource_id, res.get_type(), meta.get("description", "") or "", meta.get("summary", "") or "", *[str(v) for v in labels.values()], ) return header, row
[docs] def get_statistics(self) -> ResourceStatistics | None: """Try and load resource statistics.""" return None
[docs] def reload_statistics(self) -> ResourceStatistics | None: self._statistics = None return self.get_statistics()
[docs] class InfoImplementationMixin: """Mixin that provides generic template info page generation interface."""
[docs] @dataclass class FileEntry: """Provides an entry into manifest object.""" name: str size: str md5: str | None
resource: GenomicResource template_name: ClassVar[str] = "base_implementation.jinja" styles_template_name: ClassVar[str] = "base_implementation_styles.jinja" def _get_template_data(self) -> dict: return {}
[docs] def get_template_data(self) -> dict: """ Return a data dictionary to be used by the template. Will transform the description in the meta section using markdown. """ template_data = self._get_template_data() template_data["resource_files"] = [ self.FileEntry(entry.name, convert_size(entry.size), entry.md5) for entry in self.resource.get_manifest().entries.values() if not entry.name.startswith("statistics") and entry.name != "index.html"] template_data["resource_files"].append( self.FileEntry("statistics/", "", "")) return template_data
[docs] def get_statistics_template_data(self) -> dict: """ Return a data dictionary to be used by the statistics template. Will transform the description in the meta section using markdown. """ template_data = self._get_template_data() template_data["statistic_files"] = [ self.FileEntry( entry.name.removeprefix("statistics/"), convert_size(entry.size), entry.md5, ) for entry in self.resource.get_manifest().entries.values() if entry.name.startswith("statistics")] return template_data
[docs] def get_info(self, **kwargs: Any) -> str: # noqa: ARG002 """Construct the contents of the implementation's HTML info page.""" template_data = self.get_template_data() return get_template(self.template_name).render( resource=self.resource, markdown=markdown, data=template_data, base="resource_template.jinja", styles_template=self.styles_template_name, )
[docs] def get_statistics_info(self, **kwargs: Any) -> str: # noqa: ARG002 """Construct the contents of the implementation's HTML info page.""" template_data = self.get_statistics_template_data() return get_template(self.template_name).render( resource=self.resource, markdown=markdown, data=template_data, base="statistics_template.jinja", styles_template=self.styles_template_name, )
[docs] class ResourceConfigValidationMixin: """Mixin that provides validation of resource configuration."""
[docs] @staticmethod @abstractmethod def get_schema() -> dict: """Return schema to be used for config validation.""" raise NotImplementedError
[docs] @classmethod def validate_and_normalize_schema( cls, config: dict, resource: GenomicResource) -> dict: """Validate the resource schema and return the normalized version.""" # pylint: disable=not-callable validator = Validator(cls.get_schema()) if not validator.validate(config): logger.error( "Resource %s of type %s has an invalid configuration. %s", resource.resource_id, resource.get_type(), validator.errors) raise ValueError(f"Invalid configuration: {resource.resource_id}") return cast(dict, validator.document)