gain.gene_sets package

Subpackages

Submodules

gain.gene_sets.gene_set module

Classes for handling of gene sets and gene set collections.

class gain.gene_sets.gene_set.BaseGeneSetCollection(collection_id: str)[source]

Bases: ABC

Base class for gene set collections.

abstractmethod get_all_gene_sets() list[GeneSet][source]

Return list of all gene sets in the collection.

abstractmethod get_gene_set(gene_set_id: str) GeneSet | None[source]

Return the gene set if found; returns None if not found.

abstractmethod load() BaseGeneSetCollection[source]

Load the gene sets from the resource.

class gain.gene_sets.gene_set.BaseResourceSchema(*, type: str | None = None, meta: MetaSchema | None = None)[source]

Bases: BaseModel

meta: MetaSchema | None
model_computed_fields: ClassVar[dict[str, ComputedFieldInfo]] = {}

A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

model_config: ClassVar[ConfigDict] = {}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

model_fields: ClassVar[dict[str, FieldInfo]] = {'meta': FieldInfo(annotation=Union[MetaSchema, NoneType], required=False, default=None), 'type': FieldInfo(annotation=Union[str, NoneType], required=False, default=None)}

Metadata about the fields defined on the model, mapping of field names to [FieldInfo][pydantic.fields.FieldInfo].

This replaces Model.__fields__ from Pydantic V1.

type: str | None
class gain.gene_sets.gene_set.CategoricalHistogramSchema(*, type: Literal['categorical'], displayed_values_count: int | None = None, displayed_values_percent: float | None = None, value_order: list[str | int] | None = None, y_log_scale: bool | None = None, label_rotation: int | None = None, plot_function: str | None = None, enforce_type: bool | None = None, natural_order: bool | None = None)[source]

Bases: BaseModel

displayed_values_count: int | None
displayed_values_percent: float | None
enforce_type: bool | None
label_rotation: int | None
model_computed_fields: ClassVar[dict[str, ComputedFieldInfo]] = {}

A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

model_config: ClassVar[ConfigDict] = {}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

model_fields: ClassVar[dict[str, FieldInfo]] = {'displayed_values_count': FieldInfo(annotation=Union[int, NoneType], required=False, default=None), 'displayed_values_percent': FieldInfo(annotation=Union[float, NoneType], required=False, default=None), 'enforce_type': FieldInfo(annotation=Union[bool, NoneType], required=False, default=None), 'label_rotation': FieldInfo(annotation=Union[int, NoneType], required=False, default=None), 'natural_order': FieldInfo(annotation=Union[bool, NoneType], required=False, default=None), 'plot_function': FieldInfo(annotation=Union[str, NoneType], required=False, default=None), 'type': FieldInfo(annotation=Literal['categorical'], required=True), 'value_order': FieldInfo(annotation=Union[list[Union[str, int]], NoneType], required=False, default=None), 'y_log_scale': FieldInfo(annotation=Union[bool, NoneType], required=False, default=None)}

Metadata about the fields defined on the model, mapping of field names to [FieldInfo][pydantic.fields.FieldInfo].

This replaces Model.__fields__ from Pydantic V1.

natural_order: bool | None
plot_function: str | None
type: Literal['categorical']
value_order: list[str | int] | None
y_log_scale: bool | None
class gain.gene_sets.gene_set.GeneSet(name: str, desc: str, syms: list[str])[source]

Bases: object

Class representing a set of genes.

count: int
desc: str
name: str
syms: list[str]
class gain.gene_sets.gene_set.GeneSetCollection(resource: GenomicResource)[source]

Bases: BaseGeneSetCollection

Class representing a collection of gene sets in a resource.

property files: set[str]

Return a list of resource files the implementation utilises.

get_all_gene_sets() list[GeneSet][source]

Return list of all gene sets in the collection.

get_gene_collection_count_statistics() dict | None[source]

Get gene collection count statistics from the resource.

get_gene_set(gene_set_id: str) GeneSet | None[source]

Return the gene set if found; returns None if not found.

get_gene_sets_list_statistics() list[dict] | None[source]

Get gene sets list statistics from the resource.

get_gene_sets_per_gene_hist() NullHistogram | CategoricalHistogram | NumberHistogram | None[source]
get_gene_sets_per_gene_hist_filename() str[source]
get_gene_sets_per_gene_hist_image_filename() str[source]
get_genes_per_gene_set_hist() NullHistogram | CategoricalHistogram | NumberHistogram | None[source]
get_genes_per_gene_set_hist_filename() str[source]
get_genes_per_gene_set_hist_image_filename() str[source]
is_loaded() bool[source]

Check if the gene sets have been loaded.

load() GeneSetCollection[source]

Load the gene sets from the resource.

load_gene_sets() dict[str, GeneSet][source]

Build a gene set collection from a given GenomicResource.

class gain.gene_sets.gene_set.GeneSetResourceSchema(*, id: str, filename: str | None = None, directory: str | None = None, format: str | None, web_label: str | None = None, web_format_str: str | None = None, histograms: dict[Literal['genes_per_gene_set', 'gene_sets_per_gene'], Annotated[NumericHistogramSchema | CategoricalHistogramSchema, FieldInfo(annotation=NoneType, required=True, discriminator='type')]] | None = None)[source]

Bases: BaseModel

directory: str | None
filename: str | None
histograms: dict[Literal['genes_per_gene_set', 'gene_sets_per_gene'], HistogramConfig] | None
model_computed_fields: ClassVar[dict[str, ComputedFieldInfo]] = {}

A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

model_config: ClassVar[ConfigDict] = {}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

model_fields: ClassVar[dict[str, FieldInfo]] = {'directory': FieldInfo(annotation=Union[str, NoneType], required=False, default=None), 'filename': FieldInfo(annotation=Union[str, NoneType], required=False, default=None), 'histograms': FieldInfo(annotation=Union[dict[Literal['genes_per_gene_set', 'gene_sets_per_gene'], Annotated[Union[NumericHistogramSchema, CategoricalHistogramSchema], FieldInfo(annotation=NoneType, required=True, discriminator='type')]], NoneType], required=False, default=None), 'resource_format': FieldInfo(annotation=Union[str, NoneType], required=True, alias='format', alias_priority=2), 'resource_id': FieldInfo(annotation=str, required=True, alias='id', alias_priority=2), 'web_format_str': FieldInfo(annotation=Union[str, NoneType], required=False, default=None), 'web_label': FieldInfo(annotation=Union[str, NoneType], required=False, default=None)}

Metadata about the fields defined on the model, mapping of field names to [FieldInfo][pydantic.fields.FieldInfo].

This replaces Model.__fields__ from Pydantic V1.

resource_format: str | None
resource_id: str
web_format_str: str | None
web_label: str | None
class gain.gene_sets.gene_set.MetaSchema(*, description: str | None = None, labels: dict[str, Any] | None = None)[source]

Bases: BaseModel

description: str | None
labels: dict[str, Any] | None
model_computed_fields: ClassVar[dict[str, ComputedFieldInfo]] = {}

A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

model_config: ClassVar[ConfigDict] = {}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

model_fields: ClassVar[dict[str, FieldInfo]] = {'description': FieldInfo(annotation=Union[str, NoneType], required=False, default=None), 'labels': FieldInfo(annotation=Union[dict[str, Any], NoneType], required=False, default=None)}

Metadata about the fields defined on the model, mapping of field names to [FieldInfo][pydantic.fields.FieldInfo].

This replaces Model.__fields__ from Pydantic V1.

class gain.gene_sets.gene_set.NumericHistogramSchema(*, type: Literal['number'], plot_function: str | None = None, number_of_bins: int | None = None, view_range: ViewRangeSchema | None = None, x_log_scale: bool | None = None, y_log_scale: bool | None = None, x_min_log: float | None = None, value_order: list[str | int] | None = None, displayed_values_count: int | None = None)[source]

Bases: BaseModel

displayed_values_count: int | None
model_computed_fields: ClassVar[dict[str, ComputedFieldInfo]] = {}

A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

model_config: ClassVar[ConfigDict] = {}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

model_fields: ClassVar[dict[str, FieldInfo]] = {'displayed_values_count': FieldInfo(annotation=Union[int, NoneType], required=False, default=None), 'number_of_bins': FieldInfo(annotation=Union[int, NoneType], required=False, default=None), 'plot_function': FieldInfo(annotation=Union[str, NoneType], required=False, default=None), 'type': FieldInfo(annotation=Literal['number'], required=True), 'value_order': FieldInfo(annotation=Union[list[Union[str, int]], NoneType], required=False, default=None), 'view_range': FieldInfo(annotation=Union[ViewRangeSchema, NoneType], required=False, default=None), 'x_log_scale': FieldInfo(annotation=Union[bool, NoneType], required=False, default=None), 'x_min_log': FieldInfo(annotation=Union[float, NoneType], required=False, default=None), 'y_log_scale': FieldInfo(annotation=Union[bool, NoneType], required=False, default=None)}

Metadata about the fields defined on the model, mapping of field names to [FieldInfo][pydantic.fields.FieldInfo].

This replaces Model.__fields__ from Pydantic V1.

number_of_bins: int | None
plot_function: str | None
type: Literal['number']
value_order: list[str | int] | None
view_range: ViewRangeSchema | None
x_log_scale: bool | None
x_min_log: float | None
y_log_scale: bool | None
class gain.gene_sets.gene_set.ViewRangeSchema(*, min: float | None = None, max: float | None = None)[source]

Bases: BaseModel

max: float | None
min: float | None
model_computed_fields: ClassVar[dict[str, ComputedFieldInfo]] = {}

A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

model_config: ClassVar[ConfigDict] = {}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

model_fields: ClassVar[dict[str, FieldInfo]] = {'max': FieldInfo(annotation=Union[float, NoneType], required=False, default=None), 'min': FieldInfo(annotation=Union[float, NoneType], required=False, default=None)}

Metadata about the fields defined on the model, mapping of field names to [FieldInfo][pydantic.fields.FieldInfo].

This replaces Model.__fields__ from Pydantic V1.

gain.gene_sets.gene_set.build_gene_set_collection_from_file(filename: str, collection_id: str | None = None, collection_format: str | None = None, web_label: str | None = None, web_format_str: str | None = None) GeneSetCollection[source]

Return a Gene Set Collection by adapting a file to a local resource.

gain.gene_sets.gene_set.build_gene_set_collection_from_resource(resource: GenomicResource) GeneSetCollection[source]

Return a Gene Set Collection built from a resource.

gain.gene_sets.gene_set.build_gene_set_collection_from_resource_id(resource_id: str, grr: GenomicResourceRepo | None = None) GeneSetCollection[source]

gain.gene_sets.gene_term module

class gain.gene_sets.gene_term.GeneInfo(gene_id: str, gene_sym: str, synonyms: set[str], description: str)[source]

Bases: object

description: str
gene_id: str
gene_sym: str
synonyms: set[str]
class gain.gene_sets.gene_term.GeneTerms[source]

Bases: object

Class representing gene terms.

filter_genes(filter_fun: Callable[[list[str]], list[str]]) None[source]

Filter the genes.

rename_genes(gene_ns: str | None, rename_fn: Callable[[str], str | None]) None[source]

Rename genese.

save(fname: str) None[source]

Save to fname.

class gain.gene_sets.gene_term.NCBIGeneInfo(genes: dict[str, gain.gene_sets.gene_term.GeneInfo], ns_tokens: dict[str, dict[str, list[gain.gene_sets.gene_term.GeneInfo]]])[source]

Bases: object

genes: dict[str, GeneInfo]
ns_tokens: dict[str, dict[str, list[GeneInfo]]]
gain.gene_sets.gene_term.dd() dict[str, int][source]
gain.gene_sets.gene_term.get_clean_gene_id(ncbi_gene_info: NCBIGeneInfo, ns: str, term: str) str | None[source]

Gene gene ID from NCBI gene info data.

gain.gene_sets.gene_term.load_gene_terms(path: str) GeneTerms | None[source]

Load gene terms from a file.

gain.gene_sets.gene_term.load_ncbi_gene_info(gene_info_file: str) NCBIGeneInfo[source]
gain.gene_sets.gene_term.read_ewa_set_file(set_files: list[IO]) GeneTerms[source]

Read a set of ewa files.

gain.gene_sets.gene_term.read_gmt_file(input_file: IO) GeneTerms[source]

Read a gmt file.

gain.gene_sets.gene_term.read_mapping_file(input_file: IO, names_file: IO | None) GeneTerms[source]

Read a mapping file.

gain.gene_sets.gene_term.rename_gene_terms(gene_terms: GeneTerms, gene_ns: str, ncbi_gene_info: NCBIGeneInfo) GeneTerms[source]

Rename gene terms using NCBI gene info data.

Module contents