dae.genomic_resources.genomic_position_table package

Subpackages

Submodules

dae.genomic_resources.genomic_position_table.line module

class dae.genomic_resources.genomic_position_table.line.BigWigLine(raw_line: tuple)[source]

Bases: object

Represents a line read from a bigWig file.

get(key: str | int) str | int | float[source]
row() tuple[source]
class dae.genomic_resources.genomic_position_table.line.Line(raw_line: tuple, chrom_key: int = 0, pos_begin_key: int = 1, pos_end_key: int = 2, ref_key: int | None = None, alt_key: int | None = None)[source]

Bases: object

Represents a line read from a genomic position table.

Provides attribute access to a number of important columns - chromosome, start position, end position, reference allele and alternative allele.

alt: str | None
chrom: str
fchrom: str
get(key: str | int) str[source]
pos_begin: int
pos_end: int
ref: str | None
row() tuple[source]
class dae.genomic_resources.genomic_position_table.line.LineBase(*args, **kwargs)[source]

Bases: Protocol

Protocol for genomic position table lines.

alt: str | None
chrom: str
fchrom: str
get(key: str | int) Any[source]
pos_begin: int
pos_end: int
ref: str | None
row() tuple[source]
class dae.genomic_resources.genomic_position_table.line.LineBuffer[source]

Bases: object

Represent a line buffer for Tabix genome position table.

append(line: LineBase) None[source]
clear() None[source]
contains(chrom: str, pos: int) bool[source]
fetch(chrom: str, pos_begin: int, pos_end: int) Generator[LineBase, None, None][source]

Return a generator of rows matching the region.

find_index(chrom: str, pos: int) int[source]

Find index in line buffer that contains the passed position.

peek_first() LineBase[source]
peek_last() LineBase[source]
pop_first() LineBase[source]
prune(chrom: str, pos: int) None[source]

Prune the buffer if needed.

region() tuple[str | None, int | None, int | None][source]

Return region stored in the buffer.

class dae.genomic_resources.genomic_position_table.line.VCFLine(raw_line: VariantRecord, allele_index: int | None)[source]

Bases: object

Line adapter for lines derived from a VCF file.

Implements functionality for handling multi-allelic variants and INFO fields.

get(key: str | int) Any[source]

Get a value from the INFO field of the VCF line.

row() tuple[source]

dae.genomic_resources.genomic_position_table.table module

class dae.genomic_resources.genomic_position_table.table.GenomicPositionTable(genomic_resource: GenomicResource, table_definition: dict)[source]

Bases: ABC

Abstraction over genomic scores table.

ALT = 'alternative'
CHROM = 'chrom'
POS_BEGIN = 'pos_begin'
POS_END = 'pos_end'
REF = 'reference'
abstract close() None[source]

Close the resource.

abstract get_all_records() Generator[LineBase, None, None][source]

Return generator of all records in the table.

abstract get_chromosome_length(chrom: str, step: int = 100000000) int[source]

Return the length of a chromosome (or contig).

Returned value is guarnteed to be larget than the actual contig length.

get_chromosomes() list[str][source]

Return list of contigs in the genomic position table.

get_column_key(col: str) int | None[source]

Find the index of a column in the table.

abstract get_file_chromosomes() list[str][source]

Return chromosomes in a genomic table file.

This is to be overwritten by the subclass. It should return a list of the chromosomes in the file in the order determinted by the file.

abstract get_records_in_region(chrom: str | None = None, pos_begin: int | None = None, pos_end: int | None = None) Generator[LineBase, None, None][source]

Return an iterable over the records in the specified range.

The interval is closed on both sides and 1-based.

map_chromosome(chromosome: str) str | None[source]

Map a chromosome from reference genome to file chromosome.

abstract open() GenomicPositionTable[source]
unmap_chromosome(chromosome: str) str | None[source]

Map a chromosome file contigs to reference genome chromosome.

dae.genomic_resources.genomic_position_table.table.adjust_zero_based_line(line: Line) Line[source]

Adjust a zero-based line.

dae.genomic_resources.genomic_position_table.table.get_idx(key: str | int, header: tuple | None) int[source]
dae.genomic_resources.genomic_position_table.table.zero_based_adjust(raw: tuple, pos_begin_key: str | int, pos_end_key: str | int, header: tuple | None) tuple[source]

Adjust a zero-based record.

dae.genomic_resources.genomic_position_table.table_bigwig module

class dae.genomic_resources.genomic_position_table.table_bigwig.BigWigTable(genomic_resource: GenomicResource, table_definition: dict)[source]

Bases: GenomicPositionTable

bigWig format implementation of the genomic position table.

close() None[source]

Close the resource.

get_all_records() Generator[BigWigLine, None, None][source]

Return generator of all records in the table.

get_chromosome_length(chrom: str, step: int = 100000000) int[source]

Return the length of a chromosome (or contig).

Returned value is guarnteed to be larget than the actual contig length.

get_file_chromosomes() list[str][source]

Return chromosomes in a genomic table file.

This is to be overwritten by the subclass. It should return a list of the chromosomes in the file in the order determinted by the file.

get_records_in_region(chrom: str | None = None, pos_begin: int | None = None, pos_end: int | None = None) Generator[BigWigLine, None, None][source]

Return an iterable over the records in the specified range.

The interval is closed on both sides and 1-based.

open() BigWigTable[source]

dae.genomic_resources.genomic_position_table.table_inmemory module

class dae.genomic_resources.genomic_position_table.table_inmemory.InmemoryGenomicPositionTable(genomic_resource: GenomicResource, table_definition: dict, file_format: str)[source]

Bases: GenomicPositionTable

In-memory genomic position table.

FORMAT_DEF: ClassVar[dict] = {'csv': (',', '\n\r', False), 'mem': (None, ' \t\n\r', True), 'tsv': ('\t', '\n\r', False)}
close() None[source]

Close the resource.

get_all_records() Generator[LineBase, None, None][source]

Return generator of all records in the table.

get_chromosome_length(chrom: str, step: int = 0) int[source]

Return the length of a chromosome (or contig).

Returned value is guarnteed to be larget than the actual contig length.

get_file_chromosomes() list[str][source]

Return chromosomes in a genomic table file.

This is to be overwritten by the subclass. It should return a list of the chromosomes in the file in the order determinted by the file.

get_records_in_region(chrom: str | None = None, pos_begin: int | None = None, pos_end: int | None = None) Generator[LineBase, None, None][source]

Return an iterable over the records in the specified range.

The interval is closed on both sides and 1-based.

open() InmemoryGenomicPositionTable[source]

dae.genomic_resources.genomic_position_table.table_tabix module

class dae.genomic_resources.genomic_position_table.table_tabix.TabixGenomicPositionTable(genomic_resource: GenomicResource, table_definition: dict)[source]

Bases: GenomicPositionTable

Represents Tabix file genome position table.

BUFFER_MAXSIZE = 20000
close() None[source]

Close the resource.

get_all_records() Generator[LineBase, None, None][source]

Return generator of all records in the table.

get_chromosome_length(chrom: str, step: int = 100000000) int[source]

Return the length of a chromosome (or contig).

Returned value is guarnteed to be larget than the actual contig length.

get_chromosomes() list[str][source]

Return list of contigs in the genomic position table.

get_file_chromosomes() list[str][source]

Return chromosomes in a genomic table file.

This is to be overwritten by the subclass. It should return a list of the chromosomes in the file in the order determinted by the file.

get_line_iterator(chrom: str | None = None, pos_begin: int | None = None) Generator[LineBase | None, None, None][source]

Extract raw lines and wrap them in our Line adapter.

get_records_in_region(chrom: str | None = None, pos_begin: int | None = None, pos_end: int | None = None) Generator[LineBase, None, None][source]

Return an iterable over the records in the specified range.

The interval is closed on both sides and 1-based.

open() TabixGenomicPositionTable[source]

dae.genomic_resources.genomic_position_table.table_vcf module

class dae.genomic_resources.genomic_position_table.table_vcf.VCFGenomicPositionTable(genomic_resource: GenomicResource, table_definition: dict)[source]

Bases: TabixGenomicPositionTable

Represents a VCF file genome position table.

CHROM = 'CHROM'
POS_BEGIN = 'POS'
POS_END = 'POS'
get_file_chromosomes() list[str][source]

Return chromosomes in a genomic table file.

This is to be overwritten by the subclass. It should return a list of the chromosomes in the file in the order determinted by the file.

get_line_iterator(chrom: str | None = None, pos_begin: int | None = None) Generator[VCFLine | None, None, None][source]

Extract raw lines and wrap them in our Line adapter.

open() VCFGenomicPositionTable[source]

dae.genomic_resources.genomic_position_table.utils module

dae.genomic_resources.genomic_position_table.utils.build_genomic_position_table(resource: GenomicResource, table_definition: dict) GenomicPositionTable[source]

Instantiate a genome position table from a genomic resource.

dae.genomic_resources.genomic_position_table.utils.save_as_tabix_table(table: GenomicPositionTable, full_file_path: str) None[source]

Save a genome position table as Tabix table.

Module contents

class dae.genomic_resources.genomic_position_table.BigWigLine(raw_line: tuple)[source]

Bases: object

Represents a line read from a bigWig file.

get(key: str | int) str | int | float[source]
row() tuple[source]
class dae.genomic_resources.genomic_position_table.BigWigTable(genomic_resource: GenomicResource, table_definition: dict)[source]

Bases: GenomicPositionTable

bigWig format implementation of the genomic position table.

close() None[source]

Close the resource.

get_all_records() Generator[BigWigLine, None, None][source]

Return generator of all records in the table.

get_chromosome_length(chrom: str, step: int = 100000000) int[source]

Return the length of a chromosome (or contig).

Returned value is guarnteed to be larget than the actual contig length.

get_file_chromosomes() list[str][source]

Return chromosomes in a genomic table file.

This is to be overwritten by the subclass. It should return a list of the chromosomes in the file in the order determinted by the file.

get_records_in_region(chrom: str | None = None, pos_begin: int | None = None, pos_end: int | None = None) Generator[BigWigLine, None, None][source]

Return an iterable over the records in the specified range.

The interval is closed on both sides and 1-based.

open() BigWigTable[source]
class dae.genomic_resources.genomic_position_table.Line(raw_line: tuple, chrom_key: int = 0, pos_begin_key: int = 1, pos_end_key: int = 2, ref_key: int | None = None, alt_key: int | None = None)[source]

Bases: object

Represents a line read from a genomic position table.

Provides attribute access to a number of important columns - chromosome, start position, end position, reference allele and alternative allele.

alt: str | None
chrom: str
fchrom: str
get(key: str | int) str[source]
pos_begin: int
pos_end: int
ref: str | None
row() tuple[source]
class dae.genomic_resources.genomic_position_table.LineBuffer[source]

Bases: object

Represent a line buffer for Tabix genome position table.

append(line: LineBase) None[source]
clear() None[source]
contains(chrom: str, pos: int) bool[source]
fetch(chrom: str, pos_begin: int, pos_end: int) Generator[LineBase, None, None][source]

Return a generator of rows matching the region.

find_index(chrom: str, pos: int) int[source]

Find index in line buffer that contains the passed position.

peek_first() LineBase[source]
peek_last() LineBase[source]
pop_first() LineBase[source]
prune(chrom: str, pos: int) None[source]

Prune the buffer if needed.

region() tuple[str | None, int | None, int | None][source]

Return region stored in the buffer.

class dae.genomic_resources.genomic_position_table.TabixGenomicPositionTable(genomic_resource: GenomicResource, table_definition: dict)[source]

Bases: GenomicPositionTable

Represents Tabix file genome position table.

BUFFER_MAXSIZE = 20000
alt_key: int | None
chrom_key: int
chrom_map: dict[str, str] | None
chrom_order: list[str] | None
close() None[source]

Close the resource.

get_all_records() Generator[LineBase, None, None][source]

Return generator of all records in the table.

get_chromosome_length(chrom: str, step: int = 100000000) int[source]

Return the length of a chromosome (or contig).

Returned value is guarnteed to be larget than the actual contig length.

get_chromosomes() list[str][source]

Return list of contigs in the genomic position table.

get_file_chromosomes() list[str][source]

Return chromosomes in a genomic table file.

This is to be overwritten by the subclass. It should return a list of the chromosomes in the file in the order determinted by the file.

get_line_iterator(chrom: str | None = None, pos_begin: int | None = None) Generator[LineBase | None, None, None][source]

Extract raw lines and wrap them in our Line adapter.

get_records_in_region(chrom: str | None = None, pos_begin: int | None = None, pos_end: int | None = None) Generator[LineBase, None, None][source]

Return an iterable over the records in the specified range.

The interval is closed on both sides and 1-based.

header: Any
jump_threshold: int
line_iterator: Generator[LineBase | None, None, None] | None
open() TabixGenomicPositionTable[source]
pos_begin_key: int
pos_end_key: int
pysam_file: TabixFile | VariantFile | None
ref_key: int | None
rev_chrom_map: dict[str, str] | None
stats: Counter
class dae.genomic_resources.genomic_position_table.VCFGenomicPositionTable(genomic_resource: GenomicResource, table_definition: dict)[source]

Bases: TabixGenomicPositionTable

Represents a VCF file genome position table.

CHROM = 'CHROM'
POS_BEGIN = 'POS'
POS_END = 'POS'
alt_key: int | None
chrom_key: int
chrom_map: dict[str, str] | None
chrom_order: list[str] | None
get_file_chromosomes() list[str][source]

Return chromosomes in a genomic table file.

This is to be overwritten by the subclass. It should return a list of the chromosomes in the file in the order determinted by the file.

get_line_iterator(chrom: str | None = None, pos_begin: int | None = None) Generator[VCFLine | None, None, None][source]

Extract raw lines and wrap them in our Line adapter.

header: Any
jump_threshold: int
line_iterator: Generator[LineBase | None, None, None] | None
open() VCFGenomicPositionTable[source]
pos_begin_key: int
pos_end_key: int
pysam_file: PysamFile | None
ref_key: int | None
rev_chrom_map: dict[str, str] | None
stats: Counter
class dae.genomic_resources.genomic_position_table.VCFLine(raw_line: VariantRecord, allele_index: int | None)[source]

Bases: object

Line adapter for lines derived from a VCF file.

Implements functionality for handling multi-allelic variants and INFO fields.

get(key: str | int) Any[source]

Get a value from the INFO field of the VCF line.

row() tuple[source]
dae.genomic_resources.genomic_position_table.build_genomic_position_table(resource: GenomicResource, table_definition: dict) GenomicPositionTable[source]

Instantiate a genome position table from a genomic resource.