Source code for GenomeUtils.genome.transcript

#!/usr/bin/env python
"""
Filename: GenomeUtils/genome/transcript.py
Author: Arash Ayat
Copyright: 2025, Alexander Schliep
Version: 0.1.1
Description: This file defines the Transcript class, representing a biological transcript.
License: LGPL-3.0-or-later
"""

from __future__ import annotations

from typing import List, Optional, Tuple, TYPE_CHECKING, Union

from Bio.Seq import Seq

from .genome_element import GenomeElement
from .locus import Locus


if TYPE_CHECKING:
    from .exon import Exon
    from .gene import Gene
    from .genome import Genome

[docs] class Transcript(GenomeElement): """Represents a transcript.""" def __init__(self, id: str, chr: str, start: int, end: int, strand: str, sequence: Seq, gene: "Gene" = None, genome: "Genome" = None, **kwargs): """ Initializes a Transcript object. Args: id: The ID of the transcript. chr: The chromosome identifier (e.g., 'chr1', '1', 'X'). start: The genomic start position of the transcript in chromosome. end: The genomic end position of the transcript in chromosome. strand: The strand in which the transcript is oriented. sequence: The sequence of the transcript. gene: The `Gene` object that the transcript is associated with. Optional, defaults to None. genome: The `Genome` object in which the transcript is located. Optional, defaults to None. kwargs: Additional keyword arguments. """ self._sequence = sequence locus = Locus(chr, start, end, strand) super().__init__(id, locus, gene, genome, **kwargs) @property def sequence(self) -> Seq: return self._sequence @property def exons(self) -> List["Exon"]: return self._children
[docs] def add_exon(self, exon: "Exon"): """Add an `Exon` to the transcript in a sorted manner.""" # Only add if not already present if exon in self._children: return pos = 0 # For '+' strand, sort ascending by start coordinate. # For '-' strand, sort descending by start coordinate (transcriptional order). if self.strand == "+": while pos < len(self._children) and self._children[pos].start < exon.start: pos += 1 else: # self.strand == "-" while pos < len(self._children) and self._children[pos].start > exon.start: pos += 1 self._children.insert(pos, exon) self._genome.is_indexed = False
[docs] def get_gene(self) -> "Gene": """Returns the `Gene` object that this transcript is associated with.""" return self.parent
[docs] def exon_intervals(self) -> List[Tuple[int, int]]: """Get the exon intervals for this transcript.""" return [(exon.start, exon.end) for exon in self.exons]
[docs] def transcript_to_genomic_pos(self, start: int, end: Optional[int] = None) -> Union[Locus, List[Locus], None]: """ Converts a 0-based, half-open transcript coordinate (or range) to a 1-based, inclusive genomic coordinate (or list of Locus objects). Args: start: The 0-based start position on the transcript. end: The optional 0-based end position on the transcript. If None, a single point is converted. If provided, the range is [start, end). Returns: - A Locus object for a single point or for a range within a single exon. - A list of Locus objects if the range spans multiple exons. - None if a single point maps to no location; an empty list for a range. """ is_single_point = end is None if is_single_point: end = start + 1 if not (0 <= start < end <= len(self)): if is_single_point: raise ValueError(f"Transcript position {start} is out of bounds.") else: raise ValueError(f"Transcript positions [{start}, {end}) are out of bounds.") genomic_loci = [] transcript_pos = 0 exons_in_order = self.exons for exon in exons_in_order: exon_len = len(exon) overlap_start = max(start, transcript_pos) overlap_end = min(end, transcript_pos + exon_len) if overlap_start < overlap_end: start_in_exon = overlap_start - transcript_pos end_in_exon = overlap_end - transcript_pos if self.strand == '+': genomic_start = exon.start + start_in_exon genomic_end = exon.start + end_in_exon - 1 else: # self.strand == "-" genomic_end = exon.end - start_in_exon genomic_start = exon.end - (end_in_exon - 1) genomic_loci.append(Locus(self.chr, genomic_start, genomic_end, self.strand)) transcript_pos += exon_len if transcript_pos >= end: break if len(genomic_loci) == 1: return genomic_loci[0] return genomic_loci