Skip to content
Snippets Groups Projects

feat: function to generate poly(A) tail sequence

Merged MihaelaZavolan requested to merge polyAtail into main
Files
5
src/poly_a.py 0 → 100644
+ 67
0
"""Generate a poly(A) tail."""
from random import choices
from typing import (List, Tuple)
def generate_poly_a(
length: int = 100,
weights: Tuple[float, float, float, float] = (
0.914, 0.028, 0.025, 0.033
)
) -> str:
"""Generate a poly(A) tail of specified length and composition.
This function generates a nucleotide sequence that has compositional
statistics resembling those of poly(A) tails.
Args:
length: Length of the desired tail.
weights: Tuple of relative `A`, `C`, `G` and `U` frequencies in
the tail.
Returns:
The generated poly(A) tail.
Raises:
ValueError: The provided length is not a positive `int` or is
too large (maximum length = 200).
ValueError: One or more of the provided `weights` are not
positive or all weights are zero.
"""
max_len: int = 200
bases: Tuple[str, str, str, str] = ('A', 'C', 'G', 'U')
# check parameters
if not isinstance(length, int):
raise ValueError(
f"The provided length is not an integer: {length}"
)
if not 1 <= int(length) <= max_len:
raise ValueError(
"The provided length is outside of the accepted range "
f"(1-{max_len}): {length}"
)
if len(weights) != len(bases):
raise ValueError(
"There is not a weight provided for each of the bases '{bases}': "
"{weights}"
)
try:
sum(weights)
except TypeError:
raise ValueError(
"At least one of the provided weights is not a number: {weights}"
)
if any(w < 0 for w in weights):
raise ValueError(
"At least one of the provided weights is negative: {weights}"
)
if all(w == 0 for w in weights):
raise ValueError(f"All weights are zero: {weights}")
# ensure that the values are normalized
s: float = float(sum(weights))
norm_weights: List[float] = [freq/s for freq in weights]
tail_bases: List[str] = choices(bases, weights=norm_weights, k=length)
return "".join(tail_bases)
Loading