Skip to content
Snippets Groups Projects

feat: function to generate poly(A) tail sequence

Merged MihaelaZavolan requested to merge polyAtail into main
Files
5
src/poly_a.py 0 → 100644
+ 67
0
 
"""Generate a poly(A) tail."""
 
 
from random import choices
 
from typing import (List, Tuple)
 
 
 
def generate_poly_a(
 
length: int = 100,
 
weights: Tuple[float, float, float, float] = (
 
0.914, 0.028, 0.025, 0.033
 
)
 
) -> str:
 
"""Generate a poly(A) tail of specified length and composition.
 
 
This function generates a nucleotide sequence that has compositional
 
statistics resembling those of poly(A) tails.
 
 
Args:
 
length: Length of the desired tail.
 
weights: Tuple of relative `A`, `C`, `G` and `U` frequencies in
 
the tail.
 
 
Returns:
 
The generated poly(A) tail.
 
 
Raises:
 
ValueError: The provided length is not a positive `int` or is
 
too large (maximum length = 200).
 
ValueError: One or more of the provided `weights` are not
 
positive or all weights are zero.
 
"""
 
max_len: int = 200
 
bases: Tuple[str, str, str, str] = ('A', 'C', 'G', 'U')
 
 
# check parameters
 
if not isinstance(length, int):
 
raise ValueError(
 
f"The provided length is not an integer: {length}"
 
)
 
if not 1 <= int(length) <= max_len:
 
raise ValueError(
 
"The provided length is outside of the accepted range "
 
f"(1-{max_len}): {length}"
 
)
 
if len(weights) != len(bases):
 
raise ValueError(
 
"There is not a weight provided for each of the bases '{bases}': "
 
"{weights}"
 
)
 
try:
 
sum(weights)
 
except TypeError:
 
raise ValueError(
 
"At least one of the provided weights is not a number: {weights}"
 
)
 
if any(w < 0 for w in weights):
 
raise ValueError(
 
"At least one of the provided weights is negative: {weights}"
 
)
 
if all(w == 0 for w in weights):
 
raise ValueError(f"All weights are zero: {weights}")
 
 
# ensure that the values are normalized
 
s: float = float(sum(weights))
 
norm_weights: List[float] = [freq/s for freq in weights]
 
tail_bases: List[str] = choices(bases, weights=norm_weights, k=length)
 
return "".join(tail_bases)
Loading