Skip to content
Snippets Groups Projects

feat: function to generate poly(A) tail sequence

Merged MihaelaZavolan requested to merge polyAtail into main
Files
3
+ 62
0
"""Generate a poly(A) tail."""
import random as rn
def mk_polyA(length=100, a=0.914, c=0.028, g=0.025, u=0.033):
"""Generate a poly(A) tail of specified length and composition.
This function generates a nucleotide sequence
that has compositional statistics resembling
those of poly(A) tails.
Parameters
----------
length : int
Length of the desired tail
a : float
Frequency of 'A' nucleotides in the tail
c : float
Frequency of 'C' nucleotides in the tail
g : float
Frequency of 'G' nucleotides in the tail
u : float
Frequency of 'U' nucleotides in the tail
Returns
-------
string
The generated polyA tail
Raises
-------
ValueError
Provided length is not a positive int
ValueError
Provided length is too large (max allowed = 200)
ValueError
Provided proportions are not positive numbers
"""
max_len = 200
# check parameters
length = int(length)
a = float(a)
c = float(c)
g = float(g)
u = float(u)
if length < 0:
raise ValueError(f'{length} is not a positive integer')
if length > 200:
raise ValueError(f'{length} is larger than the maximum {max_len}')
if a < 0 or c < 0 or g < 0 or u < 0:
raise ValueError(f'One of the base frequencies is negative {a}, {c}, {g}, {u}')
bases = ['A', 'C', 'G', 'U']
weights = [a, c, g, u]
# ensure that the values are normalized
s = sum(weights)
weights = [w/s for w in weights]
tail_bases = rn.choices(bases, weights=weights, k=length)
return "".join(tail_bases)
Loading