Skip to content
Snippets Groups Projects
Commit 444cc961 authored by MihaelaZavolan's avatar MihaelaZavolan Committed by Alex Kanitz
Browse files

feat: function to generate poly(A) tail sequence

parent e8f8acc9
No related branches found
No related tags found
1 merge request!8feat: function to generate poly(A) tail sequence
Pipeline #13587 passed
coverage
flake8
flake8-docstrings
pytest
mypy
coverage
pandas
\ No newline at end of file
pandas
pytest
"""Generate a poly(A) tail."""
from random import choices
from typing import (List, Tuple)
def generate_poly_a(
length: int = 100,
weights: Tuple[float, float, float, float] = (
0.914, 0.028, 0.025, 0.033
)
) -> str:
"""Generate a poly(A) tail of specified length and composition.
This function generates a nucleotide sequence that has compositional
statistics resembling those of poly(A) tails.
Args:
length: Length of the desired tail.
weights: Tuple of relative `A`, `C`, `G` and `U` frequencies in
the tail.
Returns:
The generated poly(A) tail.
Raises:
ValueError: The provided length is not a positive `int` or is
too large (maximum length = 200).
ValueError: One or more of the provided `weights` are not
positive or all weights are zero.
"""
max_len: int = 200
bases: Tuple[str, str, str, str] = ('A', 'C', 'G', 'U')
# check parameters
if not isinstance(length, int):
raise ValueError(
f"The provided length is not an integer: {length}"
)
if not 1 <= int(length) <= max_len:
raise ValueError(
"The provided length is outside of the accepted range "
f"(1-{max_len}): {length}"
)
if len(weights) != len(bases):
raise ValueError(
"There is not a weight provided for each of the bases '{bases}': "
"{weights}"
)
try:
sum(weights)
except TypeError:
raise ValueError(
"At least one of the provided weights is not a number: {weights}"
)
if any(w < 0 for w in weights):
raise ValueError(
"At least one of the provided weights is negative: {weights}"
)
if all(w == 0 for w in weights):
raise ValueError(f"All weights are zero: {weights}")
# ensure that the values are normalized
s: float = float(sum(weights))
norm_weights: List[float] = [freq/s for freq in weights]
tail_bases: List[str] = choices(bases, weights=norm_weights, k=length)
return "".join(tail_bases)
"""Tests for poly_a module."""
import pytest
from src.poly_a import generate_poly_a
class TestGeneratePolyA():
"""Tests for poly(A) tail generation."""
def test_passes_default_args(self):
res = generate_poly_a()
assert isinstance(res, str)
assert len(res) == 100
def test_passes_set_all_args(self):
res = generate_poly_a(
length=10,
weights=(1,0,0,0),
)
assert isinstance(res, str)
assert len(res) == 10
assert res == len(res) * 'A'
@pytest.mark.parametrize(
"length, expected",
[
('a', ValueError),
(-1, ValueError),
(0, ValueError),
(250, ValueError),
]
)
def test_wrong_length(self, expected, length):
with pytest.raises(expected):
generate_poly_a(length=length)
@pytest.mark.parametrize(
"weights, expected",
[
((0,0,1), ValueError),
(('a', 0,0,1), ValueError),
((0,0,0,-1), ValueError),
((0,0,0,0), ValueError),
]
)
def test_wrong_weights(self, expected, weights):
with pytest.raises(expected):
generate_poly_a(weights=weights)
"""Tests for module in root package."""
from re import match
from src import __version__
def test_version():
"""Assert that version matches semantic versioning format."""
assert match(r'\d\.\d\.\d', __version__)
"""Placeholder test for pipeline."""
import pytest
import src
import re
from src import sampleinput as si
import pandas as pd
def test_version():
"""Assert that version matches semantic versioning format."""
from src.sampleinput import sample_from_input
assert re.match(r'\d\.\d\.\d', src.__version__)
def test_sampleinput(tmpdir):
"""Tests the output, input file name and separator."""
si.sample_from_input(
sample_from_input(
input_file='./tests/resources/Transcript2.tsv',
output_file=tmpdir / 'test1.csv',
sep='\t',
......@@ -23,6 +17,6 @@ def test_sampleinput(tmpdir):
t1=pd.read_table(tmpdir / 'test1.csv', header=None, sep=',')
assert t1[1].sum()==142958
with pytest.raises(IndexError):
si.sample_from_input(input_file='./tests/resources/Transcript2.tsv')
sample_from_input(input_file='./tests/resources/Transcript2.tsv')
with pytest.raises(IOError):
si.sample_from_input(input_file='file_not_existing.txt')
\ No newline at end of file
sample_from_input(input_file='file_not_existing.txt')
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment