Skip to content
Snippets Groups Projects
Commit bdd0ee5d authored by Hugo Madge Leon's avatar Hugo Madge Leon
Browse files

fixes to imports

parent 551c2056
No related branches found
No related tags found
1 merge request!46Merged Tanya's files + final lint check
......@@ -60,4 +60,5 @@ __pycache__/
*_cache
*egg-info/
.coverage
build/
\ No newline at end of file
build/
*play.py
\ No newline at end of file
......@@ -26,13 +26,10 @@ def main(args: argparse.Namespace):
fasta, seq_counts = file_validation(args.fasta, args.counts, args.sep)
logger.info("Fragmentation of %s...", args.fasta)
fasta_parse = {}
for record in fasta:
fasta_parse[record.id] = record.seq
splits = np.arange(0, len(list(fasta_parse))+args.size, args.size)
splits = np.arange(0, len(list(fasta))+args.size, args.size)
for i, split in enumerate(splits):
fasta_dict = fasta_parse[split:splits[i+1]]
fasta_dict = fasta[split:splits[i+1]]
term_frags = fragmentation(fasta_dict, seq_counts,
args.mean, args.std,
args.a_prob, args.t_prob,
......@@ -55,14 +52,17 @@ def file_validation(fasta_file: str,
sep (str): Separator for counts file.
Returns:
tuple: fasta and sequence counts variables
tuple: fasta dict and sequence counts pd.DataFrame
"""
with open(fasta_file, "r", encoding="utf-8") as handle:
fasta = SeqIO.parse(handle, "fasta")
if not any(fasta):
fasta_sequences = SeqIO.parse(open(fasta_file, "r", encoding="utf-8"),'fasta')
if not any(fasta_sequences):
raise ValueError("Input FASTA file is either empty or \
incorrect file type.")
fasta_dict = {}
for record in fasta_sequences:
fasta_dict[record.id] = str(record.seq).upper()
count_path = Path(counts_file)
if not count_path.is_file():
logger.exception("Input counts file does not exist or isn't a file.")
......@@ -72,7 +72,7 @@ def file_validation(fasta_file: str,
else:
seq_counts = pd.read_table(counts_file, names=["seqID", "count"])
return fasta, seq_counts
return fasta_dict, seq_counts
def parse_arguments() -> argparse.Namespace:
......
"""Test cli.py functions."""
import pytest
import pandas as pd
# from Bio import SeqIO
from term_frag_sel.cli import file_validation # type: ignore
FASTA_FILE = "tests/test_files/test.fasta"
CSV_FILE = "tests/test_files/test.csv"
def test_file():
"""Test check_positive function."""
"""Test file_validation function."""
assert isinstance(file_validation(FASTA_FILE, CSV_FILE, ","), tuple[dict, pd.DataFrame])
with pytest.raises(FileNotFoundError):
file_validation("", "", ",")
,seqID,count
0,1,120
1,2,60
2,3,76
3,4,141
4,5,107
5,6,59
6,7,121
7,8,72
8,9,114
9,10,126
10,11,77
11,12,130
12,13,146
13,14,69
14,15,62
15,16,93
16,17,103
17,18,150
18,19,140
19,20,56
20,21,103
21,22,115
22,23,113
23,24,53
24,25,95
25,26,112
26,27,102
27,28,53
28,29,139
29,30,143
30,31,150
31,32,126
32,33,50
33,34,94
34,35,81
35,36,116
36,37,51
37,38,110
38,39,98
39,40,60
40,41,57
41,42,73
42,43,143
43,44,116
44,45,98
45,46,139
46,47,89
47,48,63
48,49,68
49,50,84
"""Test utils.py functions."""
import random
import pandas as pd
import pytest
from Bio import SeqIO
from term_frag_sel.fragmentation import fragmentation, get_cut_number # type: ignore
with open("tests/test_files/test.fasta", "r", encoding="utf-8") as handle:
fasta = SeqIO.parse(handle, "fasta")
FASTA_FILE = "tests/test_files/test.fasta"
CSV_FILE = "tests/test_files/test.csv"
seq_counts = {}
for entry in fasta:
name, sequence = entry.id, str(entry.seq)
seq_counts[name] = random.randint(50,150)
fasta_sequences = SeqIO.parse(open(FASTA_FILE, "r", encoding="utf-8"),'fasta')
fasta_dict = {}
for record in fasta_sequences:
fasta_dict[record.id] = str(record.seq).upper()
seq_counts = pd.read_csv(CSV_FILE, names=["seqID", "count"])
MU = 100
STD = 10
......@@ -35,23 +37,23 @@ def test_get_cut_number():
def test_fragmentation():
"""Test fragmentation function."""
assert isinstance(fragmentation(fasta, seq_counts), list)
assert isinstance(fragmentation(fasta_dict, seq_counts), list)
with pytest.raises(ValueError):
fragmentation(fasta, seq_counts, mean_length = 'a', std = 10,
fragmentation(fasta_dict, seq_counts, mean_length = 'a', std = 10,
a_prob = 0.22, t_prob = 0.25,g_prob = 0.25, c_prob = 0.28)
with pytest.raises(ValueError):
fragmentation(fasta, seq_counts, mean_length = 100, std = 'a',
fragmentation(fasta_dict, seq_counts, mean_length = 100, std = 'a',
a_prob = 0.22, t_prob = 0.25,g_prob = 0.25, c_prob = 0.28)
with pytest.raises(ValueError):
fragmentation(fasta, seq_counts, mean_length = 100, std = 10,
a_prob = 'a', t_prob = 0.25,g_prob = 0.25, c_prob = 0.28)
fragmentation(fasta_dict, seq_counts, mean_length = 100, std = 10,
a_prob = 'a', t_prob = 0.25, g_prob = 0.25, c_prob = 0.28)
with pytest.raises(ValueError):
fragmentation(fasta, seq_counts, mean_length = 100, std = 10,
a_prob = 0.22, t_prob = 'a',g_prob = 0.25, c_prob = 0.28)
fragmentation(fasta_dict, seq_counts, mean_length = 100, std = 10,
a_prob = 0.22, t_prob = 'a', g_prob = 0.25, c_prob = 0.28)
with pytest.raises(ValueError):
fragmentation(fasta, seq_counts, mean_length = 100, std = 10,
a_prob = 0.22, t_prob = 0.25,g_prob = 'a', c_prob = 0.28)
fragmentation(fasta_dict, seq_counts, mean_length = 100, std = 10,
a_prob = 0.22, t_prob = 0.25, g_prob = 'a', c_prob = 0.28)
with pytest.raises(ValueError):
fragmentation(fasta, seq_counts, mean_length = 100, std = 10,
a_prob = 0.22, t_prob = 0.25,g_prob = 0.25, c_prob = 'a')
fragmentation(fasta_dict, seq_counts, mean_length = 100, std = 10,
a_prob = 0.22, t_prob = 0.25, g_prob = 0.25, c_prob = 'a')
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment