Skip to content
Snippets Groups Projects
Commit e1993339 authored by Kathleen Moriarty's avatar Kathleen Moriarty Committed by Kathleen Moriarty
Browse files

remove: num_seq_cyc parameter

parent 685f8b62
Branches
No related tags found
1 merge request!17Issue 7
......@@ -10,7 +10,6 @@ def read_sequencing(
output_file_name,
num_reads,
read_len,
num_seq_cyc,
) -> None:
"""Reads a fasta-formatted file of terminal fragments and simulates reads.
......@@ -29,7 +28,6 @@ def read_sequencing(
output_file_name (string): file name where to store the output
num_reads: number of total reads to simulate
read_len: integer of identical read length
num_seq_cyc: integer of number of cycles
"""
# Import classes
from random import choices, randrange
......@@ -38,7 +36,6 @@ def read_sequencing(
# Read data from terminal fragment file
# Store fragments in a list
f = open(frag_file_name, "r")
frag_line = f.readline()
frag_list = [] # type: List[str]
......@@ -68,42 +65,39 @@ def read_sequencing(
# Calculate sum of all lengths to determine the relative abundance for that fragment
sum_frags = sum(map(len, frag_list))
# Repeat the read process for given number of cycles
for j in range(0, num_seq_cyc):
# Loop through fasta fragments that start with 5'
for frag in frag_list:
# Loop through fasta fragments that start with 5'
for frag in frag_list:
# Determine number of reads to create from this fragment
# This might not always provide an exact number of reads that were asked
# TODO resolve this issue
num_frag_reads = round((len(frag)/sum_frags) * num_reads)
# Determine number of reads to create from this fragment
# This might not always provide an exact number of reads that were asked
# TODO resolve this issue
num_frag_reads = round((len(frag)/sum_frags) * num_reads)
for i in range(0, num_frag_reads):
for i in range(0, num_frag_reads):
# Obtain random first position for the read on the fragment
rand_start = randrange(0, len(frag))
# Obtain random first position for the read on the fragment
rand_start = randrange(0, len(frag))
# Calculate the difference of start position and length of read
diff_start_end = len(frag)-rand_start
# Calculate the difference of start position and length of read
diff_start_end = len(frag)-rand_start
# If length of read is greater than difference of start to end, then add random nucleotides
if diff_start_end < read_len:
# If length of read is greater than difference of start to end, then add random nucleotides
if diff_start_end < read_len:
# Calculate number of random nucleotides to add to the end of the read
diff = read_len - diff_start_end
# Calculate number of random nucleotides to add to the end of the read
diff = read_len - diff_start_end
# Select random nucleotides from list of possible
rand_samp = choices(nucleotides, k=diff)
# Select random nucleotides from list of possible
rand_samp = choices(nucleotides, k=diff)
# Add the random list to the read and save
tmp_read = frag[rand_start:len(frag)] + ''.join(rand_samp)
else:
# Save subset of fragment as read
tmp_read = frag[rand_start:(rand_start + read_len)]
# Add the random list to the read and save
tmp_read = frag[rand_start:len(frag)] + ''.join(rand_samp)
else:
# Save subset of fragment as read
tmp_read = frag[rand_start:(rand_start + read_len)]
# append read to list
fasta_list.append(tmp_read)
# append read to list
fasta_list.append(tmp_read)
# Save list to file
np.savetxt(output_file_name,
......
......@@ -5,13 +5,12 @@ from src.read_sequencing import read_sequencing
def test_read_sequencing(tmpdir):
"""Tests the output, input file name and separator."""
"""Tests the correct number of reads were generated."""
read_sequencing(
frag_file_name='./tests/resources/test_terminal_fragments.txt',
num_reads=80,
read_len=10,
num_seq_cyc=5,
output_file_name=tmpdir / 'reads.txt'
)
df_out = pd.read_table(tmpdir / 'reads.txt', header=None)
assert df_out.shape[0] == 80 * 5
assert df_out.shape[0] == 80
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment