Skip to content
Snippets Groups Projects
Commit e80482a7 authored by Kathleen Moriarty's avatar Kathleen Moriarty
Browse files

remove: num_seq_cyc parameter

parent 8ce34674
No related branches found
No related tags found
1 merge request!17Issue 7
...@@ -10,7 +10,6 @@ def read_sequencing( ...@@ -10,7 +10,6 @@ def read_sequencing(
output_file_name, output_file_name,
num_reads, num_reads,
read_len, read_len,
num_seq_cyc,
) -> None: ) -> None:
"""Reads a fasta-formatted file of terminal fragments and simulates reads. """Reads a fasta-formatted file of terminal fragments and simulates reads.
...@@ -29,7 +28,6 @@ def read_sequencing( ...@@ -29,7 +28,6 @@ def read_sequencing(
output_file_name (string): file name where to store the output output_file_name (string): file name where to store the output
num_reads: number of total reads to simulate num_reads: number of total reads to simulate
read_len: integer of identical read length read_len: integer of identical read length
num_seq_cyc: integer of number of cycles
""" """
# Import classes # Import classes
from random import choices, randrange from random import choices, randrange
...@@ -38,7 +36,6 @@ def read_sequencing( ...@@ -38,7 +36,6 @@ def read_sequencing(
# Read data from terminal fragment file # Read data from terminal fragment file
# Store fragments in a list # Store fragments in a list
f = open(frag_file_name, "r") f = open(frag_file_name, "r")
frag_line = f.readline() frag_line = f.readline()
frag_list = [] # type: List[str] frag_list = [] # type: List[str]
...@@ -68,42 +65,39 @@ def read_sequencing( ...@@ -68,42 +65,39 @@ def read_sequencing(
# Calculate sum of all lengths to determine the relative abundance for that fragment # Calculate sum of all lengths to determine the relative abundance for that fragment
sum_frags = sum(map(len, frag_list)) sum_frags = sum(map(len, frag_list))
# Repeat the read process for given number of cycles # Loop through fasta fragments that start with 5'
for j in range(0, num_seq_cyc): for frag in frag_list:
# Loop through fasta fragments that start with 5'
for frag in frag_list:
# Determine number of reads to create from this fragment # Determine number of reads to create from this fragment
# This might not always provide an exact number of reads that were asked # This might not always provide an exact number of reads that were asked
# TODO resolve this issue # TODO resolve this issue
num_frag_reads = round((len(frag)/sum_frags) * num_reads) num_frag_reads = round((len(frag)/sum_frags) * num_reads)
for i in range(0, num_frag_reads): for i in range(0, num_frag_reads):
# Obtain random first position for the read on the fragment # Obtain random first position for the read on the fragment
rand_start = randrange(0, len(frag)) rand_start = randrange(0, len(frag))
# Calculate the difference of start position and length of read # Calculate the difference of start position and length of read
diff_start_end = len(frag)-rand_start diff_start_end = len(frag)-rand_start
# If length of read is greater than difference of start to end, then add random nucleotides # If length of read is greater than difference of start to end, then add random nucleotides
if diff_start_end < read_len: if diff_start_end < read_len:
# Calculate number of random nucleotides to add to the end of the read # Calculate number of random nucleotides to add to the end of the read
diff = read_len - diff_start_end diff = read_len - diff_start_end
# Select random nucleotides from list of possible # Select random nucleotides from list of possible
rand_samp = choices(nucleotides, k=diff) rand_samp = choices(nucleotides, k=diff)
# Add the random list to the read and save # Add the random list to the read and save
tmp_read = frag[rand_start:len(frag)] + ''.join(rand_samp) tmp_read = frag[rand_start:len(frag)] + ''.join(rand_samp)
else: else:
# Save subset of fragment as read # Save subset of fragment as read
tmp_read = frag[rand_start:(rand_start + read_len)] tmp_read = frag[rand_start:(rand_start + read_len)]
# append read to list # append read to list
fasta_list.append(tmp_read) fasta_list.append(tmp_read)
# Save list to file # Save list to file
np.savetxt(output_file_name, np.savetxt(output_file_name,
......
...@@ -5,13 +5,12 @@ from src.read_sequencing import read_sequencing ...@@ -5,13 +5,12 @@ from src.read_sequencing import read_sequencing
def test_read_sequencing(tmpdir): def test_read_sequencing(tmpdir):
"""Tests the output, input file name and separator.""" """Tests the correct number of reads were generated."""
read_sequencing( read_sequencing(
frag_file_name='./tests/resources/test_terminal_fragments.txt', frag_file_name='./tests/resources/test_terminal_fragments.txt',
num_reads=80, num_reads=80,
read_len=10, read_len=10,
num_seq_cyc=5,
output_file_name=tmpdir / 'reads.txt' output_file_name=tmpdir / 'reads.txt'
) )
df_out = pd.read_table(tmpdir / 'reads.txt', header=None) df_out = pd.read_table(tmpdir / 'reads.txt', header=None)
assert df_out.shape[0] == 80 * 5 assert df_out.shape[0] == 80
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment