remove: num_seq_cyc parameter

e80482a7 · Kathleen Moriarty · 8ce34674 · e80482a7 · e80482a7
Commit e80482a7 authored 3 years ago by Kathleen Moriarty
--- a/src/read_sequencing.py
+++ b/src/read_sequencing.py
@@ -10,7 +10,6 @@ def read_sequencing(
        output_file_name,
        num_reads,
        read_len,
-        num_seq_cyc,
 ) -> None:
    """Reads a fasta-formatted file of terminal fragments and simulates reads.
@@ -29,7 +28,6 @@ def read_sequencing(
        output_file_name (string): file name where to store the output
        num_reads: number of total reads to simulate
        read_len: integer of identical read length
-        num_seq_cyc: integer of number of cycles
    """
    # Import classes
    from random import choices, randrange
@@ -38,7 +36,6 @@ def read_sequencing(
    # Read data from terminal fragment file
    # Store fragments in a list
    f = open(frag_file_name, "r")
    frag_line = f.readline()
    frag_list = []  # type: List[str]
@@ -68,42 +65,39 @@ def read_sequencing(
    # Calculate sum of all lengths to determine the relative abundance for that fragment
    sum_frags = sum(map(len, frag_list))
-    # Repeat the read process for given number of cycles
+    # Loop through fasta fragments that start with 5'
-    for j in range(0, num_seq_cyc):
+    for frag in frag_list:
-        # Loop through fasta fragments that start with 5'
-        for frag in frag_list:
-            # Determine number of reads to create from this fragment
+        # Determine number of reads to create from this fragment
-            # This might not always provide an exact number of reads that were asked
+        # This might not always provide an exact number of reads that were asked
-            # TODO resolve this issue
+        # TODO resolve this issue
-            num_frag_reads = round((len(frag)/sum_frags) * num_reads)
+        num_frag_reads = round((len(frag)/sum_frags) * num_reads)
-            for i in range(0, num_frag_reads):
+        for i in range(0, num_frag_reads):
-                # Obtain random first position for the read on the fragment
+            # Obtain random first position for the read on the fragment
-                rand_start = randrange(0, len(frag))
+            rand_start = randrange(0, len(frag))
-                # Calculate the difference of start position and length of read
+            # Calculate the difference of start position and length of read
-                diff_start_end = len(frag)-rand_start
+            diff_start_end = len(frag)-rand_start
-                # If length of read is greater than difference of start to end, then add random nucleotides
+            # If length of read is greater than difference of start to end, then add random nucleotides
-                if diff_start_end < read_len:
+            if diff_start_end < read_len:
-                    # Calculate number of random nucleotides to add to the end of the read
+                # Calculate number of random nucleotides to add to the end of the read
-                    diff = read_len - diff_start_end
+                diff = read_len - diff_start_end
-                    # Select random nucleotides from list of possible
+                # Select random nucleotides from list of possible
-                    rand_samp = choices(nucleotides, k=diff)
+                rand_samp = choices(nucleotides, k=diff)
-                    # Add the random list to the read and save
+                # Add the random list to the read and save
-                    tmp_read = frag[rand_start:len(frag)] + ''.join(rand_samp)
+                tmp_read = frag[rand_start:len(frag)] + ''.join(rand_samp)
-                else:
+            else:
-                    # Save subset of fragment as read
+                # Save subset of fragment as read
-                    tmp_read = frag[rand_start:(rand_start + read_len)]
+                tmp_read = frag[rand_start:(rand_start + read_len)]
-                # append read to list
+            # append read to list
-                fasta_list.append(tmp_read)
+            fasta_list.append(tmp_read)
    # Save list to file
    np.savetxt(output_file_name,

--- a/tests/test_read_sequence.py
+++ b/tests/test_read_sequence.py
@@ -5,13 +5,12 @@ from src.read_sequencing import read_sequencing
 def test_read_sequencing(tmpdir):
-    """Tests the output, input file name and separator."""
+    """Tests the correct number of reads were generated."""
    read_sequencing(
        frag_file_name='./tests/resources/test_terminal_fragments.txt',
        num_reads=80,
        read_len=10,
-        num_seq_cyc=5,
        output_file_name=tmpdir / 'reads.txt'
    )
    df_out = pd.read_table(tmpdir / 'reads.txt', header=None)
-    assert df_out.shape[0] == 80 * 5
+    assert df_out.shape[0] == 80