From 59e66531d2c7e6d9a5b7cf77db4c0a20ea218208 Mon Sep 17 00:00:00 2001
From: "kathleen.moriarty" <kathleen.moriarty@unibas.ch>
Date: Mon, 3 Jan 2022 14:29:24 +0100
Subject: [PATCH] remove old read_sequencing.py

---
 src/read_sequencing.py | 98 ------------------------------------------
 1 file changed, 98 deletions(-)
 delete mode 100644 src/read_sequencing.py

diff --git a/src/read_sequencing.py b/src/read_sequencing.py
deleted file mode 100644
index dfd7da9..0000000
--- a/src/read_sequencing.py
+++ /dev/null
@@ -1,98 +0,0 @@
-"""Read Sequencing.
-
-Simulate the sequencing of reads on the template of terminal fragments and simulates reads of these fragments.
-Author: Kathleen Moriarty
-"""
-# Imports from built-in modules
-from random import choices
-from typing import List
-from pathlib import Path
-
-
-def read_sequencing(
-    frag_file_name: Path,
-    output_file_name: Path = Path.cwd() / 'output_reads.txt',
-    num_reads: int = 1000,
-    read_len: int = 80,
-) -> None:
-    """Reads a fasta-formatted file of terminal fragments and simulates reads.
-
-    Simulate the sequencing of reads on the template of terminal
-    fragments. Reads are copies of fixed length starting
-    from the 5' end of fragments. If the desired read length
-    is larger than the fragment length, sequencing would in
-    principle proceed into the 3' adaptor and then would perhaps
-    yield random bases. For simplicity, here we assume that random
-    nucleotides are introduced in this case. Saves a fasta-formatted
-    file of reads of identical length, representing 5’
-    ends of the terminal fragments as .txt.
-
-    Args:
-        frag_file_name: input file path of terminal fragments
-        output_file_name: output file path where to store the output
-        num_reads: number of total reads to simulate
-        read_len: integer of identical read length
-    """
-    # Read data from terminal fragment file
-    # Store fragment descriptions in a list
-    frag_desc = []  # type: List[str]
-
-    with open(frag_file_name, 'r') as f:
-        frag_line = f.readline()
-        # Store all fragments as a list to parse later
-        frag_list = []  # type: List[str]
-        # Store combined fragment lines
-        frag_str = ""
-        while frag_line != "":
-            # To stop when the end of file is reached
-            if frag_line.startswith('>'):
-                # Determine if this is the first fragment in the file
-                # Ignore the description line (starting with >) of the first fragment
-                if not (len(frag_list) == 0 and frag_str == ""):
-                    # Not the first fragment. Append to list.
-                    frag_list.append(frag_str)
-                frag_str = ""
-                # Store description line for output file
-                frag_desc.append(frag_line)
-            else:
-                frag_str = frag_str + frag_line.rstrip("\n")
-            # Read next line
-            frag_line = f.readline()
-        frag_list.append(frag_str)
-
-    # Store list of random nucleotides from which to sample when read length is too short
-    nucleotides = ['A', 'C', 'G', 'T']
-
-    # Calculate sum of all lengths to determine the relative abundance for that fragment
-    sum_frags = sum(map(len, frag_list))
-
-    # Open the file to save the reads
-    with open(output_file_name, 'w') as fw:
-
-        # Loop through fasta fragments that start with 5'
-        for frag in frag_list:
-            # Determine number of reads to create from this fragment
-            # This might not always provide an exact number of reads that were asked
-            # TODO resolve this issue
-            num_frag_reads = round((len(frag)/sum_frags) * num_reads)
-
-            for i in range(0, num_frag_reads):
-                # If the read length is less than the required length given by the parameter,
-                # then add random nucleotides
-                if len(frag) < read_len:
-
-                    # Calculate number of random nucleotides to add to the end of the read
-                    diff = read_len - len(frag)
-
-                    # Select random nucleotides from list of possible
-                    rand_samp = choices(nucleotides, k=diff)
-
-                    # Add the random list to the read and save
-                    tmp_read = frag[0:len(frag)] + ''.join(rand_samp)
-                else:
-                    # Save subset of fragment as read
-                    tmp_read = frag[0:read_len]
-
-                # Write read to file and original fragment description
-                fw.write(frag_desc[frag_list.index(frag)])
-                fw.write(tmp_read + "\n\n")
-- 
GitLab