Skip to content
Snippets Groups Projects
Commit 12ef1d8e authored by Kathleen Moriarty's avatar Kathleen Moriarty
Browse files

add: description line for reads

parent b9d94593
No related branches found
No related tags found
1 merge request!17Issue 7
...@@ -30,71 +30,72 @@ def read_sequencing( ...@@ -30,71 +30,72 @@ def read_sequencing(
read_len: integer of identical read length read_len: integer of identical read length
""" """
# Import classes # Import classes
from random import choices, randrange from random import choices
import numpy as np
from typing import List from typing import List
# Read data from terminal fragment file # Read data from terminal fragment file
# Store fragments in a list # Store fragment descriptions in a list
f = open(frag_file_name, "r") frag_desc = [] # type: List[str]
frag_line = f.readline()
frag_list = [] # type: List[str]
frag_str = ""
while frag_line != "":
# To stop when the end of file is reached
if frag_line.startswith('>'):
# Determine if this is the first fragment in the file
# Ignore the description line (starting with >) of the first fragment
if not (len(frag_list) == 0 and frag_str == ""):
# Not the first fragment. Append to list.
frag_list.append(frag_str)
frag_str = ""
else:
frag_str = frag_str + frag_line.rstrip("\n")
frag_line = f.readline()
frag_list.append(frag_str)
# print(frag_list)
f.close()
# Initiate list to store reads from modified fragments with open(frag_file_name, 'r') as f:
fasta_list = list() frag_line = f.readline()
frag_list = [] # type: List[str]
frag_str = ""
while frag_line != "":
# To stop when the end of file is reached
if frag_line.startswith('>'):
# Determine if this is the first fragment in the file
# Ignore the description line (starting with >) of the first fragment
if not (len(frag_list) == 0 and frag_str == ""):
# Not the first fragment. Append to list.
frag_list.append(frag_str)
frag_str = ""
# Store description line for output file
frag_desc.append(frag_line)
else:
frag_str = frag_str + frag_line.rstrip("\n")
# Read next line
frag_line = f.readline()
frag_list.append(frag_str)
# store list of random nucleotides from which to sample when read length is too short # Store list of random nucleotides from which to sample when read length is too short
nucleotides = ['A', 'C', 'G', 'T'] nucleotides = ['A', 'C', 'G', 'T']
# Calculate sum of all lengths to determine the relative abundance for that fragment # Calculate sum of all lengths to determine the relative abundance for that fragment
sum_frags = sum(map(len, frag_list)) sum_frags = sum(map(len, frag_list))
# Loop through fasta fragments that start with 5' # Open the file to save the reads
for frag in frag_list: with open(output_file_name, 'w') as fw:
# Determine number of reads to create from this fragment # Loop through fasta fragments that start with 5'
# This might not always provide an exact number of reads that were asked for frag in frag_list:
# TODO resolve this issue # Determine number of reads to create from this fragment
num_frag_reads = round((len(frag)/sum_frags) * num_reads) # This might not always provide an exact number of reads that were asked
# TODO resolve this issue
for i in range(0, num_frag_reads): num_frag_reads = round((len(frag)/sum_frags) * num_reads)
# If the read length is less than the required length given by the parameter, then add random nucleotides for i in range(0, num_frag_reads):
if len(frag) < read_len: # If the read length is less than the required length given by the parameter,
# then add random nucleotides
# Calculate number of random nucleotides to add to the end of the read if len(frag) < read_len:
diff = read_len - read_len
# Calculate number of random nucleotides to add to the end of the read
# Select random nucleotides from list of possible diff = read_len - read_len
rand_samp = choices(nucleotides, k=diff)
# Select random nucleotides from list of possible
# Add the random list to the read and save rand_samp = choices(nucleotides, k=diff)
tmp_read = frag[0:(len(frag)-1)] + ''.join(rand_samp)
else: # Add the random list to the read and save
# Save subset of fragment as read tmp_read = frag[0:len(frag)] + ''.join(rand_samp)
tmp_read = frag[0:(read_len-1)] else:
# Save subset of fragment as read
# append read to list tmp_read = frag[0:read_len]
fasta_list.append(tmp_read)
# Write read to file and original fragment description
# Save list to file fw.write(frag_desc[frag_list.index(frag)])
np.savetxt(output_file_name, fw.write(tmp_read + "\n")
fasta_list,
delimiter=",", read_sequencing(frag_file_name = "../tests/resources/test_terminal_fragments.txt",
fmt='%s') output_file_name = "reads.txt",
num_reads=90,
read_len=10)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment