Skip to content
Snippets Groups Projects
Commit 1bed9aa4 authored by Kathleen Moriarty's avatar Kathleen Moriarty Committed by Kathleen Moriarty
Browse files

add: description line for reads

parent 5d15d53f
Branches
No related tags found
1 merge request!17Issue 7
......@@ -30,71 +30,72 @@ def read_sequencing(
read_len: integer of identical read length
"""
# Import classes
from random import choices, randrange
import numpy as np
from random import choices
from typing import List
# Read data from terminal fragment file
# Store fragments in a list
f = open(frag_file_name, "r")
frag_line = f.readline()
frag_list = [] # type: List[str]
frag_str = ""
while frag_line != "":
# To stop when the end of file is reached
if frag_line.startswith('>'):
# Determine if this is the first fragment in the file
# Ignore the description line (starting with >) of the first fragment
if not (len(frag_list) == 0 and frag_str == ""):
# Not the first fragment. Append to list.
frag_list.append(frag_str)
frag_str = ""
else:
frag_str = frag_str + frag_line.rstrip("\n")
frag_line = f.readline()
frag_list.append(frag_str)
# print(frag_list)
f.close()
# Store fragment descriptions in a list
frag_desc = [] # type: List[str]
# Initiate list to store reads from modified fragments
fasta_list = list()
with open(frag_file_name, 'r') as f:
frag_line = f.readline()
frag_list = [] # type: List[str]
frag_str = ""
while frag_line != "":
# To stop when the end of file is reached
if frag_line.startswith('>'):
# Determine if this is the first fragment in the file
# Ignore the description line (starting with >) of the first fragment
if not (len(frag_list) == 0 and frag_str == ""):
# Not the first fragment. Append to list.
frag_list.append(frag_str)
frag_str = ""
# Store description line for output file
frag_desc.append(frag_line)
else:
frag_str = frag_str + frag_line.rstrip("\n")
# Read next line
frag_line = f.readline()
frag_list.append(frag_str)
# store list of random nucleotides from which to sample when read length is too short
# Store list of random nucleotides from which to sample when read length is too short
nucleotides = ['A', 'C', 'G', 'T']
# Calculate sum of all lengths to determine the relative abundance for that fragment
sum_frags = sum(map(len, frag_list))
# Loop through fasta fragments that start with 5'
for frag in frag_list:
# Determine number of reads to create from this fragment
# This might not always provide an exact number of reads that were asked
# TODO resolve this issue
num_frag_reads = round((len(frag)/sum_frags) * num_reads)
for i in range(0, num_frag_reads):
# If the read length is less than the required length given by the parameter, then add random nucleotides
if len(frag) < read_len:
# Calculate number of random nucleotides to add to the end of the read
diff = read_len - read_len
# Select random nucleotides from list of possible
rand_samp = choices(nucleotides, k=diff)
# Add the random list to the read and save
tmp_read = frag[0:(len(frag)-1)] + ''.join(rand_samp)
else:
# Save subset of fragment as read
tmp_read = frag[0:(read_len-1)]
# append read to list
fasta_list.append(tmp_read)
# Save list to file
np.savetxt(output_file_name,
fasta_list,
delimiter=",",
fmt='%s')
# Open the file to save the reads
with open(output_file_name, 'w') as fw:
# Loop through fasta fragments that start with 5'
for frag in frag_list:
# Determine number of reads to create from this fragment
# This might not always provide an exact number of reads that were asked
# TODO resolve this issue
num_frag_reads = round((len(frag)/sum_frags) * num_reads)
for i in range(0, num_frag_reads):
# If the read length is less than the required length given by the parameter,
# then add random nucleotides
if len(frag) < read_len:
# Calculate number of random nucleotides to add to the end of the read
diff = read_len - read_len
# Select random nucleotides from list of possible
rand_samp = choices(nucleotides, k=diff)
# Add the random list to the read and save
tmp_read = frag[0:len(frag)] + ''.join(rand_samp)
else:
# Save subset of fragment as read
tmp_read = frag[0:read_len]
# Write read to file and original fragment description
fw.write(frag_desc[frag_list.index(frag)])
fw.write(tmp_read + "\n")
read_sequencing(frag_file_name = "../tests/resources/test_terminal_fragments.txt",
output_file_name = "reads.txt",
num_reads=90,
read_len=10)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment