Skip to content
Snippets Groups Projects
Commit 5001fde3 authored by Christoph Harmel's avatar Christoph Harmel
Browse files

feat: added random sequence generator to cli

parent d712ba97
No related branches found
No related tags found
1 merge request!21feat: added random sequence generator to cli
import argparse
from modules import read_sequencer as rs
from modules import ReadSequencer
import logging
parser = argparse.ArgumentParser(prog='read_sequencer',
......@@ -11,21 +11,28 @@ parser.add_argument('--output_file_path',
parser.add_argument('--read_length',
help='read length for sequencing',
type=int)
parser.add_argument('--random', action='store_true', default=False,
help='generate random sequences')
parser.add_argument('--n_random', default=100, type=int, help='n random sequences')
parser.add_argument('--mean_random', default=50, type=int, help='mean random sequences')
parser.add_argument('--sd_random', default=25, type=int, help='standard deviation random sequences')
args = parser.parse_args()
def main():
LOG.info("Program started.")
read_sequencer = rs()
read_sequencer.read_fasta(args.input_file_path)
LOG.info("Read sequencer started.")
read_sequencer = ReadSequencer()
if args.random:
read_sequencer.add_random_sequences(n=args.n_random, mean=args.mean_random, sd=args.sd_random)
else:
read_sequencer.read_fasta(args.input_file_path)
read_sequencer.run_sequencing(args.read_length)
read_sequencer.write_fasta(args.output_file_path)
LOG.info("Program finished.")
LOG.info("Read sequencer finished.")
if __name__ == '__main__':
logging.basicConfig(
format='[%(asctime)s: %(levelname)s] %(message)s (module "%(module)s")',
level=logging.INFO,
)
level=logging.INFO)
LOG = logging.getLogger(__name__)
main()
This diff is collapsed.
import logging
LOG = logging.getLogger(__name__)
def generate_sequences(n, mean, sd):
"""
Generates random sequences.
Args:
n (int): Amount of sequences to generate.
mean (int): mean length of sequence (gaussian distribution).
sd (float): standard deviation of length of sequence (gaussian distribution).
Returns:
list: of n sequences
"""
from random import gauss, choice
LOG.info("Generating sequences.")
dict = {}
for i in range(n):
keys = range(n)
seq = ""
nt = ["A", "T", "C", "G"]
for value in range(abs(round(gauss(mean, sd)))):
seq = seq + choice(nt)
dict[keys[i]] = seq
return dict
def read_in_fasta(file_path):
'''
"""
This function reads in FASTA files.
Args:
......@@ -36,7 +11,7 @@ def read_in_fasta(file_path):
Returns:
Dict: It returns a dictionary with sequences.
'''
"""
LOG.info("Reading in FASTA files from destination.")
sequences = {}
f = open(file_path)
......@@ -52,7 +27,7 @@ def read_in_fasta(file_path):
return sequences
def read_sequence(seq, read_length):
'''
"""
This function reads a sequence of a specific read length and adds additional nucleotides if the sequence is
smaller then the requested length or cuts the sequence if its longer.
......@@ -63,12 +38,11 @@ def read_sequence(seq, read_length):
Returns:
str: returns sequenced element
'''
"""
from random import choice
bases = ["A", "T", "C", "G"]
sequenced = ''
if read_length >= len(seq):
if read_length > len(seq):
for nt in range(len(seq)):
sequenced += seq[nt]
for nt in range(len(seq), read_length):
......@@ -94,10 +68,10 @@ def simulate_sequencing(sequences, read_length):
results = {}
for index, key in enumerate(sequences):
results[key] = read_sequence(sequences[key], read_length=read_length)
LOG.info("Sequencing was successfully executed.")
return results
import random
def generate_sequences(n, mean, sd):
"""
Generates random sequences.
......@@ -110,16 +84,17 @@ def generate_sequences(n, mean, sd):
Returns:
dict: of n sequences
"""
from random import choice, gauss
LOG.info("Generating random sequences.")
dict1 = {}
dict = {}
for i in range(n):
keys = range(n)
seq = ""
nt = ["A", "T", "C", "G"]
for value in range(round(random.gauss(mean, sd))):
seq = seq + random.choice(nt)
dict1[keys[i]] = seq
return dict1
for value in range(abs(round(gauss(mean, sd)))):
seq = seq + choice(nt)
key = str(i) + ': length ' + str(len(seq)) + ' nt'
dict[key] = seq
return dict
def write_fasta(sequences, file_path):
"""
......@@ -138,8 +113,8 @@ def write_fasta(sequences, file_path):
outfile.write(key + "\n")
outfile.write("\n".join(wrap(value, 60)))
outfile.write("\n")
LOG.info("Sequencing was successfully executed.")
class read_sequencer:
class ReadSequencer:
def __init__(self):
self.sequences = {}
self.reads = {}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment