Skip to content
Snippets Groups Projects
Commit 5001fde3 authored by Christoph Harmel's avatar Christoph Harmel
Browse files

feat: added random sequence generator to cli

parent d712ba97
No related branches found
No related tags found
1 merge request!21feat: added random sequence generator to cli
This commit is part of merge request !21. Comments created here will be created in the context of that merge request.
import argparse
from modules import read_sequencer as rs
from modules import ReadSequencer
import logging
parser = argparse.ArgumentParser(prog='read_sequencer',
......@@ -11,21 +11,28 @@ parser.add_argument('--output_file_path',
parser.add_argument('--read_length',
help='read length for sequencing',
type=int)
parser.add_argument('--random', action='store_true', default=False,
help='generate random sequences')
parser.add_argument('--n_random', default=100, type=int, help='n random sequences')
parser.add_argument('--mean_random', default=50, type=int, help='mean random sequences')
parser.add_argument('--sd_random', default=25, type=int, help='standard deviation random sequences')
args = parser.parse_args()
def main():
LOG.info("Program started.")
read_sequencer = rs()
read_sequencer.read_fasta(args.input_file_path)
LOG.info("Read sequencer started.")
read_sequencer = ReadSequencer()
if args.random:
read_sequencer.add_random_sequences(n=args.n_random, mean=args.mean_random, sd=args.sd_random)
else:
read_sequencer.read_fasta(args.input_file_path)
read_sequencer.run_sequencing(args.read_length)
read_sequencer.write_fasta(args.output_file_path)
LOG.info("Program finished.")
LOG.info("Read sequencer finished.")
if __name__ == '__main__':
logging.basicConfig(
format='[%(asctime)s: %(levelname)s] %(message)s (module "%(module)s")',
level=logging.INFO,
)
level=logging.INFO)
LOG = logging.getLogger(__name__)
main()
This diff is collapsed.
import logging
LOG = logging.getLogger(__name__)
def generate_sequences(n, mean, sd):
"""
Generates random sequences.
Args:
n (int): Amount of sequences to generate.
mean (int): mean length of sequence (gaussian distribution).
sd (float): standard deviation of length of sequence (gaussian distribution).
Returns:
list: of n sequences
"""
from random import gauss, choice
LOG.info("Generating sequences.")
dict = {}
for i in range(n):
keys = range(n)
seq = ""
nt = ["A", "T", "C", "G"]
for value in range(abs(round(gauss(mean, sd)))):
seq = seq + choice(nt)
dict[keys[i]] = seq
return dict
def read_in_fasta(file_path):
'''
"""
This function reads in FASTA files.
Args:
......@@ -36,7 +11,7 @@ def read_in_fasta(file_path):
Returns:
Dict: It returns a dictionary with sequences.
'''
"""
LOG.info("Reading in FASTA files from destination.")
sequences = {}
f = open(file_path)
......@@ -52,7 +27,7 @@ def read_in_fasta(file_path):
return sequences
def read_sequence(seq, read_length):
'''
"""
This function reads a sequence of a specific read length and adds additional nucleotides if the sequence is
smaller then the requested length or cuts the sequence if its longer.
......@@ -63,12 +38,11 @@ def read_sequence(seq, read_length):
Returns:
str: returns sequenced element
'''
"""
from random import choice
bases = ["A", "T", "C", "G"]
sequenced = ''
if read_length >= len(seq):
if read_length > len(seq):
for nt in range(len(seq)):
sequenced += seq[nt]
for nt in range(len(seq), read_length):
......@@ -94,10 +68,10 @@ def simulate_sequencing(sequences, read_length):
results = {}
for index, key in enumerate(sequences):
results[key] = read_sequence(sequences[key], read_length=read_length)
LOG.info("Sequencing was successfully executed.")
return results
import random
def generate_sequences(n, mean, sd):
"""
Generates random sequences.
......@@ -110,16 +84,17 @@ def generate_sequences(n, mean, sd):
Returns:
dict: of n sequences
"""
from random import choice, gauss
LOG.info("Generating random sequences.")
dict1 = {}
dict = {}
for i in range(n):
keys = range(n)
seq = ""
nt = ["A", "T", "C", "G"]
for value in range(round(random.gauss(mean, sd))):
seq = seq + random.choice(nt)
dict1[keys[i]] = seq
return dict1
for value in range(abs(round(gauss(mean, sd)))):
seq = seq + choice(nt)
key = str(i) + ': length ' + str(len(seq)) + ' nt'
dict[key] = seq
return dict
def write_fasta(sequences, file_path):
"""
......@@ -138,8 +113,8 @@ def write_fasta(sequences, file_path):
outfile.write(key + "\n")
outfile.write("\n".join(wrap(value, 60)))
outfile.write("\n")
LOG.info("Sequencing was successfully executed.")
class read_sequencer:
class ReadSequencer:
def __init__(self):
self.sequences = {}
self.reads = {}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment