Skip to content
Snippets Groups Projects
Commit c60004ba authored by Christoph Harmel's avatar Christoph Harmel
Browse files

Merge branch 'fix_logging' into 'main'

feat: added random sequence generator to cli

See merge request !21
parents d712ba97 5001fde3
No related branches found
No related tags found
1 merge request!21feat: added random sequence generator to cli
import argparse import argparse
from modules import read_sequencer as rs from modules import ReadSequencer
import logging import logging
parser = argparse.ArgumentParser(prog='read_sequencer', parser = argparse.ArgumentParser(prog='read_sequencer',
...@@ -11,21 +11,28 @@ parser.add_argument('--output_file_path', ...@@ -11,21 +11,28 @@ parser.add_argument('--output_file_path',
parser.add_argument('--read_length', parser.add_argument('--read_length',
help='read length for sequencing', help='read length for sequencing',
type=int) type=int)
parser.add_argument('--random', action='store_true', default=False,
help='generate random sequences')
parser.add_argument('--n_random', default=100, type=int, help='n random sequences')
parser.add_argument('--mean_random', default=50, type=int, help='mean random sequences')
parser.add_argument('--sd_random', default=25, type=int, help='standard deviation random sequences')
args = parser.parse_args() args = parser.parse_args()
def main(): def main():
LOG.info("Program started.") LOG.info("Read sequencer started.")
read_sequencer = rs() read_sequencer = ReadSequencer()
read_sequencer.read_fasta(args.input_file_path) if args.random:
read_sequencer.add_random_sequences(n=args.n_random, mean=args.mean_random, sd=args.sd_random)
else:
read_sequencer.read_fasta(args.input_file_path)
read_sequencer.run_sequencing(args.read_length) read_sequencer.run_sequencing(args.read_length)
read_sequencer.write_fasta(args.output_file_path) read_sequencer.write_fasta(args.output_file_path)
LOG.info("Program finished.") LOG.info("Read sequencer finished.")
if __name__ == '__main__': if __name__ == '__main__':
logging.basicConfig( logging.basicConfig(
format='[%(asctime)s: %(levelname)s] %(message)s (module "%(module)s")', format='[%(asctime)s: %(levelname)s] %(message)s (module "%(module)s")',
level=logging.INFO, level=logging.INFO)
)
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
main() main()
This diff is collapsed.
import logging import logging
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
def generate_sequences(n, mean, sd):
"""
Generates random sequences.
Args:
n (int): Amount of sequences to generate.
mean (int): mean length of sequence (gaussian distribution).
sd (float): standard deviation of length of sequence (gaussian distribution).
Returns:
list: of n sequences
"""
from random import gauss, choice
LOG.info("Generating sequences.")
dict = {}
for i in range(n):
keys = range(n)
seq = ""
nt = ["A", "T", "C", "G"]
for value in range(abs(round(gauss(mean, sd)))):
seq = seq + choice(nt)
dict[keys[i]] = seq
return dict
def read_in_fasta(file_path): def read_in_fasta(file_path):
''' """
This function reads in FASTA files. This function reads in FASTA files.
Args: Args:
...@@ -36,7 +11,7 @@ def read_in_fasta(file_path): ...@@ -36,7 +11,7 @@ def read_in_fasta(file_path):
Returns: Returns:
Dict: It returns a dictionary with sequences. Dict: It returns a dictionary with sequences.
''' """
LOG.info("Reading in FASTA files from destination.") LOG.info("Reading in FASTA files from destination.")
sequences = {} sequences = {}
f = open(file_path) f = open(file_path)
...@@ -52,7 +27,7 @@ def read_in_fasta(file_path): ...@@ -52,7 +27,7 @@ def read_in_fasta(file_path):
return sequences return sequences
def read_sequence(seq, read_length): def read_sequence(seq, read_length):
''' """
This function reads a sequence of a specific read length and adds additional nucleotides if the sequence is This function reads a sequence of a specific read length and adds additional nucleotides if the sequence is
smaller then the requested length or cuts the sequence if its longer. smaller then the requested length or cuts the sequence if its longer.
...@@ -63,12 +38,11 @@ def read_sequence(seq, read_length): ...@@ -63,12 +38,11 @@ def read_sequence(seq, read_length):
Returns: Returns:
str: returns sequenced element str: returns sequenced element
''' """
from random import choice from random import choice
bases = ["A", "T", "C", "G"] bases = ["A", "T", "C", "G"]
sequenced = '' sequenced = ''
if read_length >= len(seq): if read_length > len(seq):
for nt in range(len(seq)): for nt in range(len(seq)):
sequenced += seq[nt] sequenced += seq[nt]
for nt in range(len(seq), read_length): for nt in range(len(seq), read_length):
...@@ -94,10 +68,10 @@ def simulate_sequencing(sequences, read_length): ...@@ -94,10 +68,10 @@ def simulate_sequencing(sequences, read_length):
results = {} results = {}
for index, key in enumerate(sequences): for index, key in enumerate(sequences):
results[key] = read_sequence(sequences[key], read_length=read_length) results[key] = read_sequence(sequences[key], read_length=read_length)
LOG.info("Sequencing was successfully executed.")
return results return results
import random
def generate_sequences(n, mean, sd): def generate_sequences(n, mean, sd):
""" """
Generates random sequences. Generates random sequences.
...@@ -110,16 +84,17 @@ def generate_sequences(n, mean, sd): ...@@ -110,16 +84,17 @@ def generate_sequences(n, mean, sd):
Returns: Returns:
dict: of n sequences dict: of n sequences
""" """
from random import choice, gauss
LOG.info("Generating random sequences.") LOG.info("Generating random sequences.")
dict1 = {} dict = {}
for i in range(n): for i in range(n):
keys = range(n)
seq = "" seq = ""
nt = ["A", "T", "C", "G"] nt = ["A", "T", "C", "G"]
for value in range(round(random.gauss(mean, sd))): for value in range(abs(round(gauss(mean, sd)))):
seq = seq + random.choice(nt) seq = seq + choice(nt)
dict1[keys[i]] = seq key = str(i) + ': length ' + str(len(seq)) + ' nt'
return dict1 dict[key] = seq
return dict
def write_fasta(sequences, file_path): def write_fasta(sequences, file_path):
""" """
...@@ -138,8 +113,8 @@ def write_fasta(sequences, file_path): ...@@ -138,8 +113,8 @@ def write_fasta(sequences, file_path):
outfile.write(key + "\n") outfile.write(key + "\n")
outfile.write("\n".join(wrap(value, 60))) outfile.write("\n".join(wrap(value, 60)))
outfile.write("\n") outfile.write("\n")
LOG.info("Sequencing was successfully executed.")
class read_sequencer: class ReadSequencer:
def __init__(self): def __init__(self):
self.sequences = {} self.sequences = {}
self.reads = {} self.reads = {}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment