Skip to content
Snippets Groups Projects
Commit d712ba97 authored by Michael Sandholzer's avatar Michael Sandholzer
Browse files

Merge branch 'logger' into 'main'

Logger

See merge request !20
parents ec818813 10fb195e
No related branches found
No related tags found
1 merge request!20Logger
import argparse
from modules import read_sequencer as rs
parser = argparse.ArgumentParser(prog='read_sequencer',
description='Simulates sequencing of DNA sequences specified by an FASTA file.')
parser.add_argument('--input_file_path',
help='path to FASTA file')
parser.add_argument('--output_file_path',
help='path to FASTA file')
parser.add_argument('--read_length',
help='read length for sequencing',
type=int)
args = parser.parse_args()
def main():
read_sequencer = rs()
read_sequencer.read_fasta(args.input_file_path)
read_sequencer.run_sequencing(args.read_length)
read_sequencer.write_fasta(args.output_file_path)
if __name__ == '__main__':
main()
def generate_sequences(n, mean, sd):
"""
Generates random sequences.
Args:
n (int): Amount of sequences to generate.
mean (int): mean length of sequence (gaussian distribution).
sd (float): standard deviation of length of sequence (gaussian distribution).
Returns:
list: of n sequences
"""
from random import gauss, choice
dict = {}
for i in range(n):
keys = range(n)
seq = ""
nt = ["A", "T", "C", "G"]
for value in range(abs(round(gauss(mean, sd)))):
seq = seq + choice(nt)
dict[keys[i]] = seq
return dict
def read_in_fasta(file_path):
'''
This function reads in FASTA files.
Args:
file_path (str): A file path directing to the fasta file.
Returns:
Dict: It returns a dictionary with sequences.
'''
sequences = {}
f = open(file_path)
for line in f:
if line[0] == '>':
defline = line.strip()
defline = defline.replace('>', '')
else:
if defline not in sequences:
sequences[defline] = ''
sequences[defline] += line.strip()
f.close()
return sequences
def read_sequence(seq, read_length):
'''
This function reads a sequence of a specific read length and adds additional nucleotides if the sequence is
smaller then the requested length or cuts the sequence if its longer.
Args:
seq (str): the sequence to read
read_length (int): length of reads
Returns:
str: returns sequenced element
'''
from random import choice
bases = ["A", "T", "C", "G"]
sequenced = ''
if read_length >= len(seq):
for nt in range(len(seq)):
sequenced += seq[nt]
for nt in range(len(seq), read_length):
sequenced += choice(bases)
else:
for nt in range(read_length):
sequenced += seq[nt]
return sequenced
def simulate_sequencing(sequences, read_length):
"""
Simulates sequencing.
Args:
sequences (dict): Dictionary of sequences to sequence.
read_length (int): length of reads
Returns:
dict: of n sequences as values
"""
results = {}
for index, key in enumerate(sequences):
results[key] = read_sequence(sequences[key], read_length=read_length)
return results
import random
def generate_sequences(n, mean, sd):
"""
Generates random sequences.
Args:
n (int): Amount of sequences to generate.
mean (int): mean length of sequence (gaussian distribution).
sd (float): standart deviation of length of sequence (gaussian distribution).
Returns:
dict: of n sequences
"""
dict1 = {}
for i in range(n):
keys = range(n)
seq = ""
nt = ["A", "T", "C", "G"]
for value in range(round(random.gauss(mean, sd))):
seq = seq + random.choice(nt)
dict1[keys[i]] = seq
return dict1
def write_fasta(sequences, file_path):
"""
Takes a dictionary and writes it to a fasta file.
Must specify the filename when calling the function.
Args:
sequences (dict): Dictionary of sequence.
file_path (str): A file path directing to the output folder.
"""
from textwrap import wrap
with open(file_path, "w") as outfile:
for key, value in sequences.items():
outfile.write(key + "\n")
outfile.write("\n".join(wrap(value, 60)))
outfile.write("\n")
class read_sequencer:
def __init__(self):
self.sequences = {}
self.reads = {}
def add_random_sequences(self, n, mean, sd):
self.sequences = generate_sequences(n, mean, sd)
def read_fasta(self, input_file):
self.sequences = read_in_fasta(input_file)
def run_sequencing(self, read_length):
self.reads = simulate_sequencing(self.sequences, read_length)
def write_fasta(self, output_file_path):
write_fasta(self.reads, output_file_path)
File added
File added
......@@ -64,7 +64,7 @@ def read_sequence(seq, read_length):
str: returns sequenced element
'''
LOG.info("Reading sequences.")
from random import choice
bases = ["A", "T", "C", "G"]
sequenced = ''
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment