From 204584b0b48e63fc92e3f48a000a8e98e1b90cb7 Mon Sep 17 00:00:00 2001 From: clara <clara@dyn-39-19.mobile.unibas.ch> Date: Fri, 11 Nov 2022 15:52:38 +0100 Subject: [PATCH] modified: read_sequencer_package/modules.py --- read_sequencer_package/modules.py | 64 ++++++++++++++++++++++++++----- 1 file changed, 54 insertions(+), 10 deletions(-) diff --git a/read_sequencer_package/modules.py b/read_sequencer_package/modules.py index ca84a64..a65561a 100644 --- a/read_sequencer_package/modules.py +++ b/read_sequencer_package/modules.py @@ -1,11 +1,13 @@ def read_in_fasta(file_path): ''' - This function reads in FASTA files + This function reads in FASTA files. - argument is file_path + Args: + file_path (str): A file path directing to the fasta file. - it returns a dictionary with the sequences + Returns: + Dict: It returns a dictionary with sequences. ''' sequences = {} @@ -21,13 +23,17 @@ def read_in_fasta(file_path): f.close() return sequences -def read_sequence(seq, read_length, padding_probabilities=None): +def read_sequence(seq, read_length): ''' - This function reads sequences - arguments: seq is a list of sequences - padding_probabilities is a number?? + This function reads a sequence of a specific read length and adds additional nucleotides if the sequence is + smaller then the requested length or cuts the sequence if its longer. - returns sequenced element + Args: + seq (str): the sequence to read + read_length (int): length of reads + + Returns: + str: returns sequenced element ''' from random import choice @@ -45,16 +51,54 @@ def read_sequence(seq, read_length, padding_probabilities=None): return sequenced def simulate_sequencing(sequences, read_length): + """ + Simulates sequencing. + + Args: + sequences (dict): Dictionary of sequences to sequence. + read_length (int): length of reads + + Returns: + dict: of n sequences as values + """ results = {} for index, key in enumerate(sequences): results[key] = read_sequence(sequences[key],read_length=read_length) return results +import random +def generate_sequences(n, mean, sd): + """ + Generates random sequences. + + Args: + n (int): Amount of sequences to generate. + mean (int): mean length of sequence (gaussian distribution). + sd (float): standart deviation of length of sequence (gaussian distribution). + + Returns: + dict: of n sequences + """ + dict1 = {} + for i in range(n): + keys = range(n) + seq = "" + nt = ["A", "T", "C", "G"] + for value in range(round(random.gauss(mean, sd))): + seq = seq + random.choice(nt) + dict1[keys[i]] = seq + return dict1 + def write_fasta(sequences, file_path): """ - Takes a dictionary and writes it to a fasta file - Must specify the filename when caling the function + Takes a dictionary and writes it to a fasta file. + Must specify the filename when calling the function. + + Args: + sequences (dict): Dictionary of sequence. + file_path (str): A file path directing to the output folder. + """ from textwrap import wrap with open(file_path, "w") as outfile: -- GitLab