Clara Serger · 204584b0
--- a/read_sequencer_package/modules.py

+ 54

− 10
+++ b/read_sequencer_package/modules.py

+ 54

− 10

 def read_in_fasta(file_path):
    '''
-    This function reads in FASTA files
+    This function reads in FASTA files.

-    argument is file_path
+    Args:
+        file_path (str): A file path directing to the fasta file.  

-    it returns a dictionary with the sequences
+    Returns:
+        Dict: It returns a dictionary with sequences.

    '''
    sequences = {}
 @@ -21,13 +23,17 @@ def read_in_fasta(file_path):
    f.close()
    return sequences

-def read_sequence(seq, read_length, padding_probabilities=None):
+def read_sequence(seq, read_length):
    '''
-    This function reads sequences
-    arguments: seq is a list of sequences
-    padding_probabilities is a number??
+    This function reads a sequence of a specific read length and adds additional nucleotides if the sequence is 
+    smaller then the requested length or cuts the sequence if its longer.

-    returns sequenced element
+    Args:
+        seq (str): the sequence to read 
+        read_length (int): length of reads
+
+    Returns:
+        str: returns sequenced element

    '''
    from random import choice
 @@ -45,16 +51,54 @@ def read_sequence(seq, read_length, padding_probabilities=None):
    return sequenced

 def simulate_sequencing(sequences, read_length):
+    """
+    Simulates sequencing.
+
+    Args:
+        sequences (dict): Dictionary of sequences to sequence.
+        read_length (int): length of reads
+
+    Returns:
+        dict: of n sequences as values 
+    """
    results = {}
    for index, key in enumerate(sequences):
        results[key] = read_sequence(sequences[key],read_length=read_length)

    return results

+import random
+def generate_sequences(n, mean, sd):
+    """
+    Generates random sequences.
+
+    Args:
+        n (int): Amount of sequences to generate.
+        mean (int): mean length of sequence (gaussian distribution).
+        sd (float): standart deviation of length of sequence (gaussian distribution).
+
+    Returns:
+        dict: of n sequences
+    """
+    dict1 = {}
+    for i in range(n):
+        keys = range(n)
+        seq = ""
+        nt = ["A", "T", "C", "G"]
+        for value in range(round(random.gauss(mean, sd))):
+            seq = seq + random.choice(nt)
+        dict1[keys[i]] = seq
+    return dict1
+
 def write_fasta(sequences, file_path):
    """
-    Takes a dictionary and writes it to a fasta file
-    Must specify the filename when caling the function
+    Takes a dictionary and writes it to a fasta file.
+    Must specify the filename when calling the function.
+
+    Args:
+        sequences (dict): Dictionary of sequence.
+        file_path (str): A file path directing to the output folder.
+        
    """
    from textwrap import wrap
    with open(file_path, "w") as outfile: