modified: read_sequencer_package/modules.py

204584b0 · clara · 8deb1036 · 204584b0
Commit 204584b0 authored 2 years ago by clara
--- a/read_sequencer_package/modules.py
+++ b/read_sequencer_package/modules.py
 def read_in_fasta(file_path):
    '''
-    This function reads in FASTA files
+    This function reads in FASTA files.
-    argument is file_path
+    Args:
+        file_path (str): A file path directing to the fasta file.  
-    it returns a dictionary with the sequences
+    Returns:
+        Dict: It returns a dictionary with sequences.
    '''
    sequences = {}
@@ -21,13 +23,17 @@ def read_in_fasta(file_path):
    f.close()
    return sequences
-def read_sequence(seq, read_length, padding_probabilities=None):
+def read_sequence(seq, read_length):
    '''
-    This function reads sequences
+    This function reads a sequence of a specific read length and adds additional nucleotides if the sequence is 
-    arguments: seq is a list of sequences
+    smaller then the requested length or cuts the sequence if its longer.
-    padding_probabilities is a number??
-    returns sequenced element
+    Args:
+        seq (str): the sequence to read 
+        read_length (int): length of reads
+    Returns:
+        str: returns sequenced element
    '''
    from random import choice
@@ -45,16 +51,54 @@ def read_sequence(seq, read_length, padding_probabilities=None):
    return sequenced
 def simulate_sequencing(sequences, read_length):
+    """
+    Simulates sequencing.
+    Args:
+        sequences (dict): Dictionary of sequences to sequence.
+        read_length (int): length of reads
+    Returns:
+        dict: of n sequences as values 
+    """
    results = {}
    for index, key in enumerate(sequences):
        results[key] = read_sequence(sequences[key],read_length=read_length)
    return results
+import random
+def generate_sequences(n, mean, sd):
+    """
+    Generates random sequences.
+    Args:
+        n (int): Amount of sequences to generate.
+        mean (int): mean length of sequence (gaussian distribution).
+        sd (float): standart deviation of length of sequence (gaussian distribution).
+    Returns:
+        dict: of n sequences
+    """
+    dict1 = {}
+    for i in range(n):
+        keys = range(n)
+        seq = ""
+        nt = ["A", "T", "C", "G"]
+        for value in range(round(random.gauss(mean, sd))):
+            seq = seq + random.choice(nt)
+        dict1[keys[i]] = seq
+    return dict1
 def write_fasta(sequences, file_path):
    """
-    Takes a dictionary and writes it to a fasta file
+    Takes a dictionary and writes it to a fasta file.
-    Must specify the filename when caling the function
+    Must specify the filename when calling the function.
+    Args:
+        sequences (dict): Dictionary of sequence.
+        file_path (str): A file path directing to the output folder.
    """
    from textwrap import wrap
    with open(file_path, "w") as outfile: