From 204584b0b48e63fc92e3f48a000a8e98e1b90cb7 Mon Sep 17 00:00:00 2001
From: clara <clara@dyn-39-19.mobile.unibas.ch>
Date: Fri, 11 Nov 2022 15:52:38 +0100
Subject: [PATCH] 	modified:   read_sequencer_package/modules.py

---
 read_sequencer_package/modules.py | 64 ++++++++++++++++++++++++++-----
 1 file changed, 54 insertions(+), 10 deletions(-)

diff --git a/read_sequencer_package/modules.py b/read_sequencer_package/modules.py
index ca84a64..a65561a 100644
--- a/read_sequencer_package/modules.py
+++ b/read_sequencer_package/modules.py
@@ -1,11 +1,13 @@
 
 def read_in_fasta(file_path):
     '''
-    This function reads in FASTA files
+    This function reads in FASTA files.
 
-    argument is file_path
+    Args:
+        file_path (str): A file path directing to the fasta file.  
 
-    it returns a dictionary with the sequences
+    Returns:
+        Dict: It returns a dictionary with sequences.
 
     '''
     sequences = {}
@@ -21,13 +23,17 @@ def read_in_fasta(file_path):
     f.close()
     return sequences
 
-def read_sequence(seq, read_length, padding_probabilities=None):
+def read_sequence(seq, read_length):
     '''
-    This function reads sequences
-    arguments: seq is a list of sequences
-    padding_probabilities is a number??
+    This function reads a sequence of a specific read length and adds additional nucleotides if the sequence is 
+    smaller then the requested length or cuts the sequence if its longer.
 
-    returns sequenced element
+    Args:
+        seq (str): the sequence to read 
+        read_length (int): length of reads
+
+    Returns:
+        str: returns sequenced element
 
     '''
     from random import choice
@@ -45,16 +51,54 @@ def read_sequence(seq, read_length, padding_probabilities=None):
     return sequenced
 
 def simulate_sequencing(sequences, read_length):
+    """
+    Simulates sequencing.
+
+    Args:
+        sequences (dict): Dictionary of sequences to sequence.
+        read_length (int): length of reads
+
+    Returns:
+        dict: of n sequences as values 
+    """
     results = {}
     for index, key in enumerate(sequences):
         results[key] = read_sequence(sequences[key],read_length=read_length)
 
     return results
 
+import random
+def generate_sequences(n, mean, sd):
+    """
+    Generates random sequences.
+
+    Args:
+        n (int): Amount of sequences to generate.
+        mean (int): mean length of sequence (gaussian distribution).
+        sd (float): standart deviation of length of sequence (gaussian distribution).
+
+    Returns:
+        dict: of n sequences
+    """
+    dict1 = {}
+    for i in range(n):
+        keys = range(n)
+        seq = ""
+        nt = ["A", "T", "C", "G"]
+        for value in range(round(random.gauss(mean, sd))):
+            seq = seq + random.choice(nt)
+        dict1[keys[i]] = seq
+    return dict1
+
 def write_fasta(sequences, file_path):
     """
-    Takes a dictionary and writes it to a fasta file
-    Must specify the filename when caling the function
+    Takes a dictionary and writes it to a fasta file.
+    Must specify the filename when calling the function.
+
+    Args:
+        sequences (dict): Dictionary of sequence.
+        file_path (str): A file path directing to the output folder.
+        
     """
     from textwrap import wrap
     with open(file_path, "w") as outfile:
-- 
GitLab