Skip to content
Snippets Groups Projects
Commit 204584b0 authored by clara's avatar clara
Browse files

modified: read_sequencer_package/modules.py

parent 8deb1036
No related branches found
No related tags found
1 merge request!14modified: read_sequencer_package/modules.py
def read_in_fasta(file_path):
'''
This function reads in FASTA files
This function reads in FASTA files.
argument is file_path
Args:
file_path (str): A file path directing to the fasta file.
it returns a dictionary with the sequences
Returns:
Dict: It returns a dictionary with sequences.
'''
sequences = {}
......@@ -21,13 +23,17 @@ def read_in_fasta(file_path):
f.close()
return sequences
def read_sequence(seq, read_length, padding_probabilities=None):
def read_sequence(seq, read_length):
'''
This function reads sequences
arguments: seq is a list of sequences
padding_probabilities is a number??
This function reads a sequence of a specific read length and adds additional nucleotides if the sequence is
smaller then the requested length or cuts the sequence if its longer.
returns sequenced element
Args:
seq (str): the sequence to read
read_length (int): length of reads
Returns:
str: returns sequenced element
'''
from random import choice
......@@ -45,16 +51,54 @@ def read_sequence(seq, read_length, padding_probabilities=None):
return sequenced
def simulate_sequencing(sequences, read_length):
"""
Simulates sequencing.
Args:
sequences (dict): Dictionary of sequences to sequence.
read_length (int): length of reads
Returns:
dict: of n sequences as values
"""
results = {}
for index, key in enumerate(sequences):
results[key] = read_sequence(sequences[key],read_length=read_length)
return results
import random
def generate_sequences(n, mean, sd):
"""
Generates random sequences.
Args:
n (int): Amount of sequences to generate.
mean (int): mean length of sequence (gaussian distribution).
sd (float): standart deviation of length of sequence (gaussian distribution).
Returns:
dict: of n sequences
"""
dict1 = {}
for i in range(n):
keys = range(n)
seq = ""
nt = ["A", "T", "C", "G"]
for value in range(round(random.gauss(mean, sd))):
seq = seq + random.choice(nt)
dict1[keys[i]] = seq
return dict1
def write_fasta(sequences, file_path):
"""
Takes a dictionary and writes it to a fasta file
Must specify the filename when caling the function
Takes a dictionary and writes it to a fasta file.
Must specify the filename when calling the function.
Args:
sequences (dict): Dictionary of sequence.
file_path (str): A file path directing to the output folder.
"""
from textwrap import wrap
with open(file_path, "w") as outfile:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment